Merge tag 'wireless-drivers-next-for-davem-2017-07-03' of https://git.kernel.org...

author David S. Miller <davem@davemloft.net>

Mon, 3 Jul 2017 12:51:45 +0000 (05:51 -0700)

committer David S. Miller <davem@davemloft.net>

Mon, 3 Jul 2017 12:51:45 +0000 (05:51 -0700)
author David S. Miller <davem@davemloft.net>
Mon, 3 Jul 2017 12:51:45 +0000 (05:51 -0700)
committer David S. Miller <davem@davemloft.net>
Mon, 3 Jul 2017 12:51:45 +0000 (05:51 -0700)
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt

index 42c3bb2d53e88b651a7d39efe00e278c24d9c9b3..01e331a5f3e7491fba25188b5a12e91722057e42 100644 (file)
--- a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
@@ -41,9 +41,9 @@ Required properties:
  Optional properties:
  
  In order to use the GPIO lines in PWM mode, some additional optional
-properties are required. Only Armada 370 and XP support these properties.
+properties are required.
  
-- compatible: Must contain "marvell,armada-370-xp-gpio"
+- compatible: Must contain "marvell,armada-370-gpio"
  
  - reg: an additional register set is needed, for the GPIO Blink
    Counter on/off registers.
@@ -71,7 +71,7 @@ Example:
                 };
  
                 gpio1: gpio@18140 {
-                       compatible = "marvell,armada-370-xp-gpio";
+                       compatible = "marvell,armada-370-gpio";
                         reg = <0x18140 0x40>, <0x181c8 0x08>;
                         reg-names = "gpio", "pwm";
                         ngpios = <17>;
diff --git a/Documentation/devicetree/bindings/mfd/stm32-timers.txt b/Documentation/devicetree/bindings/mfd/stm32-timers.txt

index bbd083f5600a786b23a0ec821f384e8e8a6a3553..1db6e0057a638e09a5346956a70620c31276fdf6 100644 (file)
--- a/Documentation/devicetree/bindings/mfd/stm32-timers.txt
+++ b/Documentation/devicetree/bindings/mfd/stm32-timers.txt
@@ -31,7 +31,7 @@ Example:
                 compatible = "st,stm32-timers";
                 reg = <0x40010000 0x400>;
                 clocks = <&rcc 0 160>;
-               clock-names = "clk_int";
+               clock-names = "int";
  
                 pwm {
                         compatible = "st,stm32-pwm";
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt

index 1506e948610c79a564b1e9e57f69066a0d044db9..27966ae741e09a51180ee45c92bf3da05f879ce9 100644 (file)
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -22,6 +22,7 @@ Required properties:
         Required elements: 'pclk', 'hclk'
         Optional elements: 'tx_clk'
         Optional elements: 'rx_clk' applies to cdns,zynqmp-gem
+       Optional elements: 'tsu_clk'
  - clocks: Phandles to input clocks.
  
  Optional properties for PHY child node:
diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt

index c627bbb3009e5a3f54472d58c2f5ef8f37f871fa..60c833d6218184fb54c1da368400fcdc71ccd9b4 100644 (file)
--- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
@@ -13,14 +13,10 @@ Optional SoC Specific Properties:
  - pinctrl-names: Contains only one value - "default".
  - pintctrl-0: Specifies the pin control groups used for this controller.
  - autosuspend-delay: Specify autosuspend delay in milliseconds.
-- vin-voltage-override: Specify voltage of VIN pin in microvolts.
  - irq-status-read-quirk: Specify that the trf7970a being used has the
    "IRQ Status Read" erratum.
  - en2-rf-quirk: Specify that the trf7970a being used has the "EN2 RF"
    erratum.
-- t5t-rmb-extra-byte-quirk: Specify that the trf7970a has the erratum
-  where an extra byte is returned by Read Multiple Block commands issued
-  to Type 5 tags.
  - vdd-io-supply: Regulator specifying voltage for vdd-io
  - clock-frequency: Set to specify that the input frequency to the trf7970a is 13560000Hz or 27120000Hz
  
@@ -37,15 +33,13 @@ Example (for ARM-based BeagleBone with TRF7970A on SPI1):
                 spi-max-frequency = <2000000>;
                 interrupt-parent = <&gpio2>;
                 interrupts = <14 0>;
-               ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>,
-                                 <&gpio2 5 GPIO_ACTIVE_LOW>;
+               ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>,
+                                 <&gpio2 5 GPIO_ACTIVE_HIGH>;
                 vin-supply = <&ldo3_reg>;
-               vin-voltage-override = <5000000>;
                 vdd-io-supply = <&ldo2_reg>;
                 autosuspend-delay = <30000>;
                 irq-status-read-quirk;
                 en2-rf-quirk;
-               t5t-rmb-extra-byte-quirk;
                 clock-frequency = <27120000>;
                 status = "okay";
         };
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt

index 24196cef7c9104dc0aebe48b03bf91495f7f1176..1fe42a874aae02bec8c8c784389ea48033418c9a 100644 (file)
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -22,9 +22,9 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
         There are no module parameters for this driver and it can be configured
  using IProute2/ip utility.
  
-       ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }
+       ip link add link <master-dev> name <slave-dev> type ipvlan mode { l2 | l3 | l3s }
  
-       e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
+       e.g. ip link add link eth0 name ipvl0 type ipvlan mode l2
  
  
  4. Operating modes:
diff --git a/Documentation/networking/policy-routing.txt b/Documentation/networking/policy-routing.txt

deleted file mode 100644 (file)

index 36f6936..0000000
--- a/Documentation/networking/policy-routing.txt
+++ /dev/null
@@ -1,150 +0,0 @@
-Classes
--------
-
-       "Class" is a complete routing table in common sense.
-       I.e. it is tree of nodes (destination prefix, tos, metric)
-       with attached information: gateway, device etc.
-       This tree is looked up as specified in RFC1812 5.2.4.3
-       1. Basic match
-       2. Longest match
-       3. Weak TOS.
-       4. Metric. (should not be in kernel space, but they are)
-       5. Additional pruning rules. (not in kernel space).
-       
-       We have two special type of nodes:
-       REJECT - abort route lookup and return an error value.
-       THROW  - abort route lookup in this class.
-
-
-       Currently the number of classes is limited to 255
-       (0 is reserved for "not specified class")
-
-       Three classes are builtin:
-
-       RT_CLASS_LOCAL=255 - local interface addresses,
-       broadcasts, nat addresses.
-
-       RT_CLASS_MAIN=254  - all normal routes are put there
-       by default.
-
-       RT_CLASS_DEFAULT=253 - if ip_fib_model==1, then
-       normal default routes are put there, if ip_fib_model==2
-       all gateway routes are put there.
-
-
-Rules
------
-       Rule is a record of (src prefix, src interface, tos, dst prefix)
-       with attached information.
-
-       Rule types:
-       RTP_ROUTE - lookup in attached class
-       RTP_NAT   - lookup in attached class and if a match is found,
-                   translate packet source address.
-       RTP_MASQUERADE - lookup in attached class and if a match is found,
-                   masquerade packet as sourced by us.
-       RTP_DROP   - silently drop the packet.
-       RTP_REJECT - drop the packet and send ICMP NET UNREACHABLE.
-       RTP_PROHIBIT - drop the packet and send ICMP COMM. ADM. PROHIBITED.
-
-       Rule flags:
-       RTRF_LOG - log route creations.
-       RTRF_VALVE - One way route (used with masquerading)
-
-Default setup:
-
-root@amber:/pub/ip-routing # iproute -r
-Kernel routing policy rules
-Pref Source             Destination        TOS Iface   Cl
-   0 default            default            00  *       255
- 254 default            default            00  *       254
- 255 default            default            00  *       253
-
-
-Lookup algorithm
-----------------
-
-       We scan rules list, and if a rule is matched, apply it.
-       If a route is found, return it.
-       If it is not found or a THROW node was matched, continue
-       to scan rules.
-
-Applications
-------------
-
-1.     Just ignore classes. All the routes are put into MAIN class
-       (and/or into DEFAULT class).
-
-       HOWTO:  iproute add PREFIX [ tos TOS ] [ gw GW ] [ dev DEV ]
-               [ metric METRIC ] [ reject ] ... (look at iproute utility)
-
-               or use route utility from current net-tools.
-               
-2.     Opposite case. Just forget all that you know about routing
-       tables. Every rule is supplied with its own gateway, device
-       info. record. This approach is not appropriate for automated
-       route maintenance, but it is ideal for manual configuration.
-
-       HOWTO:  iproute addrule [ from PREFIX ] [ to PREFIX ] [ tos TOS ]
-               [ dev INPUTDEV] [ pref PREFERENCE ] route [ gw GATEWAY ]
-               [ dev OUTDEV ] .....
-
-       Warning: As of now the size of the routing table in this
-       approach is limited to 256. If someone likes this model, I'll
-       relax this limitation.
-
-3.     OSPF classes (see RFC1583, RFC1812 E.3.3)
-       Very clean, stable and robust algorithm for OSPF routing
-       domains. Unfortunately, it is not widely used in the Internet.
-
-       Proposed setup:
-       255 local addresses
-       254 interface routes
-       253 ASE routes with external metric
-       252 ASE routes with internal metric
-       251 inter-area routes
-       250 intra-area routes for 1st area
-       249 intra-area routes for 2nd area
-       etc.
-       
-       Rules:
-       iproute addrule class 253
-       iproute addrule class 252
-       iproute addrule class 251
-       iproute addrule to a-prefix-for-1st-area class 250
-       iproute addrule to another-prefix-for-1st-area class 250
-       ...
-       iproute addrule to a-prefix-for-2nd-area class 249
-       ...
-
-       Area classes must be terminated with reject record.
-       iproute add default reject class 250
-       iproute add default reject class 249
-       ...
-
-4.     The Variant Router Requirements Algorithm (RFC1812 E.3.2)
-       Create 16 classes for different TOS values.
-       It is a funny, but pretty useless algorithm.
-       I listed it just to show the power of new routing code.
-
-5.     All the variety of combinations......
-
-
-GATED
------
-
-       Gated does not understand classes, but it will work
-       happily in MAIN+DEFAULT. All policy routes can be set
-       and maintained manually.
-
-IMPORTANT NOTE
---------------
-       route.c has a compilation time switch CONFIG_IP_LOCAL_RT_POLICY.
-       If it is set, locally originated packets are routed
-       using all the policy list. This is not very convenient and
-       pretty ambiguous when used with NAT and masquerading.
-       I set it to FALSE by default.
-
-
-Alexey Kuznetov
-kuznet@ms2.inr.ac.ru
diff --git a/MAINTAINERS b/MAINTAINERS

index f81e1b765353be7ab56195e063353615228842f4..5bebe20811c4562b655491328a515ab3b82c2f31 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2967,7 +2967,7 @@ F:        sound/pci/oxygen/
  
  C6X ARCHITECTURE
  M:     Mark Salter <msalter@redhat.com>
-M:     Aurelien Jacquiot <a-jacquiot@ti.com>
+M:     Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
  L:     linux-c6x-dev@linux-c6x.org
  W:     http://www.linux-c6x.org/wiki/index.php/Main_Page
  S:     Maintained
@@ -8330,6 +8330,16 @@ Q:       http://patchwork.ozlabs.org/project/netdev/list/
  F:     drivers/net/ethernet/mellanox/mlx5/core/fpga/*
  F:     include/linux/mlx5/mlx5_ifc_fpga.h
  
+MELLANOX ETHERNET INNOVA IPSEC DRIVER
+M:     Ilan Tayari <ilant@mellanox.com>
+R:     Boris Pismenny <borisp@mellanox.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+W:     http://www.mellanox.com
+Q:     http://patchwork.ozlabs.org/project/netdev/list/
+F:     drivers/net/ethernet/mellanox/mlx5/core/en_ipsec/*
+F:     drivers/net/ethernet/mellanox/mlx5/core/ipsec*
+
  MELLANOX ETHERNET SWITCH DRIVERS
  M:     Jiri Pirko <jiri@mellanox.com>
  M:     Ido Schimmel <idosch@mellanox.com>
@@ -9072,9 +9082,6 @@ F:        include/uapi/linux/nfc.h
  F:     drivers/nfc/
  F:     include/linux/platform_data/nfcmrvl.h
  F:     include/linux/platform_data/nxp-nci.h
-F:     include/linux/platform_data/pn544.h
-F:     include/linux/platform_data/st21nfca.h
-F:     include/linux/platform_data/st-nci.h
  F:     Documentation/devicetree/bindings/net/nfc/
  
  NFS, SUNRPC, AND LOCKD CLIENTS
@@ -11409,6 +11416,14 @@ F:     kernel/time/alarmtimer.c
  F:     kernel/time/ntp.c
  F:     tools/testing/selftests/timers/
  
+TI TRF7970A NFC DRIVER
+M:     Mark Greer <mgreer@animalcreek.com>
+L:     linux-wireless@vger.kernel.org
+L:     linux-nfc@lists.01.org (moderated for non-subscribers)
+S:     Supported
+F:     drivers/nfc/trf7970a.c
+F:     Documentation/devicetree/bindings/net/nfc/trf7970a.txt
+
  SC1200 WDT DRIVER
  M:     Zwane Mwaikambo <zwanem@gmail.com>
  S:     Maintained
diff --git a/Makefile b/Makefile

index e40c471abe29fbc9e569bc9f4fca4302ec78cfb9..6d8a984ed9c975e8867b151e8e6e9424f85dea87 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  VERSION = 4
  PATCHLEVEL = 12
  SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
  NAME = Fearless Coyote
  
  # *DOCUMENTATION*
@@ -1437,7 +1437,7 @@ help:
         @echo  '  make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
         @echo  '  make V=2   [targets] 2 => give reason for rebuild of target'
         @echo  '  make O=dir [targets] Locate all output files in "dir", including .config'
-       @echo  '  make C=1   [targets] Check all c source with $$CHECK (sparse by default)'
+       @echo  '  make C=1   [targets] Check re-compiled c source with $$CHECK (sparse by default)'
         @echo  '  make C=2   [targets] Force check of all c source with $$CHECK'
         @echo  '  make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
         @echo  '  make W=n   [targets] Enable extra gcc checks, n=1,2,3 where'
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h

index 6e1242da0159e274b7e161f9648e031cbddd7ef4..4104a08392146f6c479710557010135aa4774a15 100644 (file)
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -86,8 +86,6 @@ struct task_struct;
  #define TSK_K_BLINK(tsk)       TSK_K_REG(tsk, 4)
  #define TSK_K_FP(tsk)          TSK_K_REG(tsk, 0)
  
-#define thread_saved_pc(tsk)   TSK_K_BLINK(tsk)
-
  extern void start_thread(struct pt_regs * regs, unsigned long pc,
                          unsigned long usp);
  
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 4c1a35f1583872d2ce39db5c1cacce28be48ccb1..c0fcab6a550473859b4a2ca525cefc16624e1710 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1416,6 +1416,7 @@ choice
         config VMSPLIT_3G
                 bool "3G/1G user/kernel split"
         config VMSPLIT_3G_OPT
+               depends on !ARM_LPAE
                 bool "3G/1G user/kernel split (for full 1G low memory)"
         config VMSPLIT_2G
                 bool "2G/2G user/kernel split"
diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S

index 3f7d1b74c5e02bd46730c58b0a66756c89b904ab..a17ca8d78656d1012910ffb8a37b3433720a8d44 100644 (file)
--- a/arch/arm/boot/compressed/efi-header.S
+++ b/arch/arm/boot/compressed/efi-header.S
@@ -17,7 +17,8 @@
                 @ there.
                 .inst   'M' | ('Z' << 8) | (0x1310 << 16)   @ tstne r0, #0x4d000
  #else
-               W(mov)  r0, r0
+ AR_CLASS(     mov     r0, r0          )
+  M_CLASS(     nop.w                   )
  #endif
                 .endm
  
diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts

index dd3525a0f06afb80f69143c968f2931fbbef5826..9e8b082c134f61efede523a916b1e60e382e005c 100644 (file)
--- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
+++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts
@@ -57,7 +57,6 @@
         aliases {
                 serial0 = &uart0;
                 /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
-               ethernet0 = &emac;
                 ethernet1 = &xr819;
         };
  
@@ -104,13 +103,6 @@
         status = "okay";
  };
  
-&emac {
-       phy-handle = <&int_mii_phy>;
-       phy-mode = "mii";
-       allwinner,leds-active-low;
-       status = "okay";
-};
-
  &mmc0 {
         pinctrl-names = "default";
         pinctrl-0 = <&mmc0_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts

index 78f6c24952dd128249fd3010d212222832bb060a..8d2cc6e9a03faff3cc71965493e5c54c1359e9f3 100644 (file)
--- a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
+++ b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts
@@ -46,10 +46,3 @@
         model = "FriendlyARM NanoPi NEO";
         compatible = "friendlyarm,nanopi-neo", "allwinner,sun8i-h3";
  };
-
-&emac {
-       phy-handle = <&int_mii_phy>;
-       phy-mode = "mii";
-       allwinner,leds-active-low;
-       status = "okay";
-};
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts

index cedd326b608900b8fe08e0dac03adf3d414b2f00..5b6d14555b7ccb8b537d35ea82a75d01b39e0833 100644 (file)
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts
@@ -54,7 +54,6 @@
         aliases {
                 serial0 = &uart0;
                 /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */
-               ethernet0 = &emac;
                 ethernet1 = &rtl8189;
         };
  
@@ -109,13 +108,6 @@
         status = "okay";
  };
  
-&emac {
-       phy-handle = <&int_mii_phy>;
-       phy-mode = "mii";
-       allwinner,leds-active-low;
-       status = "okay";
-};
-
  &ir {
         pinctrl-names = "default";
         pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts

index 6880268e8b87b0d7385e73dc95c23aaa8f25bd9a..5fea430e0eb1006120dd9b98dd904cdd7af14b67 100644 (file)
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts
@@ -52,7 +52,6 @@
         compatible = "xunlong,orangepi-one", "allwinner,sun8i-h3";
  
         aliases {
-               ethernet0 = &emac;
                 serial0 = &uart0;
         };
  
@@ -98,13 +97,6 @@
         status = "okay";
  };
  
-&emac {
-       phy-handle = <&int_mii_phy>;
-       phy-mode = "mii";
-       allwinner,leds-active-low;
-       status = "okay";
-};
-
  &mmc0 {
         pinctrl-names = "default";
         pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts

index a10281b455f50ccad1f26087ae14884600c19c90..8b93f5c781a70b565ed0012d2c29b35f04987dcd 100644 (file)
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
@@ -53,11 +53,6 @@
         };
  };
  
-&emac {
-       /* LEDs changed to active high on the plus */
-       /delete-property/ allwinner,leds-active-low;
-};
-
  &mmc1 {
         pinctrl-names = "default";
         pinctrl-0 = <&mmc1_pins_a>;
diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts

index 52e65755c51a32b0d6a6511c0ee117d85842be46..f148111c326d073674de51ca921ca97525bf28fd 100644 (file)
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
@@ -52,7 +52,6 @@
         compatible = "xunlong,orangepi-pc", "allwinner,sun8i-h3";
  
         aliases {
-               ethernet0 = &emac;
                 serial0 = &uart0;
         };
  
@@ -110,13 +109,6 @@
         status = "okay";
  };
  
-&emac {
-       phy-handle = <&int_mii_phy>;
-       phy-mode = "mii";
-       allwinner,leds-active-low;
-       status = "okay";
-};
-
  &ir {
         pinctrl-names = "default";
         pinctrl-0 = <&ir_pins_a>;
diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi

index a6d4fda544e1dc381b0acfb8250f6163c45f7bc3..d4f600dbb7eb8dc7b5f922e9334c5dddf3528296 100644 (file)
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
@@ -83,12 +83,6 @@
                 #size-cells = <1>;
                 ranges;
  
-               syscon: syscon@1c00000 {
-                       compatible = "allwinner,sun8i-h3-system-controller",
-                               "syscon";
-                       reg = <0x01c00000 0x1000>;
-               };
-
                 dma: dma-controller@01c02000 {
                         compatible = "allwinner,sun8i-h3-dma";
                         reg = <0x01c02000 0x1000>;
@@ -285,14 +279,6 @@
                         interrupt-controller;
                         #interrupt-cells = <3>;
  
-                       emac_rgmii_pins: emac0 {
-                               pins = "PD0", "PD1", "PD2", "PD3", "PD4",
-                                      "PD5", "PD7", "PD8", "PD9", "PD10",
-                                      "PD12", "PD13", "PD15", "PD16", "PD17";
-                               function = "emac";
-                               drive-strength = <40>;
-                       };
-
                         i2c0_pins: i2c0 {
                                 pins = "PA11", "PA12";
                                 function = "i2c0";
@@ -389,32 +375,6 @@
                         clocks = <&osc24M>;
                 };
  
-               emac: ethernet@1c30000 {
-                       compatible = "allwinner,sun8i-h3-emac";
-                       syscon = <&syscon>;
-                       reg = <0x01c30000 0x104>;
-                       interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
-                       interrupt-names = "macirq";
-                       resets = <&ccu RST_BUS_EMAC>;
-                       reset-names = "stmmaceth";
-                       clocks = <&ccu CLK_BUS_EMAC>;
-                       clock-names = "stmmaceth";
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-                       status = "disabled";
-
-                       mdio: mdio {
-                               #address-cells = <1>;
-                               #size-cells = <0>;
-                               int_mii_phy: ethernet-phy@1 {
-                                       compatible = "ethernet-phy-ieee802.3-c22";
-                                       reg = <1>;
-                                       clocks = <&ccu CLK_BUS_EPHY>;
-                                       resets = <&ccu RST_BUS_EPHY>;
-                               };
-                       };
-               };
-
                 spi0: spi@01c68000 {
                         compatible = "allwinner,sun8i-h3-spi";
                         reg = <0x01c68000 0x1000>;
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig

index 6da6af8881f729f415925821d3968ea29a4608af..2685e03600b11bc1d48a2a12ca1a80e4de18adea 100644 (file)
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -257,7 +257,6 @@ CONFIG_SMSC911X=y
  CONFIG_STMMAC_ETH=y
  CONFIG_STMMAC_PLATFORM=y
  CONFIG_DWMAC_DWC_QOS_ETH=y
-CONFIG_DWMAC_SUN8I=y
  CONFIG_TI_CPSW=y
  CONFIG_XILINX_EMACLITE=y
  CONFIG_AT803X_PHY=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig

index 504e02238031437f4ccdcbe5c6c0a0da79dea511..5cd5dd70bc836566634393e6d2c36d581d1a4ea9 100644 (file)
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -40,7 +40,6 @@ CONFIG_ATA=y
  CONFIG_AHCI_SUNXI=y
  CONFIG_NETDEVICES=y
  CONFIG_SUN4I_EMAC=y
-CONFIG_DWMAC_SUN8I=y
  # CONFIG_NET_VENDOR_ARC is not set
  # CONFIG_NET_CADENCE is not set
  # CONFIG_NET_VENDOR_BROADCOM is not set
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c

index 32e1a9513dc70eba4787ca1af0ba3e32b9f29d7e..4e80bf7420d4e65fb30e0c68e7bef53932f765b3 100644 (file)
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -315,7 +315,7 @@ static void __init cacheid_init(void)
         if (arch >= CPU_ARCH_ARMv6) {
                 unsigned int cachetype = read_cpuid_cachetype();
  
-               if ((arch == CPU_ARCH_ARMv7M) && !cachetype) {
+               if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) {
                         cacheid = 0;
                 } else if ((cachetype & (7 << 29)) == 4 << 29) {
                         /* ARMv7 register format */
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts

index 0d1f026d831aac7b7ecda69ac5ec0f57e693ec2b..6872135d7f849b1df7e0529b9b95d2b0d9c86478 100644 (file)
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -67,14 +67,6 @@
         };
  };
  
-&emac {
-       pinctrl-names = "default";
-       pinctrl-0 = <&rgmii_pins>;
-       phy-mode = "rgmii";
-       phy-handle = <&ext_rgmii_phy>;
-       status = "okay";
-};
-
  &i2c1 {
         pinctrl-names = "default";
         pinctrl-0 = <&i2c1_pins>;
@@ -85,13 +77,6 @@
         bias-pull-up;
  };
  
-&mdio {
-       ext_rgmii_phy: ethernet-phy@1 {
-               compatible = "ethernet-phy-ieee802.3-c22";
-               reg = <1>;
-       };
-};
-
  &mmc0 {
         pinctrl-names = "default";
         pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts

index 24f1aac366d64355f5b6b37bb8e263bcce7f2e2d..790d14daaa6a68f2b6ebe40594c5e78baea3f52a 100644 (file)
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
@@ -46,20 +46,5 @@
         model = "Pine64+";
         compatible = "pine64,pine64-plus", "allwinner,sun50i-a64";
  
-       /* TODO: Camera, touchscreen, etc. */
-};
-
-&emac {
-       pinctrl-names = "default";
-       pinctrl-0 = <&rgmii_pins>;
-       phy-mode = "rgmii";
-       phy-handle = <&ext_rgmii_phy>;
-       status = "okay";
-};
-
-&mdio {
-       ext_rgmii_phy: ethernet-phy@1 {
-               compatible = "ethernet-phy-ieee802.3-c22";
-               reg = <1>;
-       };
+       /* TODO: Camera, Ethernet PHY, touchscreen, etc. */
  };
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts

index 3b491c0e3b0decbf6b63695d5833755424a1f71d..c680ed385da3565da0291e325b5429fcb1e3eeec 100644 (file)
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
@@ -70,15 +70,6 @@
         status = "okay";
  };
  
-&emac {
-       pinctrl-names = "default";
-       pinctrl-0 = <&rmii_pins>;
-       phy-mode = "rmii";
-       phy-handle = <&ext_rmii_phy1>;
-       status = "okay";
-
-};
-
  &i2c1 {
         pinctrl-names = "default";
         pinctrl-0 = <&i2c1_pins>;
@@ -89,13 +80,6 @@
         bias-pull-up;
  };
  
-&mdio {
-       ext_rmii_phy1: ethernet-phy@1 {
-               compatible = "ethernet-phy-ieee802.3-c22";
-               reg = <1>;
-       };
-};
-
  &mmc0 {
         pinctrl-names = "default";
         pinctrl-0 = <&mmc0_pins>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi

index 769ced01a9980134644da1ee63636e4cd0d64446..166c9ef884dc6e52432e0030d06f1b7036d2c428 100644 (file)
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -129,12 +129,6 @@
                 #size-cells = <1>;
                 ranges;
  
-               syscon: syscon@1c00000 {
-                       compatible = "allwinner,sun50i-a64-system-controller",
-                               "syscon";
-                       reg = <0x01c00000 0x1000>;
-               };
-
                 mmc0: mmc@1c0f000 {
                         compatible = "allwinner,sun50i-a64-mmc";
                         reg = <0x01c0f000 0x1000>;
@@ -287,21 +281,6 @@
                                 bias-pull-up;
                         };
  
-                       rmii_pins: rmii_pins {
-                               pins = "PD10", "PD11", "PD13", "PD14", "PD17",
-                                      "PD18", "PD19", "PD20", "PD22", "PD23";
-                               function = "emac";
-                               drive-strength = <40>;
-                       };
-
-                       rgmii_pins: rgmii_pins {
-                               pins = "PD8", "PD9", "PD10", "PD11", "PD12",
-                                      "PD13", "PD15", "PD16", "PD17", "PD18",
-                                      "PD19", "PD20", "PD21", "PD22", "PD23";
-                               function = "emac";
-                               drive-strength = <40>;
-                       };
-
                         uart0_pins_a: uart0@0 {
                                 pins = "PB8", "PB9";
                                 function = "uart0";
@@ -406,26 +385,6 @@
                         #size-cells = <0>;
                 };
  
-               emac: ethernet@1c30000 {
-                       compatible = "allwinner,sun50i-a64-emac";
-                       syscon = <&syscon>;
-                       reg = <0x01c30000 0x100>;
-                       interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
-                       interrupt-names = "macirq";
-                       resets = <&ccu RST_BUS_EMAC>;
-                       reset-names = "stmmaceth";
-                       clocks = <&ccu CLK_BUS_EMAC>;
-                       clock-names = "stmmaceth";
-                       status = "disabled";
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-
-                       mdio: mdio {
-                               #address-cells = <1>;
-                               #size-cells = <0>;
-                       };
-               };
-
                 gic: interrupt-controller@1c81000 {
                         compatible = "arm,gic-400";
                         reg = <0x01c81000 0x1000>,
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig

index d789858c4f1b7745c42bcf6eb7119e7c342e70e8..97c123e09e45bfd80173029de0da0161dd4be0c7 100644 (file)
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -191,7 +191,6 @@ CONFIG_RAVB=y
  CONFIG_SMC91X=y
  CONFIG_SMSC911X=y
  CONFIG_STMMAC_ETH=m
-CONFIG_DWMAC_SUN8I=m
  CONFIG_MDIO_BUS_MUX_MMIOREG=y
  CONFIG_MESON_GXL_PHY=m
  CONFIG_MICREL_PHY=y
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c

index 41b6e31f8f556f4d4af8ecdafd3867e540a8ed10..d0cb007fa4823791be514b8a5ac65d8d408f099d 100644 (file)
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
                 /* tkr_mono.cycle_last == tkr_raw.cycle_last */
                 vdso_data->cs_cycle_last        = tk->tkr_mono.cycle_last;
                 vdso_data->raw_time_sec         = tk->raw_time.tv_sec;
-               vdso_data->raw_time_nsec        = tk->raw_time.tv_nsec;
+               vdso_data->raw_time_nsec        = (tk->raw_time.tv_nsec <<
+                                                  tk->tkr_raw.shift) +
+                                                 tk->tkr_raw.xtime_nsec;
                 vdso_data->xtime_clock_sec      = tk->xtime_sec;
                 vdso_data->xtime_clock_nsec     = tk->tkr_mono.xtime_nsec;
-               /* tkr_raw.xtime_nsec == 0 */
                 vdso_data->cs_mono_mult         = tk->tkr_mono.mult;
                 vdso_data->cs_raw_mult          = tk->tkr_raw.mult;
                 /* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S

index e00b4671bd7c4af5516b95da00409c7296df1963..76320e9209651fd307659dcbab8092ff7c1c09e2 100644 (file)
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -256,7 +256,6 @@ monotonic_raw:
         seqcnt_check fail=monotonic_raw
  
         /* All computations are done with left-shifted nsecs. */
-       lsl     x14, x14, x12
         get_nsec_per_sec res=x9
         lsl     x9, x9, x12
  
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h

index 85d4af97c986aee4a7eff90b4347aca444ee1ed9..dbdbb8a558df4acb548b71dbc52f63b3e7373d7f 100644 (file)
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-/*
- * Return saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk)   (tsk->thread.pc)
-
  unsigned long get_wchan(struct task_struct *p);
  
  #define        KSTK_EIP(tsk)                                                   \
diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h

index b9eb3da7f278dac858bfb2191094f9fc837ac5c8..7c87b5be53b5b74c76fbe03829034b1e96ac3c7c 100644 (file)
--- a/arch/c6x/include/asm/processor.h
+++ b/arch/c6x/include/asm/processor.h
@@ -95,11 +95,6 @@ static inline void release_thread(struct task_struct *dead_task)
  #define copy_segments(tsk, mm)         do { } while (0)
  #define release_segments(mm)           do { } while (0)
  
-/*
- * saved PC of a blocked thread.
- */
-#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc)
-
  /*
   * saved kernel SP and DP of a blocked thread.
   */
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c

index e299d30105b53bf5ad0e8b8df7f036f39267ad66..a2cdb1521aca4db4069d449f56094ee822e0313d 100644 (file)
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -69,14 +69,6 @@ void hard_reset_now (void)
         while(1) /* waiting for RETRIBUTION! */ ;
  }
  
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return task_pt_regs(t)->irp;
-}
-
  /* setup the child's kernel stack with a pt_regs and switch_stack on it.
   * it will be un-nested during _resume and _ret_from_sys_call when the
   * new thread is scheduled.
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c

index c530a8fa87ceb751a0c275885e34852dbcd5b6c2..fe87b383fbf3fc45522d8ce74a122c8469347993 100644 (file)
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -84,14 +84,6 @@ hard_reset_now(void)
                 ; /* Wait for reset. */
  }
  
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return task_pt_regs(t)->erp;
-}
-
  /*
   * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
   * It will be unnested during _resume and _ret_from_sys_call when the new thread
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h

index 15b815df29c165809c4e6e229ede6a077d9e8e71..bc2729e4b2c97e89b5a9dd3daf30df555dee1e0f 100644 (file)
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p);
  
  #define KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->thread.usp)
  
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
  /* Free all resources held by a thread. */
  static inline void release_thread(struct task_struct *dead_task)
  {
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h

index ddaeb9cc9143333d8f00de60291298f6dce5530c..e4d08d74ed9f8dc4f81140fcf60442378f02cf58 100644 (file)
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ...
  #define release_segments(mm)           do { } while (0)
  #define forget_segments()              do { } while (0)
  
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
  unsigned long get_wchan(struct task_struct *p);
  
  #define        KSTK_EIP(tsk)   ((tsk)->thread.frame0->pc)
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c

index 5a4c92abc99ec320b54ef80841bc6f3333a9d3dd..a957b374e3a66b28a30bf5d30b623bfccbc2fb0a 100644 (file)
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p)
         return 0;
  }
  
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       /* Check whether the thread is blocked in resume() */
-       if (in_sched_functions(tsk->thread.pc))
-               return ((unsigned long *)tsk->thread.fp)[2];
-       else
-               return tsk->thread.pc;
-}
-
  int elf_check_arch(const struct elf32_hdr *hdr)
  {
         unsigned long hsr0 = __get_HSR(0);
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h

index 65132d7ae9e5b66fb484014f7930fd91f75cc944..afa53147e66a82e24d8d9e100b4706a9ce2ee760 100644 (file)
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk);
  unsigned long get_wchan(struct task_struct *p);
  
  #define        KSTK_EIP(tsk)   \
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c

index 0f5db5bb561b75cb30bb2871ffa7c00545f84539..d1ddcabbbe8383fc6160db3946a334286676b5ae 100644 (file)
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags,
         return 0;
  }
  
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return ((struct pt_regs *)tsk->thread.esp0)->pc;
-}
-
  unsigned long get_wchan(struct task_struct *p)
  {
         unsigned long fp, pc;
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h

index 45a825402f634ee4b650a08a60efc5c60d50e261..ce67940860a536dce66bea9b59daeab1fcaa103e 100644 (file)
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -33,9 +33,6 @@
  /*  task_struct, defined elsewhere, is the "process descriptor" */
  struct task_struct;
  
-/*  this is defined in arch/process.c  */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
  extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
  
  /*
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c

index de715bab7956c7e38df0b8ab689d11cca8fceb16..656050c2e6a06ab7f60ad6c1a65351dfd29f544f 100644 (file)
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -60,14 +60,6 @@ void arch_cpu_idle(void)
         local_irq_enable();
  }
  
-/*
- *  Return saved PC of a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return 0;
-}
-
  /*
   * Copy architecture-specific thread state
   */
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h

index 26a63d69c599addab0486c2fa08d548dd229ccf9..ab982f07ea681253d42f351c230c976a83258f95 100644 (file)
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -601,23 +601,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat)
         *unat = (*unat & ~mask) | (nat << bit);
  }
  
-/*
- * Return saved PC of a blocked thread.
- * Note that the only way T can block is through a call to schedule() -> switch_to().
- */
-static inline unsigned long
-thread_saved_pc (struct task_struct *t)
-{
-       struct unw_frame_info info;
-       unsigned long ip;
-
-       unw_init_from_blocked_task(&info, t);
-       if (unw_unwind(&info) < 0)
-               return 0;
-       unw_get_ip(&info, &ip);
-       return ip;
-}
-
  /*
   * Get the current instruction/program counter value.
   */
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h

index 5767367550c69637a0b51c7a037b7a5e0c4fbfb4..657874eeeccc262c11268094e0f0ba530e05dd92 100644 (file)
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *);
  extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
  extern void release_segments(struct mm_struct * mm);
  
-extern unsigned long thread_saved_pc(struct task_struct *);
-
  /* Copy and release all segment info associated with a VM */
  #define copy_segments(p, mm)  do { } while (0)
  #define release_segments(mm)  do { } while (0)
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c

index d8ffcfec599cb6a10a6eb649f15d628a97a377f9..8cd7e03f4370c06a633d2a12b5db70299ffa41ae 100644 (file)
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -39,14 +39,6 @@
  
  #include <linux/err.h>
  
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return tsk->thread.lr;
-}
-
  void (*pm_power_off)(void) = NULL;
  EXPORT_SYMBOL(pm_power_off);
  
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h

index 77239e81379b16b52e39cff979d1da4ceb2e1965..94c36030440cc825c4688244a0b973ba60be3863 100644 (file)
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
  unsigned long get_wchan(struct task_struct *p);
  
  #define        KSTK_EIP(tsk)   \
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c

index e475c945c8b2bf199e6147a16fcf42c2d7c0d714..7df92f8b0781dd2651096f83f8c45185a26be803 100644 (file)
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -40,20 +40,6 @@
  asmlinkage void ret_from_fork(void);
  asmlinkage void ret_from_kernel_thread(void);
  
-
-/*
- * Return saved PC from a blocked thread
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp;
-       /* Check whether the thread is blocked in resume() */
-       if (in_sched_functions(sw->retpc))
-               return ((unsigned long *)sw->a6)[1];
-       else
-               return sw->retpc;
-}
-
  void arch_cpu_idle(void)
  {
  #if defined(MACH_ATARI_ONLY)
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h

index 37ef196e45191adb481450c8648e9e9acbb05106..330d556860ba7a8211b767cd4ac03275adcab9f0 100644 (file)
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
  extern unsigned long get_wchan(struct task_struct *p);
  
  # define KSTK_EIP(tsk) (0)
@@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-/* Return saved (kernel) PC of a blocked thread.  */
-#  define thread_saved_pc(tsk) \
-       ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0)
-
  unsigned long get_wchan(struct task_struct *p);
  
  /* The size allocated for kernel stacks. This _must_ be a power of two! */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c

index e92a817e645fac7bf8782e782b2525429572d5b3..6527ec22f158f16acef89a0b78310c518866ae73 100644 (file)
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
         return 0;
  }
  
-#ifndef CONFIG_MMU
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct cpu_context *ctx =
-               &(((struct thread_info *)(tsk->stack))->cpu_context);
-
-       /* Check whether the thread is blocked in resume() */
-       if (in_sched_functions(ctx->r15))
-               return (unsigned long)ctx->r15;
-       else
-               return ctx->r14;
-}
-#endif
-
  unsigned long get_wchan(struct task_struct *p)
  {
  /* TBD (used by procfs) */
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c

index 7c6336dd2638ce9c12c4ff8be566ff6acb856137..7cd92166a0b9a9bf3c14fe1df33442442ac668ac 100644 (file)
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
  int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
                           bool user, bool kernel)
  {
-       int idx_user, idx_kernel;
+       /*
+        * Initialize idx_user and idx_kernel to workaround bogus
+        * maybe-initialized warning when using GCC 6.
+        */
+       int idx_user = 0, idx_kernel = 0;
         unsigned long flags, old_entryhi;
  
         local_irq_save(flags);
diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h

index 18e17abf7664e51c807e3d5649b0585bfd89a3be..3ae479117b42efd07d719282ee1dd287abecb1a4 100644 (file)
--- a/arch/mn10300/include/asm/processor.h
+++ b/arch/mn10300/include/asm/processor.h
@@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs,
  /* Free all resources held by a thread. */
  extern void release_thread(struct task_struct *);
  
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
  unsigned long get_wchan(struct task_struct *p);
  
  #define task_pt_regs(task) ((task)->thread.uregs)
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c

index c9fa42619c6a9aa4f5fa3244b3ce45efdfaeea11..89e8027e07fb327d39de0170c0da61af177c7160 100644 (file)
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -39,14 +39,6 @@
  #include <asm/gdb-stub.h>
  #include "internal.h"
  
-/*
- * return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return ((unsigned long *) tsk->thread.sp)[3];
-}
-
  /*
   * power off function, if any
   */
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h

index 3bbbc3d798e5f4738548690eb8511834050baa76..4944e2e1d8b0677d48c30ebba98629d43db90933 100644 (file)
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task)
  {
  }
  
-/* Return saved PC of a blocked thread. */
-#define thread_saved_pc(tsk)   ((tsk)->thread.kregs->ea)
-
  extern unsigned long get_wchan(struct task_struct *p);
  
  #define task_pt_regs(p) \
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h

index a908e6c30a001e14860fe70f174f1451c6e4ae09..396d8f306c21b6c24f872780b9500cb8ebc1b963 100644 (file)
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
  void release_thread(struct task_struct *);
  unsigned long get_wchan(struct task_struct *p);
  
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
  #define init_stack      (init_thread_union.stack)
  
  #define cpu_relax()     barrier()
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c

index 106859ae27ffba114f9f4b0011151db0f65f98d4..f9b77003f1138ce42dff841814cb280d562481e0 100644 (file)
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs)
         show_registers(regs);
  }
  
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return (unsigned long)user_regs(t->stack)->pc;
-}
-
  void release_thread(struct task_struct *dead_task)
  {
  }
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h

index a3661ee6b060c1d258ab740e5468cfffb665f8d5..4c6694b4e77e0dd12ef6aed95508ea035ccdb10c 100644 (file)
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -163,12 +163,7 @@ struct thread_struct {
         .flags          = 0 \
         }
  
-/*
- * Return saved PC of a blocked thread.  This is used by ps mostly.
- */
-
  struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *t);
  void show_trace(struct task_struct *task, unsigned long *stack);
  
  /*
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c

index 4516a5b53f38ef651c038e4231effa00fd6db19d..b64d7d21646ed50c4a5c1f046b0f8ca758abfcc6 100644 (file)
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
         return 0;
  }
  
-unsigned long thread_saved_pc(struct task_struct *t)
-{
-       return t->thread.regs.kpc;
-}
-
  unsigned long
  get_wchan(struct task_struct *p)
  {
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h

index a83821f33ea36f8c005dab8e80dfdfcfd26a7058..8814a7249cebe29852dd8b4588a61052decac88c 100644 (file)
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
  extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
  extern int kprobe_handler(struct pt_regs *regs);
  extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
  #ifdef CONFIG_KPROBES_ON_FTRACE
  extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
                            struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h

index bb99b651085aaf292e5f98ee23c7cdc53d443cd2..1189d04f3bd1ce6db0f6ed5da3414f25dc3f4c38 100644 (file)
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -378,12 +378,6 @@ struct thread_struct {
  }
  #endif
  
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk)    \
-        ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
  #define task_pt_regs(tsk)      ((struct pt_regs *)(tsk)->thread.regs)
  
  unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S

index ae418b85c17c4bce805227a82350d322259cf06e..b886795060fd2dba727c54d8c7b5e2b47a888f8d 100644 (file)
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
         .balign IFETCH_ALIGN_BYTES
  do_hash_page:
  #ifdef CONFIG_PPC_STD_MMU_64
-       andis.  r0,r4,0xa410            /* weird error? */
+       andis.  r0,r4,0xa450            /* weird error? */
         bne-    handle_page_fault       /* if not, try to insert a HPTE */
-       andis.  r0,r4,DSISR_DABRMATCH@h
-       bne-    handle_dabr_fault
         CURRENT_THREAD_INFO(r11, r1)
         lwz     r0,TI_PREEMPT(r11)      /* If we're in an "NMI" */
         andis.  r0,r0,NMI_MASK@h        /* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1436,16 @@ do_hash_page:
  
         /* Error */
         blt-    13f
+
+       /* Reload DSISR into r4 for the DABR check below */
+       ld      r4,_DSISR(r1)
  #endif /* CONFIG_PPC_STD_MMU_64 */
  
  /* Here we have a page fault that hash_page can't handle. */
  handle_page_fault:
-11:    ld      r4,_DAR(r1)
+11:    andis.  r0,r4,DSISR_DABRMATCH@h
+       bne-    handle_dabr_fault
+       ld      r4,_DAR(r1)
         ld      r5,_DSISR(r1)
         addi    r3,r1,STACK_FRAME_OVERHEAD
         bl      do_page_fault
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c

index fc4343514bed8b0f05a88e64caf0285c44ee8ea0..01addfb0ed0a42216d64c7692c3fea4694c528fb 100644 (file)
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
  
  struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
  
+int is_current_kprobe_addr(unsigned long addr)
+{
+       struct kprobe *p = kprobe_running();
+       return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
  bool arch_within_kprobe_blacklist(unsigned long addr)
  {
         return  (addr >= (unsigned long)__kprobes_text_start &&
@@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
         regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
  #endif
  
+       /*
+        * jprobes use jprobe_return() which skips the normal return
+        * path of the function, and this messes up the accounting of the
+        * function graph tracer.
+        *
+        * Pause function graph tracing while performing the jprobe function.
+        */
+       pause_graph_tracing();
+
         return 1;
  }
  NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
          * saved regs...
          */
         memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+       /* It's OK to start function graph tracing again */
+       unpause_graph_tracing();
         preempt_enable_no_resched();
         return 1;
  }
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c

index a8c1f99e96072530cb1f2d9ed702dffd78665720..4640f6d64f8b406a636d60c4ea2263658dde5be6 100644 (file)
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -615,6 +615,24 @@ void __init exc_lvl_early_init(void)
  }
  #endif
  
+/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+       ti->task = NULL;
+       ti->cpu = cpu;
+       ti->preempt_count = 0;
+       ti->local_flags = 0;
+       ti->flags = 0;
+       klp_init_thread_info(ti);
+}
+
  /*
   * Stack space used when we detect a bad kernel stack pointer, and
   * early in SMP boots before relocation is enabled. Exclusive emergency
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
          * Since we use these as temporary stacks during secondary CPU
          * bringup, we need to get at them in real mode. This means they
          * must also be within the RMO region.
+        *
+        * The IRQ stacks allocated elsewhere in this file are zeroed and
+        * initialized in kernel/irq.c. These are initialized here in order
+        * to have emergency stacks available as early as possible.
          */
         limit = min(safe_stack_limit(), ppc64_rma_size);
  
         for_each_possible_cpu(i) {
                 struct thread_info *ti;
                 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-               klp_init_thread_info(ti);
+               memset(ti, 0, THREAD_SIZE);
+               emerg_stack_init_thread_info(ti, i);
                 paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
  
  #ifdef CONFIG_PPC_BOOK3S_64
                 /* emergency stack for NMI exception handling. */
                 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-               klp_init_thread_info(ti);
+               memset(ti, 0, THREAD_SIZE);
+               emerg_stack_init_thread_info(ti, i);
                 paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
  
                 /* emergency stack for machine check exception handling. */
                 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
-               klp_init_thread_info(ti);
+               memset(ti, 0, THREAD_SIZE);
+               emerg_stack_init_thread_info(ti, i);
                 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
  #endif
         }
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S

index 7c933a99f5d578bdfd1408b16d4e7fe33e95e4a9..c98e90b4ea7b1f15a2dd7157300376e370774cf2 100644 (file)
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
         stdu    r1,-SWITCH_FRAME_SIZE(r1)
  
         /* Save all gprs to pt_regs */
-       SAVE_8GPRS(0,r1)
-       SAVE_8GPRS(8,r1)
-       SAVE_8GPRS(16,r1)
-       SAVE_8GPRS(24,r1)
+       SAVE_GPR(0, r1)
+       SAVE_10GPRS(2, r1)
+       SAVE_10GPRS(12, r1)
+       SAVE_10GPRS(22, r1)
+
+       /* Save previous stack pointer (r1) */
+       addi    r8, r1, SWITCH_FRAME_SIZE
+       std     r8, GPR1(r1)
  
         /* Load special regs for save below */
         mfmsr   r8
@@ -95,18 +99,44 @@ ftrace_call:
         bl      ftrace_stub
         nop
  
-       /* Load ctr with the possibly modified NIP */
-       ld      r3, _NIP(r1)
-       mtctr   r3
+       /* Load the possibly modified NIP */
+       ld      r15, _NIP(r1)
+
  #ifdef CONFIG_LIVEPATCH
-       cmpd    r14,r3          /* has NIP been altered? */
+       cmpd    r14, r15        /* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+       /* NIP has not been altered, skip over further checks */
+       beq     1f
+
+       /* Check if there is an active kprobe on us */
+       subi    r3, r14, 4
+       bl      is_current_kprobe_addr
+       nop
+
+       /*
+        * If r3 == 1, then this is a kprobe/jprobe.
+        * else, this is livepatched function.
+        *
+        * The conditional branch for livepatch_handler below will use the
+        * result of this comparison. For kprobe/jprobe, we just need to branch to
+        * the new NIP, not call livepatch_handler. The branch below is bne, so we
+        * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+        * CR0[EQ] = (r3 == 1).
+        */
+       cmpdi   r3, 1
+1:
  #endif
  
+       /* Load CTR with the possibly modified NIP */
+       mtctr   r15
+
         /* Restore gprs */
-       REST_8GPRS(0,r1)
-       REST_8GPRS(8,r1)
-       REST_8GPRS(16,r1)
-       REST_8GPRS(24,r1)
+       REST_GPR(0,r1)
+       REST_10GPRS(2,r1)
+       REST_10GPRS(12,r1)
+       REST_10GPRS(22,r1)
  
         /* Restore possibly modified LR */
         ld      r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
         addi r1, r1, SWITCH_FRAME_SIZE
  
  #ifdef CONFIG_LIVEPATCH
-        /* Based on the cmpd above, if the NIP was altered handle livepatch */
+        /*
+        * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+        * not on a kprobe/jprobe, then handle livepatch.
+        */
         bne-    livepatch_handler
  #endif
  
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 42b7a4fd57d9a557f8278a9f9a8c228f2758a1e8..8d1a365b8edc45fa9f655b77789a0d8602b462fd 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
                 break;
         case KVM_REG_PPC_TB_OFFSET:
+               /*
+                * POWER9 DD1 has an erratum where writing TBU40 causes
+                * the timebase to lose ticks.  So we don't let the
+                * timebase offset be changed on P9 DD1.  (It is
+                * initialized to zero.)
+                */
+               if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       break;
                 /* round up to multiple of 2^24 */
                 vcpu->arch.vcore->tb_offset =
                         ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
  {
         int r;
         int srcu_idx;
+       unsigned long ebb_regs[3] = {}; /* shut up GCC */
+       unsigned long user_tar = 0;
+       unsigned int user_vrsave;
  
         if (!vcpu->arch.sane) {
                 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                 return -EINVAL;
         }
  
+       /*
+        * Don't allow entry with a suspended transaction, because
+        * the guest entry/exit code will lose it.
+        * If the guest has TM enabled, save away their TM-related SPRs
+        * (they will get restored by the TM unavailable interrupt).
+        */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+           (current->thread.regs->msr & MSR_TM)) {
+               if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+                       run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+                       run->fail_entry.hardware_entry_failure_reason = 0;
+                       return -EINVAL;
+               }
+               current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+               current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+               current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+               current->thread.regs->msr &= ~MSR_TM;
+       }
+#endif
+
         kvmppc_core_prepare_to_enter(vcpu);
  
         /* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
  
         flush_all_to_thread(current);
  
+       /* Save userspace EBB and other register values */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+               ebb_regs[0] = mfspr(SPRN_EBBHR);
+               ebb_regs[1] = mfspr(SPRN_EBBRR);
+               ebb_regs[2] = mfspr(SPRN_BESCR);
+               user_tar = mfspr(SPRN_TAR);
+       }
+       user_vrsave = mfspr(SPRN_VRSAVE);
+
         vcpu->arch.wqp = &vcpu->arch.vcore->wq;
         vcpu->arch.pgdir = current->mm->pgd;
         vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
                 }
         } while (is_kvmppc_resume_guest(r));
  
+       /* Restore userspace EBB and other register values */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+               mtspr(SPRN_EBBHR, ebb_regs[0]);
+               mtspr(SPRN_EBBRR, ebb_regs[1]);
+               mtspr(SPRN_BESCR, ebb_regs[2]);
+               mtspr(SPRN_TAR, user_tar);
+               mtspr(SPRN_FSCR, current->thread.fscr);
+       }
+       mtspr(SPRN_VRSAVE, user_vrsave);
+
   out:
         vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
         atomic_dec(&vcpu->kvm->arch.vcpus_running);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S

index 0fdc4a28970b3c53d821088dae8eec5e2282dca4..404deb512844424d07bba8ead5bd77e70aee82af 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
          * Put whatever is in the decrementer into the
          * hypervisor decrementer.
          */
+BEGIN_FTR_SECTION
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       ld      r6, VCORE_KVM(r5)
+       ld      r9, KVM_HOST_LPCR(r6)
+       andis.  r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         mfspr   r8,SPRN_DEC
         mftb    r7
-       mtspr   SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+       /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+       bne     32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
         extsw   r8,r8
+32:    mtspr   SPRN_HDEC,r8
         add     r8,r8,r7
         std     r8,HSTATE_DECEXP(r13)
  
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index bdb3f76ceb6b9ff0e25e5b3b56c3be48dd6f65cc..4888dd494604f101a194a51ff168c44d85c4354d 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,29 @@
  #include <asm/opal.h>
  #include <asm/xive-regs.h>
  
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg)                       \
+BEGIN_FTR_SECTION;                             \
+       extsw   reg, reg;                       \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
  #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
  
  /* Values in HSTATE_NAPPING(r13) */
  #define NAPPING_CEDE   1
  #define NAPPING_NOVCPU 2
  
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS                    144
+#define STACK_SLOT_TRAP                (SFS-4)
+#define STACK_SLOT_TID         (SFS-16)
+#define STACK_SLOT_PSSCR       (SFS-24)
+#define STACK_SLOT_PID         (SFS-32)
+#define STACK_SLOT_IAMR                (SFS-40)
+#define STACK_SLOT_CIABR       (SFS-48)
+#define STACK_SLOT_DAWR                (SFS-56)
+#define STACK_SLOT_DAWRX       (SFS-64)
+
  /*
   * Call kvmppc_hv_entry in real mode.
   * Must be called with interrupts hard-disabled.
@@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  kvmppc_primary_no_guest:
         /* We handle this much like a ceded vcpu */
         /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+       /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+       /* HDEC value came from DEC in the first place, it will fit */
         mfspr   r3, SPRN_HDEC
         mtspr   SPRN_DEC, r3
         /*
@@ -295,8 +314,9 @@ kvm_novcpu_wakeup:
  
         /* See if our timeslice has expired (HDEC is negative) */
         mfspr   r0, SPRN_HDEC
+       EXTEND_HDEC(r0)
         li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
-       cmpwi   r0, 0
+       cmpdi   r0, 0
         blt     kvm_novcpu_exit
  
         /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,10 +339,10 @@ kvm_novcpu_exit:
         bl      kvmhv_accumulate_time
  #endif
  13:    mr      r3, r12
-       stw     r12, 112-4(r1)
+       stw     r12, STACK_SLOT_TRAP(r1)
         bl      kvmhv_commence_exit
         nop
-       lwz     r12, 112-4(r1)
+       lwz     r12, STACK_SLOT_TRAP(r1)
         b       kvmhv_switch_to_host
  
  /*
@@ -390,8 +410,8 @@ kvm_secondary_got_guest:
         lbz     r4, HSTATE_PTID(r13)
         cmpwi   r4, 0
         bne     63f
-       lis     r6, 0x7fff
-       ori     r6, r6, 0xffff
+       LOAD_REG_ADDR(r6, decrementer_max)
+       ld      r6, 0(r6)
         mtspr   SPRN_HDEC, r6
         /* and set per-LPAR registers, if doing dynamic micro-threading */
         ld      r6, HSTATE_SPLIT_MODE(r13)
@@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
   *                                                                            *
   *****************************************************************************/
  
-/* Stack frame offsets */
-#define STACK_SLOT_TID         (112-16)
-#define STACK_SLOT_PSSCR       (112-24)
-#define STACK_SLOT_PID         (112-32)
-
  .global kvmppc_hv_entry
  kvmppc_hv_entry:
  
@@ -565,7 +580,7 @@ kvmppc_hv_entry:
          */
         mflr    r0
         std     r0, PPC_LR_STKOFF(r1)
-       stdu    r1, -112(r1)
+       stdu    r1, -SFS(r1)
  
         /* Save R1 in the PACA */
         std     r1, HSTATE_HOST_R1(r13)
@@ -749,10 +764,20 @@ BEGIN_FTR_SECTION
         mfspr   r5, SPRN_TIDR
         mfspr   r6, SPRN_PSSCR
         mfspr   r7, SPRN_PID
+       mfspr   r8, SPRN_IAMR
         std     r5, STACK_SLOT_TID(r1)
         std     r6, STACK_SLOT_PSSCR(r1)
         std     r7, STACK_SLOT_PID(r1)
+       std     r8, STACK_SLOT_IAMR(r1)
  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+       mfspr   r5, SPRN_CIABR
+       mfspr   r6, SPRN_DAWR
+       mfspr   r7, SPRN_DAWRX
+       std     r5, STACK_SLOT_CIABR(r1)
+       std     r6, STACK_SLOT_DAWR(r1)
+       std     r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  
  BEGIN_FTR_SECTION
         /* Set partition DABR */
@@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
  
         /* Check if HDEC expires soon */
         mfspr   r3, SPRN_HDEC
-       cmpwi   r3, 512         /* 1 microsecond */
+       EXTEND_HDEC(r3)
+       cmpdi   r3, 512         /* 1 microsecond */
         blt     hdec_soon
  
  #ifdef CONFIG_KVM_XICS
@@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
          * set by the guest could disrupt the host.
          */
         li      r0, 0
-       mtspr   SPRN_IAMR, r0
-       mtspr   SPRN_CIABR, r0
-       mtspr   SPRN_DAWRX, r0
+       mtspr   SPRN_PSPB, r0
         mtspr   SPRN_WORT, r0
  BEGIN_FTR_SECTION
+       mtspr   SPRN_IAMR, r0
         mtspr   SPRN_TCSCR, r0
         /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
         li      r0, 1
@@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
         std     r6,VCPU_UAMOR(r9)
         li      r6,0
         mtspr   SPRN_AMR,r6
+       mtspr   SPRN_UAMOR, r6
  
         /* Switch DSCR back to host value */
         mfspr   r8, SPRN_DSCR
@@ -1669,13 +1695,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         ptesync
  
         /* Restore host values of some registers */
+BEGIN_FTR_SECTION
+       ld      r5, STACK_SLOT_CIABR(r1)
+       ld      r6, STACK_SLOT_DAWR(r1)
+       ld      r7, STACK_SLOT_DAWRX(r1)
+       mtspr   SPRN_CIABR, r5
+       mtspr   SPRN_DAWR, r6
+       mtspr   SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  BEGIN_FTR_SECTION
         ld      r5, STACK_SLOT_TID(r1)
         ld      r6, STACK_SLOT_PSSCR(r1)
         ld      r7, STACK_SLOT_PID(r1)
+       ld      r8, STACK_SLOT_IAMR(r1)
         mtspr   SPRN_TIDR, r5
         mtspr   SPRN_PSSCR, r6
         mtspr   SPRN_PID, r7
+       mtspr   SPRN_IAMR, r8
  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
  BEGIN_FTR_SECTION
         PPC_INVALIDATE_ERAT
@@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
         li      r0, KVM_GUEST_MODE_NONE
         stb     r0, HSTATE_IN_GUEST(r13)
  
-       ld      r0, 112+PPC_LR_STKOFF(r1)
-       addi    r1, r1, 112
+       ld      r0, SFS+PPC_LR_STKOFF(r1)
+       addi    r1, r1, SFS
         mtlr    r0
         blr
  
@@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
         mfspr   r3, SPRN_DEC
         mfspr   r4, SPRN_HDEC
         mftb    r5
-       cmpw    r3, r4
+       extsw   r3, r3
+       EXTEND_HDEC(r4)
+       cmpd    r3, r4
         ble     67f
         mtspr   SPRN_DEC, r4
  67:
         /* save expiry time of guest decrementer */
-       extsw   r3, r3
         add     r3, r3, r5
         ld      r4, HSTATE_KVM_VCPU(r13)
         ld      r5, HSTATE_KVM_VCORE(r13)
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c

index cbd82fde57702e2a210608dc2e1800ae574465e0..09ceea6175ba9dc1d99b8b56eadae1367138b166 100644 (file)
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
                         struct pt_regs *regs_user_copy)
  {
         regs_user->regs = task_pt_regs(current);
-       regs_user->abi  = perf_reg_abi(current);
+       regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+                        PERF_SAMPLE_REGS_ABI_NONE;
  }
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c

index e6f444b462079c3c4f4bea059337b92e700488f5..b5d960d6db3d0b18d33273b31ac67e03caebd02b 100644 (file)
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
         return mmio_atsd_reg;
  }
  
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
  {
         unsigned long launch;
  
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
         /* PID */
         launch |= pid << PPC_BITLSHIFT(38);
  
+       /* No flush */
+       launch |= !flush << PPC_BITLSHIFT(39);
+
         /* Invalidating the entire process doesn't use a va */
         return mmio_launch_invalidate(npu, launch, 0);
  }
  
  static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-                       unsigned long pid)
+                       unsigned long pid, bool flush)
  {
         unsigned long launch;
  
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
         /* PID */
         launch |= pid << PPC_BITLSHIFT(38);
  
+       /* No flush */
+       launch |= !flush << PPC_BITLSHIFT(39);
+
         return mmio_launch_invalidate(npu, launch, va);
  }
  
  #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
  
+struct mmio_atsd_reg {
+       struct npu *npu;
+       int reg;
+};
+
+static void mmio_invalidate_wait(
+       struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+       struct npu *npu;
+       int i, reg;
+
+       /* Wait for all invalidations to complete */
+       for (i = 0; i <= max_npu2_index; i++) {
+               if (mmio_atsd_reg[i].reg < 0)
+                       continue;
+
+               /* Wait for completion */
+               npu = mmio_atsd_reg[i].npu;
+               reg = mmio_atsd_reg[i].reg;
+               while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+                       cpu_relax();
+
+               put_mmio_atsd_reg(npu, reg);
+
+               /*
+                * The GPU requires two flush ATSDs to ensure all entries have
+                * been flushed. We use PID 0 as it will never be used for a
+                * process on the GPU.
+                */
+               if (flush)
+                       mmio_invalidate_pid(npu, 0, true);
+       }
+}
+
  /*
   * Invalidate either a single address or an entire PID depending on
   * the value of va.
   */
  static void mmio_invalidate(struct npu_context *npu_context, int va,
-                       unsigned long address)
+                       unsigned long address, bool flush)
  {
-       int i, j, reg;
+       int i, j;
         struct npu *npu;
         struct pnv_phb *nphb;
         struct pci_dev *npdev;
-       struct {
-               struct npu *npu;
-               int reg;
-       } mmio_atsd_reg[NV_MAX_NPUS];
+       struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
         unsigned long pid = npu_context->mm->context.id;
  
         /*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
  
                         if (va)
                                 mmio_atsd_reg[i].reg =
-                                       mmio_invalidate_va(npu, address, pid);
+                                       mmio_invalidate_va(npu, address, pid,
+                                                       flush);
                         else
                                 mmio_atsd_reg[i].reg =
-                                       mmio_invalidate_pid(npu, pid);
+                                       mmio_invalidate_pid(npu, pid, flush);
  
                         /*
                          * The NPU hardware forwards the shootdown to all GPUs
@@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
          */
         flush_tlb_mm(npu_context->mm);
  
-       /* Wait for all invalidations to complete */
-       for (i = 0; i <= max_npu2_index; i++) {
-               if (mmio_atsd_reg[i].reg < 0)
-                       continue;
-
-               /* Wait for completion */
-               npu = mmio_atsd_reg[i].npu;
-               reg = mmio_atsd_reg[i].reg;
-               while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
-                       cpu_relax();
-               put_mmio_atsd_reg(npu, reg);
-       }
+       mmio_invalidate_wait(mmio_atsd_reg, flush);
+       if (flush)
+               /* Wait for the flush to complete */
+               mmio_invalidate_wait(mmio_atsd_reg, false);
  }
  
  static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
          * There should be no more translation requests for this PID, but we
          * need to ensure any entries for it are removed from the TLB.
          */
-       mmio_invalidate(npu_context, 0, 0);
+       mmio_invalidate(npu_context, 0, 0, true);
  }
  
  static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
  {
         struct npu_context *npu_context = mn_to_npu_context(mn);
  
-       mmio_invalidate(npu_context, 1, address);
+       mmio_invalidate(npu_context, 1, address, true);
  }
  
  static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
  {
         struct npu_context *npu_context = mn_to_npu_context(mn);
  
-       mmio_invalidate(npu_context, 1, address);
+       mmio_invalidate(npu_context, 1, address, true);
  }
  
  static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
         struct npu_context *npu_context = mn_to_npu_context(mn);
         unsigned long address;
  
-       for (address = start; address <= end; address += PAGE_SIZE)
-               mmio_invalidate(npu_context, 1, address);
+       for (address = start; address < end; address += PAGE_SIZE)
+               mmio_invalidate(npu_context, 1, address, false);
+
+       /* Do the flush only on the final addess == end */
+       mmio_invalidate(npu_context, 1, address, true);
  }
  
  static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                 /* No nvlink associated with this GPU device */
                 return ERR_PTR(-ENODEV);
  
-       if (!mm) {
-               /* kernel thread contexts are not supported */
+       if (!mm || mm->context.id == 0) {
+               /*
+                * Kernel thread contexts are not supported and context id 0 is
+                * reserved on the GPU.
+                */
                 return ERR_PTR(-EINVAL);
         }
  
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h

index 60d395fdc86438e55f49ddf853dca7b6f99582b3..aeac013968f2a00fd0050e772d50a8959dd57e04 100644 (file)
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *);
  /* Free guarded storage control block for current */
  void exit_thread_gs(void);
  
-/*
- * Return saved PC of a blocked thread.
- */
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
  unsigned long get_wchan(struct task_struct *p);
  #define task_pt_regs(tsk) ((struct pt_regs *) \
          (task_stack_page(tsk) + THREAD_SIZE) - 1)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c

index e545ffe5155ab0179327cfe4f9f66e677c604041..8e622bb52f7a95fd59c2f89aec95f04c616633cf 100644 (file)
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -564,8 +564,6 @@ static struct kset *ipl_kset;
  
  static void __ipl_run(void *unused)
  {
-       if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
-               diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
         diag308(DIAG308_LOAD_CLEAR, NULL);
         if (MACHINE_IS_VM)
                 __cpcmd("IPL", NULL, 0, NULL);
@@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused)
                 break;
         case REIPL_METHOD_CCW_DIAG:
                 diag308(DIAG308_SET, reipl_block_ccw);
-               if (MACHINE_IS_LPAR)
-                       diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
-               else
-                       diag308(DIAG308_LOAD_CLEAR, NULL);
+               diag308(DIAG308_LOAD_CLEAR, NULL);
                 break;
         case REIPL_METHOD_FCP_RW_DIAG:
                 diag308(DIAG308_SET, reipl_block_fcp);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c

index 999d7154bbdcd0891f6e2d5e6c55ea4ab62d0554..bb32b8618bf61836888d383c5b3fe1c9a352c0d2 100644 (file)
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -41,31 +41,6 @@
  
  asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
  
-/*
- * Return saved PC of a blocked thread. used in kernel/sched.
- * resume in entry.S does not create a new stack frame, it
- * just stores the registers %r6-%r15 to the frame given by
- * schedule. We want to return the address of the caller of
- * schedule, so we have to walk the backchain one time to
- * find the frame schedule() store its return address.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct stack_frame *sf, *low, *high;
-
-       if (!tsk || !task_stack_page(tsk))
-               return 0;
-       low = task_stack_page(tsk);
-       high = (struct stack_frame *) task_pt_regs(tsk);
-       sf = (struct stack_frame *) tsk->thread.ksp;
-       if (sf <= low || sf > high)
-               return 0;
-       sf = (struct stack_frame *) sf->back_chain;
-       if (sf <= low || sf > high)
-               return 0;
-       return sf->gprs[8];
-}
-
  extern void kernel_thread_starter(void);
  
  /*
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

index 9da243d94cc3286c5e1dabcfae5e563f991326a0..3b297fa3aa67c59be7fdb2fd2953f431adfbc1d4 100644 (file)
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
         ptr = asce.origin * 4096;
         if (asce.r) {
                 *fake = 1;
+               ptr = 0;
                 asce.dt = ASCE_TYPE_REGION1;
         }
         switch (asce.dt) {
         case ASCE_TYPE_REGION1:
-               if (vaddr.rfx01 > asce.tl && !asce.r)
+               if (vaddr.rfx01 > asce.tl && !*fake)
                         return PGM_REGION_FIRST_TRANS;
                 break;
         case ASCE_TYPE_REGION2:
@@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
                 union region1_table_entry rfte;
  
                 if (*fake) {
-                       /* offset in 16EB guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+                       ptr += (unsigned long) vaddr.rfx << 53;
                         rfte.val = ptr;
                         goto shadow_r2t;
                 }
@@ -1036,8 +1036,7 @@ shadow_r2t:
                 union region2_table_entry rste;
  
                 if (*fake) {
-                       /* offset in 8PB guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+                       ptr += (unsigned long) vaddr.rsx << 42;
                         rste.val = ptr;
                         goto shadow_r3t;
                 }
@@ -1064,8 +1063,7 @@ shadow_r3t:
                 union region3_table_entry rtte;
  
                 if (*fake) {
-                       /* offset in 4TB guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+                       ptr += (unsigned long) vaddr.rtx << 31;
                         rtte.val = ptr;
                         goto shadow_sgt;
                 }
@@ -1101,8 +1099,7 @@ shadow_sgt:
                 union segment_table_entry ste;
  
                 if (*fake) {
-                       /* offset in 2G guest memory block */
-                       ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+                       ptr += (unsigned long) vaddr.sx << 20;
                         ste.val = ptr;
                         goto shadow_pgt;
                 }
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h

index d9a922d8711b2155a2225699e5958236dbb70adb..299274581968d1c850f56341615d81bddae19003 100644 (file)
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -13,7 +13,6 @@ struct task_struct;
   */
  extern void (*cpu_wait)(void);
  
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
  extern void start_thread(struct pt_regs *regs,
                         unsigned long pc, unsigned long sp);
  extern unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c

index eb64d7a677cb9525afc874a1cb3a21872ad4bddb..6e20241a1ed45c428d7f5bfe9e76c55ecd5e0d07 100644 (file)
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r)
         return 1;
  }
  
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return task_pt_regs(tsk)->cp0_epc;
-}
-
  unsigned long get_wchan(struct task_struct *task)
  {
         if (!task || task == current || task->state == TASK_RUNNING)
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h

index dd27159819ebedce4d0479ec800e91d56706311f..b395e5620c0b986ec808675ccf3062e9a534ae43 100644 (file)
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -67,9 +67,6 @@ struct thread_struct {
         .current_ds = KERNEL_DS, \
  }
  
-/* Return saved PC of a blocked thread. */
-unsigned long thread_saved_pc(struct task_struct *t);
-
  /* Do necessary setup to start up a newly executed thread. */
  static inline void start_thread(struct pt_regs * regs, unsigned long pc,
                                     unsigned long sp)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h

index b58ee90184334224b756360e769c47a0d10e088a..f04dc5a4306245ffc0b53b3c88707332b8a25a0f 100644 (file)
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -89,9 +89,7 @@ struct thread_struct {
  #include <linux/types.h>
  #include <asm/fpumacro.h>
  
-/* Return saved PC of a blocked thread. */
  struct task_struct;
-unsigned long thread_saved_pc(struct task_struct *);
  
  /* On Uniprocessor, even in RMO processes see TSO semantics */
  #ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c

index b6dac8e980f07183f9ea300abcfb033d0251b5c0..9245f93398c76a59a073963253609c1fc54bfeec 100644 (file)
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -176,14 +176,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp)
         printk("\n");
  }
  
-/*
- * Note: sparc64 has a pretty intricated thread_saved_pc, check it out.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return task_thread_info(tsk)->kpc;
-}
-
  /*
   * Free current thread data structures etc..
   */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c

index 1badc493e62ee71c2e538758aec3c446780ea671..b96104da5bd6116b119872eaa79e013636aa328c 100644 (file)
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init);
  
  #endif
  
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct thread_info *ti = task_thread_info(tsk);
-       unsigned long ret = 0xdeadbeefUL;
-       
-       if (ti && ti->ksp) {
-               unsigned long *sp;
-               sp = (unsigned long *)(ti->ksp + STACK_BIAS);
-               if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL &&
-                   sp[14]) {
-                       unsigned long *fp;
-                       fp = (unsigned long *)(sp[14] + STACK_BIAS);
-                       if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL)
-                               ret = fp[15];
-               }
-       }
-       return ret;
-}
-
  /* Free current thread data structures etc.. */
  void exit_thread(struct task_struct *tsk)
  {
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h

index 0bc9968b97a19296a48d4dfb215cd3464b69f9a6..f71e5206650bf91578677720a7701a3d766d0436 100644 (file)
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task)
  
  extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags);
  
-
-/*
- * Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
- */
-#define thread_saved_pc(t)   ((t)->thread.pc)
-
  unsigned long get_wchan(struct task_struct *p);
  
  /* Return initial ksp value for given task. */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h

index 2d1e0dd5bb0bf55a7e543f9f7f9e0eefd6ab4cac..f6d1a3f747a9b58b3f33ce0ee25c3bf889985c0b 100644 (file)
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task)
  {
  }
  
-extern unsigned long thread_saved_pc(struct task_struct *t);
-
  static inline void mm_copy_segments(struct mm_struct *from_mm,
                                     struct mm_struct *new_mm)
  {
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c

index 64a1fd06f3fde02d964c4fedcb9e73328085c94d..7b56401173250e1dbb61208106c35c58e324812b 100644 (file)
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -56,12 +56,6 @@ union thread_union cpu0_irqstack
         __attribute__((__section__(".data..init_irqstack"))) =
                 { INIT_THREAD_INFO(init_task) };
  
-unsigned long thread_saved_pc(struct task_struct *task)
-{
-       /* FIXME: Need to look up userspace_pid by cpu */
-       return os_process_pc(userspace_pid[0]);
-}
-
  /* Changed in setup_arch, which is called in early boot */
  static char host_info[(__NEW_UTS_LEN + 1) * 5];
  
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c

index a6d91d4e37a1f1dadae588a1c084e31e65d08f5d..110ce8238466f7e404d8312dd8d9297fa09355c9 100644 (file)
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids
   [ C(DTLB) ] = {
         [ C(OP_READ) ] = {
                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
-               [ C(RESULT_MISS)   ] = 0x608,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
+               [ C(RESULT_MISS)   ] = 0xe08,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
         },
         [ C(OP_WRITE) ] = {
                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
-               [ C(RESULT_MISS)   ] = 0x649,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
+               [ C(RESULT_MISS)   ] = 0xe49,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
         },
         [ C(OP_PREFETCH) ] = {
                 [ C(RESULT_ACCESS) ] = 0x0,
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h

index 05596261577937d65afcad75574bab676a7effe9..722d0e56886342a3a9f65d7f419d0e240a9cc6ab 100644 (file)
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
  
         bool perm_ok; /* do not check permissions if true */
         bool ud;        /* inject an #UD if host doesn't support insn */
+       bool tf;        /* TF value before instruction (after for syscall/sysret) */
  
         bool have_exception;
         struct x86_exception exception;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h

index fba1007139243b21081b6bfa114bed588e6f4e8e..d5acc27ed1cc79ab7bc621ae0d85ad4076cbc4cc 100644 (file)
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -2,8 +2,7 @@
  #define _ASM_X86_MSHYPER_H
  
  #include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/clocksource.h>
+#include <linux/atomic.h>
  #include <asm/hyperv.h>
  
  /*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index 3cada998a402a7893ffd2fc709916f4fcbc3f970..a28b671f15499590d3b7243eb7909436aa27b814 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task);
  
  #endif /* CONFIG_X86_64 */
  
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
  extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
                                                unsigned long new_sp);
  
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c

index 0bb88428cbf2697c89a60311051cc5351ea55fde..3ca198080ea9294486ae9a1121e7815dfba7cb19 100644 (file)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -544,17 +544,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
         return randomize_page(mm->brk, 0x02000000);
  }
  
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       struct inactive_task_frame *frame =
-               (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
-       return READ_ONCE_NOCHECK(frame->ret_addr);
-}
-
  /*
   * Called from fs/proc with a reference on @p to find the function
   * which called into schedule(). This needs to be done carefully
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index 0816ab2e8adcae2b45f83c95c51e8b95a245b07e..80890dee66cebf370a3815e28f7bd7c34025b0d4 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
                 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
         }
  
+       ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
         return X86EMUL_CONTINUE;
  }
  
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 87d3cb901935f2b251857f54ed53ca73567f10ef..0e846f0cb83bb214811d0a12d2f700cc96a455f9 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
         kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
  
         ctxt->eflags = kvm_get_rflags(vcpu);
+       ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
         ctxt->eip = kvm_rip_read(vcpu);
         ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
                      (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
@@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
         return dr6;
  }
  
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
  {
         struct kvm_run *kvm_run = vcpu->run;
  
-       /*
-        * rflags is the old, "raw" value of the flags.  The new value has
-        * not been saved yet.
-        *
-        * This is correct even for TF set by the guest, because "the
-        * processor will not generate this exception after the instruction
-        * that sets the TF flag".
-        */
-       if (unlikely(rflags & X86_EFLAGS_TF)) {
-               if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-                       kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
-                                                 DR6_RTM;
-                       kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
-                       kvm_run->debug.arch.exception = DB_VECTOR;
-                       kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
-               } else {
-                       /*
-                        * "Certain debug exceptions may clear bit 0-3.  The
-                        * remaining contents of the DR6 register are never
-                        * cleared by the processor".
-                        */
-                       vcpu->arch.dr6 &= ~15;
-                       vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
-                       kvm_queue_exception(vcpu, DB_VECTOR);
-               }
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+               kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+               kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+               kvm_run->debug.arch.exception = DB_VECTOR;
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               *r = EMULATE_USER_EXIT;
+       } else {
+               /*
+                * "Certain debug exceptions may clear bit 0-3.  The
+                * remaining contents of the DR6 register are never
+                * cleared by the processor".
+                */
+               vcpu->arch.dr6 &= ~15;
+               vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+               kvm_queue_exception(vcpu, DB_VECTOR);
         }
  }
  
@@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
         int r = EMULATE_DONE;
  
         kvm_x86_ops->skip_emulated_instruction(vcpu);
-       kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+
+       /*
+        * rflags is the old, "raw" value of the flags.  The new value has
+        * not been saved yet.
+        *
+        * This is correct even for TF set by the guest, because "the
+        * processor will not generate this exception after the instruction
+        * that sets the TF flag".
+        */
+       if (unlikely(rflags & X86_EFLAGS_TF))
+               kvm_vcpu_do_singlestep(vcpu, &r);
         return r == EMULATE_DONE;
  }
  EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
@@ -5726,8 +5727,9 @@ restart:
                 toggle_interruptibility(vcpu, ctxt->interruptibility);
                 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
                 kvm_rip_write(vcpu, ctxt->eip);
-               if (r == EMULATE_DONE)
-                       kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+               if (r == EMULATE_DONE &&
+                   (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+                       kvm_vcpu_do_singlestep(vcpu, &r);
                 if (!ctxt->have_exception ||
                     exception_type(ctxt->exception.vector) == EXCPT_TRAP)
                         __kvm_set_rflags(vcpu, ctxt->eflags);
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h

index 003eeee3fbc636d91aed849aaa39bb0c24370227..30ee8c608853d4fb4b238a01319589d38ce018b7 100644 (file)
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -213,8 +213,6 @@ struct mm_struct;
  #define release_segments(mm)   do { } while(0)
  #define forget_segments()      do { } while (0)
  
-#define thread_saved_pc(tsk)   (task_pt_regs(tsk)->pc)
-
  extern unsigned long get_wchan(struct task_struct *p);
  
  #define KSTK_EIP(tsk)          (task_pt_regs(tsk)->pc)
diff --git a/block/bio.c b/block/bio.c

index 888e7801c6381edd8d995503643917b2f452282e..26b0810fb8eac14b0a39c27bdf53f2398c767fb3 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -240,20 +240,21 @@ fallback:
         return bvl;
  }
  
-static void __bio_free(struct bio *bio)
+void bio_uninit(struct bio *bio)
  {
         bio_disassociate_task(bio);
  
         if (bio_integrity(bio))
                 bio_integrity_free(bio);
  }
+EXPORT_SYMBOL(bio_uninit);
  
  static void bio_free(struct bio *bio)
  {
         struct bio_set *bs = bio->bi_pool;
         void *p;
  
-       __bio_free(bio);
+       bio_uninit(bio);
  
         if (bs) {
                 bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
@@ -271,6 +272,11 @@ static void bio_free(struct bio *bio)
         }
  }
  
+/*
+ * Users of this function have their own bio allocation. Subsequently,
+ * they must remember to pair any call to bio_init() with bio_uninit()
+ * when IO has completed, or when the bio is released.
+ */
  void bio_init(struct bio *bio, struct bio_vec *table,
               unsigned short max_vecs)
  {
@@ -297,7 +303,7 @@ void bio_reset(struct bio *bio)
  {
         unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
  
-       __bio_free(bio);
+       bio_uninit(bio);
  
         memset(bio, 0, BIO_RESET_BYTES);
         bio->bi_flags = flags;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index 1f5b692526ae1a7199ee9bbaef305c4b0a42e696..0ded5e846335667406d58ce08e8439360baeb312 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
                 __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
  }
  
+/*
+ * Mark a hardware queue as needing a restart. For shared queues, maintain
+ * a count of how many hardware queues are marked for restart.
+ */
+static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+               return;
+
+       if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+               struct request_queue *q = hctx->queue;
+
+               if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                       atomic_inc(&q->shared_hctx_restart);
+       } else
+               set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+}
+
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+{
+       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+               return false;
+
+       if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
+               struct request_queue *q = hctx->queue;
+
+               if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                       atomic_dec(&q->shared_hctx_restart);
+       } else
+               clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+
+       if (blk_mq_hctx_has_pending(hctx)) {
+               blk_mq_run_hw_queue(hctx, true);
+               return true;
+       }
+
+       return false;
+}
+
  struct request *blk_mq_sched_get_request(struct request_queue *q,
                                          struct bio *bio,
                                          unsigned int op,
@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
         return true;
  }
  
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
-               clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-               if (blk_mq_hctx_has_pending(hctx)) {
-                       blk_mq_run_hw_queue(hctx, true);
-                       return true;
-               }
-       }
-       return false;
-}
-
  /**
   * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
   * @pos:    loop cursor.
@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
         unsigned int i, j;
  
         if (set->flags & BLK_MQ_F_TAG_SHARED) {
+               /*
+                * If this is 0, then we know that no hardware queues
+                * have RESTART marked. We're done.
+                */
+               if (!atomic_read(&queue->shared_hctx_restart))
+                       return;
+
                 rcu_read_lock();
                 list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
                                            tag_set_list) {
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h

index edafb5383b7bbdedfd5365ed38f9a5c373ec96ab..5007edece51aced038d3db8f0adbc722c49e3d38 100644 (file)
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
         return false;
  }
  
-/*
- * Mark a hardware queue as needing a restart.
- */
-static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
-{
-       if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-               set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-}
-
  static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
  {
         return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq.c b/block/blk-mq.c

index bb66c96850b18cb419b0e44aab1894169352f9af..958cedaff8b829ceb4c724dbf1c6f6d30d883aeb 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
         }
  }
  
+/*
+ * Caller needs to ensure that we're either frozen/quiesced, or that
+ * the queue isn't live yet.
+ */
  static void queue_set_hctx_shared(struct request_queue *q, bool shared)
  {
         struct blk_mq_hw_ctx *hctx;
         int i;
  
         queue_for_each_hw_ctx(q, hctx, i) {
-               if (shared)
+               if (shared) {
+                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                               atomic_inc(&q->shared_hctx_restart);
                         hctx->flags |= BLK_MQ_F_TAG_SHARED;
-               else
+               } else {
+                       if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
+                               atomic_dec(&q->shared_hctx_restart);
                         hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+               }
         }
  }
  
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
+                                       bool shared)
  {
         struct request_queue *q;
  
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c

index 8af664f7d27c25493d548752446625515eebc184..be117495eb43b6ef3caa5d49f6404fd073f5c679 100644 (file)
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -877,7 +877,7 @@ static void aead_sock_destruct(struct sock *sk)
         unsigned int ivlen = crypto_aead_ivsize(
                                 crypto_aead_reqtfm(&ctx->aead_req));
  
-       WARN_ON(atomic_read(&sk->sk_refcnt) != 0);
+       WARN_ON(refcount_read(&sk->sk_refcnt) != 0);
         aead_put_sgl(sk);
         sock_kzfree_s(sk, ctx->iv, ivlen);
         sock_kfree_s(sk, ctx, ctx->len);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c

index 3a10d7573477e7dea0139c5f885e9514a1886a7a..d53162997f32002828a6db353bcd1926f2a8dc97 100644 (file)
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1428,6 +1428,37 @@ static void acpi_init_coherency(struct acpi_device *adev)
         adev->flags.coherent_dma = cca;
  }
  
+static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
+{
+       bool *is_spi_i2c_slave_p = data;
+
+       if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
+               return 1;
+
+       /*
+        * devices that are connected to UART still need to be enumerated to
+        * platform bus
+        */
+       if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
+               *is_spi_i2c_slave_p = true;
+
+        /* no need to do more checking */
+       return -1;
+}
+
+static bool acpi_is_spi_i2c_slave(struct acpi_device *device)
+{
+       struct list_head resource_list;
+       bool is_spi_i2c_slave = false;
+
+       INIT_LIST_HEAD(&resource_list);
+       acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
+                              &is_spi_i2c_slave);
+       acpi_dev_free_resource_list(&resource_list);
+
+       return is_spi_i2c_slave;
+}
+
  void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
                              int type, unsigned long long sta)
  {
@@ -1443,6 +1474,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
         acpi_bus_get_flags(device);
         device->flags.match_driver = false;
         device->flags.initialized = true;
+       device->flags.spi_i2c_slave = acpi_is_spi_i2c_slave(device);
         acpi_device_clear_enumerated(device);
         device_initialize(&device->dev);
         dev_set_uevent_suppress(&device->dev, true);
@@ -1727,38 +1759,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
         return AE_OK;
  }
  
-static int acpi_check_spi_i2c_slave(struct acpi_resource *ares, void *data)
-{
-       bool *is_spi_i2c_slave_p = data;
-
-       if (ares->type != ACPI_RESOURCE_TYPE_SERIAL_BUS)
-               return 1;
-
-       /*
-        * devices that are connected to UART still need to be enumerated to
-        * platform bus
-        */
-       if (ares->data.common_serial_bus.type != ACPI_RESOURCE_SERIAL_TYPE_UART)
-               *is_spi_i2c_slave_p = true;
-
-        /* no need to do more checking */
-       return -1;
-}
-
  static void acpi_default_enumeration(struct acpi_device *device)
  {
-       struct list_head resource_list;
-       bool is_spi_i2c_slave = false;
-
         /*
          * Do not enumerate SPI/I2C slaves as they will be enumerated by their
          * respective parents.
          */
-       INIT_LIST_HEAD(&resource_list);
-       acpi_dev_get_resources(device, &resource_list, acpi_check_spi_i2c_slave,
-                              &is_spi_i2c_slave);
-       acpi_dev_free_resource_list(&resource_list);
-       if (!is_spi_i2c_slave) {
+       if (!device->flags.spi_i2c_slave) {
                 acpi_create_platform_device(device, NULL);
                 acpi_device_set_enumerated(device);
         } else {
@@ -1854,7 +1861,7 @@ static void acpi_bus_attach(struct acpi_device *device)
                 return;
  
         device->flags.match_driver = true;
-       if (ret > 0) {
+       if (ret > 0 && !device->flags.spi_i2c_slave) {
                 acpi_device_set_enumerated(device);
                 goto ok;
         }
@@ -1863,10 +1870,10 @@ static void acpi_bus_attach(struct acpi_device *device)
         if (ret < 0)
                 return;
  
-       if (device->pnp.type.platform_id)
-               acpi_default_enumeration(device);
-       else
+       if (!device->pnp.type.platform_id && !device->flags.spi_i2c_slave)
                 acpi_device_set_enumerated(device);
+       else
+               acpi_default_enumeration(device);
  
   ok:
         list_for_each_entry(child, &device->children, node)
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c

index 7584ae1ded85abb0a4d602a3deb4deaf3f8367c4..f0433adcd8fca8451bad4fc52ccbae6c340e3241 100644 (file)
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -924,12 +924,7 @@ fore200e_tx_irq(struct fore200e* fore200e)
                 else {
                     dev_kfree_skb_any(entry->skb);
                 }
-#if 1
-               /* race fixed by the above incarnation mechanism, but... */
-               if (atomic_read(&sk_atm(vcc)->sk_wmem_alloc) < 0) {
-                   atomic_set(&sk_atm(vcc)->sk_wmem_alloc, 0);
-               }
-#endif
+
                 /* check error condition */
                 if (*entry->status & STATUS_ERROR)
                     atomic_inc(&vcc->stats->tx_err);
@@ -1130,13 +1125,9 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp
         return -ENOMEM;
      }
  
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
      vcc->push(vcc, skb);
      atomic_inc(&vcc->stats->rx);
  
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
-
      return 0;
  }
  
@@ -1572,7 +1563,6 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
      unsigned long           flags;
  
      ASSERT(vcc);
-    ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0);
      ASSERT(fore200e);
      ASSERT(fore200e_vcc);
  
diff --git a/drivers/atm/he.c b/drivers/atm/he.c

index 461da2bce8efde423018897d77c7c0f094efa7e7..37ee21c5a5ca75fd224769dd6c28350cce62d250 100644 (file)
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -2395,7 +2395,7 @@ he_close(struct atm_vcc *vcc)
                  * TBRQ, the host issues the close command to the adapter.
                  */
  
-               while (((tx_inuse = atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
+               while (((tx_inuse = refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) > 1) &&
                        (retry < MAX_RETRY)) {
                         msleep(sleep);
                         if (sleep < 250)
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c

index 4e64de380bda5b0114410cae4a377b046f9f3481..60bacba03d17da46e882b45cd104138bffcc5e51 100644 (file)
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -724,7 +724,7 @@ push_on_scq(struct idt77252_dev *card, struct vc_map *vc, struct sk_buff *skb)
                 struct sock *sk = sk_atm(vcc);
  
                 vc->estimator->cells += (skb->len + 47) / 48;
-               if (atomic_read(&sk->sk_wmem_alloc) >
+               if (refcount_read(&sk->sk_wmem_alloc) >
                     (sk->sk_sndbuf >> 1)) {
                         u32 cps = vc->estimator->maxcps;
  
@@ -2009,7 +2009,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags)
                 atomic_inc(&vcc->stats->tx_err);
                 return -ENOMEM;
         }
-       atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
  
         skb_put_data(skb, cell, 52);
  
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c

index 726c32e35db9c542e6f050ff0a04e31e10fc2b7d..0e824091a12fac8757c2ade5d4e5dae6a1470cbd 100644 (file)
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
         unsigned long timeout;
         int ret;
  
-       xen_blkif_get(blkif);
-
         set_freezable();
         while (!kthread_should_stop()) {
                 if (try_to_freeze())
@@ -665,7 +663,6 @@ purge_gnt_list:
                 print_stats(ring);
  
         ring->xenblkd = NULL;
-       xen_blkif_put(blkif);
  
         return 0;
  }
@@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
  static void make_response(struct xen_blkif_ring *ring, u64 id,
                           unsigned short op, int st)
  {
-       struct blkif_response  resp;
+       struct blkif_response *resp;
         unsigned long     flags;
         union blkif_back_rings *blk_rings;
         int notify;
  
-       resp.id        = id;
-       resp.operation = op;
-       resp.status    = st;
-
         spin_lock_irqsave(&ring->blk_ring_lock, flags);
         blk_rings = &ring->blk_rings;
         /* Place on the response ring for the relevant domain. */
         switch (ring->blkif->blk_protocol) {
         case BLKIF_PROTOCOL_NATIVE:
-               memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->native,
+                                        blk_rings->native.rsp_prod_pvt);
                 break;
         case BLKIF_PROTOCOL_X86_32:
-               memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->x86_32,
+                                        blk_rings->x86_32.rsp_prod_pvt);
                 break;
         case BLKIF_PROTOCOL_X86_64:
-               memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
-                      &resp, sizeof(resp));
+               resp = RING_GET_RESPONSE(&blk_rings->x86_64,
+                                        blk_rings->x86_64.rsp_prod_pvt);
                 break;
         default:
                 BUG();
         }
+
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+
         blk_rings->common.rsp_prod_pvt++;
         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
         spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h

index dea61f6ab8cbdbaffedceb4c64bda239b51a63a4..ecb35fe8ca8dbb54f36a85513a09064819acd67a 100644 (file)
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
  struct blkif_common_request {
         char dummy;
  };
-struct blkif_common_response {
-       char dummy;
-};
+
+/* i386 protocol version */
  
  struct blkif_x86_32_request_rw {
         uint8_t        nr_segments;  /* number of segments                   */
@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
         } u;
  } __attribute__((__packed__));
  
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
-       uint64_t        id;              /* copied from request */
-       uint8_t         operation;       /* copied from request */
-       int16_t         status;          /* BLKIF_RSP_???       */
-};
-#pragma pack(pop)
  /* x86_64 protocol version */
  
  struct blkif_x86_64_request_rw {
@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
         } u;
  } __attribute__((__packed__));
  
-struct blkif_x86_64_response {
-       uint64_t       __attribute__((__aligned__(8))) id;
-       uint8_t         operation;       /* copied from request */
-       int16_t         status;          /* BLKIF_RSP_???       */
-};
-
  DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
-                 struct blkif_common_response);
+                 struct blkif_response);
  DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
-                 struct blkif_x86_32_response);
+                 struct blkif_response __packed);
  DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
-                 struct blkif_x86_64_response);
+                 struct blkif_response);
  
  union blkif_back_rings {
         struct blkif_back_ring        native;
@@ -281,6 +266,7 @@ struct xen_blkif_ring {
  
         wait_queue_head_t       wq;
         atomic_t                inflight;
+       bool                    active;
         /* One thread per blkif ring. */
         struct task_struct      *xenblkd;
         unsigned int            waiting_reqs;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c

index 1f3dfaa54d871a36897408898c1e0e9f22100bb1..792da683e70dafafa6f69e224b8e57272d3e6be1 100644 (file)
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
                 init_waitqueue_head(&ring->shutdown_wq);
                 ring->blkif = blkif;
                 ring->st_print = jiffies;
-               xen_blkif_get(blkif);
+               ring->active = true;
         }
  
         return 0;
@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
                 struct xen_blkif_ring *ring = &blkif->rings[r];
                 unsigned int i = 0;
  
+               if (!ring->active)
+                       continue;
+
                 if (ring->xenblkd) {
                         kthread_stop(ring->xenblkd);
                         wake_up(&ring->shutdown_wq);
-                       ring->xenblkd = NULL;
                 }
  
                 /* The above kthread_stop() guarantees that at this point we
@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
                 BUG_ON(ring->free_pages_num != 0);
                 BUG_ON(ring->persistent_gnt_c != 0);
                 WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
-               xen_blkif_put(blkif);
+               ring->active = false;
         }
         blkif->nr_ring_pages = 0;
         /*
@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
  
  static void xen_blkif_free(struct xen_blkif *blkif)
  {
-
-       xen_blkif_disconnect(blkif);
+       WARN_ON(xen_blkif_disconnect(blkif));
         xen_vbd_free(&blkif->vbd);
+       kfree(blkif->be->mode);
+       kfree(blkif->be);
  
         /* Make sure everything is drained before shutting down */
         kmem_cache_free(xen_blkif_cachep, blkif);
@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
                 xen_blkif_put(be->blkif);
         }
  
-       kfree(be->mode);
-       kfree(be);
         return 0;
  }
  
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c

index 24f8c4e93f4eb7ce34e5280781dab25ea5ffaf40..9ab6cfbb831d81b8f533b35bd4b27161a6f5ef76 100644 (file)
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -295,6 +295,7 @@ static const struct {
         { 0x410e, "BCM43341B0"  },      /* 002.001.014 */
         { 0x4406, "BCM4324B3"   },      /* 002.004.006 */
         { 0x610c, "BCM4354"     },      /* 003.001.012 */
+       { 0x2209, "BCM43430A1"  },      /* 001.002.009 */
         { }
  };
  
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c

index d2e9e2d1b01410fb3bff025752af77625e311b73..6a662d0161b49765d97c1b3d37c236cfecf8244a 100644 (file)
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -419,8 +419,7 @@ finalize:
         if (err)
                 return err;
  
-       err = bcm_request_irq(bcm);
-       if (!err)
+       if (!bcm_request_irq(bcm))
                 err = bcm_setup_sleep(hu);
  
         return err;
@@ -657,6 +656,15 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
                 },
                 .driver_data = &acpi_active_low,
         },
+       {
+               .ident = "Asus T100CHI",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR,
+                                       "ASUSTeK COMPUTER INC."),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"),
+               },
+               .driver_data = &acpi_active_low,
+       },
         {       /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
                 .ident = "Lenovo ThinkPad 8",
                 .matches = {
diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c

index 7de0edc0ff8cd73bca5d81bc1a96794c35234a17..aea930101dd2b9e8415d7666eccc4b971d0620f6 100644 (file)
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -31,7 +31,7 @@
  
  #include "hci_uart.h"
  
-struct serdev_device_ops hci_serdev_client_ops;
+static struct serdev_device_ops hci_serdev_client_ops;
  
  static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type)
  {
@@ -268,7 +268,7 @@ static int hci_uart_receive_buf(struct serdev_device *serdev, const u8 *data,
         return count;
  }
  
-struct serdev_device_ops hci_serdev_client_ops = {
+static struct serdev_device_ops hci_serdev_client_ops = {
         .receive_buf = hci_uart_receive_buf,
         .write_wakeup = hci_uart_write_wakeup,
  };
diff --git a/drivers/char/random.c b/drivers/char/random.c

index e870f329db888c58e06bb854e7cf55d78a8bd313..01a260f67437488b425372c12d33142fce699f84 100644 (file)
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -803,13 +803,13 @@ static int crng_fast_load(const char *cp, size_t len)
                 p[crng_init_cnt % CHACHA20_KEY_SIZE] ^= *cp;
                 cp++; crng_init_cnt++; len--;
         }
+       spin_unlock_irqrestore(&primary_crng.lock, flags);
         if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) {
                 invalidate_batched_entropy();
                 crng_init = 1;
                 wake_up_interruptible(&crng_init_wait);
                 pr_notice("random: fast init done\n");
         }
-       spin_unlock_irqrestore(&primary_crng.lock, flags);
         return 1;
  }
  
@@ -841,6 +841,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
         }
         memzero_explicit(&buf, sizeof(buf));
         crng->init_time = jiffies;
+       spin_unlock_irqrestore(&primary_crng.lock, flags);
         if (crng == &primary_crng && crng_init < 2) {
                 invalidate_batched_entropy();
                 crng_init = 2;
@@ -848,7 +849,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
                 wake_up_interruptible(&crng_init_wait);
                 pr_notice("random: crng init done\n");
         }
-       spin_unlock_irqrestore(&primary_crng.lock, flags);
  }
  
  static inline void crng_wait_ready(void)
@@ -2041,8 +2041,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
  u64 get_random_u64(void)
  {
         u64 ret;
-       bool use_lock = crng_init < 2;
-       unsigned long flags;
+       bool use_lock = READ_ONCE(crng_init) < 2;
+       unsigned long flags = 0;
         struct batched_entropy *batch;
  
  #if BITS_PER_LONG == 64
@@ -2073,8 +2073,8 @@ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
  u32 get_random_u32(void)
  {
         u32 ret;
-       bool use_lock = crng_init < 2;
-       unsigned long flags;
+       bool use_lock = READ_ONCE(crng_init) < 2;
+       unsigned long flags = 0;
         struct batched_entropy *batch;
  
         if (arch_get_random_int(&ret))
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c

index 4bed671e490e0b15d79fd432f3d85dacfd094a96..8b5c30062d995968cc83e6c0511bcccab721873c 100644 (file)
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct arch_timer_mem_frame *frame)
                 return 0;
         }
  
-       rate = readl_relaxed(frame + CNTFRQ);
+       rate = readl_relaxed(base + CNTFRQ);
  
-       iounmap(frame);
+       iounmap(base);
  
         return rate;
  }
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c

index 44e5e951583bc38fc8c4a6a9b89ea91f7587af34..8e64b8460f113f56e829f69a414e229fea3c722a 100644 (file)
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -18,6 +18,7 @@
  #include <linux/clk.h>
  #include <linux/interrupt.h>
  #include <linux/clockchips.h>
+#include <linux/clocksource.h>
  #include <linux/of_address.h>
  #include <linux/of_irq.h>
  #include <linux/slab.h>
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c

index 2e9c830ae1cd52d61e38dbb3620b2eb53dd4eb7e..c4656c4d44a6715a25b0b9ddaf432451f31ad32c 100644 (file)
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -12,6 +12,7 @@
  
  #include <linux/clk.h>
  #include <linux/clockchips.h>
+#include <linux/clocksource.h>
  #include <linux/delay.h>
  #include <linux/interrupt.h>
  #include <linux/irq.h>
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c

index 5104b63981390adb878ed27f4ca2d0d758c65307..c83ea68be792df45a354f38dee2438a866a1f29d 100644 (file)
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -721,7 +721,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
         u32 set;
  
         if (!of_device_is_compatible(mvchip->chip.of_node,
-                                    "marvell,armada-370-xp-gpio"))
+                                    "marvell,armada-370-gpio"))
                 return 0;
  
         if (IS_ERR(mvchip->clk))
@@ -852,7 +852,7 @@ static const struct of_device_id mvebu_gpio_of_match[] = {
                 .data       = (void *) MVEBU_GPIO_SOC_VARIANT_ARMADAXP,
         },
         {
-               .compatible = "marvell,armada-370-xp-gpio",
+               .compatible = "marvell,armada-370-gpio",
                 .data       = (void *) MVEBU_GPIO_SOC_VARIANT_ORION,
         },
         {
@@ -1128,7 +1128,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
                                                  mvchip);
         }
  
-       /* Armada 370/XP has simple PWM support for GPIO lines */
+       /* Some MVEBU SoCs have simple PWM support for GPIO lines */
         if (IS_ENABLED(CONFIG_PWM))
                 return mvebu_pwm_probe(pdev, mvchip, id);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c

index 1cf78f4dd339f93ddd971088ec42a5146b9820fe..1e8e1123ddf416f18176cbc6e82fa791b3df9fb5 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -693,6 +693,10 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
                         DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n",
                                  adev->clock.default_dispclk / 100);
                         adev->clock.default_dispclk = 60000;
+               } else if (adev->clock.default_dispclk <= 60000) {
+                       DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n",
+                                adev->clock.default_dispclk / 100);
+                       adev->clock.default_dispclk = 62500;
                 }
                 adev->clock.dp_extclk =
                         le16_to_cpu(firmware_info->info_21.usUniphyDPModeExtClkFreq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index f2d705e6a75aa4f092d3d98ff739927e15b6f26b..ab6b0d0febab810ba4941e5a527435dd5d3161d8 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -449,6 +449,7 @@ static const struct pci_device_id pciidlist[] = {
         {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
         {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
         {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+       {0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
         {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
         /* Vega 10 */
         {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT},
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c

index 8c9bc75a9c2db63288f2c6765b02f68f63875194..8a0818b23ea40fadde57f6b469b2153330b710ab 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -165,7 +165,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
         struct drm_device *dev = crtc->dev;
         struct amdgpu_device *adev = dev->dev_private;
         int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-       ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+       ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
  
         memset(&args, 0, sizeof(args));
  
@@ -178,7 +178,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state)
  void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev)
  {
         int index = GetIndexIntoMasterTable(COMMAND, EnableDispPowerGating);
-       ENABLE_DISP_POWER_GATING_PARAMETERS_V2_1 args;
+       ENABLE_DISP_POWER_GATING_PS_ALLOCATION args;
  
         memset(&args, 0, sizeof(args));
  
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c

index 9f847615ac74ab012f6203a141627a5c6f5993e2..48ca2457df8c964977f3f7edae0980bc227b97cf 100644 (file)
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1229,21 +1229,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
         if (!connector)
                 return -ENOENT;
  
-       drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
-       encoder = drm_connector_get_encoder(connector);
-       if (encoder)
-               out_resp->encoder_id = encoder->base.id;
-       else
-               out_resp->encoder_id = 0;
-
-       ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
-                       (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
-                       (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
-                       &out_resp->count_props);
-       drm_modeset_unlock(&dev->mode_config.connection_mutex);
-       if (ret)
-               goto out_unref;
-
         for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
                 if (connector->encoder_ids[i] != 0)
                         encoders_count++;
@@ -1256,7 +1241,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
                                 if (put_user(connector->encoder_ids[i],
                                              encoder_ptr + copied)) {
                                         ret = -EFAULT;
-                                       goto out_unref;
+                                       goto out;
                                 }
                                 copied++;
                         }
@@ -1300,15 +1285,32 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
                         if (copy_to_user(mode_ptr + copied,
                                          &u_mode, sizeof(u_mode))) {
                                 ret = -EFAULT;
+                               mutex_unlock(&dev->mode_config.mutex);
+
                                 goto out;
                         }
                         copied++;
                 }
         }
         out_resp->count_modes = mode_count;
-out:
         mutex_unlock(&dev->mode_config.mutex);
-out_unref:
+
+       drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
+       encoder = drm_connector_get_encoder(connector);
+       if (encoder)
+               out_resp->encoder_id = encoder->base.id;
+       else
+               out_resp->encoder_id = 0;
+
+       /* Only grab properties after probing, to make sure EDID and other
+        * properties reflect the latest status. */
+       ret = drm_mode_object_get_properties(&connector->base, file_priv->atomic,
+                       (uint32_t __user *)(unsigned long)(out_resp->props_ptr),
+                       (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr),
+                       &out_resp->count_props);
+       drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out:
         drm_connector_put(connector);
  
         return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h

index c4a091e874269fd9ac79a025f9d37f250ac95520..e437fba1209d925cca7bf7f33b5651c3eeeda21a 100644 (file)
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -106,9 +106,10 @@ struct etnaviv_gem_submit {
         struct etnaviv_gpu *gpu;
         struct ww_acquire_ctx ticket;
         struct dma_fence *fence;
+       u32 flags;
         unsigned int nr_bos;
         struct etnaviv_gem_submit_bo bos[0];
-       u32 flags;
+       /* No new members here, the previous one is variable-length! */
  };
  
  int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c

index de80ee1b71dfa2e8380b6e74b2d8cc6ed4aa6f25..1013765274da4a4853c21b302dbc646dacba5760 100644 (file)
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -172,7 +172,7 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
         for (i = 0; i < submit->nr_bos; i++) {
                 struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
                 bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
-               bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
+               bool explicit = !!(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
  
                 ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
                                                  explicit);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index d689e511744e8f2fc9508e2d7345827c6a70bbb2..4bd1467c17b17c6225e27e05ccbde63444012f39 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -292,6 +292,8 @@ static int per_file_stats(int id, void *ptr, void *data)
         struct file_stats *stats = data;
         struct i915_vma *vma;
  
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
+
         stats->count++;
         stats->total += obj->base.size;
         if (!obj->bind_count)
@@ -476,6 +478,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
                 struct drm_i915_gem_request *request;
                 struct task_struct *task;
  
+               mutex_lock(&dev->struct_mutex);
+
                 memset(&stats, 0, sizeof(stats));
                 stats.file_priv = file->driver_priv;
                 spin_lock(&file->table_lock);
@@ -487,7 +491,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
                  * still alive (e.g. get_pid(current) => fork() => exit()).
                  * Therefore, we need to protect this ->comm access using RCU.
                  */
-               mutex_lock(&dev->struct_mutex);
                 request = list_first_entry_or_null(&file_priv->mm.request_list,
                                                    struct drm_i915_gem_request,
                                                    client_link);
@@ -497,6 +500,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
                                 PIDTYPE_PID);
                 print_file_stats(m, task ? task->comm : "<unknown>", stats);
                 rcu_read_unlock();
+
                 mutex_unlock(&dev->struct_mutex);
         }
         mutex_unlock(&dev->filelist_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 462031cbd77f714b23a3b7645039c0d8dba71f40..615f0a855222f630d07311c92dce17d3bd371298 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2285,8 +2285,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
         struct page *page;
         unsigned long last_pfn = 0;     /* suppress gcc warning */
         unsigned int max_segment;
+       gfp_t noreclaim;
         int ret;
-       gfp_t gfp;
  
         /* Assert that the object is not currently in any GPU domain. As it
          * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2315,22 +2315,31 @@ rebuild_st:
          * Fail silently without starting the shrinker
          */
         mapping = obj->base.filp->f_mapping;
-       gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
-       gfp |= __GFP_NORETRY | __GFP_NOWARN;
+       noreclaim = mapping_gfp_constraint(mapping,
+                                          ~(__GFP_IO | __GFP_RECLAIM));
+       noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
         sg = st->sgl;
         st->nents = 0;
         for (i = 0; i < page_count; i++) {
-               page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-               if (unlikely(IS_ERR(page))) {
-                       i915_gem_shrink(dev_priv,
-                                       page_count,
-                                       I915_SHRINK_BOUND |
-                                       I915_SHRINK_UNBOUND |
-                                       I915_SHRINK_PURGEABLE);
+               const unsigned int shrink[] = {
+                       I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
+                       0,
+               }, *s = shrink;
+               gfp_t gfp = noreclaim;
+
+               do {
                         page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-               }
-               if (unlikely(IS_ERR(page))) {
-                       gfp_t reclaim;
+                       if (likely(!IS_ERR(page)))
+                               break;
+
+                       if (!*s) {
+                               ret = PTR_ERR(page);
+                               goto err_sg;
+                       }
+
+                       i915_gem_shrink(dev_priv, 2 * page_count, *s++);
+                       cond_resched();
  
                         /* We've tried hard to allocate the memory by reaping
                          * our own buffer, now let the real VM do its job and
@@ -2340,15 +2349,26 @@ rebuild_st:
                          * defer the oom here by reporting the ENOMEM back
                          * to userspace.
                          */
-                       reclaim = mapping_gfp_mask(mapping);
-                       reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
-
-                       page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
-                       if (IS_ERR(page)) {
-                               ret = PTR_ERR(page);
-                               goto err_sg;
+                       if (!*s) {
+                               /* reclaim and warn, but no oom */
+                               gfp = mapping_gfp_mask(mapping);
+
+                               /* Our bo are always dirty and so we require
+                                * kswapd to reclaim our pages (direct reclaim
+                                * does not effectively begin pageout of our
+                                * buffers on its own). However, direct reclaim
+                                * only waits for kswapd when under allocation
+                                * congestion. So as a result __GFP_RECLAIM is
+                                * unreliable and fails to actually reclaim our
+                                * dirty pages -- unless you try over and over
+                                * again with !__GFP_NORETRY. However, we still
+                                * want to fail this allocation rather than
+                                * trigger the out-of-memory killer and for
+                                * this we want the future __GFP_MAYFAIL.
+                                */
                         }
-               }
+               } while (1);
+
                 if (!i ||
                     sg->length >= max_segment ||
                     page_to_pfn(page) != last_pfn + 1) {
@@ -4222,6 +4242,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
  
         mapping = obj->base.filp->f_mapping;
         mapping_set_gfp_mask(mapping, mask);
+       GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
  
         i915_gem_object_init(obj, &i915_gem_object_ops);
  
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

index a3e59c8ef27baf4f3584ff5016635d8005735af6..9ad13eeed904d4d012c3fe93f6124b5ed5884b04 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -546,11 +546,12 @@ repeat:
  }
  
  static int
-i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
+i915_gem_execbuffer_relocate_entry(struct i915_vma *vma,
                                    struct eb_vmas *eb,
                                    struct drm_i915_gem_relocation_entry *reloc,
                                    struct reloc_cache *cache)
  {
+       struct drm_i915_gem_object *obj = vma->obj;
         struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
         struct drm_gem_object *target_obj;
         struct drm_i915_gem_object *target_i915_obj;
@@ -628,6 +629,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
                 return -EINVAL;
         }
  
+       /*
+        * If we write into the object, we need to force the synchronisation
+        * barrier, either with an asynchronous clflush or if we executed the
+        * patching using the GPU (though that should be serialised by the
+        * timeline). To be completely sure, and since we are required to
+        * do relocations we are already stalling, disable the user's opt
+        * of our synchronisation.
+        */
+       vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
+
         ret = relocate_entry(obj, reloc, cache, target_offset);
         if (ret)
                 return ret;
@@ -678,7 +689,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
                 do {
                         u64 offset = r->presumed_offset;
  
-                       ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache);
+                       ret = i915_gem_execbuffer_relocate_entry(vma, eb, r, &cache);
                         if (ret)
                                 goto out;
  
@@ -726,7 +737,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
  
         reloc_cache_init(&cache, eb->i915);
         for (i = 0; i < entry->relocation_count; i++) {
-               ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
+               ret = i915_gem_execbuffer_relocate_entry(vma, eb, &relocs[i], &cache);
                 if (ret)
                         break;
         }
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index 5ddbc94997751adf5c9f04f7dd4a37a74d70de24..a74d0ac737cbeb7f9b9c5e93ea712a396e3c09d5 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
          * GPU processing the request, we never over-estimate the
          * position of the head.
          */
-       req->head = req->ring->tail;
+       req->head = req->ring->emit;
  
         /* Check that we didn't interrupt ourselves with a new request */
         GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c

index 1642fff9cf135d5edbe85864d1b327d59002c026..ab5140ba108ddcb2c9c5382cc6439223704f9fda 100644 (file)
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
         GEM_BUG_ON(freespace < wqi_size);
  
         /* The GuC firmware wants the tail index in QWords, not bytes */
-       tail = rq->tail;
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       tail >>= 3;
+       tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
         GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
  
         /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c

index 1aba47024656817190168984f1d076ceea710e9b..f066e2d785f5c9d30fbe544b3caec698a3b4f8d1 100644 (file)
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -650,6 +650,11 @@ int i915_vma_unbind(struct i915_vma *vma)
                                 break;
                 }
  
+               if (!ret) {
+                       ret = i915_gem_active_retire(&vma->last_fence,
+                                                    &vma->vm->i915->drm.struct_mutex);
+               }
+
                 __i915_vma_unpin(vma);
                 if (ret)
                         return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index 96b0b01677e26b22f382868f4b8b4c6dd738a4b3..9106ea32b048cac4783ae316d7cc198a4bf8ae88 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -120,7 +120,8 @@ static void intel_crtc_init_scalers(struct intel_crtc *crtc,
  static void skylake_pfit_enable(struct intel_crtc *crtc);
  static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force);
  static void ironlake_pfit_enable(struct intel_crtc *crtc);
-static void intel_modeset_setup_hw_state(struct drm_device *dev);
+static void intel_modeset_setup_hw_state(struct drm_device *dev,
+                                        struct drm_modeset_acquire_ctx *ctx);
  static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc);
  
  struct intel_limit {
@@ -3449,7 +3450,7 @@ __intel_display_resume(struct drm_device *dev,
         struct drm_crtc *crtc;
         int i, ret;
  
-       intel_modeset_setup_hw_state(dev);
+       intel_modeset_setup_hw_state(dev, ctx);
         i915_redisable_vga(to_i915(dev));
  
         if (!state)
@@ -5825,7 +5826,8 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state,
                 intel_update_watermarks(intel_crtc);
  }
  
-static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
+static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
+                                       struct drm_modeset_acquire_ctx *ctx)
  {
         struct intel_encoder *encoder;
         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5855,7 +5857,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
                 return;
         }
  
-       state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+       state->acquire_ctx = ctx;
  
         /* Everything's already locked, -EDEADLK can't happen. */
         crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
@@ -15030,7 +15032,7 @@ int intel_modeset_init(struct drm_device *dev)
         intel_setup_outputs(dev_priv);
  
         drm_modeset_lock_all(dev);
-       intel_modeset_setup_hw_state(dev);
+       intel_modeset_setup_hw_state(dev, dev->mode_config.acquire_ctx);
         drm_modeset_unlock_all(dev);
  
         for_each_intel_crtc(dev, crtc) {
@@ -15067,13 +15069,13 @@ int intel_modeset_init(struct drm_device *dev)
         return 0;
  }
  
-static void intel_enable_pipe_a(struct drm_device *dev)
+static void intel_enable_pipe_a(struct drm_device *dev,
+                               struct drm_modeset_acquire_ctx *ctx)
  {
         struct intel_connector *connector;
         struct drm_connector_list_iter conn_iter;
         struct drm_connector *crt = NULL;
         struct intel_load_detect_pipe load_detect_temp;
-       struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
         int ret;
  
         /* We can't just switch on the pipe A, we need to set things up with a
@@ -15145,7 +15147,8 @@ static bool has_pch_trancoder(struct drm_i915_private *dev_priv,
                 (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A);
  }
  
-static void intel_sanitize_crtc(struct intel_crtc *crtc)
+static void intel_sanitize_crtc(struct intel_crtc *crtc,
+                               struct drm_modeset_acquire_ctx *ctx)
  {
         struct drm_device *dev = crtc->base.dev;
         struct drm_i915_private *dev_priv = to_i915(dev);
@@ -15191,7 +15194,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                 plane = crtc->plane;
                 crtc->base.primary->state->visible = true;
                 crtc->plane = !plane;
-               intel_crtc_disable_noatomic(&crtc->base);
+               intel_crtc_disable_noatomic(&crtc->base, ctx);
                 crtc->plane = plane;
         }
  
@@ -15201,13 +15204,13 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
                  * resume. Force-enable the pipe to fix this, the update_dpms
                  * call below we restore the pipe to the right state, but leave
                  * the required bits on. */
-               intel_enable_pipe_a(dev);
+               intel_enable_pipe_a(dev, ctx);
         }
  
         /* Adjust the state of the output pipe according to whether we
          * have active connectors/encoders. */
         if (crtc->active && !intel_crtc_has_encoders(crtc))
-               intel_crtc_disable_noatomic(&crtc->base);
+               intel_crtc_disable_noatomic(&crtc->base, ctx);
  
         if (crtc->active || HAS_GMCH_DISPLAY(dev_priv)) {
                 /*
@@ -15505,7 +15508,8 @@ get_encoder_power_domains(struct drm_i915_private *dev_priv)
   * and sanitizes it to the current state
   */
  static void
-intel_modeset_setup_hw_state(struct drm_device *dev)
+intel_modeset_setup_hw_state(struct drm_device *dev,
+                            struct drm_modeset_acquire_ctx *ctx)
  {
         struct drm_i915_private *dev_priv = to_i915(dev);
         enum pipe pipe;
@@ -15525,7 +15529,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
         for_each_pipe(dev_priv, pipe) {
                 crtc = intel_get_crtc_for_pipe(dev_priv, pipe);
  
-               intel_sanitize_crtc(crtc);
+               intel_sanitize_crtc(crtc, ctx);
                 intel_dump_pipe_config(crtc, crtc->config,
                                        "[setup_hw_state]");
         }
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c

index 6532e226db29b63da766a8571de231de4f7261f6..40ba3134545ef7e339c5bfe347501eeb7715dac0 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -119,8 +119,6 @@ static int intel_dp_aux_setup_backlight(struct intel_connector *connector,
         struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
         struct intel_panel *panel = &connector->panel;
  
-       intel_dp_aux_enable_backlight(connector);
-
         if (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT)
                 panel->backlight.max = 0xFFFF;
         else
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index dac4e003c1f317ec402110132bad0c3a734bf52a..62f44d3e7c43c0d90df093050d5af6d3d68fe3a3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
                 rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
         u32 *reg_state = ce->lrc_reg_state;
  
-       assert_ring_tail_valid(rq->ring, rq->tail);
-       reg_state[CTX_RING_TAIL+1] = rq->tail;
+       reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
  
         /* True 32b PPGTT with dynamic page allocation: update PDP
          * registers and point the unallocated PDPs to scratch page.
@@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
                         ce->state->obj->mm.dirty = true;
                         i915_gem_object_unpin_map(ce->state->obj);
  
-                       ce->ring->head = ce->ring->tail = 0;
-                       intel_ring_update_space(ce->ring);
+                       intel_ring_reset(ce->ring, 0);
                 }
         }
  }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 66a2b8b83972691d04f2737337e7ea6cf6a72851..513a0f4b469b32c9d0ac2e87c089bb6f2e4907ba 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
  
  void intel_ring_update_space(struct intel_ring *ring)
  {
-       ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
+       ring->space = __intel_ring_space(ring->head, ring->emit, ring->size);
  }
  
  static int
@@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
  
         i915_gem_request_submit(request);
  
-       assert_ring_tail_valid(request->ring, request->tail);
-       I915_WRITE_TAIL(request->engine, request->tail);
+       I915_WRITE_TAIL(request->engine,
+                       intel_ring_set_tail(request->ring, request->tail));
  }
  
  static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
@@ -1316,11 +1316,23 @@ err:
         return PTR_ERR(addr);
  }
  
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+       GEM_BUG_ON(!list_empty(&ring->request_list));
+       ring->tail = tail;
+       ring->head = tail;
+       ring->emit = tail;
+       intel_ring_update_space(ring);
+}
+
  void intel_ring_unpin(struct intel_ring *ring)
  {
         GEM_BUG_ON(!ring->vma);
         GEM_BUG_ON(!ring->vaddr);
  
+       /* Discard any unused bytes beyond that submitted to hw. */
+       intel_ring_reset(ring, ring->tail);
+
         if (i915_vma_is_map_and_fenceable(ring->vma))
                 i915_vma_unpin_iomap(ring->vma);
         else
@@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
         struct intel_engine_cs *engine;
         enum intel_engine_id id;
  
+       /* Restart from the beginning of the rings for convenience */
         for_each_engine(engine, dev_priv, id)
-               engine->buffer->head = engine->buffer->tail;
+               intel_ring_reset(engine->buffer, 0);
  }
  
  static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
                 unsigned space;
  
                 /* Would completion of this request free enough space? */
-               space = __intel_ring_space(target->postfix, ring->tail,
+               space = __intel_ring_space(target->postfix, ring->emit,
                                            ring->size);
                 if (space >= bytes)
                         break;
@@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
  u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
  {
         struct intel_ring *ring = req->ring;
-       int remain_actual = ring->size - ring->tail;
-       int remain_usable = ring->effective_size - ring->tail;
+       int remain_actual = ring->size - ring->emit;
+       int remain_usable = ring->effective_size - ring->emit;
         int bytes = num_dwords * sizeof(u32);
         int total_bytes, wait_bytes;
         bool need_wrap = false;
@@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
  
         if (unlikely(need_wrap)) {
                 GEM_BUG_ON(remain_actual > ring->space);
-               GEM_BUG_ON(ring->tail + remain_actual > ring->size);
+               GEM_BUG_ON(ring->emit + remain_actual > ring->size);
  
                 /* Fill the tail with MI_NOOP */
-               memset(ring->vaddr + ring->tail, 0, remain_actual);
-               ring->tail = 0;
+               memset(ring->vaddr + ring->emit, 0, remain_actual);
+               ring->emit = 0;
                 ring->space -= remain_actual;
         }
  
-       GEM_BUG_ON(ring->tail > ring->size - bytes);
-       cs = ring->vaddr + ring->tail;
-       ring->tail += bytes;
+       GEM_BUG_ON(ring->emit > ring->size - bytes);
+       cs = ring->vaddr + ring->emit;
+       ring->emit += bytes;
         ring->space -= bytes;
         GEM_BUG_ON(ring->space < 0);
  
@@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
  int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
  {
         int num_dwords =
-               (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+               (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
         u32 *cs;
  
         if (num_dwords == 0)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index a82a0807f64dbd0624728fe3c65215abe3647565..f7144fe0961347826c62e620af879bb4db9f0d77 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -145,6 +145,7 @@ struct intel_ring {
  
         u32 head;
         u32 tail;
+       u32 emit;
  
         int space;
         int size;
@@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  struct intel_ring *
  intel_engine_create_ring(struct intel_engine_cs *engine, int size);
  int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+void intel_ring_update_space(struct intel_ring *ring);
  void intel_ring_unpin(struct intel_ring *ring);
  void intel_ring_free(struct intel_ring *ring);
  
@@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
          * reserved for the command packet (i.e. the value passed to
          * intel_ring_begin()).
          */
-       GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs);
+       GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
  }
  
  static inline u32
@@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
         GEM_BUG_ON(tail >= ring->size);
  }
  
-void intel_ring_update_space(struct intel_ring *ring);
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+       /* Whilst writes to the tail are strictly order, there is no
+        * serialisation between readers and the writers. The tail may be
+        * read by i915_gem_request_retire() just as it is being updated
+        * by execlists, as although the breadcrumb is complete, the context
+        * switch hasn't been seen.
+        */
+       assert_ring_tail_valid(ring, tail);
+       ring->tail = tail;
+       return tail;
+}
  
  void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
  
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c

index 432480ff9d228857d57170b3353c143bf0501c3f..3178ba0c537c1915af3b857aad83efd6371f17ad 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3393,6 +3393,13 @@ void radeon_combios_asic_init(struct drm_device *dev)
             rdev->pdev->subsystem_vendor == 0x103c &&
             rdev->pdev->subsystem_device == 0x280a)
                 return;
+       /* quirk for rs4xx Toshiba Sattellite L20-183 latop to make it resume
+        * - it hangs on resume inside the dynclk 1 table.
+        */
+       if (rdev->family == CHIP_RS400 &&
+           rdev->pdev->subsystem_vendor == 0x1179 &&
+           rdev->pdev->subsystem_device == 0xff31)
+               return;
  
         /* DYN CLK 1 */
         table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c

index 6ecf42783d4b0c45539325edd2e5ffd2fc08e29f..0a6444d72000c434a6b494229a75c1a7d3e41631 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -136,6 +136,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = {
          * https://bugzilla.kernel.org/show_bug.cgi?id=51381
          */
         { PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+       /* Asus K53TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+        * https://bugs.freedesktop.org/show_bug.cgi?id=101491
+        */
+       { PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
         /* macbook pro 8.2 */
         { PCI_VENDOR_ID_ATI, 0x6741, PCI_VENDOR_ID_APPLE, 0x00e2, RADEON_PX_QUIRK_LONG_WAKEUP },
         { 0, 0, 0, 0, 0 },
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c

index 13db8a2851edd475cc1e44adedd44796c3ccbca1..1f013d45c9e9a3959dfa19300ba76fc37820592a 100644 (file)
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
@@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man)
         list_for_each_entry_safe(entry, next, &man->list, head)
                 vmw_cmdbuf_res_free(man, entry);
  
+       drm_ht_remove(&man->resources);
         kfree(man);
  }
  
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c

index 95ed17183e73e904e06b13c383bee28410797172..54a47b40546f69c7ea0d3dbf033c22c95f106516 100644 (file)
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -734,9 +734,9 @@ static int i2c_imx_dma_read(struct imx_i2c_struct *i2c_imx,
                  * the first read operation, otherwise the first read cost
                  * one extra clock cycle.
                  */
-               temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+               temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
                 temp |= I2CR_MTX;
-               writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+               imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
         }
         msgs->buf[msgs->len-1] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
  
@@ -857,9 +857,9 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
                                  * the first read operation, otherwise the first read cost
                                  * one extra clock cycle.
                                  */
-                               temp = readb(i2c_imx->base + IMX_I2C_I2CR);
+                               temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
                                 temp |= I2CR_MTX;
-                               writeb(temp, i2c_imx->base + IMX_I2C_I2CR);
+                               imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
                         }
                 } else if (i == (msgs->len - 2)) {
                         dev_dbg(&i2c_imx->adapter.dev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c

index 9f7e18612322e212816189a14d5553f95823607e..dc2f59e33971cdb98eb050dfa5e0089a5654205a 100644 (file)
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -223,8 +223,8 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
         return 0;
  }
  
-static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
-                                struct ib_port_attr *props)
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+                               struct ib_port_attr *props)
  {
         struct mlx5_ib_dev *dev = to_mdev(device);
         struct mlx5_core_dev *mdev = dev->mdev;
@@ -232,12 +232,14 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
         enum ib_mtu ndev_ib_mtu;
         u16 qkey_viol_cntr;
         u32 eth_prot_oper;
+       int err;
  
         /* Possible bad flows are checked before filling out props so in case
          * of an error it will still be zeroed out.
          */
-       if (mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num))
-               return;
+       err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num);
+       if (err)
+               return err;
  
         translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
                                  &props->active_width);
@@ -258,7 +260,7 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
  
         ndev = mlx5_ib_get_netdev(device, port_num);
         if (!ndev)
-               return;
+               return 0;
  
         if (mlx5_lag_is_active(dev->mdev)) {
                 rcu_read_lock();
@@ -281,75 +283,49 @@ static void mlx5_query_port_roce(struct ib_device *device, u8 port_num,
         dev_put(ndev);
  
         props->active_mtu       = min(props->max_mtu, ndev_ib_mtu);
+       return 0;
  }
  
-static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
-                                    const struct ib_gid_attr *attr,
-                                    void *mlx5_addr)
+static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
+                        unsigned int index, const union ib_gid *gid,
+                        const struct ib_gid_attr *attr)
  {
-#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
-       char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
-                                              source_l3_address);
-       void *mlx5_addr_mac     = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
-                                              source_mac_47_32);
-
-       if (!gid)
-               return;
+       enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+       u8 roce_version = 0;
+       u8 roce_l3_type = 0;
+       bool vlan = false;
+       u8 mac[ETH_ALEN];
+       u16 vlan_id = 0;
  
-       ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+       if (gid) {
+               gid_type = attr->gid_type;
+               ether_addr_copy(mac, attr->ndev->dev_addr);
  
-       if (is_vlan_dev(attr->ndev)) {
-               MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
-               MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+               if (is_vlan_dev(attr->ndev)) {
+                       vlan = true;
+                       vlan_id = vlan_dev_vlan_id(attr->ndev);
+               }
         }
  
-       switch (attr->gid_type) {
+       switch (gid_type) {
         case IB_GID_TYPE_IB:
-               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+               roce_version = MLX5_ROCE_VERSION_1;
                 break;
         case IB_GID_TYPE_ROCE_UDP_ENCAP:
-               MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+               roce_version = MLX5_ROCE_VERSION_2;
+               if (ipv6_addr_v4mapped((void *)gid))
+                       roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
+               else
+                       roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
                 break;
  
         default:
-               WARN_ON(true);
+               mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
         }
  
-       if (attr->gid_type != IB_GID_TYPE_IB) {
-               if (ipv6_addr_v4mapped((void *)gid))
-                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
-                                   MLX5_ROCE_L3_TYPE_IPV4);
-               else
-                       MLX5_SET_RA(mlx5_addr, roce_l3_type,
-                                   MLX5_ROCE_L3_TYPE_IPV6);
-       }
-
-       if ((attr->gid_type == IB_GID_TYPE_IB) ||
-           !ipv6_addr_v4mapped((void *)gid))
-               memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
-       else
-               memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
-}
-
-static int set_roce_addr(struct ib_device *device, u8 port_num,
-                        unsigned int index,
-                        const union ib_gid *gid,
-                        const struct ib_gid_attr *attr)
-{
-       struct mlx5_ib_dev *dev = to_mdev(device);
-       u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
-       u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
-       void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
-       enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
-
-       if (ll != IB_LINK_LAYER_ETHERNET)
-               return -EINVAL;
-
-       ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
-
-       MLX5_SET(set_roce_address_in, in, roce_address_index, index);
-       MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-       return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+       return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
+                                     roce_l3_type, gid->raw, mac, vlan,
+                                     vlan_id);
  }
  
  static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
@@ -357,13 +333,13 @@ static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
                            const struct ib_gid_attr *attr,
                            __always_unused void **context)
  {
-       return set_roce_addr(device, port_num, index, gid, attr);
+       return set_roce_addr(to_mdev(device), port_num, index, gid, attr);
  }
  
  static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
                            unsigned int index, __always_unused void **context)
  {
-       return set_roce_addr(device, port_num, index, NULL, NULL);
+       return set_roce_addr(to_mdev(device), port_num, index, NULL, NULL);
  }
  
  __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
@@ -978,20 +954,31 @@ out:
  int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
                        struct ib_port_attr *props)
  {
+       unsigned int count;
+       int ret;
+
         switch (mlx5_get_vport_access_method(ibdev)) {
         case MLX5_VPORT_ACCESS_METHOD_MAD:
-               return mlx5_query_mad_ifc_port(ibdev, port, props);
+               ret = mlx5_query_mad_ifc_port(ibdev, port, props);
+               break;
  
         case MLX5_VPORT_ACCESS_METHOD_HCA:
-               return mlx5_query_hca_port(ibdev, port, props);
+               ret = mlx5_query_hca_port(ibdev, port, props);
+               break;
  
         case MLX5_VPORT_ACCESS_METHOD_NIC:
-               mlx5_query_port_roce(ibdev, port, props);
-               return 0;
+               ret = mlx5_query_port_roce(ibdev, port, props);
+               break;
  
         default:
-               return -EINVAL;
+               ret = -EINVAL;
+       }
+
+       if (!ret && props) {
+               count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev);
+               props->gid_tbl_len -= count;
         }
+       return ret;
  }
  
  static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c

index 30b256a2c54ec42dd97b29ff0f0cb15be6d44510..de4025deaa4ad384db5b86f15165aa991dcb45b4 100644 (file)
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -742,7 +742,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
  
         if (type == NES_TIMER_TYPE_SEND) {
                 new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
-               atomic_inc(&new_send->skb->users);
+               refcount_inc(&new_send->skb->users);
                 spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
                 cm_node->send_entry = new_send;
                 add_ref_cm_node(cm_node);
@@ -924,7 +924,7 @@ static void nes_cm_timer_tick(unsigned long pass)
                                                   flags);
                                 break;
                         }
-                       atomic_inc(&send_entry->skb->users);
+                       refcount_inc(&send_entry->skb->users);
                         cm_packets_retrans++;
                         nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
                                   "for node %p, jiffies = %lu, time to send = "
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c

index e37d37273182097d412f30a3878ea0303f9f7a5b..f600f3a7a3c685488e1ede36058439c59f0703dc 100644 (file)
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -248,7 +248,8 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
  
         if (!btns_desc) {
                 dev_err(dev, "ACPI Button Descriptors not found\n");
-               return ERR_PTR(-ENODEV);
+               button_info = ERR_PTR(-ENODEV);
+               goto out;
         }
  
         /* The first package describes the collection */
@@ -264,24 +265,31 @@ static struct soc_button_info *soc_button_get_button_info(struct device *dev)
         }
         if (collection_uid == -1) {
                 dev_err(dev, "Invalid Button Collection Descriptor\n");
-               return ERR_PTR(-ENODEV);
+               button_info = ERR_PTR(-ENODEV);
+               goto out;
         }
  
         /* There are package.count - 1 buttons + 1 terminating empty entry */
         button_info = devm_kcalloc(dev, btns_desc->package.count,
                                    sizeof(*button_info), GFP_KERNEL);
-       if (!button_info)
-               return ERR_PTR(-ENOMEM);
+       if (!button_info) {
+               button_info = ERR_PTR(-ENOMEM);
+               goto out;
+       }
  
         /* Parse the button descriptors */
         for (i = 1, btn = 0; i < btns_desc->package.count; i++, btn++) {
                 if (soc_button_parse_btn_desc(dev,
                                               &btns_desc->package.elements[i],
                                               collection_uid,
-                                             &button_info[btn]))
-                       return ERR_PTR(-ENODEV);
+                                             &button_info[btn])) {
+                       button_info = ERR_PTR(-ENODEV);
+                       goto out;
+               }
         }
  
+out:
+       kfree(buf.pointer);
         return button_info;
  }
  
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c

index dea63e2db3e6213f5e83d6067870a16cc5707e6d..f5206e2c767ebf3579c2468b2a2956cc4bff3dcc 100644 (file)
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c
@@ -31,9 +31,6 @@
  #define F54_GET_REPORT          1
  #define F54_FORCE_CAL           2
  
-/* Fixed sizes of reports */
-#define F54_QUERY_LEN                  27
-
  /* F54 capabilities */
  #define F54_CAP_BASELINE       (1 << 2)
  #define F54_CAP_IMAGE8         (1 << 3)
@@ -95,7 +92,6 @@ struct rmi_f54_reports {
  struct f54_data {
         struct rmi_function *fn;
  
-       u8 qry[F54_QUERY_LEN];
         u8 num_rx_electrodes;
         u8 num_tx_electrodes;
         u8 capabilities;
@@ -632,22 +628,23 @@ static int rmi_f54_detect(struct rmi_function *fn)
  {
         int error;
         struct f54_data *f54;
+       u8 buf[6];
  
         f54 = dev_get_drvdata(&fn->dev);
  
         error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr,
-                              &f54->qry, sizeof(f54->qry));
+                              buf, sizeof(buf));
         if (error) {
                 dev_err(&fn->dev, "%s: Failed to query F54 properties\n",
                         __func__);
                 return error;
         }
  
-       f54->num_rx_electrodes = f54->qry[0];
-       f54->num_tx_electrodes = f54->qry[1];
-       f54->capabilities = f54->qry[2];
-       f54->clock_rate = f54->qry[3] | (f54->qry[4] << 8);
-       f54->family = f54->qry[5];
+       f54->num_rx_electrodes = buf[0];
+       f54->num_tx_electrodes = buf[1];
+       f54->capabilities = buf[2];
+       f54->clock_rate = buf[3] | (buf[4] << 8);
+       f54->family = buf[5];
  
         rmi_dbg(RMI_DEBUG_FN, &fn->dev, "F54 num_rx_electrodes: %d\n",
                 f54->num_rx_electrodes);
diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h

index 09720d950686c844b49f1d7f32710e160d21624a..f932a83b4990210d8daeb25c1d2482b958c3719e 100644 (file)
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -723,6 +723,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
                         DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"),
                 },
         },
+       {
+               /* Fujitsu UH554 laptop */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK UH544"),
+               },
+       },
         { }
  };
  
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c

index eb7fbe15996304fc9eecca11a2b71b70bffd284d..929f8558bf1c0fe247d5c0ab99e40dfaaea094bd 100644 (file)
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -140,7 +140,7 @@ static inline void gic_map_to_vpe(unsigned int intr, unsigned int vpe)
  }
  
  #ifdef CONFIG_CLKSRC_MIPS_GIC
-u64 gic_read_count(void)
+u64 notrace gic_read_count(void)
  {
         unsigned int hi, hi2, lo;
  
@@ -167,7 +167,7 @@ unsigned int gic_get_count_width(void)
         return bits;
  }
  
-void gic_write_compare(u64 cnt)
+void notrace gic_write_compare(u64 cnt)
  {
         if (mips_cm_is64) {
                 gic_write(GIC_REG(VPE_LOCAL, GIC_VPE_COMPARE), cnt);
@@ -179,7 +179,7 @@ void gic_write_compare(u64 cnt)
         }
  }
  
-void gic_write_cpu_compare(u64 cnt, int cpu)
+void notrace gic_write_cpu_compare(u64 cnt, int cpu)
  {
         unsigned long flags;
  
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c

index 99e5f9751e8b1746835b28c4bd4e2a5d1b53fd14..c5603d1a07d6e86ce7f6913168daacb56b134ea3 100644 (file)
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -155,7 +155,7 @@ mISDN_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
         copied = skb->len + MISDN_HEADER_LEN;
         if (len < copied) {
                 if (flags & MSG_PEEK)
-                       atomic_dec(&skb->users);
+                       refcount_dec(&skb->users);
                 else
                         skb_queue_head(&sk->sk_receive_queue, skb);
                 return -ENOSPC;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c

index 7910bfe50da4469c44b571363cc6696f74f5fa42..93b18108816813bd4d49e0ba3e6eb57be7ae3d9f 100644 (file)
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1105,10 +1105,13 @@ static void schedule_autocommit(struct dm_integrity_c *ic)
  static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *dio)
  {
         struct bio *bio;
-       spin_lock_irq(&ic->endio_wait.lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ic->endio_wait.lock, flags);
         bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
         bio_list_add(&ic->flush_bio_list, bio);
-       spin_unlock_irq(&ic->endio_wait.lock);
+       spin_unlock_irqrestore(&ic->endio_wait.lock, flags);
+
         queue_work(ic->commit_wq, &ic->commit_work);
  }
  
@@ -3040,6 +3043,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
                 ti->error = "The device is too small";
                 goto bad;
         }
+       if (ti->len > ic->provided_data_sectors) {
+               r = -EINVAL;
+               ti->error = "Not enough provided sectors for requested mapping size";
+               goto bad;
+       }
  
         if (!buffer_sectors)
                 buffer_sectors = 1;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c

index 3702e502466d37a902c64a74a1f5ad7b516770bb..8d5ca30f655123611b5dcd2ba3d35b52e9c6c447 100644 (file)
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -317,8 +317,8 @@ static void do_region(int op, int op_flags, unsigned region,
         else if (op == REQ_OP_WRITE_SAME)
                 special_cmd_max_sectors = q->limits.max_write_same_sectors;
         if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
-            op == REQ_OP_WRITE_SAME)  &&
-           special_cmd_max_sectors == 0) {
+            op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
+               atomic_inc(&io->count);
                 dec_count(io, region, -EOPNOTSUPP);
                 return;
         }
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c

index 7d893228c40f50dd7d0017fca004b504fa27f567..b4b75dad816ad95c0028f1b64b2be73e8993caac 100644 (file)
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1927,7 +1927,7 @@ struct dm_raid_superblock {
         /********************************************************************
          * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
          *
-        * FEATURE_FLAG_SUPPORTS_V190 in the features member indicates that those exist
+        * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
          */
  
         __le32 flags; /* Flags defining array states for reshaping */
@@ -2092,6 +2092,11 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev)
         sb->layout = cpu_to_le32(mddev->layout);
         sb->stripe_sectors = cpu_to_le32(mddev->chunk_sectors);
  
+       /********************************************************************
+        * BELOW FOLLOW V1.9.0 EXTENSIONS TO THE PRISTINE SUPERBLOCK FORMAT!!!
+        *
+        * FEATURE_FLAG_SUPPORTS_V190 in the compat_features member indicates that those exist
+        */
         sb->new_level = cpu_to_le32(mddev->new_level);
         sb->new_layout = cpu_to_le32(mddev->new_layout);
         sb->new_stripe_sectors = cpu_to_le32(mddev->new_chunk_sectors);
@@ -2438,8 +2443,14 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
         mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
  
         if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
-               /* Retrieve device size stored in superblock to be prepared for shrink */
-               rdev->sectors = le64_to_cpu(sb->sectors);
+               /*
+                * Retrieve rdev size stored in superblock to be prepared for shrink.
+                * Check extended superblock members are present otherwise the size
+                * will not be set!
+                */
+               if (le32_to_cpu(sb->compat_features) & FEATURE_FLAG_SUPPORTS_V190)
+                       rdev->sectors = le64_to_cpu(sb->sectors);
+
                 rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
                 if (rdev->recovery_offset == MaxSector)
                         set_bit(In_sync, &rdev->flags);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c

index e61c45047c25a9ba2683c313fbc2151c9051b178..4da8858856fb3019c16d5681c241f8733dca21ec 100644 (file)
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
  
  struct dm_raid1_bio_record {
         struct mirror *m;
+       /* if details->bi_bdev == NULL, details were not saved */
         struct dm_bio_details details;
         region_t write_region;
  };
@@ -1198,6 +1199,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
         struct dm_raid1_bio_record *bio_record =
           dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
  
+       bio_record->details.bi_bdev = NULL;
+
         if (rw == WRITE) {
                 /* Save region for mirror_end_io() handler */
                 bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1256,12 +1259,22 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
         }
  
         if (error == -EOPNOTSUPP)
-               return error;
+               goto out;
  
         if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-               return error;
+               goto out;
  
         if (unlikely(error)) {
+               if (!bio_record->details.bi_bdev) {
+                       /*
+                        * There wasn't enough memory to record necessary
+                        * information for a retry or there was no other
+                        * mirror in-sync.
+                        */
+                       DMERR_LIMIT("Mirror read failed.");
+                       return -EIO;
+               }
+
                 m = bio_record->m;
  
                 DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1277,6 +1290,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                         bd = &bio_record->details;
  
                         dm_bio_restore(bd, bio);
+                       bio_record->details.bi_bdev = NULL;
                         bio->bi_error = 0;
  
                         queue_bio(ms, bio, rw);
@@ -1285,6 +1299,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                 DMERR("All replicated volumes dead, failing I/O");
         }
  
+out:
+       bio_record->details.bi_bdev = NULL;
+
         return error;
  }
  
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c

index 17ad50daed08ef5022b8648ef2e8701208c85a9d..28808e5ec0fd68346609eb33ac552359f5cff760 100644 (file)
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1094,6 +1094,19 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
                 return;
         }
  
+       /*
+        * Increment the unmapped blocks.  This prevents a race between the
+        * passdown io and reallocation of freed blocks.
+        */
+       r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+       if (r) {
+               metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+               bio_io_error(m->bio);
+               cell_defer_no_holder(tc, m->cell);
+               mempool_free(m, pool->mapping_pool);
+               return;
+       }
+
         discard_parent = bio_alloc(GFP_NOIO, 1);
         if (!discard_parent) {
                 DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
@@ -1114,19 +1127,6 @@ static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
                         end_discard(&op, r);
                 }
         }
-
-       /*
-        * Increment the unmapped blocks.  This prevents a race between the
-        * passdown io and reallocation of freed blocks.
-        */
-       r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
-       if (r) {
-               metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
-               bio_io_error(m->bio);
-               cell_defer_no_holder(tc, m->cell);
-               mempool_free(m, pool->mapping_pool);
-               return;
-       }
  }
  
  static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c

index 75488e65cd96cf6484e300d47280df77f1eec77a..8d46e3ad9529d46a2b2e27229668e517ef38552f 100644 (file)
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -245,8 +245,7 @@ static int arizona_poll_reg(struct arizona *arizona,
         int ret;
  
         ret = regmap_read_poll_timeout(arizona->regmap,
-                                      ARIZONA_INTERRUPT_RAW_STATUS_5, val,
-                                      ((val & mask) == target),
+                                      reg, val, ((val & mask) == target),
                                        ARIZONA_REG_POLL_DELAY_US,
                                        timeout_ms * 1000);
         if (ret)
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h

index 20bfb9ba83ea23d38880676198e279513ed70123..cbb4f8566bbe58a72eae8bbec0633b83687c5e13 100644 (file)
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -269,6 +269,10 @@ struct arcnet_local {
  
         struct timer_list       timer;
  
+       struct net_device *dev;
+       int reply_status;
+       struct tasklet_struct reply_tasklet;
+
         /*
          * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of
          * which can be used for either sending or receiving.  The new dynamic
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c

index 62ee439d58829574d732e84b8afba738871a43e5..fcfccbb3d9a2a0e5a6a84ee0daea317e5d2b32b6 100644 (file)
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -51,6 +51,7 @@
  #include <net/arp.h>
  #include <linux/init.h>
  #include <linux/jiffies.h>
+#include <linux/errqueue.h>
  
  #include <linux/leds.h>
  
@@ -391,6 +392,52 @@ static void arcnet_timer(unsigned long data)
         }
  }
  
+static void arcnet_reply_tasklet(unsigned long data)
+{
+       struct arcnet_local *lp = (struct arcnet_local *)data;
+
+       struct sk_buff *ackskb, *skb;
+       struct sock_exterr_skb *serr;
+       struct sock *sk;
+       int ret;
+
+       local_irq_disable();
+       skb = lp->outgoing.skb;
+       if (!skb || !skb->sk) {
+               local_irq_enable();
+               return;
+       }
+
+       sock_hold(skb->sk);
+       sk = skb->sk;
+       ackskb = skb_clone_sk(skb);
+       sock_put(skb->sk);
+
+       if (!ackskb) {
+               local_irq_enable();
+               return;
+       }
+
+       serr = SKB_EXT_ERR(ackskb);
+       memset(serr, 0, sizeof(*serr));
+       serr->ee.ee_errno = ENOMSG;
+       serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+       serr->ee.ee_data = skb_shinfo(skb)->tskey;
+       serr->ee.ee_info = lp->reply_status;
+
+       /* finally erasing outgoing skb */
+       dev_kfree_skb(lp->outgoing.skb);
+       lp->outgoing.skb = NULL;
+
+       ackskb->dev = lp->dev;
+
+       ret = sock_queue_err_skb(sk, ackskb);
+       if (ret)
+               kfree_skb(ackskb);
+
+       local_irq_enable();
+};
+
  struct net_device *alloc_arcdev(const char *name)
  {
         struct net_device *dev;
@@ -401,6 +448,7 @@ struct net_device *alloc_arcdev(const char *name)
         if (dev) {
                 struct arcnet_local *lp = netdev_priv(dev);
  
+               lp->dev = dev;
                 spin_lock_init(&lp->lock);
                 init_timer(&lp->timer);
                 lp->timer.data = (unsigned long) dev;
@@ -436,6 +484,9 @@ int arcnet_open(struct net_device *dev)
                 arc_cont(D_PROTO, "\n");
         }
  
+       tasklet_init(&lp->reply_tasklet, arcnet_reply_tasklet,
+                    (unsigned long)lp);
+
         arc_printk(D_INIT, dev, "arcnet_open: resetting card.\n");
  
         /* try to put the card in a defined state - if it fails the first
@@ -527,6 +578,8 @@ int arcnet_close(struct net_device *dev)
         netif_stop_queue(dev);
         netif_carrier_off(dev);
  
+       tasklet_kill(&lp->reply_tasklet);
+
         /* flush TX and disable RX */
         lp->hw.intmask(dev, 0);
         lp->hw.command(dev, NOTXcmd);   /* stop transmit */
@@ -635,13 +688,13 @@ netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
                 txbuf = -1;
  
         if (txbuf != -1) {
+               lp->outgoing.skb = skb;
                 if (proto->prepare_tx(dev, pkt, skb->len, txbuf) &&
                     !proto->ack_tx) {
                         /* done right away and we don't want to acknowledge
                          *  the package later - forget about it now
                          */
                         dev->stats.tx_bytes += skb->len;
-                       dev_kfree_skb(skb);
                 } else {
                         /* do it the 'split' way */
                         lp->outgoing.proto = proto;
@@ -756,6 +809,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
         struct net_device *dev = dev_id;
         struct arcnet_local *lp;
         int recbuf, status, diagstatus, didsomething, boguscount;
+       unsigned long flags;
         int retval = IRQ_NONE;
  
         arc_printk(D_DURING, dev, "\n");
@@ -765,7 +819,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
         lp = netdev_priv(dev);
         BUG_ON(!lp);
  
-       spin_lock(&lp->lock);
+       spin_lock_irqsave(&lp->lock, flags);
  
         /* RESET flag was enabled - if device is not running, we must
          * clear it right away (but nothing else).
@@ -774,7 +828,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
                 if (lp->hw.status(dev) & RESETflag)
                         lp->hw.command(dev, CFLAGScmd | RESETclear);
                 lp->hw.intmask(dev, 0);
-               spin_unlock(&lp->lock);
+               spin_unlock_irqrestore(&lp->lock, flags);
                 return retval;
         }
  
@@ -842,8 +896,16 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
  
                 /* a transmit finished, and we're interested in it. */
                 if ((status & lp->intmask & TXFREEflag) || lp->timed_out) {
+                       int ackstatus;
                         lp->intmask &= ~(TXFREEflag | EXCNAKflag);
  
+                       if (status & TXACKflag)
+                               ackstatus = 2;
+                       else if (lp->excnak_pending)
+                               ackstatus = 1;
+                       else
+                               ackstatus = 0;
+
                         arc_printk(D_DURING, dev, "TX IRQ (stat=%Xh)\n",
                                    status);
  
@@ -866,18 +928,11 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
  
                                 if (lp->outgoing.proto &&
                                     lp->outgoing.proto->ack_tx) {
-                                       int ackstatus;
-
-                                       if (status & TXACKflag)
-                                               ackstatus = 2;
-                                       else if (lp->excnak_pending)
-                                               ackstatus = 1;
-                                       else
-                                               ackstatus = 0;
-
                                         lp->outgoing.proto
                                                 ->ack_tx(dev, ackstatus);
                                 }
+                               lp->reply_status = ackstatus;
+                               tasklet_hi_schedule(&lp->reply_tasklet);
                         }
                         if (lp->cur_tx != -1)
                                 release_arcbuf(dev, lp->cur_tx);
@@ -998,7 +1053,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id)
         udelay(1);
         lp->hw.intmask(dev, lp->intmask);
  
-       spin_unlock(&lp->lock);
+       spin_unlock_irqrestore(&lp->lock, flags);
         return retval;
  }
  EXPORT_SYMBOL(arcnet_interrupt);
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c

index a80f4eb9262d58305e29af45e71ba352273e8aa5..b780be6f41ff9f72f40c2a56c3fcccaf8b877511 100644 (file)
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -212,7 +212,7 @@ static int ack_tx(struct net_device *dev, int acked)
         ackpkt->soft.cap.proto = 0; /* using protocol 0 for acknowledge */
         ackpkt->soft.cap.mes.ack = acked;
  
-       arc_printk(D_PROTO, dev, "Ackknowledge for cap packet %x.\n",
+       arc_printk(D_PROTO, dev, "Acknowledge for cap packet %x.\n",
                    *((int *)&ackpkt->soft.cap.cookie[0]));
  
         ackskb->protocol = cpu_to_be16(ETH_P_ARCNET);
diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c

index 239de38fbd6a588bbb0e90e3452ea60ca1e5a161..2d956cb59d06d33c2f605a988a5884b1c21a1012 100644 (file)
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -93,6 +93,27 @@ static void led_recon_set(struct led_classdev *led_cdev,
         outb(!!value, priv->misc + ci->leds[card->index].red);
  }
  
+static ssize_t backplane_mode_show(struct device *dev,
+                                  struct device_attribute *attr,
+                                  char *buf)
+{
+       struct net_device *net_dev = to_net_dev(dev);
+       struct arcnet_local *lp = netdev_priv(net_dev);
+
+       return sprintf(buf, "%s\n", lp->backplane ? "true" : "false");
+}
+static DEVICE_ATTR_RO(backplane_mode);
+
+static struct attribute *com20020_state_attrs[] = {
+       &dev_attr_backplane_mode.attr,
+       NULL,
+};
+
+static struct attribute_group com20020_state_group = {
+       .name = NULL,
+       .attrs = com20020_state_attrs,
+};
+
  static void com20020pci_remove(struct pci_dev *pdev);
  
  static int com20020pci_probe(struct pci_dev *pdev,
@@ -135,6 +156,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
         for (i = 0; i < ci->devcount; i++) {
                 struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
                 struct com20020_dev *card;
+               int dev_id_mask = 0xf;
  
                 dev = alloc_arcdev(device);
                 if (!dev) {
@@ -166,8 +188,10 @@ static int com20020pci_probe(struct pci_dev *pdev,
                 arcnet_outb(0x00, ioaddr, COM20020_REG_W_COMMAND);
                 arcnet_inb(ioaddr, COM20020_REG_R_DIAGSTAT);
  
+               SET_NETDEV_DEV(dev, &pdev->dev);
                 dev->base_addr = ioaddr;
                 dev->dev_addr[0] = node;
+               dev->sysfs_groups[0] = &com20020_state_group;
                 dev->irq = pdev->irq;
                 lp->card_name = "PCI COM20020";
                 lp->card_flags = ci->flags;
@@ -177,10 +201,15 @@ static int com20020pci_probe(struct pci_dev *pdev,
                 lp->timeout = timeout;
                 lp->hw.owner = THIS_MODULE;
  
+               lp->backplane = (inb(priv->misc) >> (2 + i)) & 0x1;
+
+               if (!strncmp(ci->name, "EAE PLX-PCI FB2", 15))
+                       lp->backplane = 1;
+
                 /* Get the dev_id from the PLX rotary coder */
                 if (!strncmp(ci->name, "EAE PLX-PCI MA1", 15))
-                       dev->dev_id = 0xc;
-               dev->dev_id ^= inb(priv->misc + ci->rotary) >> 4;
+                       dev_id_mask = 0x3;
+               dev->dev_id = (inb(priv->misc + ci->rotary) >> 4) & dev_id_mask;
  
                 snprintf(dev->name, sizeof(dev->name), "arc%d-%d", dev->dev_id, i);
  
@@ -361,6 +390,31 @@ static struct com20020_pci_card_info card_info_eae_ma1 = {
         .flags = ARC_CAN_10MBIT,
  };
  
+static struct com20020_pci_card_info card_info_eae_fb2 = {
+       .name = "EAE PLX-PCI FB2",
+       .devcount = 1,
+       .chan_map_tbl = {
+               {
+                       .bar = 2,
+                       .offset = 0x00,
+                       .size = 0x08,
+               },
+       },
+       .misc_map = {
+               .bar = 2,
+               .offset = 0x10,
+               .size = 0x04,
+       },
+       .leds = {
+               {
+                       .green = 0x0,
+                       .red = 0x1,
+               },
+       },
+       .rotary = 0x0,
+       .flags = ARC_CAN_10MBIT,
+};
+
  static const struct pci_device_id com20020pci_id_table[] = {
         {
                 0x1571, 0xa001,
@@ -506,6 +560,12 @@ static const struct pci_device_id com20020pci_id_table[] = {
                 0, 0,
                 (kernel_ulong_t)&card_info_eae_ma1
         },
+       {
+               0x10B5, 0x9050,
+               0x10B5, 0x3294,
+               0, 0,
+               (kernel_ulong_t)&card_info_eae_fb2
+       },
         {
                 0x14BA, 0x6000,
                 PCI_ANY_ID, PCI_ANY_ID,
diff --git a/drivers/net/arcnet/com20020.c b/drivers/net/arcnet/com20020.c

index 13d9ad4b3f5c977e99f3ac2f38d3f244de3ae203..78043a9c5981e5a0b8d562879de0f77e0763aa75 100644 (file)
--- a/drivers/net/arcnet/com20020.c
+++ b/drivers/net/arcnet/com20020.c
@@ -246,8 +246,6 @@ int com20020_found(struct net_device *dev, int shared)
                 return -ENODEV;
         }
  
-       dev->base_addr = ioaddr;
-
         arc_printk(D_NORMAL, dev, "%s: station %02Xh found at %03lXh, IRQ %d.\n",
                    lp->card_name, dev->dev_addr[0], dev->base_addr, dev->irq);
  
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c

index 8ca683396fccebd072133f0ae69b8e2db27a6484..a12d603d41c6616ed3cba926b204f94492678ff3 100644 (file)
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -464,7 +464,7 @@ const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val)
  
  /* Searches for a value in opt's values[] table which matches the flagmask */
  static const struct bond_opt_value *bond_opt_get_flags(const struct bond_option *opt,
-                                                u32 flagmask)
+                                                      u32 flagmask)
  {
         int i;
  
@@ -744,14 +744,14 @@ static int bond_option_mode_set(struct bonding *bond,
                                 const struct bond_opt_value *newval)
  {
         if (!bond_mode_uses_arp(newval->value) && bond->params.arp_interval) {
-               netdev_info(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
-                           newval->string);
+               netdev_dbg(bond->dev, "%s mode is incompatible with arp monitoring, start mii monitoring\n",
+                          newval->string);
                 /* disable arp monitoring */
                 bond->params.arp_interval = 0;
                 /* set miimon to default value */
                 bond->params.miimon = BOND_DEFAULT_MIIMON;
-               netdev_info(bond->dev, "Setting MII monitoring interval to %d\n",
-                           bond->params.miimon);
+               netdev_dbg(bond->dev, "Setting MII monitoring interval to %d\n",
+                          bond->params.miimon);
         }
  
         /* don't cache arp_validate between modes */
@@ -794,7 +794,7 @@ static int bond_option_active_slave_set(struct bonding *bond,
         block_netpoll_tx();
         /* check to see if we are clearing active */
         if (!slave_dev) {
-               netdev_info(bond->dev, "Clearing current active slave\n");
+               netdev_dbg(bond->dev, "Clearing current active slave\n");
                 RCU_INIT_POINTER(bond->curr_active_slave, NULL);
                 bond_select_active_slave(bond);
         } else {
@@ -805,13 +805,13 @@ static int bond_option_active_slave_set(struct bonding *bond,
  
                 if (new_active == old_active) {
                         /* do nothing */
-                       netdev_info(bond->dev, "%s is already the current active slave\n",
-                                   new_active->dev->name);
+                       netdev_dbg(bond->dev, "%s is already the current active slave\n",
+                                  new_active->dev->name);
                 } else {
                         if (old_active && (new_active->link == BOND_LINK_UP) &&
                             bond_slave_is_up(new_active)) {
-                               netdev_info(bond->dev, "Setting %s as active slave\n",
-                                           new_active->dev->name);
+                               netdev_dbg(bond->dev, "Setting %s as active slave\n",
+                                          new_active->dev->name);
                                 bond_change_active_slave(bond, new_active);
                         } else {
                                 netdev_err(bond->dev, "Could not set %s as active slave; either %s is down or the link is down\n",
@@ -833,17 +833,17 @@ static int bond_option_active_slave_set(struct bonding *bond,
  static int bond_option_miimon_set(struct bonding *bond,
                                   const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting MII monitoring interval to %llu\n",
-                   newval->value);
+       netdev_dbg(bond->dev, "Setting MII monitoring interval to %llu\n",
+                  newval->value);
         bond->params.miimon = newval->value;
         if (bond->params.updelay)
-               netdev_info(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n",
-                       bond->params.updelay * bond->params.miimon);
+               netdev_dbg(bond->dev, "Note: Updating updelay (to %d) since it is a multiple of the miimon value\n",
+                          bond->params.updelay * bond->params.miimon);
         if (bond->params.downdelay)
-               netdev_info(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
-                           bond->params.downdelay * bond->params.miimon);
+               netdev_dbg(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
+                          bond->params.downdelay * bond->params.miimon);
         if (newval->value && bond->params.arp_interval) {
-               netdev_info(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
+               netdev_dbg(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
                 bond->params.arp_interval = 0;
                 if (bond->params.arp_validate)
                         bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
@@ -885,8 +885,8 @@ static int bond_option_updelay_set(struct bonding *bond,
                             bond->params.miimon);
         }
         bond->params.updelay = value / bond->params.miimon;
-       netdev_info(bond->dev, "Setting up delay to %d\n",
-                   bond->params.updelay * bond->params.miimon);
+       netdev_dbg(bond->dev, "Setting up delay to %d\n",
+                  bond->params.updelay * bond->params.miimon);
  
         return 0;
  }
@@ -907,8 +907,8 @@ static int bond_option_downdelay_set(struct bonding *bond,
                             bond->params.miimon);
         }
         bond->params.downdelay = value / bond->params.miimon;
-       netdev_info(bond->dev, "Setting down delay to %d\n",
-                   bond->params.downdelay * bond->params.miimon);
+       netdev_dbg(bond->dev, "Setting down delay to %d\n",
+                  bond->params.downdelay * bond->params.miimon);
  
         return 0;
  }
@@ -916,8 +916,8 @@ static int bond_option_downdelay_set(struct bonding *bond,
  static int bond_option_use_carrier_set(struct bonding *bond,
                                        const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting use_carrier to %llu\n",
-                   newval->value);
+       netdev_dbg(bond->dev, "Setting use_carrier to %llu\n",
+                  newval->value);
         bond->params.use_carrier = newval->value;
  
         return 0;
@@ -930,16 +930,16 @@ static int bond_option_use_carrier_set(struct bonding *bond,
  static int bond_option_arp_interval_set(struct bonding *bond,
                                         const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting ARP monitoring interval to %llu\n",
-                   newval->value);
+       netdev_dbg(bond->dev, "Setting ARP monitoring interval to %llu\n",
+                  newval->value);
         bond->params.arp_interval = newval->value;
         if (newval->value) {
                 if (bond->params.miimon) {
-                       netdev_info(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n");
+                       netdev_dbg(bond->dev, "ARP monitoring cannot be used with MII monitoring. Disabling MII monitoring\n");
                         bond->params.miimon = 0;
                 }
                 if (!bond->params.arp_targets[0])
-                       netdev_info(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n");
+                       netdev_dbg(bond->dev, "ARP monitoring has been set up, but no ARP targets have been specified\n");
         }
         if (bond->dev->flags & IFF_UP) {
                 /* If the interface is up, we may need to fire off
@@ -1000,7 +1000,7 @@ static int _bond_option_arp_ip_target_add(struct bonding *bond, __be32 target)
                 return -EINVAL;
         }
  
-       netdev_info(bond->dev, "Adding ARP target %pI4\n", &target);
+       netdev_dbg(bond->dev, "Adding ARP target %pI4\n", &target);
  
         _bond_options_arp_ip_target_set(bond, ind, target, jiffies);
  
@@ -1036,7 +1036,7 @@ static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target)
         if (ind == 0 && !targets[1] && bond->params.arp_interval)
                 netdev_warn(bond->dev, "Removing last arp target with arp_interval on\n");
  
-       netdev_info(bond->dev, "Removing ARP target %pI4\n", &target);
+       netdev_dbg(bond->dev, "Removing ARP target %pI4\n", &target);
  
         bond_for_each_slave(bond, slave, iter) {
                 targets_rx = slave->target_last_arp_rx;
@@ -1088,8 +1088,8 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
  static int bond_option_arp_validate_set(struct bonding *bond,
                                         const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting arp_validate to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting arp_validate to %s (%llu)\n",
+                  newval->string, newval->value);
  
         if (bond->dev->flags & IFF_UP) {
                 if (!newval->value)
@@ -1105,8 +1105,8 @@ static int bond_option_arp_validate_set(struct bonding *bond,
  static int bond_option_arp_all_targets_set(struct bonding *bond,
                                            const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting arp_all_targets to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting arp_all_targets to %s (%llu)\n",
+                  newval->string, newval->value);
         bond->params.arp_all_targets = newval->value;
  
         return 0;
@@ -1126,7 +1126,7 @@ static int bond_option_primary_set(struct bonding *bond,
                 *p = '\0';
         /* check to see if we are clearing primary */
         if (!strlen(primary)) {
-               netdev_info(bond->dev, "Setting primary slave to None\n");
+               netdev_dbg(bond->dev, "Setting primary slave to None\n");
                 RCU_INIT_POINTER(bond->primary_slave, NULL);
                 memset(bond->params.primary, 0, sizeof(bond->params.primary));
                 bond_select_active_slave(bond);
@@ -1135,8 +1135,8 @@ static int bond_option_primary_set(struct bonding *bond,
  
         bond_for_each_slave(bond, slave, iter) {
                 if (strncmp(slave->dev->name, primary, IFNAMSIZ) == 0) {
-                       netdev_info(bond->dev, "Setting %s as primary slave\n",
-                                   slave->dev->name);
+                       netdev_dbg(bond->dev, "Setting %s as primary slave\n",
+                                  slave->dev->name);
                         rcu_assign_pointer(bond->primary_slave, slave);
                         strcpy(bond->params.primary, slave->dev->name);
                         bond_select_active_slave(bond);
@@ -1145,15 +1145,15 @@ static int bond_option_primary_set(struct bonding *bond,
         }
  
         if (rtnl_dereference(bond->primary_slave)) {
-               netdev_info(bond->dev, "Setting primary slave to None\n");
+               netdev_dbg(bond->dev, "Setting primary slave to None\n");
                 RCU_INIT_POINTER(bond->primary_slave, NULL);
                 bond_select_active_slave(bond);
         }
         strncpy(bond->params.primary, primary, IFNAMSIZ);
         bond->params.primary[IFNAMSIZ - 1] = 0;
  
-       netdev_info(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n",
-                   primary, bond->dev->name);
+       netdev_dbg(bond->dev, "Recording %s as primary, but it has not been enslaved to %s yet\n",
+                  primary, bond->dev->name);
  
  out:
         unblock_netpoll_tx();
@@ -1164,8 +1164,8 @@ out:
  static int bond_option_primary_reselect_set(struct bonding *bond,
                                             const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting primary_reselect to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting primary_reselect to %s (%llu)\n",
+                  newval->string, newval->value);
         bond->params.primary_reselect = newval->value;
  
         block_netpoll_tx();
@@ -1178,8 +1178,8 @@ static int bond_option_primary_reselect_set(struct bonding *bond,
  static int bond_option_fail_over_mac_set(struct bonding *bond,
                                          const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting fail_over_mac to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting fail_over_mac to %s (%llu)\n",
+                  newval->string, newval->value);
         bond->params.fail_over_mac = newval->value;
  
         return 0;
@@ -1188,8 +1188,8 @@ static int bond_option_fail_over_mac_set(struct bonding *bond,
  static int bond_option_xmit_hash_policy_set(struct bonding *bond,
                                             const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting xmit hash policy to %s (%llu)\n",
+                  newval->string, newval->value);
         bond->params.xmit_policy = newval->value;
  
         return 0;
@@ -1198,8 +1198,8 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond,
  static int bond_option_resend_igmp_set(struct bonding *bond,
                                        const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting resend_igmp to %llu\n",
-                   newval->value);
+       netdev_dbg(bond->dev, "Setting resend_igmp to %llu\n",
+                  newval->value);
         bond->params.resend_igmp = newval->value;
  
         return 0;
@@ -1237,8 +1237,8 @@ static int bond_option_all_slaves_active_set(struct bonding *bond,
  static int bond_option_min_links_set(struct bonding *bond,
                                      const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting min links value to %llu\n",
-                   newval->value);
+       netdev_dbg(bond->dev, "Setting min links value to %llu\n",
+                  newval->value);
         bond->params.min_links = newval->value;
         bond_set_carrier(bond);
  
@@ -1256,6 +1256,8 @@ static int bond_option_lp_interval_set(struct bonding *bond,
  static int bond_option_pps_set(struct bonding *bond,
                                const struct bond_opt_value *newval)
  {
+       netdev_dbg(bond->dev, "Setting packets per slave to %llu\n",
+                  newval->value);
         bond->params.packets_per_slave = newval->value;
         if (newval->value > 0) {
                 bond->params.reciprocal_packets_per_slave =
@@ -1274,8 +1276,8 @@ static int bond_option_pps_set(struct bonding *bond,
  static int bond_option_lacp_rate_set(struct bonding *bond,
                                      const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting LACP rate to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting LACP rate to %s (%llu)\n",
+                  newval->string, newval->value);
         bond->params.lacp_fast = newval->value;
         bond_3ad_update_lacp_rate(bond);
  
@@ -1285,8 +1287,8 @@ static int bond_option_lacp_rate_set(struct bonding *bond,
  static int bond_option_ad_select_set(struct bonding *bond,
                                      const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting ad_select to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting ad_select to %s (%llu)\n",
+                  newval->string, newval->value);
         bond->params.ad_select = newval->value;
  
         return 0;
@@ -1347,7 +1349,7 @@ out:
         return ret;
  
  err_no_cmd:
-       netdev_info(bond->dev, "invalid input for queue_id set\n");
+       netdev_dbg(bond->dev, "invalid input for queue_id set\n");
         ret = -EPERM;
         goto out;
  
@@ -1369,20 +1371,20 @@ static int bond_option_slaves_set(struct bonding *bond,
  
         dev = __dev_get_by_name(dev_net(bond->dev), ifname);
         if (!dev) {
-               netdev_info(bond->dev, "interface %s does not exist!\n",
-                           ifname);
+               netdev_dbg(bond->dev, "interface %s does not exist!\n",
+                          ifname);
                 ret = -ENODEV;
                 goto out;
         }
  
         switch (command[0]) {
         case '+':
-               netdev_info(bond->dev, "Adding slave %s\n", dev->name);
+               netdev_dbg(bond->dev, "Adding slave %s\n", dev->name);
                 ret = bond_enslave(bond->dev, dev);
                 break;
  
         case '-':
-               netdev_info(bond->dev, "Removing slave %s\n", dev->name);
+               netdev_dbg(bond->dev, "Removing slave %s\n", dev->name);
                 ret = bond_release(bond->dev, dev);
                 break;
  
@@ -1402,8 +1404,8 @@ err_no_cmd:
  static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
                                           const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting dynamic-lb to %s (%llu)\n",
-                   newval->string, newval->value);
+       netdev_dbg(bond->dev, "Setting dynamic-lb to %s (%llu)\n",
+                  newval->string, newval->value);
         bond->params.tlb_dynamic_lb = newval->value;
  
         return 0;
@@ -1412,8 +1414,8 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
  static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
                                              const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
-                   newval->value);
+       netdev_dbg(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
+                  newval->value);
  
         bond->params.ad_actor_sys_prio = newval->value;
         bond_3ad_update_ad_actor_settings(bond);
@@ -1442,7 +1444,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
         if (!is_valid_ether_addr(mac))
                 goto err;
  
-       netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac);
+       netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac);
         ether_addr_copy(bond->params.ad_actor_system, mac);
         bond_3ad_update_ad_actor_settings(bond);
  
@@ -1456,8 +1458,8 @@ err:
  static int bond_option_ad_user_port_key_set(struct bonding *bond,
                                             const struct bond_opt_value *newval)
  {
-       netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n",
-                   newval->value);
+       netdev_dbg(bond->dev, "Setting ad_user_port_key to %llu\n",
+                  newval->value);
  
         bond->params.ad_user_port_key = newval->value;
         return 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h

index 127adbeefb105cc031f3782b534d175f29fb7143..9795419aac2da8c5292a4384c751206d98edbfef 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -123,38 +123,13 @@
  #define DMA_ISR                                0x3008
  #define DMA_AXIARCR                    0x3010
  #define DMA_AXIAWCR                    0x3018
+#define DMA_AXIAWARCR                  0x301c
  #define DMA_DSR0                       0x3020
  #define DMA_DSR1                       0x3024
+#define DMA_TXEDMACR                   0x3040
+#define DMA_RXEDMACR                   0x3044
  
  /* DMA register entry bit positions and sizes */
-#define DMA_AXIARCR_DRC_INDEX          0
-#define DMA_AXIARCR_DRC_WIDTH          4
-#define DMA_AXIARCR_DRD_INDEX          4
-#define DMA_AXIARCR_DRD_WIDTH          2
-#define DMA_AXIARCR_TEC_INDEX          8
-#define DMA_AXIARCR_TEC_WIDTH          4
-#define DMA_AXIARCR_TED_INDEX          12
-#define DMA_AXIARCR_TED_WIDTH          2
-#define DMA_AXIARCR_THC_INDEX          16
-#define DMA_AXIARCR_THC_WIDTH          4
-#define DMA_AXIARCR_THD_INDEX          20
-#define DMA_AXIARCR_THD_WIDTH          2
-#define DMA_AXIAWCR_DWC_INDEX          0
-#define DMA_AXIAWCR_DWC_WIDTH          4
-#define DMA_AXIAWCR_DWD_INDEX          4
-#define DMA_AXIAWCR_DWD_WIDTH          2
-#define DMA_AXIAWCR_RPC_INDEX          8
-#define DMA_AXIAWCR_RPC_WIDTH          4
-#define DMA_AXIAWCR_RPD_INDEX          12
-#define DMA_AXIAWCR_RPD_WIDTH          2
-#define DMA_AXIAWCR_RHC_INDEX          16
-#define DMA_AXIAWCR_RHC_WIDTH          4
-#define DMA_AXIAWCR_RHD_INDEX          20
-#define DMA_AXIAWCR_RHD_WIDTH          2
-#define DMA_AXIAWCR_TDC_INDEX          24
-#define DMA_AXIAWCR_TDC_WIDTH          4
-#define DMA_AXIAWCR_TDD_INDEX          28
-#define DMA_AXIAWCR_TDD_WIDTH          2
  #define DMA_ISR_MACIS_INDEX            17
  #define DMA_ISR_MACIS_WIDTH            1
  #define DMA_ISR_MTLIS_INDEX            16
@@ -163,14 +138,31 @@
  #define DMA_MR_INTM_WIDTH              2
  #define DMA_MR_SWR_INDEX               0
  #define DMA_MR_SWR_WIDTH               1
+#define DMA_RXEDMACR_RDPS_INDEX                0
+#define DMA_RXEDMACR_RDPS_WIDTH                3
+#define DMA_SBMR_AAL_INDEX             12
+#define DMA_SBMR_AAL_WIDTH             1
  #define DMA_SBMR_EAME_INDEX            11
  #define DMA_SBMR_EAME_WIDTH            1
-#define DMA_SBMR_BLEN_256_INDEX                7
-#define DMA_SBMR_BLEN_256_WIDTH                1
+#define DMA_SBMR_BLEN_INDEX            1
+#define DMA_SBMR_BLEN_WIDTH            7
+#define DMA_SBMR_RD_OSR_LMT_INDEX      16
+#define DMA_SBMR_RD_OSR_LMT_WIDTH      6
  #define DMA_SBMR_UNDEF_INDEX           0
  #define DMA_SBMR_UNDEF_WIDTH           1
+#define DMA_SBMR_WR_OSR_LMT_INDEX      24
+#define DMA_SBMR_WR_OSR_LMT_WIDTH      6
+#define DMA_TXEDMACR_TDPS_INDEX                0
+#define DMA_TXEDMACR_TDPS_WIDTH                3
  
  /* DMA register values */
+#define DMA_SBMR_BLEN_256              256
+#define DMA_SBMR_BLEN_128              128
+#define DMA_SBMR_BLEN_64               64
+#define DMA_SBMR_BLEN_32               32
+#define DMA_SBMR_BLEN_16               16
+#define DMA_SBMR_BLEN_8                        8
+#define DMA_SBMR_BLEN_4                        4
  #define DMA_DSR_RPS_WIDTH              4
  #define DMA_DSR_TPS_WIDTH              4
  #define DMA_DSR_Q_WIDTH                        (DMA_DSR_RPS_WIDTH + DMA_DSR_TPS_WIDTH)
@@ -959,6 +951,7 @@
  #define XP_DRIVER_INT_RO               0x0064
  #define XP_DRIVER_SCRATCH_0            0x0068
  #define XP_DRIVER_SCRATCH_1            0x006c
+#define XP_INT_REISSUE_EN              0x0074
  #define XP_INT_EN                      0x0078
  #define XP_I2C_MUTEX                   0x0080
  #define XP_MDIO_MUTEX                  0x0084
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c

index 0a98c369df2045ccbb9fbf7a55af848530a5f464..45d92304068eb5ddb4d48a7fa57996d340732429 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -176,8 +176,8 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata)
  
         DBGPR("-->xgbe_free_ring_resources\n");
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 xgbe_free_ring(pdata, channel->tx_ring);
                 xgbe_free_ring(pdata, channel->rx_ring);
         }
@@ -185,34 +185,60 @@ static void xgbe_free_ring_resources(struct xgbe_prv_data *pdata)
         DBGPR("<--xgbe_free_ring_resources\n");
  }
  
+static void *xgbe_alloc_node(size_t size, int node)
+{
+       void *mem;
+
+       mem = kzalloc_node(size, GFP_KERNEL, node);
+       if (!mem)
+               mem = kzalloc(size, GFP_KERNEL);
+
+       return mem;
+}
+
+static void *xgbe_dma_alloc_node(struct device *dev, size_t size,
+                                dma_addr_t *dma, int node)
+{
+       void *mem;
+       int cur_node = dev_to_node(dev);
+
+       set_dev_node(dev, node);
+       mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+       set_dev_node(dev, cur_node);
+
+       if (!mem)
+               mem = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+
+       return mem;
+}
+
  static int xgbe_init_ring(struct xgbe_prv_data *pdata,
                           struct xgbe_ring *ring, unsigned int rdesc_count)
  {
-       DBGPR("-->xgbe_init_ring\n");
+       size_t size;
  
         if (!ring)
                 return 0;
  
         /* Descriptors */
+       size = rdesc_count * sizeof(struct xgbe_ring_desc);
+
         ring->rdesc_count = rdesc_count;
-       ring->rdesc = dma_alloc_coherent(pdata->dev,
-                                        (sizeof(struct xgbe_ring_desc) *
-                                         rdesc_count), &ring->rdesc_dma,
-                                        GFP_KERNEL);
+       ring->rdesc = xgbe_dma_alloc_node(pdata->dev, size, &ring->rdesc_dma,
+                                         ring->node);
         if (!ring->rdesc)
                 return -ENOMEM;
  
         /* Descriptor information */
-       ring->rdata = kcalloc(rdesc_count, sizeof(struct xgbe_ring_data),
-                             GFP_KERNEL);
+       size = rdesc_count * sizeof(struct xgbe_ring_data);
+
+       ring->rdata = xgbe_alloc_node(size, ring->node);
         if (!ring->rdata)
                 return -ENOMEM;
  
         netif_dbg(pdata, drv, pdata->netdev,
-                 "rdesc=%p, rdesc_dma=%pad, rdata=%p\n",
-                 ring->rdesc, &ring->rdesc_dma, ring->rdata);
-
-       DBGPR("<--xgbe_init_ring\n");
+                 "rdesc=%p, rdesc_dma=%pad, rdata=%p, node=%d\n",
+                 ring->rdesc, &ring->rdesc_dma, ring->rdata, ring->node);
  
         return 0;
  }
@@ -223,10 +249,8 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata)
         unsigned int i;
         int ret;
  
-       DBGPR("-->xgbe_alloc_ring_resources\n");
-
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 netif_dbg(pdata, drv, pdata->netdev, "%s - Tx ring:\n",
                           channel->name);
  
@@ -250,8 +274,6 @@ static int xgbe_alloc_ring_resources(struct xgbe_prv_data *pdata)
                 }
         }
  
-       DBGPR("<--xgbe_alloc_ring_resources\n");
-
         return 0;
  
  err_ring:
@@ -261,21 +283,33 @@ err_ring:
  }
  
  static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
-                           struct xgbe_page_alloc *pa, gfp_t gfp, int order)
+                           struct xgbe_page_alloc *pa, int alloc_order,
+                           int node)
  {
         struct page *pages = NULL;
         dma_addr_t pages_dma;
-       int ret;
+       gfp_t gfp;
+       int order, ret;
+
+again:
+       order = alloc_order;
  
         /* Try to obtain pages, decreasing order if necessary */
-       gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
+       gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
         while (order >= 0) {
-               pages = alloc_pages(gfp, order);
+               pages = alloc_pages_node(node, gfp, order);
                 if (pages)
                         break;
  
                 order--;
         }
+
+       /* If we couldn't get local pages, try getting from anywhere */
+       if (!pages && (node != NUMA_NO_NODE)) {
+               node = NUMA_NO_NODE;
+               goto again;
+       }
+
         if (!pages)
                 return -ENOMEM;
  
@@ -327,14 +361,14 @@ static int xgbe_map_rx_buffer(struct xgbe_prv_data *pdata,
         int ret;
  
         if (!ring->rx_hdr_pa.pages) {
-               ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, GFP_ATOMIC, 0);
+               ret = xgbe_alloc_pages(pdata, &ring->rx_hdr_pa, 0, ring->node);
                 if (ret)
                         return ret;
         }
  
         if (!ring->rx_buf_pa.pages) {
-               ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa, GFP_ATOMIC,
-                                      PAGE_ALLOC_COSTLY_ORDER);
+               ret = xgbe_alloc_pages(pdata, &ring->rx_buf_pa,
+                                      PAGE_ALLOC_COSTLY_ORDER, ring->node);
                 if (ret)
                         return ret;
         }
@@ -362,8 +396,8 @@ static void xgbe_wrapper_tx_descriptor_init(struct xgbe_prv_data *pdata)
  
         DBGPR("-->xgbe_wrapper_tx_descriptor_init\n");
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 ring = channel->tx_ring;
                 if (!ring)
                         break;
@@ -403,8 +437,8 @@ static void xgbe_wrapper_rx_descriptor_init(struct xgbe_prv_data *pdata)
  
         DBGPR("-->xgbe_wrapper_rx_descriptor_init\n");
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 ring = channel->rx_ring;
                 if (!ring)
                         break;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c

index 24a687ce4388182716438770c49e2dca7ff81114..06f953e1e9b27d52775a642a49f90918f8be176c 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -174,58 +174,30 @@ static unsigned int xgbe_riwt_to_usec(struct xgbe_prv_data *pdata,
         return ret;
  }
  
-static int xgbe_config_pblx8(struct xgbe_prv_data *pdata)
+static int xgbe_config_pbl_val(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
+       unsigned int pblx8, pbl;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++)
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, PBLX8,
-                                      pdata->pblx8);
-
-       return 0;
-}
-
-static int xgbe_get_tx_pbl_val(struct xgbe_prv_data *pdata)
-{
-       return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_TCR, PBL);
-}
-
-static int xgbe_config_tx_pbl_val(struct xgbe_prv_data *pdata)
-{
-       struct xgbe_channel *channel;
-       unsigned int i;
-
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
-                       break;
+       pblx8 = DMA_PBL_X8_DISABLE;
+       pbl = pdata->pbl;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, PBL,
-                                      pdata->tx_pbl);
+       if (pdata->pbl > 32) {
+               pblx8 = DMA_PBL_X8_ENABLE;
+               pbl >>= 3;
         }
  
-       return 0;
-}
-
-static int xgbe_get_rx_pbl_val(struct xgbe_prv_data *pdata)
-{
-       return XGMAC_DMA_IOREAD_BITS(pdata->channel, DMA_CH_RCR, PBL);
-}
-
-static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata)
-{
-       struct xgbe_channel *channel;
-       unsigned int i;
+       for (i = 0; i < pdata->channel_count; i++) {
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, PBLX8,
+                                      pblx8);
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
-                       break;
+               if (pdata->channel[i]->tx_ring)
+                       XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR,
+                                              PBL, pbl);
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, PBL,
-                                      pdata->rx_pbl);
+               if (pdata->channel[i]->rx_ring)
+                       XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR,
+                                              PBL, pbl);
         }
  
         return 0;
@@ -233,15 +205,13 @@ static int xgbe_config_rx_pbl_val(struct xgbe_prv_data *pdata)
  
  static int xgbe_config_osp_mode(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->tx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, OSP,
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, OSP,
                                        pdata->tx_osp_mode);
         }
  
@@ -292,15 +262,13 @@ static int xgbe_config_tx_threshold(struct xgbe_prv_data *pdata,
  
  static int xgbe_config_rx_coalesce(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->rx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RIWT, RWT,
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RIWT, RWT,
                                        pdata->rx_riwt);
         }
  
@@ -314,44 +282,38 @@ static int xgbe_config_tx_coalesce(struct xgbe_prv_data *pdata)
  
  static void xgbe_config_rx_buffer_size(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->rx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, RBSZ,
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, RBSZ,
                                        pdata->rx_buf_size);
         }
  }
  
  static void xgbe_config_tso_mode(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->tx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, TSE, 1);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, TSE, 1);
         }
  }
  
  static void xgbe_config_sph_mode(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->rx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_CR, SPH, 1);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_CR, SPH, 1);
         }
  
         XGMAC_IOWRITE_BITS(pdata, MAC_RCR, HDSMS, XGBE_SPH_HDSMS_SIZE);
@@ -651,8 +613,9 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
                 XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM,
                                    pdata->channel_irq_mode);
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
+
                 /* Clear all the interrupts which are set */
                 dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
                 XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
@@ -1497,26 +1460,37 @@ static void xgbe_rx_desc_init(struct xgbe_channel *channel)
  static void xgbe_update_tstamp_addend(struct xgbe_prv_data *pdata,
                                       unsigned int addend)
  {
+       unsigned int count = 10000;
+
         /* Set the addend register value and tell the device */
         XGMAC_IOWRITE(pdata, MAC_TSAR, addend);
         XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSADDREG, 1);
  
         /* Wait for addend update to complete */
-       while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG))
+       while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSADDREG))
                 udelay(5);
+
+       if (!count)
+               netdev_err(pdata->netdev,
+                          "timed out updating timestamp addend register\n");
  }
  
  static void xgbe_set_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec,
                                  unsigned int nsec)
  {
+       unsigned int count = 10000;
+
         /* Set the time values and tell the device */
         XGMAC_IOWRITE(pdata, MAC_STSUR, sec);
         XGMAC_IOWRITE(pdata, MAC_STNUR, nsec);
         XGMAC_IOWRITE_BITS(pdata, MAC_TSCR, TSINIT, 1);
  
         /* Wait for time update to complete */
-       while (XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT))
+       while (--count && XGMAC_IOREAD_BITS(pdata, MAC_TSCR, TSINIT))
                 udelay(5);
+
+       if (!count)
+               netdev_err(pdata->netdev, "timed out initializing timestamp\n");
  }
  
  static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata)
@@ -2140,37 +2114,38 @@ static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata)
  
  static void xgbe_config_dma_bus(struct xgbe_prv_data *pdata)
  {
+       unsigned int sbmr;
+
+       sbmr = XGMAC_IOREAD(pdata, DMA_SBMR);
+
         /* Set enhanced addressing mode */
-       XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, EAME, 1);
+       XGMAC_SET_BITS(sbmr, DMA_SBMR, EAME, 1);
  
         /* Set the System Bus mode */
-       XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, UNDEF, 1);
-       XGMAC_IOWRITE_BITS(pdata, DMA_SBMR, BLEN_256, 1);
+       XGMAC_SET_BITS(sbmr, DMA_SBMR, UNDEF, 1);
+       XGMAC_SET_BITS(sbmr, DMA_SBMR, BLEN, pdata->blen >> 2);
+       XGMAC_SET_BITS(sbmr, DMA_SBMR, AAL, pdata->aal);
+       XGMAC_SET_BITS(sbmr, DMA_SBMR, RD_OSR_LMT, pdata->rd_osr_limit - 1);
+       XGMAC_SET_BITS(sbmr, DMA_SBMR, WR_OSR_LMT, pdata->wr_osr_limit - 1);
+
+       XGMAC_IOWRITE(pdata, DMA_SBMR, sbmr);
+
+       /* Set descriptor fetching threshold */
+       if (pdata->vdata->tx_desc_prefetch)
+               XGMAC_IOWRITE_BITS(pdata, DMA_TXEDMACR, TDPS,
+                                  pdata->vdata->tx_desc_prefetch);
+
+       if (pdata->vdata->rx_desc_prefetch)
+               XGMAC_IOWRITE_BITS(pdata, DMA_RXEDMACR, RDPS,
+                                  pdata->vdata->rx_desc_prefetch);
  }
  
  static void xgbe_config_dma_cache(struct xgbe_prv_data *pdata)
  {
-       unsigned int arcache, awcache;
-
-       arcache = 0;
-       XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRC, pdata->arcache);
-       XGMAC_SET_BITS(arcache, DMA_AXIARCR, DRD, pdata->axdomain);
-       XGMAC_SET_BITS(arcache, DMA_AXIARCR, TEC, pdata->arcache);
-       XGMAC_SET_BITS(arcache, DMA_AXIARCR, TED, pdata->axdomain);
-       XGMAC_SET_BITS(arcache, DMA_AXIARCR, THC, pdata->arcache);
-       XGMAC_SET_BITS(arcache, DMA_AXIARCR, THD, pdata->axdomain);
-       XGMAC_IOWRITE(pdata, DMA_AXIARCR, arcache);
-
-       awcache = 0;
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWC, pdata->awcache);
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, DWD, pdata->axdomain);
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPC, pdata->awcache);
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RPD, pdata->axdomain);
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHC, pdata->awcache);
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, RHD, pdata->axdomain);
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDC, pdata->awcache);
-       XGMAC_SET_BITS(awcache, DMA_AXIAWCR, TDD, pdata->axdomain);
-       XGMAC_IOWRITE(pdata, DMA_AXIAWCR, awcache);
+       XGMAC_IOWRITE(pdata, DMA_AXIARCR, pdata->arcr);
+       XGMAC_IOWRITE(pdata, DMA_AXIAWCR, pdata->awcr);
+       if (pdata->awarcr)
+               XGMAC_IOWRITE(pdata, DMA_AXIAWARCR, pdata->awarcr);
  }
  
  static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata)
@@ -3202,16 +3177,14 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
  
  static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
         /* Enable each Tx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->tx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1);
         }
  
         /* Enable each Tx queue */
@@ -3225,7 +3198,6 @@ static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
  
  static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
         /* Prepare for Tx DMA channel stop */
@@ -3240,12 +3212,11 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
                 XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
  
         /* Disable each Tx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->tx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
         }
  }
  
@@ -3277,16 +3248,14 @@ static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata,
  
  static void xgbe_enable_rx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int reg_val, i;
  
         /* Enable each Rx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->rx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1);
         }
  
         /* Enable each Rx queue */
@@ -3304,7 +3273,6 @@ static void xgbe_enable_rx(struct xgbe_prv_data *pdata)
  
  static void xgbe_disable_rx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
         /* Disable MAC Rx */
@@ -3321,27 +3289,24 @@ static void xgbe_disable_rx(struct xgbe_prv_data *pdata)
         XGMAC_IOWRITE(pdata, MAC_RQC0R, 0);
  
         /* Disable each Rx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->rx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0);
         }
  }
  
  static void xgbe_powerup_tx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
         /* Enable each Tx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->tx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 1);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 1);
         }
  
         /* Enable MAC Tx */
@@ -3350,7 +3315,6 @@ static void xgbe_powerup_tx(struct xgbe_prv_data *pdata)
  
  static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
         /* Prepare for Tx DMA channel stop */
@@ -3361,42 +3325,37 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
         XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
  
         /* Disable each Tx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->tx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_TCR, ST, 0);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
         }
  }
  
  static void xgbe_powerup_rx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
         /* Enable each Rx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->rx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 1);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 1);
         }
  }
  
  static void xgbe_powerdown_rx(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
         /* Disable each Rx DMA channel */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->rx_ring)
+       for (i = 0; i < pdata->channel_count; i++) {
+               if (!pdata->channel[i]->rx_ring)
                         break;
  
-               XGMAC_DMA_IOWRITE_BITS(channel, DMA_CH_RCR, SR, 0);
+               XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_RCR, SR, 0);
         }
  }
  
@@ -3420,9 +3379,7 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
         xgbe_config_dma_bus(pdata);
         xgbe_config_dma_cache(pdata);
         xgbe_config_osp_mode(pdata);
-       xgbe_config_pblx8(pdata);
-       xgbe_config_tx_pbl_val(pdata);
-       xgbe_config_rx_pbl_val(pdata);
+       xgbe_config_pbl_val(pdata);
         xgbe_config_rx_coalesce(pdata);
         xgbe_config_tx_coalesce(pdata);
         xgbe_config_rx_buffer_size(pdata);
@@ -3550,13 +3507,6 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
         /* For TX DMA Operating on Second Frame config */
         hw_if->config_osp_mode = xgbe_config_osp_mode;
  
-       /* For RX and TX PBL config */
-       hw_if->config_rx_pbl_val = xgbe_config_rx_pbl_val;
-       hw_if->get_rx_pbl_val = xgbe_get_rx_pbl_val;
-       hw_if->config_tx_pbl_val = xgbe_config_tx_pbl_val;
-       hw_if->get_tx_pbl_val = xgbe_get_tx_pbl_val;
-       hw_if->config_pblx8 = xgbe_config_pblx8;
-
         /* For MMC statistics support */
         hw_if->tx_mmc_int = xgbe_tx_mmc_int;
         hw_if->rx_mmc_int = xgbe_rx_mmc_int;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c

index a934bd5d05075308f6087c5a4aaa0e099251fda3..ecef3ee87b175e8e90b23672d8d8005ab048cad0 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -158,81 +158,106 @@ static int xgbe_one_poll(struct napi_struct *, int);
  static int xgbe_all_poll(struct napi_struct *, int);
  static void xgbe_stop(struct xgbe_prv_data *);
  
+static void *xgbe_alloc_node(size_t size, int node)
+{
+       void *mem;
+
+       mem = kzalloc_node(size, GFP_KERNEL, node);
+       if (!mem)
+               mem = kzalloc(size, GFP_KERNEL);
+
+       return mem;
+}
+
+static void xgbe_free_channels(struct xgbe_prv_data *pdata)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(pdata->channel); i++) {
+               if (!pdata->channel[i])
+                       continue;
+
+               kfree(pdata->channel[i]->rx_ring);
+               kfree(pdata->channel[i]->tx_ring);
+               kfree(pdata->channel[i]);
+
+               pdata->channel[i] = NULL;
+       }
+
+       pdata->channel_count = 0;
+}
+
  static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel_mem, *channel;
-       struct xgbe_ring *tx_ring, *rx_ring;
+       struct xgbe_channel *channel;
+       struct xgbe_ring *ring;
         unsigned int count, i;
-       int ret = -ENOMEM;
+       unsigned int cpu;
+       int node;
  
         count = max_t(unsigned int, pdata->tx_ring_count, pdata->rx_ring_count);
+       for (i = 0; i < count; i++) {
+               /* Attempt to use a CPU on the node the device is on */
+               cpu = cpumask_local_spread(i, dev_to_node(pdata->dev));
  
-       channel_mem = kcalloc(count, sizeof(struct xgbe_channel), GFP_KERNEL);
-       if (!channel_mem)
-               goto err_channel;
-
-       tx_ring = kcalloc(pdata->tx_ring_count, sizeof(struct xgbe_ring),
-                         GFP_KERNEL);
-       if (!tx_ring)
-               goto err_tx_ring;
+               /* Set the allocation node based on the returned CPU */
+               node = cpu_to_node(cpu);
  
-       rx_ring = kcalloc(pdata->rx_ring_count, sizeof(struct xgbe_ring),
-                         GFP_KERNEL);
-       if (!rx_ring)
-               goto err_rx_ring;
+               channel = xgbe_alloc_node(sizeof(*channel), node);
+               if (!channel)
+                       goto err_mem;
+               pdata->channel[i] = channel;
  
-       for (i = 0, channel = channel_mem; i < count; i++, channel++) {
                 snprintf(channel->name, sizeof(channel->name), "channel-%u", i);
                 channel->pdata = pdata;
                 channel->queue_index = i;
                 channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
                                     (DMA_CH_INC * i);
+               channel->node = node;
+               cpumask_set_cpu(cpu, &channel->affinity_mask);
  
                 if (pdata->per_channel_irq)
                         channel->dma_irq = pdata->channel_irq[i];
  
                 if (i < pdata->tx_ring_count) {
-                       spin_lock_init(&tx_ring->lock);
-                       channel->tx_ring = tx_ring++;
+                       ring = xgbe_alloc_node(sizeof(*ring), node);
+                       if (!ring)
+                               goto err_mem;
+
+                       spin_lock_init(&ring->lock);
+                       ring->node = node;
+
+                       channel->tx_ring = ring;
                 }
  
                 if (i < pdata->rx_ring_count) {
-                       spin_lock_init(&rx_ring->lock);
-                       channel->rx_ring = rx_ring++;
+                       ring = xgbe_alloc_node(sizeof(*ring), node);
+                       if (!ring)
+                               goto err_mem;
+
+                       spin_lock_init(&ring->lock);
+                       ring->node = node;
+
+                       channel->rx_ring = ring;
                 }
  
+               netif_dbg(pdata, drv, pdata->netdev,
+                         "%s: cpu=%u, node=%d\n", channel->name, cpu, node);
+
                 netif_dbg(pdata, drv, pdata->netdev,
                           "%s: dma_regs=%p, dma_irq=%d, tx=%p, rx=%p\n",
                           channel->name, channel->dma_regs, channel->dma_irq,
                           channel->tx_ring, channel->rx_ring);
         }
  
-       pdata->channel = channel_mem;
         pdata->channel_count = count;
  
         return 0;
  
-err_rx_ring:
-       kfree(tx_ring);
-
-err_tx_ring:
-       kfree(channel_mem);
-
-err_channel:
-       return ret;
-}
-
-static void xgbe_free_channels(struct xgbe_prv_data *pdata)
-{
-       if (!pdata->channel)
-               return;
-
-       kfree(pdata->channel->rx_ring);
-       kfree(pdata->channel->tx_ring);
-       kfree(pdata->channel);
+err_mem:
+       xgbe_free_channels(pdata);
  
-       pdata->channel = NULL;
-       pdata->channel_count = 0;
+       return -ENOMEM;
  }
  
  static inline unsigned int xgbe_tx_avail_desc(struct xgbe_ring *ring)
@@ -301,12 +326,10 @@ static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata,
  
  static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++)
-               xgbe_enable_rx_tx_int(pdata, channel);
+       for (i = 0; i < pdata->channel_count; i++)
+               xgbe_enable_rx_tx_int(pdata, pdata->channel[i]);
  }
  
  static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
@@ -329,12 +352,10 @@ static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
  
  static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata)
  {
-       struct xgbe_channel *channel;
         unsigned int i;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++)
-               xgbe_disable_rx_tx_int(pdata, channel);
+       for (i = 0; i < pdata->channel_count; i++)
+               xgbe_disable_rx_tx_int(pdata, pdata->channel[i]);
  }
  
  static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period,
@@ -382,9 +403,9 @@ static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
         return false;
  }
  
-static irqreturn_t xgbe_ecc_isr(int irq, void *data)
+static void xgbe_ecc_isr_task(unsigned long data)
  {
-       struct xgbe_prv_data *pdata = data;
+       struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
         unsigned int ecc_isr;
         bool stop = false;
  
@@ -435,12 +456,26 @@ out:
         /* Clear all ECC interrupts */
         XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
  
-       return IRQ_HANDLED;
+       /* Reissue interrupt if status is not clear */
+       if (pdata->vdata->irq_reissue_support)
+               XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 1);
  }
  
-static irqreturn_t xgbe_isr(int irq, void *data)
+static irqreturn_t xgbe_ecc_isr(int irq, void *data)
  {
         struct xgbe_prv_data *pdata = data;
+
+       if (pdata->isr_as_tasklet)
+               tasklet_schedule(&pdata->tasklet_ecc);
+       else
+               xgbe_ecc_isr_task((unsigned long)pdata);
+
+       return IRQ_HANDLED;
+}
+
+static void xgbe_isr_task(unsigned long data)
+{
+       struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
         struct xgbe_hw_if *hw_if = &pdata->hw_if;
         struct xgbe_channel *channel;
         unsigned int dma_isr, dma_ch_isr;
@@ -461,7 +496,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
                 if (!(dma_isr & (1 << i)))
                         continue;
  
-               channel = pdata->channel + i;
+               channel = pdata->channel[i];
  
                 dma_ch_isr = XGMAC_DMA_IOREAD(channel, DMA_CH_SR);
                 netif_dbg(pdata, intr, pdata->netdev, "DMA_CH%u_ISR=%#010x\n",
@@ -543,15 +578,36 @@ static irqreturn_t xgbe_isr(int irq, void *data)
  isr_done:
         /* If there is not a separate AN irq, handle it here */
         if (pdata->dev_irq == pdata->an_irq)
-               pdata->phy_if.an_isr(irq, pdata);
+               pdata->phy_if.an_isr(pdata);
  
         /* If there is not a separate ECC irq, handle it here */
         if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
-               xgbe_ecc_isr(irq, pdata);
+               xgbe_ecc_isr_task((unsigned long)pdata);
  
         /* If there is not a separate I2C irq, handle it here */
         if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
-               pdata->i2c_if.i2c_isr(irq, pdata);
+               pdata->i2c_if.i2c_isr(pdata);
+
+       /* Reissue interrupt if status is not clear */
+       if (pdata->vdata->irq_reissue_support) {
+               unsigned int reissue_mask;
+
+               reissue_mask = 1 << 0;
+               if (!pdata->per_channel_irq)
+                       reissue_mask |= 0xffff < 4;
+
+               XP_IOWRITE(pdata, XP_INT_REISSUE_EN, reissue_mask);
+       }
+}
+
+static irqreturn_t xgbe_isr(int irq, void *data)
+{
+       struct xgbe_prv_data *pdata = data;
+
+       if (pdata->isr_as_tasklet)
+               tasklet_schedule(&pdata->tasklet_dev);
+       else
+               xgbe_isr_task((unsigned long)pdata);
  
         return IRQ_HANDLED;
  }
@@ -640,8 +696,8 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata)
         setup_timer(&pdata->service_timer, xgbe_service_timer,
                     (unsigned long)pdata);
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 if (!channel->tx_ring)
                         break;
  
@@ -662,8 +718,8 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata)
  
         del_timer_sync(&pdata->service_timer);
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 if (!channel->tx_ring)
                         break;
  
@@ -781,8 +837,8 @@ static void xgbe_napi_enable(struct xgbe_prv_data *pdata, unsigned int add)
         unsigned int i;
  
         if (pdata->per_channel_irq) {
-               channel = pdata->channel;
-               for (i = 0; i < pdata->channel_count; i++, channel++) {
+               for (i = 0; i < pdata->channel_count; i++) {
+                       channel = pdata->channel[i];
                         if (add)
                                 netif_napi_add(pdata->netdev, &channel->napi,
                                                xgbe_one_poll, NAPI_POLL_WEIGHT);
@@ -804,8 +860,8 @@ static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del)
         unsigned int i;
  
         if (pdata->per_channel_irq) {
-               channel = pdata->channel;
-               for (i = 0; i < pdata->channel_count; i++, channel++) {
+               for (i = 0; i < pdata->channel_count; i++) {
+                       channel = pdata->channel[i];
                         napi_disable(&channel->napi);
  
                         if (del)
@@ -826,6 +882,10 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
         unsigned int i;
         int ret;
  
+       tasklet_init(&pdata->tasklet_dev, xgbe_isr_task, (unsigned long)pdata);
+       tasklet_init(&pdata->tasklet_ecc, xgbe_ecc_isr_task,
+                    (unsigned long)pdata);
+
         ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0,
                                netdev->name, pdata);
         if (ret) {
@@ -847,8 +907,8 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
         if (!pdata->per_channel_irq)
                 return 0;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 snprintf(channel->dma_irq_name,
                          sizeof(channel->dma_irq_name) - 1,
                          "%s-TxRx-%u", netdev_name(netdev),
@@ -862,14 +922,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
                                      channel->dma_irq);
                         goto err_dma_irq;
                 }
+
+               irq_set_affinity_hint(channel->dma_irq,
+                                     &channel->affinity_mask);
         }
  
         return 0;
  
  err_dma_irq:
         /* Using an unsigned int, 'i' will go to UINT_MAX and exit */
-       for (i--, channel--; i < pdata->channel_count; i--, channel--)
+       for (i--; i < pdata->channel_count; i--) {
+               channel = pdata->channel[i];
+
+               irq_set_affinity_hint(channel->dma_irq, NULL);
                 devm_free_irq(pdata->dev, channel->dma_irq, channel);
+       }
  
         if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
                 devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
@@ -893,9 +960,12 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
         if (!pdata->per_channel_irq)
                 return;
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++)
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
+
+               irq_set_affinity_hint(channel->dma_irq, NULL);
                 devm_free_irq(pdata->dev, channel->dma_irq, channel);
+       }
  }
  
  void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata)
@@ -930,16 +1000,14 @@ void xgbe_init_rx_coalesce(struct xgbe_prv_data *pdata)
  static void xgbe_free_tx_data(struct xgbe_prv_data *pdata)
  {
         struct xgbe_desc_if *desc_if = &pdata->desc_if;
-       struct xgbe_channel *channel;
         struct xgbe_ring *ring;
         struct xgbe_ring_data *rdata;
         unsigned int i, j;
  
         DBGPR("-->xgbe_free_tx_data\n");
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               ring = channel->tx_ring;
+       for (i = 0; i < pdata->channel_count; i++) {
+               ring = pdata->channel[i]->tx_ring;
                 if (!ring)
                         break;
  
@@ -955,16 +1023,14 @@ static void xgbe_free_tx_data(struct xgbe_prv_data *pdata)
  static void xgbe_free_rx_data(struct xgbe_prv_data *pdata)
  {
         struct xgbe_desc_if *desc_if = &pdata->desc_if;
-       struct xgbe_channel *channel;
         struct xgbe_ring *ring;
         struct xgbe_ring_data *rdata;
         unsigned int i, j;
  
         DBGPR("-->xgbe_free_rx_data\n");
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               ring = channel->rx_ring;
+       for (i = 0; i < pdata->channel_count; i++) {
+               ring = pdata->channel[i]->rx_ring;
                 if (!ring)
                         break;
  
@@ -1140,8 +1206,8 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
  
         hw_if->exit(pdata);
  
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
                 if (!channel->tx_ring)
                         continue;
  
@@ -1212,6 +1278,10 @@ static void xgbe_tx_tstamp(struct work_struct *work)
         u64 nsec;
         unsigned long flags;
  
+       spin_lock_irqsave(&pdata->tstamp_lock, flags);
+       if (!pdata->tx_tstamp_skb)
+               goto unlock;
+
         if (pdata->tx_tstamp) {
                 nsec = timecounter_cyc2time(&pdata->tstamp_tc,
                                             pdata->tx_tstamp);
@@ -1223,8 +1293,9 @@ static void xgbe_tx_tstamp(struct work_struct *work)
  
         dev_kfree_skb_any(pdata->tx_tstamp_skb);
  
-       spin_lock_irqsave(&pdata->tstamp_lock, flags);
         pdata->tx_tstamp_skb = NULL;
+
+unlock:
         spin_unlock_irqrestore(&pdata->tstamp_lock, flags);
  }
  
@@ -1623,7 +1694,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev)
  
         DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len);
  
-       channel = pdata->channel + skb->queue_mapping;
+       channel = pdata->channel[skb->queue_mapping];
         txq = netdev_get_tx_queue(netdev, channel->queue_index);
         ring = channel->tx_ring;
         packet = &ring->packet_data;
@@ -1833,9 +1904,10 @@ static void xgbe_poll_controller(struct net_device *netdev)
         DBGPR("-->xgbe_poll_controller\n");
  
         if (pdata->per_channel_irq) {
-               channel = pdata->channel;
-               for (i = 0; i < pdata->channel_count; i++, channel++)
+               for (i = 0; i < pdata->channel_count; i++) {
+                       channel = pdata->channel[i];
                         xgbe_dma_isr(channel->dma_irq, channel);
+               }
         } else {
                 disable_irq(pdata->dev_irq);
                 xgbe_isr(pdata->dev_irq, pdata);
@@ -2328,8 +2400,9 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
         do {
                 last_processed = processed;
  
-               channel = pdata->channel;
-               for (i = 0; i < pdata->channel_count; i++, channel++) {
+               for (i = 0; i < pdata->channel_count; i++) {
+                       channel = pdata->channel[i];
+
                         /* Cleanup Tx ring first */
                         xgbe_tx_poll(channel);
  
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c

index 920566a3a5996c3032e325e42ccd4736844a11de..67a2e52ad25ddb9c54fac01f489094152f646eba 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -247,7 +247,7 @@ static int xgbe_set_pauseparam(struct net_device *netdev,
  
         if (pause->autoneg && (pdata->phy.autoneg != AUTONEG_ENABLE)) {
                 netdev_err(netdev,
-                          "autoneg disabled, pause autoneg not avialable\n");
+                          "autoneg disabled, pause autoneg not available\n");
                 return -EINVAL;
         }
  
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c

index 417bdb5982a93ca252962f7fcd6141a2f53fca37..4d9062d35930f74bfd7864ff989d0dcf4c054325 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -274,13 +274,16 @@ static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
                 XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
  }
  
-static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+static void xgbe_i2c_isr_task(unsigned long data)
  {
         struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
         struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
         unsigned int isr;
  
         isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT);
+       if (!isr)
+               goto reissue_check;
+
         netif_dbg(pdata, intr, pdata->netdev,
                   "I2C interrupt received: status=%#010x\n", isr);
  
@@ -308,6 +311,21 @@ out:
         if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET))
                 complete(&pdata->i2c_complete);
  
+reissue_check:
+       /* Reissue interrupt if status is not clear */
+       if (pdata->vdata->irq_reissue_support)
+               XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 2);
+}
+
+static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+{
+       struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+       if (pdata->isr_as_tasklet)
+               tasklet_schedule(&pdata->tasklet_i2c);
+       else
+               xgbe_i2c_isr_task((unsigned long)pdata);
+
         return IRQ_HANDLED;
  }
  
@@ -349,12 +367,11 @@ static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
         XI2C_IOWRITE(pdata, IC_TAR, addr);
  }
  
-static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata)
+static irqreturn_t xgbe_i2c_combined_isr(struct xgbe_prv_data *pdata)
  {
-       if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT))
-               return IRQ_HANDLED;
+       xgbe_i2c_isr_task((unsigned long)pdata);
  
-       return xgbe_i2c_isr(irq, pdata);
+       return IRQ_HANDLED;
  }
  
  static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op)
@@ -445,6 +462,9 @@ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
  
         /* If we have a separate I2C irq, enable it */
         if (pdata->dev_irq != pdata->i2c_irq) {
+               tasklet_init(&pdata->tasklet_i2c, xgbe_i2c_isr_task,
+                            (unsigned long)pdata);
+
                 ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
                                        xgbe_i2c_isr, 0, pdata->i2c_name,
                                        pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c

index 17ac8f9a51a0291a983ba5abbcd0131cd9b93527..500147d9e3c8c1791dba586c71e87661b4d3af09 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -140,14 +140,16 @@ static void xgbe_default_config(struct xgbe_prv_data *pdata)
  {
         DBGPR("-->xgbe_default_config\n");
  
-       pdata->pblx8 = DMA_PBL_X8_ENABLE;
+       pdata->blen = DMA_SBMR_BLEN_64;
+       pdata->pbl = DMA_PBL_128;
+       pdata->aal = 1;
+       pdata->rd_osr_limit = 8;
+       pdata->wr_osr_limit = 8;
         pdata->tx_sf_mode = MTL_TSF_ENABLE;
         pdata->tx_threshold = MTL_TX_THRESHOLD_64;
-       pdata->tx_pbl = DMA_PBL_16;
         pdata->tx_osp_mode = DMA_OSP_ENABLE;
         pdata->rx_sf_mode = MTL_RSF_DISABLE;
         pdata->rx_threshold = MTL_RX_THRESHOLD_64;
-       pdata->rx_pbl = DMA_PBL_16;
         pdata->pause_autoneg = 1;
         pdata->tx_pause = 1;
         pdata->rx_pause = 1;
@@ -277,7 +279,11 @@ int xgbe_config_netdev(struct xgbe_prv_data *pdata)
         pdata->desc_ded_period = jiffies;
  
         /* Issue software reset to device */
-       pdata->hw_if.exit(pdata);
+       ret = pdata->hw_if.exit(pdata);
+       if (ret) {
+               dev_err(dev, "software reset failed\n");
+               return ret;
+       }
  
         /* Set default configuration data */
         xgbe_default_config(pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c

index b672d92495397bb3132c90e25db17872c4589fa6..80684914dd8a7516e57743c30033c5e357395be8 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -665,6 +665,10 @@ static void xgbe_an37_isr(struct xgbe_prv_data *pdata)
         } else {
                 /* Enable AN interrupts */
                 xgbe_an37_enable_interrupts(pdata);
+
+               /* Reissue interrupt if status is not clear */
+               if (pdata->vdata->irq_reissue_support)
+                       XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
         }
  }
  
@@ -684,10 +688,14 @@ static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
         } else {
                 /* Enable AN interrupts */
                 xgbe_an73_enable_interrupts(pdata);
+
+               /* Reissue interrupt if status is not clear */
+               if (pdata->vdata->irq_reissue_support)
+                       XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
         }
  }
  
-static irqreturn_t xgbe_an_isr(int irq, void *data)
+static void xgbe_an_isr_task(unsigned long data)
  {
         struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
  
@@ -705,13 +713,25 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
         default:
                 break;
         }
+}
+
+static irqreturn_t xgbe_an_isr(int irq, void *data)
+{
+       struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+       if (pdata->isr_as_tasklet)
+               tasklet_schedule(&pdata->tasklet_an);
+       else
+               xgbe_an_isr_task((unsigned long)pdata);
  
         return IRQ_HANDLED;
  }
  
-static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata)
+static irqreturn_t xgbe_an_combined_isr(struct xgbe_prv_data *pdata)
  {
-       return xgbe_an_isr(irq, pdata);
+       xgbe_an_isr_task((unsigned long)pdata);
+
+       return IRQ_HANDLED;
  }
  
  static void xgbe_an_irq_work(struct work_struct *work)
@@ -915,6 +935,10 @@ static void xgbe_an_state_machine(struct work_struct *work)
                 break;
         }
  
+       /* Reissue interrupt if status is not clear */
+       if (pdata->vdata->irq_reissue_support)
+               XP_IOWRITE(pdata, XP_INT_REISSUE_EN, 1 << 3);
+
         mutex_unlock(&pdata->an_mutex);
  }
  
@@ -1379,6 +1403,9 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
  
         /* If we have a separate AN irq, enable it */
         if (pdata->dev_irq != pdata->an_irq) {
+               tasklet_init(&pdata->tasklet_an, xgbe_an_isr_task,
+                            (unsigned long)pdata);
+
                 ret = devm_request_irq(pdata->dev, pdata->an_irq,
                                        xgbe_an_isr, 0, pdata->an_name,
                                        pdata);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c

index 38392a5207258e707c7aac9f638fb592ad2bd890..1e56ad7bd9a5fcac9f677e70b9d6e4ef77d0ea2c 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -139,6 +139,7 @@ static int xgbe_config_multi_msi(struct xgbe_prv_data *pdata)
                 return ret;
         }
  
+       pdata->isr_as_tasklet = 1;
         pdata->irq_count = ret;
  
         pdata->dev_irq = pci_irq_vector(pdata->pcidev, 0);
@@ -175,6 +176,7 @@ static int xgbe_config_irqs(struct xgbe_prv_data *pdata)
                 return ret;
         }
  
+       pdata->isr_as_tasklet = pdata->pcidev->msi_enabled ? 1 : 0;
         pdata->irq_count = 1;
         pdata->channel_irq_count = 1;
  
@@ -325,9 +327,9 @@ static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  
         /* Set the DMA coherency values */
         pdata->coherent = 1;
-       pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
-       pdata->arcache = XGBE_DMA_OS_ARCACHE;
-       pdata->awcache = XGBE_DMA_OS_AWCACHE;
+       pdata->arcr = XGBE_DMA_PCI_ARCR;
+       pdata->awcr = XGBE_DMA_PCI_AWCR;
+       pdata->awarcr = XGBE_DMA_PCI_AWARCR;
  
         /* Set the maximum channels and queues */
         reg = XP_IOREAD(pdata, XP_PROP_1);
@@ -445,6 +447,9 @@ static const struct xgbe_version_data xgbe_v2a = {
         .tx_tstamp_workaround           = 1,
         .ecc_support                    = 1,
         .i2c_support                    = 1,
+       .irq_reissue_support            = 1,
+       .tx_desc_prefetch               = 5,
+       .rx_desc_prefetch               = 5,
  };
  
  static const struct xgbe_version_data xgbe_v2b = {
@@ -456,6 +461,9 @@ static const struct xgbe_version_data xgbe_v2b = {
         .tx_tstamp_workaround           = 1,
         .ecc_support                    = 1,
         .i2c_support                    = 1,
+       .irq_reissue_support            = 1,
+       .tx_desc_prefetch               = 5,
+       .rx_desc_prefetch               = 5,
  };
  
  static const struct pci_device_id xgbe_pci_table[] = {
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c

index e707c49cc55a785f9950685f73c03932a1117541..04b5c149cacac286ad5635cb3f32ee0d7d3f6645 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -711,23 +711,39 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
  
+       if (!phy_data->sfp_mod_absent && !phy_data->sfp_changed)
+               return;
+
+       pdata->phy.supported &= ~SUPPORTED_Autoneg;
+       pdata->phy.supported &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+       pdata->phy.supported &= ~SUPPORTED_TP;
+       pdata->phy.supported &= ~SUPPORTED_FIBRE;
+       pdata->phy.supported &= ~SUPPORTED_100baseT_Full;
+       pdata->phy.supported &= ~SUPPORTED_1000baseT_Full;
+       pdata->phy.supported &= ~SUPPORTED_10000baseT_Full;
+
         if (phy_data->sfp_mod_absent) {
                 pdata->phy.speed = SPEED_UNKNOWN;
                 pdata->phy.duplex = DUPLEX_UNKNOWN;
                 pdata->phy.autoneg = AUTONEG_ENABLE;
+               pdata->phy.pause_autoneg = AUTONEG_ENABLE;
+
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_TP;
+               pdata->phy.supported |= SUPPORTED_FIBRE;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+                       pdata->phy.supported |= SUPPORTED_100baseT_Full;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+                       pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+
                 pdata->phy.advertising = pdata->phy.supported;
  
                 return;
         }
  
-       pdata->phy.advertising &= ~ADVERTISED_Autoneg;
-       pdata->phy.advertising &= ~ADVERTISED_TP;
-       pdata->phy.advertising &= ~ADVERTISED_FIBRE;
-       pdata->phy.advertising &= ~ADVERTISED_100baseT_Full;
-       pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full;
-       pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full;
-       pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC;
-
         switch (phy_data->sfp_base) {
         case XGBE_SFP_BASE_1000_T:
         case XGBE_SFP_BASE_1000_SX:
@@ -736,17 +752,25 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
                 pdata->phy.speed = SPEED_UNKNOWN;
                 pdata->phy.duplex = DUPLEX_UNKNOWN;
                 pdata->phy.autoneg = AUTONEG_ENABLE;
-               pdata->phy.advertising |= ADVERTISED_Autoneg;
+               pdata->phy.pause_autoneg = AUTONEG_ENABLE;
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
                 break;
         case XGBE_SFP_BASE_10000_SR:
         case XGBE_SFP_BASE_10000_LR:
         case XGBE_SFP_BASE_10000_LRM:
         case XGBE_SFP_BASE_10000_ER:
         case XGBE_SFP_BASE_10000_CR:
-       default:
                 pdata->phy.speed = SPEED_10000;
                 pdata->phy.duplex = DUPLEX_FULL;
                 pdata->phy.autoneg = AUTONEG_DISABLE;
+               pdata->phy.pause_autoneg = AUTONEG_DISABLE;
+               break;
+       default:
+               pdata->phy.speed = SPEED_UNKNOWN;
+               pdata->phy.duplex = DUPLEX_UNKNOWN;
+               pdata->phy.autoneg = AUTONEG_DISABLE;
+               pdata->phy.pause_autoneg = AUTONEG_DISABLE;
                 break;
         }
  
@@ -754,36 +778,38 @@ static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
         case XGBE_SFP_BASE_1000_T:
         case XGBE_SFP_BASE_1000_CX:
         case XGBE_SFP_BASE_10000_CR:
-               pdata->phy.advertising |= ADVERTISED_TP;
+               pdata->phy.supported |= SUPPORTED_TP;
                 break;
         default:
-               pdata->phy.advertising |= ADVERTISED_FIBRE;
+               pdata->phy.supported |= SUPPORTED_FIBRE;
         }
  
         switch (phy_data->sfp_speed) {
         case XGBE_SFP_SPEED_100_1000:
                 if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
-                       pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+                       pdata->phy.supported |= SUPPORTED_100baseT_Full;
                 if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-                       pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
                 break;
         case XGBE_SFP_SPEED_1000:
                 if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-                       pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
                 break;
         case XGBE_SFP_SPEED_10000:
                 if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
-                       pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+                       pdata->phy.supported |= SUPPORTED_10000baseT_Full;
                 break;
         default:
                 /* Choose the fastest supported speed */
                 if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
-                       pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+                       pdata->phy.supported |= SUPPORTED_10000baseT_Full;
                 else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
-                       pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
                 else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
-                       pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+                       pdata->phy.supported |= SUPPORTED_100baseT_Full;
         }
+
+       pdata->phy.advertising = pdata->phy.supported;
  }
  
  static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom,
@@ -1095,7 +1121,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
  
         ret = xgbe_phy_sfp_get_mux(pdata);
         if (ret) {
-               netdev_err(pdata->netdev, "I2C error setting SFP MUX\n");
+               dev_err_once(pdata->dev, "%s: I2C error setting SFP MUX\n",
+                            netdev_name(pdata->netdev));
                 return ret;
         }
  
@@ -1105,7 +1132,8 @@ static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
                                 &eeprom_addr, sizeof(eeprom_addr),
                                 &sfp_eeprom, sizeof(sfp_eeprom));
         if (ret) {
-               netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n");
+               dev_err_once(pdata->dev, "%s: I2C error reading SFP EEPROM\n",
+                            netdev_name(pdata->netdev));
                 goto put;
         }
  
@@ -1164,7 +1192,8 @@ static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
                                 &gpio_reg, sizeof(gpio_reg),
                                 gpio_ports, sizeof(gpio_ports));
         if (ret) {
-               netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n");
+               dev_err_once(pdata->dev, "%s: I2C error reading SFP GPIOs\n",
+                            netdev_name(pdata->netdev));
                 return;
         }
  
@@ -1694,19 +1723,25 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
         xgbe_phy_put_comm_ownership(pdata);
  }
  
-static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata)
+static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
+                                       unsigned int cmd, unsigned int sub_cmd)
  {
-       if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
-               return;
+       unsigned int s0 = 0;
+       unsigned int wait;
  
         /* Log if a previous command did not complete */
-       netif_dbg(pdata, link, pdata->netdev,
-                 "firmware mailbox not ready for command\n");
-}
+       if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+               netif_dbg(pdata, link, pdata->netdev,
+                         "firmware mailbox not ready for command\n");
  
-static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
-{
-       unsigned int wait;
+       /* Construct the command */
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, sub_cmd);
+
+       /* Issue the command */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
  
         /* Wait for command to complete */
         wait = XGBE_RATECHANGE_COUNT;
@@ -1723,21 +1758,8 @@ static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
  
  static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
  {
-       unsigned int s0;
-
-       xgbe_phy_start_ratechange(pdata);
-
         /* Receiver Reset Cycle */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5);
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       xgbe_phy_perform_ratechange(pdata, 5, 0);
  
         netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n");
  }
@@ -1746,14 +1768,8 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
  
-       xgbe_phy_start_ratechange(pdata);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       /* Power off */
+       xgbe_phy_perform_ratechange(pdata, 0, 0);
  
         phy_data->cur_mode = XGBE_MODE_UNKNOWN;
  
@@ -1763,33 +1779,21 @@ static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
  static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int s0;
  
         xgbe_phy_set_redrv_mode(pdata);
  
-       xgbe_phy_start_ratechange(pdata);
-
         /* 10G/SFI */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3);
         if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) {
-               XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+               xgbe_phy_perform_ratechange(pdata, 3, 0);
         } else {
                 if (phy_data->sfp_cable_len <= 1)
-                       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
+                       xgbe_phy_perform_ratechange(pdata, 3, 1);
                 else if (phy_data->sfp_cable_len <= 3)
-                       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
+                       xgbe_phy_perform_ratechange(pdata, 3, 2);
                 else
-                       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+                       xgbe_phy_perform_ratechange(pdata, 3, 3);
         }
  
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
-
         phy_data->cur_mode = XGBE_MODE_SFI;
  
         netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n");
@@ -1798,23 +1802,11 @@ static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
  static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int s0;
  
         xgbe_phy_set_redrv_mode(pdata);
  
-       xgbe_phy_start_ratechange(pdata);
-
         /* 1G/X */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       xgbe_phy_perform_ratechange(pdata, 1, 3);
  
         phy_data->cur_mode = XGBE_MODE_X;
  
@@ -1824,23 +1816,11 @@ static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
  static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int s0;
  
         xgbe_phy_set_redrv_mode(pdata);
  
-       xgbe_phy_start_ratechange(pdata);
-
         /* 1G/SGMII */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       xgbe_phy_perform_ratechange(pdata, 1, 2);
  
         phy_data->cur_mode = XGBE_MODE_SGMII_1000;
  
@@ -1850,23 +1830,11 @@ static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
  static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int s0;
  
         xgbe_phy_set_redrv_mode(pdata);
  
-       xgbe_phy_start_ratechange(pdata);
-
-       /* 1G/SGMII */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       /* 100M/SGMII */
+       xgbe_phy_perform_ratechange(pdata, 1, 1);
  
         phy_data->cur_mode = XGBE_MODE_SGMII_100;
  
@@ -1876,23 +1844,11 @@ static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
  static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int s0;
  
         xgbe_phy_set_redrv_mode(pdata);
  
-       xgbe_phy_start_ratechange(pdata);
-
         /* 10G/KR */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4);
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       xgbe_phy_perform_ratechange(pdata, 4, 0);
  
         phy_data->cur_mode = XGBE_MODE_KR;
  
@@ -1902,23 +1858,11 @@ static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
  static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int s0;
  
         xgbe_phy_set_redrv_mode(pdata);
  
-       xgbe_phy_start_ratechange(pdata);
-
         /* 2.5G/KX */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2);
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       xgbe_phy_perform_ratechange(pdata, 2, 0);
  
         phy_data->cur_mode = XGBE_MODE_KX_2500;
  
@@ -1928,23 +1872,11 @@ static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
  static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata)
  {
         struct xgbe_phy_data *phy_data = pdata->phy_data;
-       unsigned int s0;
  
         xgbe_phy_set_redrv_mode(pdata);
  
-       xgbe_phy_start_ratechange(pdata);
-
         /* 1G/KX */
-       s0 = 0;
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
-       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
-
-       /* Call FW to make the change */
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
-       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
-       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
-
-       xgbe_phy_complete_ratechange(pdata);
+       xgbe_phy_perform_ratechange(pdata, 1, 3);
  
         phy_data->cur_mode = XGBE_MODE_KX_1000;
  
@@ -2037,6 +1969,8 @@ static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data,
                 return XGBE_MODE_SGMII_100;
         case SPEED_1000:
                 return XGBE_MODE_SGMII_1000;
+       case SPEED_2500:
+               return XGBE_MODE_KX_2500;
         case SPEED_10000:
                 return XGBE_MODE_KR;
         default:
@@ -2180,6 +2114,9 @@ static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata,
         case XGBE_MODE_SGMII_1000:
                 return xgbe_phy_check_mode(pdata, mode,
                                            ADVERTISED_1000baseT_Full);
+       case XGBE_MODE_KX_2500:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_2500baseX_Full);
         case XGBE_MODE_KR:
                 return xgbe_phy_check_mode(pdata, mode,
                                            ADVERTISED_10000baseT_Full);
@@ -2210,6 +2147,8 @@ static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
                 return xgbe_phy_check_mode(pdata, mode,
                                            ADVERTISED_1000baseT_Full);
         case XGBE_MODE_SFI:
+               if (phy_data->sfp_mod_absent)
+                       return true;
                 return xgbe_phy_check_mode(pdata, mode,
                                            ADVERTISED_10000baseT_Full);
         default:
@@ -2287,6 +2226,8 @@ static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data,
         case SPEED_100:
         case SPEED_1000:
                 return true;
+       case SPEED_2500:
+               return (phy_data->port_mode == XGBE_PORT_MODE_NBASE_T);
         case SPEED_10000:
                 return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T);
         default:
@@ -3013,9 +2954,6 @@ static int xgbe_phy_init(struct xgbe_prv_data *pdata)
                 if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
                         pdata->phy.supported |= SUPPORTED_10000baseT_Full;
                         phy_data->start_mode = XGBE_MODE_SFI;
-                       if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
-                               pdata->phy.supported |=
-                                       SUPPORTED_10000baseR_FEC;
                 }
  
                 phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c

index 84d4c51cab8c352bcff37647b46649a131fd38ca..d0f3dfb88202966d315d4ec5bf20b7f8fcf02abe 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -448,13 +448,11 @@ static int xgbe_platform_probe(struct platform_device *pdev)
         }
         pdata->coherent = (attr == DEV_DMA_COHERENT);
         if (pdata->coherent) {
-               pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
-               pdata->arcache = XGBE_DMA_OS_ARCACHE;
-               pdata->awcache = XGBE_DMA_OS_AWCACHE;
+               pdata->arcr = XGBE_DMA_OS_ARCR;
+               pdata->awcr = XGBE_DMA_OS_AWCR;
         } else {
-               pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN;
-               pdata->arcache = XGBE_DMA_SYS_ARCACHE;
-               pdata->awcache = XGBE_DMA_SYS_AWCACHE;
+               pdata->arcr = XGBE_DMA_SYS_ARCR;
+               pdata->awcr = XGBE_DMA_SYS_AWCR;
         }
  
         /* Set the maximum fifo amounts */
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c

index a533a6cc2d530258c3875150565d35aa6fa55fc5..d06d260cf1e28ba32953c0ab9d92fa0ad819fd5a 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
@@ -267,7 +267,7 @@ void xgbe_ptp_register(struct xgbe_prv_data *pdata)
                          ktime_to_ns(ktime_get_real()));
  
         /* Disable all timestamping to start */
-       XGMAC_IOWRITE(pdata, MAC_TCR, 0);
+       XGMAC_IOWRITE(pdata, MAC_TSCR, 0);
         pdata->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
         pdata->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
  }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h

index f9a24639f5741bdc0d534cd7b155ad50acdb45c1..0938294f640a64127988310dae4f103051d74dfb 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -128,6 +128,7 @@
  #include <linux/net_tstamp.h>
  #include <net/dcbnl.h>
  #include <linux/completion.h>
+#include <linux/cpumask.h>
  
  #define XGBE_DRV_NAME          "amd-xgbe"
  #define XGBE_DRV_VERSION       "1.0.3"
@@ -163,14 +164,17 @@
  #define XGBE_DMA_STOP_TIMEOUT  1
  
  /* DMA cache settings - Outer sharable, write-back, write-allocate */
-#define XGBE_DMA_OS_AXDOMAIN   0x2
-#define XGBE_DMA_OS_ARCACHE    0xb
-#define XGBE_DMA_OS_AWCACHE    0xf
+#define XGBE_DMA_OS_ARCR       0x002b2b2b
+#define XGBE_DMA_OS_AWCR       0x2f2f2f2f
  
  /* DMA cache settings - System, no caches used */
-#define XGBE_DMA_SYS_AXDOMAIN  0x3
-#define XGBE_DMA_SYS_ARCACHE   0x0
-#define XGBE_DMA_SYS_AWCACHE   0x0
+#define XGBE_DMA_SYS_ARCR      0x00303030
+#define XGBE_DMA_SYS_AWCR      0x30303030
+
+/* DMA cache settings - PCI device */
+#define XGBE_DMA_PCI_ARCR      0x00000003
+#define XGBE_DMA_PCI_AWCR      0x13131313
+#define XGBE_DMA_PCI_AWARCR    0x00000313
  
  /* DMA channel interrupt modes */
  #define XGBE_IRQ_MODE_EDGE     0
@@ -412,6 +416,7 @@ struct xgbe_ring {
         /* Page allocation for RX buffers */
         struct xgbe_page_alloc rx_hdr_pa;
         struct xgbe_page_alloc rx_buf_pa;
+       int node;
  
         /* Ring index values
          *  cur   - Tx: index of descriptor to be used for current transfer
@@ -462,6 +467,9 @@ struct xgbe_channel {
  
         struct xgbe_ring *tx_ring;
         struct xgbe_ring *rx_ring;
+
+       int node;
+       cpumask_t affinity_mask;
  } ____cacheline_aligned;
  
  enum xgbe_state {
@@ -734,13 +742,6 @@ struct xgbe_hw_if {
         /* For TX DMA Operate on Second Frame config */
         int (*config_osp_mode)(struct xgbe_prv_data *);
  
-       /* For RX and TX PBL config */
-       int (*config_rx_pbl_val)(struct xgbe_prv_data *);
-       int (*get_rx_pbl_val)(struct xgbe_prv_data *);
-       int (*config_tx_pbl_val)(struct xgbe_prv_data *);
-       int (*get_tx_pbl_val)(struct xgbe_prv_data *);
-       int (*config_pblx8)(struct xgbe_prv_data *);
-
         /* For MMC statistics */
         void (*rx_mmc_int)(struct xgbe_prv_data *);
         void (*tx_mmc_int)(struct xgbe_prv_data *);
@@ -837,7 +838,7 @@ struct xgbe_phy_if {
         bool (*phy_valid_speed)(struct xgbe_prv_data *, int);
  
         /* For single interrupt support */
-       irqreturn_t (*an_isr)(int, struct xgbe_prv_data *);
+       irqreturn_t (*an_isr)(struct xgbe_prv_data *);
  
         /* PHY implementation specific services */
         struct xgbe_phy_impl_if phy_impl;
@@ -855,7 +856,7 @@ struct xgbe_i2c_if {
         int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *);
  
         /* For single interrupt support */
-       irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *);
+       irqreturn_t (*i2c_isr)(struct xgbe_prv_data *);
  };
  
  struct xgbe_desc_if {
@@ -924,6 +925,9 @@ struct xgbe_version_data {
         unsigned int tx_tstamp_workaround;
         unsigned int ecc_support;
         unsigned int i2c_support;
+       unsigned int irq_reissue_support;
+       unsigned int tx_desc_prefetch;
+       unsigned int rx_desc_prefetch;
  };
  
  struct xgbe_prv_data {
@@ -1001,9 +1005,9 @@ struct xgbe_prv_data {
  
         /* AXI DMA settings */
         unsigned int coherent;
-       unsigned int axdomain;
-       unsigned int arcache;
-       unsigned int awcache;
+       unsigned int arcr;
+       unsigned int awcr;
+       unsigned int awarcr;
  
         /* Service routine support */
         struct workqueue_struct *dev_workqueue;
@@ -1011,7 +1015,7 @@ struct xgbe_prv_data {
         struct timer_list service_timer;
  
         /* Rings for Tx/Rx on a DMA channel */
-       struct xgbe_channel *channel;
+       struct xgbe_channel *channel[XGBE_MAX_DMA_CHANNELS];
         unsigned int tx_max_channel_count;
         unsigned int rx_max_channel_count;
         unsigned int channel_count;
@@ -1026,19 +1030,21 @@ struct xgbe_prv_data {
         unsigned int rx_q_count;
  
         /* Tx/Rx common settings */
-       unsigned int pblx8;
+       unsigned int blen;
+       unsigned int pbl;
+       unsigned int aal;
+       unsigned int rd_osr_limit;
+       unsigned int wr_osr_limit;
  
         /* Tx settings */
         unsigned int tx_sf_mode;
         unsigned int tx_threshold;
-       unsigned int tx_pbl;
         unsigned int tx_osp_mode;
         unsigned int tx_max_fifo_size;
  
         /* Rx settings */
         unsigned int rx_sf_mode;
         unsigned int rx_threshold;
-       unsigned int rx_pbl;
         unsigned int rx_max_fifo_size;
  
         /* Tx coalescing settings */
@@ -1159,6 +1165,12 @@ struct xgbe_prv_data {
  
         unsigned int lpm_ctrl;          /* CTRL1 for resume */
  
+       unsigned int isr_as_tasklet;
+       struct tasklet_struct tasklet_dev;
+       struct tasklet_struct tasklet_ecc;
+       struct tasklet_struct tasklet_i2c;
+       struct tasklet_struct tasklet_an;
+
  #ifdef CONFIG_DEBUG_FS
         struct dentry *xgbe_debugfs;
  
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c

index 7e913d8331c3082d4720e8a4077a85abd4be17c7..8c9986f3fc0186701bd9ae81f27cbb1519e9f25f 100644 (file)
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2252,7 +2252,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
  
         if (atl1c_tx_map(adapter, skb, tpd, type) < 0) {
                 netif_info(adapter, tx_done, adapter->netdev,
-                          "tx-skb droppted due to dma error\n");
+                          "tx-skb dropped due to dma error\n");
                 /* roll back tpd/buffer */
                 atl1c_tx_rollback(adapter, tpd, type);
                 dev_kfree_skb_any(skb);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

index 14c236e5bdb1a456ece5ff135e3bbff8a86955da..c12b4d3e946e17d15705a3ce6b1dcd1d74924e62 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12729,7 +12729,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
         } else {
                 /* If no mc addresses are required, flush the configuration */
                 rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
-               if (rc)
+               if (rc < 0)
                         BNX2X_ERR("Failed to clear multicast configuration %d\n",
                                   rc);
         }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

index 11e8a866a31232e123567fc539b4a1ae45a7cfad..a19f68f5862d7e8632b770eb9faaae430ebeb150 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1311,10 +1311,11 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                 cp_cons = NEXT_CMP(cp_cons);
         }
  
-       if (unlikely(agg_bufs > MAX_SKB_FRAGS)) {
+       if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) {
                 bnxt_abort_tpa(bp, bnapi, cp_cons, agg_bufs);
-               netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
-                           agg_bufs, (int)MAX_SKB_FRAGS);
+               if (agg_bufs > MAX_SKB_FRAGS)
+                       netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n",
+                                   agg_bufs, (int)MAX_SKB_FRAGS);
                 return NULL;
         }
  
@@ -1573,6 +1574,45 @@ next_rx_no_prod:
         return rc;
  }
  
+/* In netpoll mode, if we are using a combined completion ring, we need to
+ * discard the rx packets and recycle the buffers.
+ */
+static int bnxt_force_rx_discard(struct bnxt *bp, struct bnxt_napi *bnapi,
+                                u32 *raw_cons, u8 *event)
+{
+       struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+       u32 tmp_raw_cons = *raw_cons;
+       struct rx_cmp_ext *rxcmp1;
+       struct rx_cmp *rxcmp;
+       u16 cp_cons;
+       u8 cmp_type;
+
+       cp_cons = RING_CMP(tmp_raw_cons);
+       rxcmp = (struct rx_cmp *)
+                       &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+       tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons);
+       cp_cons = RING_CMP(tmp_raw_cons);
+       rxcmp1 = (struct rx_cmp_ext *)
+                       &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+
+       if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons))
+               return -EBUSY;
+
+       cmp_type = RX_CMP_TYPE(rxcmp);
+       if (cmp_type == CMP_TYPE_RX_L2_CMP) {
+               rxcmp1->rx_cmp_cfa_code_errors_v2 |=
+                       cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
+       } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
+               struct rx_tpa_end_cmp_ext *tpa_end1;
+
+               tpa_end1 = (struct rx_tpa_end_cmp_ext *)rxcmp1;
+               tpa_end1->rx_tpa_end_cmp_errors_v2 |=
+                       cpu_to_le32(RX_TPA_END_CMP_ERRORS);
+       }
+       return bnxt_rx_pkt(bp, bnapi, raw_cons, event);
+}
+
  #define BNXT_GET_EVENT_PORT(data)      \
         ((data) &                       \
          ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -1755,7 +1795,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
                         if (unlikely(tx_pkts > bp->tx_wake_thresh))
                                 rx_pkts = budget;
                 } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) {
-                       rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+                       if (likely(budget))
+                               rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
+                       else
+                               rc = bnxt_force_rx_discard(bp, bnapi, &raw_cons,
+                                                          &event);
                         if (likely(rc >= 0))
                                 rx_pkts += rc;
                         else if (rc == -EBUSY)  /* partial completion */
@@ -6730,12 +6774,11 @@ static void bnxt_poll_controller(struct net_device *dev)
         struct bnxt *bp = netdev_priv(dev);
         int i;
  
-       for (i = 0; i < bp->cp_nr_rings; i++) {
-               struct bnxt_irq *irq = &bp->irq_tbl[i];
+       /* Only process tx rings/combined rings in netpoll mode. */
+       for (i = 0; i < bp->tx_nr_rings; i++) {
+               struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
  
-               disable_irq(irq->vector);
-               irq->handler(irq->vector, bp->bnapi[i]);
-               enable_irq(irq->vector);
+               napi_schedule(&txr->bnapi->napi);
         }
  }
  #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

index 5984423499e62b03c985e219460e4a4d418c7801..f872a7db2ca8b6ad6701158ec25a6b953936f731 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -374,12 +374,16 @@ struct rx_tpa_end_cmp_ext {
  
         __le32 rx_tpa_end_cmp_errors_v2;
         #define RX_TPA_END_CMP_V2                               (0x1 << 0)
-       #define RX_TPA_END_CMP_ERRORS                           (0x7fff << 1)
+       #define RX_TPA_END_CMP_ERRORS                           (0x3 << 1)
         #define RX_TPA_END_CMPL_ERRORS_SHIFT                     1
  
         u32 rx_tpa_end_cmp_start_opaque;
  };
  
+#define TPA_END_ERRORS(rx_tpa_end_ext)                                 \
+       ((rx_tpa_end_ext)->rx_tpa_end_cmp_errors_v2 &                   \
+        cpu_to_le32(RX_TPA_END_CMP_ERRORS))
+
  #define DB_IDX_MASK                                            0xffffff
  #define DB_IDX_VALID                                           (0x1 << 26)
  #define DB_IRQ_DIS                                             (0x1 << 27)
diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig

index 608bea171956873508256e024c11e0de2396c119..427d65a1a1261095a402b833660c83da0113e4c7 100644 (file)
--- a/drivers/net/ethernet/cadence/Kconfig
+++ b/drivers/net/ethernet/cadence/Kconfig
@@ -29,7 +29,15 @@ config MACB
           support for the MACB/GEM chip.
  
           To compile this driver as a module, choose M here: the module
-         will be called macb.
+         will be macb.
+
+config MACB_USE_HWSTAMP
+       bool "Use IEEE 1588 hwstamp"
+       depends on MACB
+       default y
+       imply PTP_1588_CLOCK
+       ---help---
+         Enable IEEE 1588 Precision Time Protocol (PTP) support for MACB.
  
  config MACB_PCI
         tristate "Cadence PCI MACB/GEM support"
diff --git a/drivers/net/ethernet/cadence/Makefile b/drivers/net/ethernet/cadence/Makefile

index 4ba75594d5c55cdec41bb1a805317b4af0fdf0a1..1d66ddb6896977511e58922b0b1adaaeefbdb01e 100644 (file)
--- a/drivers/net/ethernet/cadence/Makefile
+++ b/drivers/net/ethernet/cadence/Makefile
@@ -1,6 +1,11 @@
  #
  # Makefile for the Atmel network device drivers.
  #
+macb-y := macb_main.o
+
+ifeq ($(CONFIG_MACB_USE_HWSTAMP),y)
+macb-y += macb_ptp.o
+endif
  
  obj-$(CONFIG_MACB) += macb.o
  obj-$(CONFIG_MACB_PCI) += macb_pci.o
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h

index 2510661102bade67116d5e85bd551d10c64ad54b..c93f3a2dc6c1a3187040038bc0e859aa79d409bf 100644 (file)
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -11,6 +11,12 @@
  #define _MACB_H
  
  #include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/net_tstamp.h>
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) || defined(CONFIG_MACB_USE_HWSTAMP)
+#define MACB_EXT_DESC
+#endif
  
  #define MACB_GREGS_NBR 16
  #define MACB_GREGS_VERSION 2
@@ -86,6 +92,10 @@
  #define GEM_SA3T               0x009C /* Specific3 Top */
  #define GEM_SA4B               0x00A0 /* Specific4 Bottom */
  #define GEM_SA4T               0x00A4 /* Specific4 Top */
+#define GEM_EFTSH              0x00e8 /* PTP Event Frame Transmitted Seconds Register 47:32 */
+#define GEM_EFRSH              0x00ec /* PTP Event Frame Received Seconds Register 47:32 */
+#define GEM_PEFTSH             0x00f0 /* PTP Peer Event Frame Transmitted Seconds Register 47:32 */
+#define GEM_PEFRSH             0x00f4 /* PTP Peer Event Frame Received Seconds Register 47:32 */
  #define GEM_OTX                        0x0100 /* Octets transmitted */
  #define GEM_OCTTXL             0x0100 /* Octets transmitted [31:0] */
  #define GEM_OCTTXH             0x0104 /* Octets transmitted [47:32] */
@@ -155,6 +165,9 @@
  #define GEM_DCFG6              0x0294 /* Design Config 6 */
  #define GEM_DCFG7              0x0298 /* Design Config 7 */
  
+#define GEM_TXBDCTRL   0x04cc /* TX Buffer Descriptor control register */
+#define GEM_RXBDCTRL   0x04d0 /* RX Buffer Descriptor control register */
+
  #define GEM_ISR(hw_q)          (0x0400 + ((hw_q) << 2))
  #define GEM_TBQP(hw_q)         (0x0440 + ((hw_q) << 2))
  #define GEM_TBQPH(hw_q)                (0x04C8)
@@ -191,6 +204,8 @@
  #define MACB_TZQ_OFFSET                12 /* Transmit zero quantum pause frame */
  #define MACB_TZQ_SIZE          1
  #define MACB_SRTSM_OFFSET      15
+#define MACB_OSSMODE_OFFSET 24 /* Enable One Step Synchro Mode */
+#define MACB_OSSMODE_SIZE      1
  
  /* Bitfields in NCFGR */
  #define MACB_SPD_OFFSET                0 /* Speed */
@@ -269,6 +284,10 @@
  #define GEM_RXBS_SIZE          8
  #define GEM_DDRP_OFFSET                24 /* disc_when_no_ahb */
  #define GEM_DDRP_SIZE          1
+#define GEM_RXEXT_OFFSET       28 /* RX extended Buffer Descriptor mode */
+#define GEM_RXEXT_SIZE         1
+#define GEM_TXEXT_OFFSET       29 /* TX extended Buffer Descriptor mode */
+#define GEM_TXEXT_SIZE         1
  #define GEM_ADDR64_OFFSET      30 /* Address bus width - 64b or 32b */
  #define GEM_ADDR64_SIZE                1
  
@@ -425,6 +444,11 @@
  #define GEM_TX_PKT_BUFF_OFFSET                 21
  #define GEM_TX_PKT_BUFF_SIZE                   1
  
+
+/* Bitfields in DCFG5. */
+#define GEM_TSU_OFFSET                         8
+#define GEM_TSU_SIZE                           1
+
  /* Bitfields in DCFG6. */
  #define GEM_PBUF_LSO_OFFSET                    27
  #define GEM_PBUF_LSO_SIZE                      1
@@ -439,6 +463,52 @@
  #define GEM_NSINCR_OFFSET                      0
  #define GEM_NSINCR_SIZE                                8
  
+/* Bitfields in TSH */
+#define GEM_TSH_OFFSET                         0 /* TSU timer value (s). MSB [47:32] of seconds timer count */
+#define GEM_TSH_SIZE                           16
+
+/* Bitfields in TSL */
+#define GEM_TSL_OFFSET                         0 /* TSU timer value (s). LSB [31:0] of seconds timer count */
+#define GEM_TSL_SIZE                           32
+
+/* Bitfields in TN */
+#define GEM_TN_OFFSET                          0 /* TSU timer value (ns) */
+#define GEM_TN_SIZE                                    30
+
+/* Bitfields in TXBDCTRL */
+#define GEM_TXTSMODE_OFFSET                    4 /* TX Descriptor Timestamp Insertion mode */
+#define GEM_TXTSMODE_SIZE                      2
+
+/* Bitfields in RXBDCTRL */
+#define GEM_RXTSMODE_OFFSET                    4 /* RX Descriptor Timestamp Insertion mode */
+#define GEM_RXTSMODE_SIZE                      2
+
+/* Transmit DMA buffer descriptor Word 1 */
+#define GEM_DMA_TXVALID_OFFSET         23 /* timestamp has been captured in the Buffer Descriptor */
+#define GEM_DMA_TXVALID_SIZE           1
+
+/* Receive DMA buffer descriptor Word 0 */
+#define GEM_DMA_RXVALID_OFFSET         2 /* indicates a valid timestamp in the Buffer Descriptor */
+#define GEM_DMA_RXVALID_SIZE           1
+
+/* DMA buffer descriptor Word 2 (32 bit addressing) or Word 4 (64 bit addressing) */
+#define GEM_DMA_SECL_OFFSET                    30 /* Timestamp seconds[1:0]  */
+#define GEM_DMA_SECL_SIZE                      2
+#define GEM_DMA_NSEC_OFFSET                    0 /* Timestamp nanosecs [29:0] */
+#define GEM_DMA_NSEC_SIZE                      30
+
+/* DMA buffer descriptor Word 3 (32 bit addressing) or Word 5 (64 bit addressing) */
+
+/* New hardware supports 12 bit precision of timestamp in DMA buffer descriptor.
+ * Old hardware supports only 6 bit precision but it is enough for PTP.
+ * Less accuracy is used always instead of checking hardware version.
+ */
+#define GEM_DMA_SECH_OFFSET                    0 /* Timestamp seconds[5:2] */
+#define GEM_DMA_SECH_SIZE                      4
+#define GEM_DMA_SEC_WIDTH                      (GEM_DMA_SECH_SIZE + GEM_DMA_SECL_SIZE)
+#define GEM_DMA_SEC_TOP                                (1 << GEM_DMA_SEC_WIDTH)
+#define GEM_DMA_SEC_MASK                       (GEM_DMA_SEC_TOP - 1)
+
  /* Bitfields in ADJ */
  #define GEM_ADDSUB_OFFSET                      31
  #define GEM_ADDSUB_SIZE                                1
@@ -514,6 +584,8 @@
  #define queue_readl(queue, reg)                (queue)->bp->macb_reg_readl((queue)->bp, (queue)->reg)
  #define queue_writel(queue, reg, value)        (queue)->bp->macb_reg_writel((queue)->bp, (queue)->reg, (value))
  
+#define PTP_TS_BUFFER_SIZE             128 /* must be power of 2 */
+
  /* Conditional GEM/MACB macros.  These perform the operation to the correct
   * register dependent on whether the device is a GEM or a MACB.  For registers
   * and bitfields that are common across both devices, use macb_{read,write}l
@@ -546,16 +618,26 @@ struct macb_dma_desc {
         u32     ctrl;
  };
  
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-enum macb_hw_dma_cap {
-       HW_DMA_CAP_32B,
-       HW_DMA_CAP_64B,
-};
+#ifdef MACB_EXT_DESC
+#define HW_DMA_CAP_32B         0
+#define HW_DMA_CAP_64B         (1 << 0)
+#define HW_DMA_CAP_PTP         (1 << 1)
+#define HW_DMA_CAP_64B_PTP     (HW_DMA_CAP_64B | HW_DMA_CAP_PTP)
  
  struct macb_dma_desc_64 {
         u32 addrh;
         u32 resvd;
  };
+
+struct macb_dma_desc_ptp {
+       u32     ts_1;
+       u32     ts_2;
+};
+
+struct gem_tx_ts {
+       struct sk_buff *skb;
+       struct macb_dma_desc_ptp desc_ptp;
+};
  #endif
  
  /* DMA descriptor bitfields */
@@ -871,6 +953,11 @@ struct macb_config {
         int     jumbo_max_len;
  };
  
+struct tsu_incr {
+       u32 sub_ns;
+       u32 ns;
+};
+
  struct macb_queue {
         struct macb             *bp;
         int                     irq;
@@ -887,6 +974,12 @@ struct macb_queue {
         struct macb_tx_skb      *tx_skb;
         dma_addr_t              tx_ring_dma;
         struct work_struct      tx_error_task;
+
+#ifdef CONFIG_MACB_USE_HWSTAMP
+       struct work_struct      tx_ts_task;
+       unsigned int            tx_ts_head, tx_ts_tail;
+       struct gem_tx_ts        tx_timestamps[PTP_TS_BUFFER_SIZE];
+#endif
  };
  
  struct macb {
@@ -955,11 +1048,62 @@ struct macb {
         u32                     wol;
  
         struct macb_ptp_info    *ptp_info;      /* macb-ptp interface */
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-       enum macb_hw_dma_cap hw_dma_cap;
+#ifdef MACB_EXT_DESC
+       uint8_t hw_dma_cap;
  #endif
+       spinlock_t tsu_clk_lock; /* gem tsu clock locking */
+       unsigned int tsu_rate;
+       struct ptp_clock *ptp_clock;
+       struct ptp_clock_info ptp_clock_info;
+       struct tsu_incr tsu_incr;
+       struct hwtstamp_config tstamp_config;
  };
  
+#ifdef CONFIG_MACB_USE_HWSTAMP
+#define GEM_TSEC_SIZE  (GEM_TSH_SIZE + GEM_TSL_SIZE)
+#define TSU_SEC_MAX_VAL (((u64)1 << GEM_TSEC_SIZE) - 1)
+#define TSU_NSEC_MAX_VAL ((1 << GEM_TN_SIZE) - 1)
+
+enum macb_bd_control {
+       TSTAMP_DISABLED,
+       TSTAMP_FRAME_PTP_EVENT_ONLY,
+       TSTAMP_ALL_PTP_FRAMES,
+       TSTAMP_ALL_FRAMES,
+};
+
+void gem_ptp_init(struct net_device *ndev);
+void gem_ptp_remove(struct net_device *ndev);
+int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *des);
+void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc);
+static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+       if (queue->bp->tstamp_config.tx_type == TSTAMP_DISABLED)
+               return -ENOTSUPP;
+
+       return gem_ptp_txstamp(queue, skb, desc);
+}
+
+static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+       if (bp->tstamp_config.rx_filter == TSTAMP_DISABLED)
+               return;
+
+       gem_ptp_rxstamp(bp, skb, desc);
+}
+int gem_get_hwtst(struct net_device *dev, struct ifreq *rq);
+int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd);
+#else
+static inline void gem_ptp_init(struct net_device *ndev) { }
+static inline void gem_ptp_remove(struct net_device *ndev) { }
+
+static inline int gem_ptp_do_txstamp(struct macb_queue *queue, struct sk_buff *skb, struct macb_dma_desc *desc)
+{
+       return -1;
+}
+
+static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, struct macb_dma_desc *desc) { }
+#endif
+
  static inline bool macb_is_gem(struct macb *bp)
  {
         return !!(bp->caps & MACB_CAPS_MACB_IS_GEM);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb_main.c

similarity index 95%

rename from drivers/net/ethernet/cadence/macb.c

rename to drivers/net/ethernet/cadence/macb_main.c

index 3ae9d8071ded533c317eb5a39d1900bb99510db3..41e5711544fcb8eef85534d4beb0d7601c7e0db7 100644 (file)
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -79,33 +79,84 @@
  #define MACB_HALT_TIMEOUT      1230
  
  /* DMA buffer descriptor might be different size
- * depends on hardware configuration.
+ * depends on hardware configuration:
+ *
+ * 1. dma address width 32 bits:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *
+ * 2. dma address width 64 bits:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *    word 3: upper 32 bit address of Data Buffer
+ *    word 4: unused
+ *
+ * 3. dma address width 32 bits with hardware timestamping:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *    word 3: timestamp word 1
+ *    word 4: timestamp word 2
+ *
+ * 4. dma address width 64 bits with hardware timestamping:
+ *    word 1: 32 bit address of Data Buffer
+ *    word 2: control
+ *    word 3: upper 32 bit address of Data Buffer
+ *    word 4: unused
+ *    word 5: timestamp word 1
+ *    word 6: timestamp word 2
   */
  static unsigned int macb_dma_desc_get_size(struct macb *bp)
  {
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-       if (bp->hw_dma_cap == HW_DMA_CAP_64B)
-               return sizeof(struct macb_dma_desc) + sizeof(struct macb_dma_desc_64);
+#ifdef MACB_EXT_DESC
+       unsigned int desc_size;
+
+       switch (bp->hw_dma_cap) {
+       case HW_DMA_CAP_64B:
+               desc_size = sizeof(struct macb_dma_desc)
+                       + sizeof(struct macb_dma_desc_64);
+               break;
+       case HW_DMA_CAP_PTP:
+               desc_size = sizeof(struct macb_dma_desc)
+                       + sizeof(struct macb_dma_desc_ptp);
+               break;
+       case HW_DMA_CAP_64B_PTP:
+               desc_size = sizeof(struct macb_dma_desc)
+                       + sizeof(struct macb_dma_desc_64)
+                       + sizeof(struct macb_dma_desc_ptp);
+               break;
+       default:
+               desc_size = sizeof(struct macb_dma_desc);
+       }
+       return desc_size;
  #endif
         return sizeof(struct macb_dma_desc);
  }
  
-static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int idx)
+static unsigned int macb_adj_dma_desc_idx(struct macb *bp, unsigned int desc_idx)
  {
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-       /* Dma buffer descriptor is 4 words length (instead of 2 words)
-        * for 64b GEM.
-        */
-       if (bp->hw_dma_cap == HW_DMA_CAP_64B)
-               idx <<= 1;
+#ifdef MACB_EXT_DESC
+       switch (bp->hw_dma_cap) {
+       case HW_DMA_CAP_64B:
+       case HW_DMA_CAP_PTP:
+               desc_idx <<= 1;
+               break;
+       case HW_DMA_CAP_64B_PTP:
+               desc_idx *= 3;
+               break;
+       default:
+               break;
+       }
+       return desc_idx;
  #endif
-       return idx;
+       return desc_idx;
  }
  
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
  static struct macb_dma_desc_64 *macb_64b_desc(struct macb *bp, struct macb_dma_desc *desc)
  {
-       return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
+       if (bp->hw_dma_cap & HW_DMA_CAP_64B)
+               return (struct macb_dma_desc_64 *)((void *)desc + sizeof(struct macb_dma_desc));
+       return NULL;
  }
  #endif
  
@@ -621,7 +672,7 @@ static void macb_set_addr(struct macb *bp, struct macb_dma_desc *desc, dma_addr_
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
         struct macb_dma_desc_64 *desc_64;
  
-       if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+       if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
                 desc_64 = macb_64b_desc(bp, desc);
                 desc_64->addrh = upper_32_bits(addr);
         }
@@ -635,7 +686,7 @@ static dma_addr_t macb_get_addr(struct macb *bp, struct macb_dma_desc *desc)
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
         struct macb_dma_desc_64 *desc_64;
  
-       if (bp->hw_dma_cap == HW_DMA_CAP_64B) {
+       if (bp->hw_dma_cap & HW_DMA_CAP_64B) {
                 desc_64 = macb_64b_desc(bp, desc);
                 addr = ((u64)(desc_64->addrh) << 32);
         }
@@ -734,7 +785,7 @@ static void macb_tx_error_task(struct work_struct *work)
         /* Reinitialize the TX desc queue */
         queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-       if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+       if (bp->hw_dma_cap & HW_DMA_CAP_64B)
                 queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
  #endif
         /* Make TX ring reflect state of hardware */
@@ -796,6 +847,12 @@ static void macb_tx_interrupt(struct macb_queue *queue)
  
                         /* First, update TX stats if needed */
                         if (skb) {
+                               if (gem_ptp_do_txstamp(queue, skb, desc) == 0) {
+                                       /* skb now belongs to timestamp buffer
+                                        * and will be removed later
+                                        */
+                                       tx_skb->skb = NULL;
+                               }
                                 netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
                                             macb_tx_ring_wrap(bp, tail),
                                             skb->data);
@@ -962,6 +1019,8 @@ static int gem_rx(struct macb *bp, int budget)
                 bp->dev->stats.rx_packets++;
                 bp->dev->stats.rx_bytes += skb->len;
  
+               gem_ptp_do_rxstamp(bp, skb, desc);
+
  #if defined(DEBUG) && defined(VERBOSE_DEBUG)
                 netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
                             skb->len, skb->csum);
@@ -1283,7 +1342,6 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
                         if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
                                 queue_writel(queue, ISR, MACB_BIT(HRESP));
                 }
-
                 status = queue_readl(queue, ISR);
         }
  
@@ -1613,7 +1671,6 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
  
         /* Make newly initialized descriptor visible to hardware */
         wmb();
-
         skb_tx_timestamp(skb);
  
         macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
@@ -1942,8 +1999,12 @@ static void macb_configure_dma(struct macb *bp)
                         dmacfg &= ~GEM_BIT(TXCOEN);
  
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-               if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+               if (bp->hw_dma_cap & HW_DMA_CAP_64B)
                         dmacfg |= GEM_BIT(ADDR64);
+#endif
+#ifdef CONFIG_MACB_USE_HWSTAMP
+               if (bp->hw_dma_cap & HW_DMA_CAP_PTP)
+                       dmacfg |= GEM_BIT(RXEXT) | GEM_BIT(TXEXT);
  #endif
                 netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
                            dmacfg);
@@ -1992,13 +2053,13 @@ static void macb_init_hw(struct macb *bp)
         /* Initialize TX and RX buffers */
         macb_writel(bp, RBQP, lower_32_bits(bp->rx_ring_dma));
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-       if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+       if (bp->hw_dma_cap & HW_DMA_CAP_64B)
                 macb_writel(bp, RBQPH, upper_32_bits(bp->rx_ring_dma));
  #endif
         for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
                 queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma));
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-               if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+               if (bp->hw_dma_cap & HW_DMA_CAP_64B)
                         queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma));
  #endif
  
@@ -2467,6 +2528,70 @@ static int macb_set_ringparam(struct net_device *netdev,
         return 0;
  }
  
+#ifdef CONFIG_MACB_USE_HWSTAMP
+static unsigned int gem_get_tsu_rate(struct macb *bp)
+{
+       struct clk *tsu_clk;
+       unsigned int tsu_rate;
+
+       tsu_clk = devm_clk_get(&bp->pdev->dev, "tsu_clk");
+       if (!IS_ERR(tsu_clk))
+               tsu_rate = clk_get_rate(tsu_clk);
+       /* try pclk instead */
+       else if (!IS_ERR(bp->pclk)) {
+               tsu_clk = bp->pclk;
+               tsu_rate = clk_get_rate(tsu_clk);
+       } else
+               return -ENOTSUPP;
+       return tsu_rate;
+}
+
+static s32 gem_get_ptp_max_adj(void)
+{
+       return 64000000;
+}
+
+static int gem_get_ts_info(struct net_device *dev,
+                          struct ethtool_ts_info *info)
+{
+       struct macb *bp = netdev_priv(dev);
+
+       if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) {
+               ethtool_op_get_ts_info(dev, info);
+               return 0;
+       }
+
+       info->so_timestamping =
+               SOF_TIMESTAMPING_TX_SOFTWARE |
+               SOF_TIMESTAMPING_RX_SOFTWARE |
+               SOF_TIMESTAMPING_SOFTWARE |
+               SOF_TIMESTAMPING_TX_HARDWARE |
+               SOF_TIMESTAMPING_RX_HARDWARE |
+               SOF_TIMESTAMPING_RAW_HARDWARE;
+       info->tx_types =
+               (1 << HWTSTAMP_TX_ONESTEP_SYNC) |
+               (1 << HWTSTAMP_TX_OFF) |
+               (1 << HWTSTAMP_TX_ON);
+       info->rx_filters =
+               (1 << HWTSTAMP_FILTER_NONE) |
+               (1 << HWTSTAMP_FILTER_ALL);
+
+       info->phc_index = bp->ptp_clock ? ptp_clock_index(bp->ptp_clock) : -1;
+
+       return 0;
+}
+
+static struct macb_ptp_info gem_ptp_info = {
+       .ptp_init        = gem_ptp_init,
+       .ptp_remove      = gem_ptp_remove,
+       .get_ptp_max_adj = gem_get_ptp_max_adj,
+       .get_tsu_rate    = gem_get_tsu_rate,
+       .get_ts_info     = gem_get_ts_info,
+       .get_hwtst       = gem_get_hwtst,
+       .set_hwtst       = gem_set_hwtst,
+};
+#endif
+
  static int macb_get_ts_info(struct net_device *netdev,
                             struct ethtool_ts_info *info)
  {
@@ -2600,6 +2725,16 @@ static void macb_configure_caps(struct macb *bp,
                 dcfg = gem_readl(bp, DCFG2);
                 if ((dcfg & (GEM_BIT(RX_PKT_BUFF) | GEM_BIT(TX_PKT_BUFF))) == 0)
                         bp->caps |= MACB_CAPS_FIFO_MODE;
+#ifdef CONFIG_MACB_USE_HWSTAMP
+               if (gem_has_ptp(bp)) {
+                       if (!GEM_BFEXT(TSU, gem_readl(bp, DCFG5)))
+                               pr_err("GEM doesn't support hardware ptp.\n");
+                       else {
+                               bp->hw_dma_cap |= HW_DMA_CAP_PTP;
+                               bp->ptp_info = &gem_ptp_info;
+                       }
+               }
+#endif
         }
  
         dev_dbg(&bp->pdev->dev, "Cadence caps 0x%08x\n", bp->caps);
@@ -2737,7 +2872,7 @@ static int macb_init(struct platform_device *pdev)
                         queue->IMR  = GEM_IMR(hw_q - 1);
                         queue->TBQP = GEM_TBQP(hw_q - 1);
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-                       if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+                       if (bp->hw_dma_cap & HW_DMA_CAP_64B)
                                 queue->TBQPH = GEM_TBQPH(hw_q - 1);
  #endif
                 } else {
@@ -2748,7 +2883,7 @@ static int macb_init(struct platform_device *pdev)
                         queue->IMR  = MACB_IMR;
                         queue->TBQP = MACB_TBQP;
  #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-                       if (bp->hw_dma_cap == HW_DMA_CAP_64B)
+                       if (bp->hw_dma_cap & HW_DMA_CAP_64B)
                                 queue->TBQPH = MACB_TBQPH;
  #endif
                 }
@@ -3205,7 +3340,9 @@ static const struct macb_config np4_config = {
  };
  
  static const struct macb_config zynqmp_config = {
-       .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
+       .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
+                       MACB_CAPS_JUMBO |
+                       MACB_CAPS_GEM_HAS_PTP,
         .dma_burst_length = 16,
         .clk_init = macb_clk_init,
         .init = macb_init,
@@ -3239,7 +3376,9 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids);
  #endif /* CONFIG_OF */
  
  static const struct macb_config default_gem_config = {
-       .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
+       .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE |
+                       MACB_CAPS_JUMBO |
+                       MACB_CAPS_GEM_HAS_PTP,
         .dma_burst_length = 16,
         .clk_init = macb_clk_init,
         .init = macb_init,
@@ -3328,19 +3467,17 @@ static int macb_probe(struct platform_device *pdev)
                 bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
         device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
  
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-       if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
-               dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
-               bp->hw_dma_cap = HW_DMA_CAP_64B;
-       } else
-               bp->hw_dma_cap = HW_DMA_CAP_32B;
-#endif
-
         spin_lock_init(&bp->lock);
  
         /* setup capabilities */
         macb_configure_caps(bp, macb_config);
  
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+       if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) {
+               dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+               bp->hw_dma_cap |= HW_DMA_CAP_64B;
+       }
+#endif
         platform_set_drvdata(pdev, dev);
  
         dev->irq = platform_get_irq(pdev, 0);
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c

new file mode 100755 (executable)

index 0000000..67cca08
--- /dev/null
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -0,0 +1,518 @@
+/**
+ * 1588 PTP support for Cadence GEM device.
+ *
+ * Copyright (C) 2017 Cadence Design Systems - http://www.cadence.com
+ *
+ * Authors: Rafal Ozieblo <rafalo@cadence.com>
+ *          Bartosz Folta <bfolta@cadence.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/time64.h>
+#include <linux/ptp_classify.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
+#include <linux/circ_buf.h>
+#include <linux/spinlock.h>
+
+#include "macb.h"
+
+#define  GEM_PTP_TIMER_NAME "gem-ptp-timer"
+
+static struct macb_dma_desc_ptp *macb_ptp_desc(struct macb *bp,
+                                              struct macb_dma_desc *desc)
+{
+       if (bp->hw_dma_cap == HW_DMA_CAP_PTP)
+               return (struct macb_dma_desc_ptp *)
+                               ((u8 *)desc + sizeof(struct macb_dma_desc));
+       if (bp->hw_dma_cap == HW_DMA_CAP_64B_PTP)
+               return (struct macb_dma_desc_ptp *)
+                               ((u8 *)desc + sizeof(struct macb_dma_desc)
+                               + sizeof(struct macb_dma_desc_64));
+       return NULL;
+}
+
+static int gem_tsu_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+       struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+       unsigned long flags;
+       long first, second;
+       u32 secl, sech;
+
+       spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+       first = gem_readl(bp, TN);
+       secl = gem_readl(bp, TSL);
+       sech = gem_readl(bp, TSH);
+       second = gem_readl(bp, TN);
+
+       /* test for nsec rollover */
+       if (first > second) {
+               /* if so, use later read & re-read seconds
+                * (assume all done within 1s)
+                */
+               ts->tv_nsec = gem_readl(bp, TN);
+               secl = gem_readl(bp, TSL);
+               sech = gem_readl(bp, TSH);
+       } else {
+               ts->tv_nsec = first;
+       }
+
+       spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+       ts->tv_sec = (((u64)sech << GEM_TSL_SIZE) | secl)
+                       & TSU_SEC_MAX_VAL;
+       return 0;
+}
+
+static int gem_tsu_set_time(struct ptp_clock_info *ptp,
+                           const struct timespec64 *ts)
+{
+       struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+       unsigned long flags;
+       u32 ns, sech, secl;
+
+       secl = (u32)ts->tv_sec;
+       sech = (ts->tv_sec >> GEM_TSL_SIZE) & ((1 << GEM_TSH_SIZE) - 1);
+       ns = ts->tv_nsec;
+
+       spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+
+       /* TSH doesn't latch the time and no atomicity! */
+       gem_writel(bp, TN, 0); /* clear to avoid overflow */
+       gem_writel(bp, TSH, sech);
+       /* write lower bits 2nd, for synchronized secs update */
+       gem_writel(bp, TSL, secl);
+       gem_writel(bp, TN, ns);
+
+       spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+
+       return 0;
+}
+
+static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec)
+{
+       unsigned long flags;
+
+       /* tsu_timer_incr register must be written after
+        * the tsu_timer_incr_sub_ns register and the write operation
+        * will cause the value written to the tsu_timer_incr_sub_ns register
+        * to take effect.
+        */
+       spin_lock_irqsave(&bp->tsu_clk_lock, flags);
+       gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns));
+       gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns));
+       spin_unlock_irqrestore(&bp->tsu_clk_lock, flags);
+
+       return 0;
+}
+
+static int gem_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+       struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+       struct tsu_incr incr_spec;
+       bool neg_adj = false;
+       u32 word;
+       u64 adj;
+
+       if (scaled_ppm < 0) {
+               neg_adj = true;
+               scaled_ppm = -scaled_ppm;
+       }
+
+       /* Adjustment is relative to base frequency */
+       incr_spec.sub_ns = bp->tsu_incr.sub_ns;
+       incr_spec.ns = bp->tsu_incr.ns;
+
+       /* scaling: unused(8bit) | ns(8bit) | fractions(16bit) */
+       word = ((u64)incr_spec.ns << GEM_SUBNSINCR_SIZE) + incr_spec.sub_ns;
+       adj = (u64)scaled_ppm * word;
+       /* Divide with rounding, equivalent to floating dividing:
+        * (temp / USEC_PER_SEC) + 0.5
+        */
+       adj += (USEC_PER_SEC >> 1);
+       adj >>= GEM_SUBNSINCR_SIZE; /* remove fractions */
+       adj = div_u64(adj, USEC_PER_SEC);
+       adj = neg_adj ? (word - adj) : (word + adj);
+
+       incr_spec.ns = (adj >> GEM_SUBNSINCR_SIZE)
+                       & ((1 << GEM_NSINCR_SIZE) - 1);
+       incr_spec.sub_ns = adj & ((1 << GEM_SUBNSINCR_SIZE) - 1);
+       gem_tsu_incr_set(bp, &incr_spec);
+       return 0;
+}
+
+static int gem_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+       struct macb *bp = container_of(ptp, struct macb, ptp_clock_info);
+       struct timespec64 now, then = ns_to_timespec64(delta);
+       u32 adj, sign = 0;
+
+       if (delta < 0) {
+               sign = 1;
+               delta = -delta;
+       }
+
+       if (delta > TSU_NSEC_MAX_VAL) {
+               gem_tsu_get_time(&bp->ptp_clock_info, &now);
+               if (sign)
+                       now = timespec64_sub(now, then);
+               else
+                       now = timespec64_add(now, then);
+
+               gem_tsu_set_time(&bp->ptp_clock_info,
+                                (const struct timespec64 *)&now);
+       } else {
+               adj = (sign << GEM_ADDSUB_OFFSET) | delta;
+
+               gem_writel(bp, TA, adj);
+       }
+
+       return 0;
+}
+
+static int gem_ptp_enable(struct ptp_clock_info *ptp,
+                         struct ptp_clock_request *rq, int on)
+{
+       return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info gem_ptp_caps_template = {
+       .owner          = THIS_MODULE,
+       .name           = GEM_PTP_TIMER_NAME,
+       .max_adj        = 0,
+       .n_alarm        = 0,
+       .n_ext_ts       = 0,
+       .n_per_out      = 0,
+       .n_pins         = 0,
+       .pps            = 1,
+       .adjfine        = gem_ptp_adjfine,
+       .adjtime        = gem_ptp_adjtime,
+       .gettime64      = gem_tsu_get_time,
+       .settime64      = gem_tsu_set_time,
+       .enable         = gem_ptp_enable,
+};
+
+static void gem_ptp_init_timer(struct macb *bp)
+{
+       u32 rem = 0;
+       u64 adj;
+
+       bp->tsu_incr.ns = div_u64_rem(NSEC_PER_SEC, bp->tsu_rate, &rem);
+       if (rem) {
+               adj = rem;
+               adj <<= GEM_SUBNSINCR_SIZE;
+               bp->tsu_incr.sub_ns = div_u64(adj, bp->tsu_rate);
+       } else {
+               bp->tsu_incr.sub_ns = 0;
+       }
+}
+
+static void gem_ptp_init_tsu(struct macb *bp)
+{
+       struct timespec64 ts;
+
+       /* 1. get current system time */
+       ts = ns_to_timespec64(ktime_to_ns(ktime_get_real()));
+
+       /* 2. set ptp timer */
+       gem_tsu_set_time(&bp->ptp_clock_info, &ts);
+
+       /* 3. set PTP timer increment value to BASE_INCREMENT */
+       gem_tsu_incr_set(bp, &bp->tsu_incr);
+
+       gem_writel(bp, TA, 0);
+}
+
+static void gem_ptp_clear_timer(struct macb *bp)
+{
+       bp->tsu_incr.sub_ns = 0;
+       bp->tsu_incr.ns = 0;
+
+       gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, 0));
+       gem_writel(bp, TI, GEM_BF(NSINCR, 0));
+       gem_writel(bp, TA, 0);
+}
+
+static int gem_hw_timestamp(struct macb *bp, u32 dma_desc_ts_1,
+                           u32 dma_desc_ts_2, struct timespec64 *ts)
+{
+       struct timespec64 tsu;
+
+       ts->tv_sec = (GEM_BFEXT(DMA_SECH, dma_desc_ts_2) << GEM_DMA_SECL_SIZE) |
+                       GEM_BFEXT(DMA_SECL, dma_desc_ts_1);
+       ts->tv_nsec = GEM_BFEXT(DMA_NSEC, dma_desc_ts_1);
+
+       /* TSU overlapping workaround
+        * The timestamp only contains lower few bits of seconds,
+        * so add value from 1588 timer
+        */
+       gem_tsu_get_time(&bp->ptp_clock_info, &tsu);
+
+       /* If the top bit is set in the timestamp,
+        * but not in 1588 timer, it has rolled over,
+        * so subtract max size
+        */
+       if ((ts->tv_sec & (GEM_DMA_SEC_TOP >> 1)) &&
+           !(tsu.tv_sec & (GEM_DMA_SEC_TOP >> 1)))
+               ts->tv_sec -= GEM_DMA_SEC_TOP;
+
+       ts->tv_sec += ((~GEM_DMA_SEC_MASK) & tsu.tv_sec);
+
+       return 0;
+}
+
+void gem_ptp_rxstamp(struct macb *bp, struct sk_buff *skb,
+                    struct macb_dma_desc *desc)
+{
+       struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+       struct macb_dma_desc_ptp *desc_ptp;
+       struct timespec64 ts;
+
+       if (GEM_BFEXT(DMA_RXVALID, desc->addr)) {
+               desc_ptp = macb_ptp_desc(bp, desc);
+               gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts);
+               memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
+               shhwtstamps->hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+       }
+}
+
+static void gem_tstamp_tx(struct macb *bp, struct sk_buff *skb,
+                         struct macb_dma_desc_ptp *desc_ptp)
+{
+       struct skb_shared_hwtstamps shhwtstamps;
+       struct timespec64 ts;
+
+       gem_hw_timestamp(bp, desc_ptp->ts_1, desc_ptp->ts_2, &ts);
+       memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+       shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+       skb_tstamp_tx(skb, &shhwtstamps);
+}
+
+int gem_ptp_txstamp(struct macb_queue *queue, struct sk_buff *skb,
+                   struct macb_dma_desc *desc)
+{
+       unsigned long tail = READ_ONCE(queue->tx_ts_tail);
+       unsigned long head = queue->tx_ts_head;
+       struct macb_dma_desc_ptp *desc_ptp;
+       struct gem_tx_ts *tx_timestamp;
+
+       if (!GEM_BFEXT(DMA_TXVALID, desc->ctrl))
+               return -EINVAL;
+
+       if (CIRC_SPACE(head, tail, PTP_TS_BUFFER_SIZE) == 0)
+               return -ENOMEM;
+
+       skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+       desc_ptp = macb_ptp_desc(queue->bp, desc);
+       tx_timestamp = &queue->tx_timestamps[head];
+       tx_timestamp->skb = skb;
+       tx_timestamp->desc_ptp.ts_1 = desc_ptp->ts_1;
+       tx_timestamp->desc_ptp.ts_2 = desc_ptp->ts_2;
+       /* move head */
+       smp_store_release(&queue->tx_ts_head,
+                         (head + 1) & (PTP_TS_BUFFER_SIZE - 1));
+
+       schedule_work(&queue->tx_ts_task);
+       return 0;
+}
+
+static void gem_tx_timestamp_flush(struct work_struct *work)
+{
+       struct macb_queue *queue =
+                       container_of(work, struct macb_queue, tx_ts_task);
+       unsigned long head, tail;
+       struct gem_tx_ts *tx_ts;
+
+       /* take current head */
+       head = smp_load_acquire(&queue->tx_ts_head);
+       tail = queue->tx_ts_tail;
+
+       while (CIRC_CNT(head, tail, PTP_TS_BUFFER_SIZE)) {
+               tx_ts = &queue->tx_timestamps[tail];
+               gem_tstamp_tx(queue->bp, tx_ts->skb, &tx_ts->desc_ptp);
+               /* cleanup */
+               dev_kfree_skb_any(tx_ts->skb);
+               /* remove old tail */
+               smp_store_release(&queue->tx_ts_tail,
+                                 (tail + 1) & (PTP_TS_BUFFER_SIZE - 1));
+               tail = queue->tx_ts_tail;
+       }
+}
+
+void gem_ptp_init(struct net_device *dev)
+{
+       struct macb *bp = netdev_priv(dev);
+       struct macb_queue *queue;
+       unsigned int q;
+
+       bp->ptp_clock_info = gem_ptp_caps_template;
+
+       /* nominal frequency and maximum adjustment in ppb */
+       bp->tsu_rate = bp->ptp_info->get_tsu_rate(bp);
+       bp->ptp_clock_info.max_adj = bp->ptp_info->get_ptp_max_adj();
+       gem_ptp_init_timer(bp);
+       bp->ptp_clock = ptp_clock_register(&bp->ptp_clock_info, &dev->dev);
+       if (IS_ERR(bp->ptp_clock)) {
+               pr_err("ptp clock register failed: %ld\n",
+                       PTR_ERR(bp->ptp_clock));
+               bp->ptp_clock = NULL;
+               return;
+       } else if (bp->ptp_clock == NULL) {
+               pr_err("ptp clock register failed\n");
+               return;
+       }
+
+       spin_lock_init(&bp->tsu_clk_lock);
+       for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
+               queue->tx_ts_head = 0;
+               queue->tx_ts_tail = 0;
+               INIT_WORK(&queue->tx_ts_task, gem_tx_timestamp_flush);
+       }
+
+       gem_ptp_init_tsu(bp);
+
+       dev_info(&bp->pdev->dev, "%s ptp clock registered.\n",
+                GEM_PTP_TIMER_NAME);
+}
+
+void gem_ptp_remove(struct net_device *ndev)
+{
+       struct macb *bp = netdev_priv(ndev);
+
+       if (bp->ptp_clock)
+               ptp_clock_unregister(bp->ptp_clock);
+
+       gem_ptp_clear_timer(bp);
+
+       dev_info(&bp->pdev->dev, "%s ptp clock unregistered.\n",
+                GEM_PTP_TIMER_NAME);
+}
+
+static int gem_ptp_set_ts_mode(struct macb *bp,
+                              enum macb_bd_control tx_bd_control,
+                              enum macb_bd_control rx_bd_control)
+{
+       gem_writel(bp, TXBDCTRL, GEM_BF(TXTSMODE, tx_bd_control));
+       gem_writel(bp, RXBDCTRL, GEM_BF(RXTSMODE, rx_bd_control));
+
+       return 0;
+}
+
+int gem_get_hwtst(struct net_device *dev, struct ifreq *rq)
+{
+       struct hwtstamp_config *tstamp_config;
+       struct macb *bp = netdev_priv(dev);
+
+       tstamp_config = &bp->tstamp_config;
+       if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
+               return -EOPNOTSUPP;
+
+       if (copy_to_user(rq->ifr_data, tstamp_config, sizeof(*tstamp_config)))
+               return -EFAULT;
+       else
+               return 0;
+}
+
+static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
+{
+       u32 reg_val;
+
+       reg_val = macb_readl(bp, NCR);
+
+       if (enable)
+               macb_writel(bp, NCR, reg_val | MACB_BIT(OSSMODE));
+       else
+               macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
+
+       return 0;
+}
+
+int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+       enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
+       enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
+       struct hwtstamp_config *tstamp_config;
+       struct macb *bp = netdev_priv(dev);
+       u32 regval;
+
+       tstamp_config = &bp->tstamp_config;
+       if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(tstamp_config, ifr->ifr_data,
+                          sizeof(*tstamp_config)))
+               return -EFAULT;
+
+       /* reserved for future extensions */
+       if (tstamp_config->flags)
+               return -EINVAL;
+
+       switch (tstamp_config->tx_type) {
+       case HWTSTAMP_TX_OFF:
+               break;
+       case HWTSTAMP_TX_ONESTEP_SYNC:
+               if (gem_ptp_set_one_step_sync(bp, 1) != 0)
+                       return -ERANGE;
+       case HWTSTAMP_TX_ON:
+               tx_bd_control = TSTAMP_ALL_FRAMES;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       switch (tstamp_config->rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+               rx_bd_control =  TSTAMP_ALL_PTP_FRAMES;
+               tstamp_config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+               regval = macb_readl(bp, NCR);
+               macb_writel(bp, NCR, (regval | MACB_BIT(SRTSM)));
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+       case HWTSTAMP_FILTER_ALL:
+               rx_bd_control = TSTAMP_ALL_FRAMES;
+               tstamp_config->rx_filter = HWTSTAMP_FILTER_ALL;
+               break;
+       default:
+               tstamp_config->rx_filter = HWTSTAMP_FILTER_NONE;
+               return -ERANGE;
+       }
+
+       if (gem_ptp_set_ts_mode(bp, tx_bd_control, rx_bd_control) != 0)
+               return -ERANGE;
+
+       if (copy_to_user(ifr->ifr_data, tstamp_config, sizeof(*tstamp_config)))
+               return -EFAULT;
+       else
+               return 0;
+}
+
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c

index 573755b0a51b7f48c8565f6e5d638fbf86f99afd..49b80da51ba7307eb0d7fff8116203723b30cc9b 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -227,15 +227,14 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
                 nic->speed = mbx.link_status.speed;
                 nic->mac_type = mbx.link_status.mac_type;
                 if (nic->link_up) {
-                       netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
-                                   nic->netdev->name, nic->speed,
+                       netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
+                                   nic->speed,
                                     nic->duplex == DUPLEX_FULL ?
-                               "Full duplex" : "Half duplex");
+                                   "Full" : "Half");
                         netif_carrier_on(nic->netdev);
                         netif_tx_start_all_queues(nic->netdev);
                 } else {
-                       netdev_info(nic->netdev, "%s: Link is Down\n",
-                                   nic->netdev->name);
+                       netdev_info(nic->netdev, "Link is Down\n");
                         netif_carrier_off(nic->netdev);
                         netif_tx_stop_all_queues(nic->netdev);
                 }
@@ -721,8 +720,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
                 return;
  
         if (netif_msg_pktdata(nic)) {
-               netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name,
-                           skb, skb->len);
+               netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len);
                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1,
                                skb->data, skb->len, true);
         }
@@ -854,10 +852,8 @@ done:
                         netif_tx_wake_queue(txq);
                         nic = nic->pnicvf;
                         this_cpu_inc(nic->drv_stats->txq_wake);
-                       if (netif_msg_tx_err(nic))
-                               netdev_warn(netdev,
-                                           "%s: Transmit queue wakeup SQ%d\n",
-                                           netdev->name, txq_idx);
+                       netif_warn(nic, tx_err, netdev,
+                                  "Transmit queue wakeup SQ%d\n", txq_idx);
                 }
         }
  
@@ -928,9 +924,8 @@ static void nicvf_handle_qs_err(unsigned long data)
  
  static void nicvf_dump_intr_status(struct nicvf *nic)
  {
-       if (netif_msg_intr(nic))
-               netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n",
-                           nic->netdev->name, nicvf_reg_read(nic, NIC_VF_INT));
+       netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n",
+                  nicvf_reg_read(nic, NIC_VF_INT));
  }
  
  static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq)
@@ -1212,10 +1207,8 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
                         netif_tx_wake_queue(txq);
                 } else {
                         this_cpu_inc(nic->drv_stats->txq_stop);
-                       if (netif_msg_tx_err(nic))
-                               netdev_warn(netdev,
-                                           "%s: Transmit ring full, stopping SQ%d\n",
-                                           netdev->name, qid);
+                       netif_warn(nic, tx_err, netdev,
+                                  "Transmit ring full, stopping SQ%d\n", qid);
                 }
                 return NETDEV_TX_BUSY;
         }
@@ -1600,9 +1593,7 @@ static void nicvf_tx_timeout(struct net_device *dev)
  {
         struct nicvf *nic = netdev_priv(dev);
  
-       if (netif_msg_tx_err(nic))
-               netdev_warn(dev, "%s: Transmit timed out, resetting\n",
-                           dev->name);
+       netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n");
  
         this_cpu_inc(nic->drv_stats->tx_timeout);
         schedule_work(&nic->reset_task);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c

index 2b181762ad4908476c4fb78acd7ed6ea650762c5..d4496e9afcdf37d12043a9db6beb2dbe76e6daa6 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -1811,11 +1811,9 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
  /* Check for errors in the receive cmp.queue entry */
  int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
  {
-       if (netif_msg_rx_err(nic))
-               netdev_err(nic->netdev,
-                          "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n",
-                          nic->netdev->name,
-                          cqe_rx->err_level, cqe_rx->err_opcode);
+       netif_err(nic, rx_err, nic->netdev,
+                 "RX error CQE err_level 0x%x err_opcode 0x%x\n",
+                 cqe_rx->err_level, cqe_rx->err_opcode);
  
         switch (cqe_rx->err_opcode) {
         case CQ_RX_ERROP_RE_PARTIAL:
diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig

index dc0850b3b517b9b02e3cd9a42cf98425a55d0df3..8870a9a798ca4e0245e6b05ac8d4ee2cf8499b46 100644 (file)
--- a/drivers/net/ethernet/freescale/fman/Kconfig
+++ b/drivers/net/ethernet/freescale/fman/Kconfig
@@ -2,6 +2,7 @@ config FSL_FMAN
         tristate "FMan support"
         depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
         select GENERIC_ALLOCATOR
+       depends on HAS_DMA
         select PHYLIB
         default n
         help
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c

index a79e257bc338f24e947e3be8f67b8d72e829ad5c..c4b4b0a1bbf0a60b9d757811b19e45e44afd3685 100644 (file)
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1718,7 +1718,7 @@ static int gfar_restore(struct device *dev)
         return 0;
  }
  
-static struct dev_pm_ops gfar_pm_ops = {
+static const struct dev_pm_ops gfar_pm_ops = {
         .suspend = gfar_suspend,
         .resume = gfar_resume,
         .freeze = gfar_suspend,
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h

index 04211ac73b36a3152b6642a4c797f738076bd601..7ba653af19cb980a5e28a59ab2844cac1d7813c5 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -360,6 +360,7 @@ enum hnae_loop {
         MAC_INTERNALLOOP_MAC = 0,
         MAC_INTERNALLOOP_SERDES,
         MAC_INTERNALLOOP_PHY,
+       MAC_LOOP_PHY_NONE,
         MAC_LOOP_NONE,
  };
  
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c

index 00e57bbaf122775c20b11d5fd9eaedf80e2cd40f..a8db27e86a112cde0ba71dfe9cc3d39a2ae1d270 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -259,67 +259,27 @@ static const char hns_nic_test_strs[][ETH_GSTRING_LEN] = {
  
  static int hns_nic_config_phy_loopback(struct phy_device *phy_dev, u8 en)
  {
-#define COPPER_CONTROL_REG 0
-#define PHY_POWER_DOWN BIT(11)
-#define PHY_LOOP_BACK BIT(14)
-       u16 val = 0;
-
-       if (phy_dev->is_c45) /* c45 branch adding for XGE PHY */
-               return -ENOTSUPP;
+       int err;
  
         if (en) {
-               /* speed : 1000M */
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 2);
-               phy_write(phy_dev, 21, 0x1046);
-
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-               /* Force Master */
-               phy_write(phy_dev, 9, 0x1F00);
-
-               /* Soft-reset */
-               phy_write(phy_dev, 0, 0x9140);
-               /* If autoneg disabled,two soft-reset operations */
-               phy_write(phy_dev, 0, 0x9140);
-
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
-
-               /* Default is 0x0400 */
-               phy_write(phy_dev, 1, 0x418);
-
-               /* Force 1000M Link, Default is 0x0200 */
-               phy_write(phy_dev, 7, 0x20C);
-
-               /* Powerup Fiber */
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
-               val = phy_read(phy_dev, COPPER_CONTROL_REG);
-               val &= ~PHY_POWER_DOWN;
-               phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
-               /* Enable Phy Loopback */
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-               val = phy_read(phy_dev, COPPER_CONTROL_REG);
-               val |= PHY_LOOP_BACK;
-               val &= ~PHY_POWER_DOWN;
-               phy_write(phy_dev, COPPER_CONTROL_REG, val);
+               /* Doing phy loopback in offline state, phy resuming is
+                * needed to power up the device.
+                */
+               err = phy_resume(phy_dev);
+               if (err)
+                       goto out;
+
+               err = phy_loopback(phy_dev, true);
         } else {
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 0xFA);
-               phy_write(phy_dev, 1, 0x400);
-               phy_write(phy_dev, 7, 0x200);
-
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 1);
-               val = phy_read(phy_dev, COPPER_CONTROL_REG);
-               val |= PHY_POWER_DOWN;
-               phy_write(phy_dev, COPPER_CONTROL_REG, val);
-
-               phy_write(phy_dev, HNS_PHY_PAGE_REG, 0);
-               phy_write(phy_dev, 9, 0xF00);
-
-               val = phy_read(phy_dev, COPPER_CONTROL_REG);
-               val &= ~PHY_LOOP_BACK;
-               val |= PHY_POWER_DOWN;
-               phy_write(phy_dev, COPPER_CONTROL_REG, val);
+               err = phy_loopback(phy_dev, false);
+               if (err)
+                       goto out;
+
+               err = phy_suspend(phy_dev);
         }
-       return 0;
+
+out:
+       return err;
  }
  
  static int __lb_setup(struct net_device *ndev,
@@ -332,10 +292,9 @@ static int __lb_setup(struct net_device *ndev,
  
         switch (loop) {
         case MAC_INTERNALLOOP_PHY:
-               if ((phy_dev) && (!phy_dev->is_c45)) {
-                       ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
-                       ret |= h->dev->ops->set_loopback(h, loop, 0x1);
-               }
+               ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
+               if (!ret)
+                       ret = h->dev->ops->set_loopback(h, loop, 0x1);
                 break;
         case MAC_INTERNALLOOP_MAC:
                 if ((h->dev->ops->set_loopback) &&
@@ -346,17 +305,17 @@ static int __lb_setup(struct net_device *ndev,
                 if (h->dev->ops->set_loopback)
                         ret = h->dev->ops->set_loopback(h, loop, 0x1);
                 break;
+       case MAC_LOOP_PHY_NONE:
+               ret = hns_nic_config_phy_loopback(phy_dev, 0x0);
         case MAC_LOOP_NONE:
-               if ((phy_dev) && (!phy_dev->is_c45))
-                       ret |= hns_nic_config_phy_loopback(phy_dev, 0x0);
-
-               if (h->dev->ops->set_loopback) {
+               if (!ret && h->dev->ops->set_loopback) {
                         if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII)
-                               ret |= h->dev->ops->set_loopback(h,
+                               ret = h->dev->ops->set_loopback(h,
                                         MAC_INTERNALLOOP_MAC, 0x0);
  
-                       ret |= h->dev->ops->set_loopback(h,
-                               MAC_INTERNALLOOP_SERDES, 0x0);
+                       if (!ret)
+                               ret = h->dev->ops->set_loopback(h,
+                                       MAC_INTERNALLOOP_SERDES, 0x0);
                 }
                 break;
         default:
@@ -582,13 +541,16 @@ static int __lb_run_test(struct net_device *ndev,
         return ret_val;
  }
  
-static int __lb_down(struct net_device *ndev)
+static int __lb_down(struct net_device *ndev, enum hnae_loop loop)
  {
         struct hns_nic_priv *priv = netdev_priv(ndev);
         struct hnae_handle *h = priv->ae_handle;
         int ret;
  
-       ret = __lb_setup(ndev, MAC_LOOP_NONE);
+       if (loop == MAC_INTERNALLOOP_PHY)
+               ret = __lb_setup(ndev, MAC_LOOP_PHY_NONE);
+       else
+               ret = __lb_setup(ndev, MAC_LOOP_NONE);
         if (ret)
                 netdev_err(ndev, "%s: __lb_setup return error(%d)!\n",
                            __func__,
@@ -644,7 +606,8 @@ static void hns_nic_self_test(struct net_device *ndev,
                         if (!data[test_index]) {
                                 data[test_index] = __lb_run_test(
                                         ndev, (enum hnae_loop)st_param[i][0]);
-                               (void)__lb_down(ndev);
+                               (void)__lb_down(ndev,
+                                               (enum hnae_loop)st_param[i][0]);
                         }
  
                         if (data[test_index])
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c

index 9a74c4e2e1932607f7665ded28c42fd5243080db..3e0a695537e285c676e449b6ed5981d41cc3c5a9 100644 (file)
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1914,7 +1914,7 @@ static struct vio_device_id ibmveth_device_table[] = {
  };
  MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
  
-static struct dev_pm_ops ibmveth_pm_ops = {
+static const struct dev_pm_ops ibmveth_pm_ops = {
         .resume = ibmveth_resume
  };
  
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c

index 87db1eb5cc444d168643ba6f3c67652d5b3ec83f..a3e6946796350d0a3bb79410d1f354a844ab7f60 100644 (file)
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -763,12 +763,6 @@ static int init_resources(struct ibmvnic_adapter *adapter)
         if (rc)
                 return rc;
  
-       rc = init_sub_crq_irqs(adapter);
-       if (rc) {
-               netdev_err(netdev, "failed to initialize sub crq irqs\n");
-               return -1;
-       }
-
         rc = init_stats_token(adapter);
         if (rc)
                 return rc;
@@ -1803,7 +1797,6 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
                         return rc;
         }
  
-       rc = init_sub_crq_irqs(adapter);
         return rc;
  }
  
@@ -3669,6 +3662,13 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
         if (rc) {
                 dev_err(dev, "Initialization of sub crqs failed\n");
                 release_crq_queue(adapter);
+               return rc;
+       }
+
+       rc = init_sub_crq_irqs(adapter);
+       if (rc) {
+               dev_err(dev, "Failed to initialize sub crq irqs\n");
+               release_crq_queue(adapter);
         }
  
         return rc;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c

index c1af47e45d3f23221f730f64375b9be771cef7c6..674773b28b2e55284080b3a591bf2ca5581fb7e8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3280,7 +3280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
  
         if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
                 mlx4_dbg(dev,
-                        "updating vf %d port %d no link state HW enforcment\n",
+                        "updating vf %d port %d no link state HW enforcement\n",
                          vf, port);
         return 0;
  }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c

index 1dae8e40fb25f7da4ba7b0b7c9e4623cce1f63dc..5f41dc92aa6848aafd3e3a4b7a735ad33da984dd 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -238,7 +238,7 @@ static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state)
                 priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
         }
  
-       if (mlx4_en_setup_tc(dev, num_tcs))
+       if (mlx4_en_alloc_tx_queue_per_tc(dev, num_tcs))
                 return 1;
  
         return 0;
@@ -303,7 +303,7 @@ static int mlx4_en_ets_validate(struct mlx4_en_priv *priv, struct ieee_ets *ets)
         int has_ets_tc = 0;
  
         for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-               if (ets->prio_tc[i] >= MLX4_EN_NUM_UP) {
+               if (ets->prio_tc[i] >= MLX4_EN_NUM_UP_HIGH) {
                         en_err(priv, "Bad priority in UP <=> TC mapping. TC: %d, UP: %d\n",
                                         i, ets->prio_tc[i]);
                         return -EINVAL;
@@ -472,7 +472,7 @@ static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
                         goto err;
                 if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc))
                         goto err;
-               if (mlx4_en_setup_tc(dev, 0))
+               if (mlx4_en_alloc_tx_queue_per_tc(dev, 0))
                         goto err;
         }
  
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c

index e97fbf32759465ff5ee3c8ffa2de74c793233a8c..c751a1d434ad7167e6b65a62f46b7295044860f8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1750,7 +1750,8 @@ static void mlx4_en_get_channels(struct net_device *dev,
         channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
  
         channel->rx_count = priv->rx_ring_num;
-       channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP;
+       channel->tx_count = priv->tx_ring_num[TX] /
+                           priv->prof->num_up;
  }
  
  static int mlx4_en_set_channels(struct net_device *dev,
@@ -1763,6 +1764,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
         int port_up = 0;
         int xdp_count;
         int err = 0;
+       u8 up;
  
         if (!channel->tx_count || !channel->rx_count)
                 return -EINVAL;
@@ -1773,18 +1775,19 @@ static int mlx4_en_set_channels(struct net_device *dev,
  
         mutex_lock(&mdev->state_lock);
         xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
-       if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) {
+       if (channel->tx_count * priv->prof->num_up + xdp_count >
+           MAX_TX_RINGS) {
                 err = -EINVAL;
                 en_err(priv,
                        "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
-                      channel->tx_count * MLX4_EN_NUM_UP + xdp_count,
+                      channel->tx_count * priv->prof->num_up  + xdp_count,
                        MAX_TX_RINGS);
                 goto out;
         }
  
         memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
         new_prof.num_tx_rings_p_up = channel->tx_count;
-       new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP;
+       new_prof.tx_ring_num[TX] = channel->tx_count * priv->prof->num_up;
         new_prof.tx_ring_num[TX_XDP] = xdp_count;
         new_prof.rx_ring_num = channel->rx_count;
  
@@ -1799,11 +1802,11 @@ static int mlx4_en_set_channels(struct net_device *dev,
  
         mlx4_en_safe_replace_resources(priv, tmp);
  
-       netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
         netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
  
-       if (netdev_get_num_tc(dev))
-               mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
+       up = (priv->prof->num_up == MLX4_EN_NUM_UP_LOW) ?
+                                   0 : priv->prof->num_up;
+       mlx4_en_setup_tc(dev, up);
  
         en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
         en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c

index 56cdf38d150e714d4b260a15e6921c5940216da8..2b0cbca4beb5f6bf97804b593ce1a493365adba6 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -169,8 +169,10 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
                 params->prof[i].tx_ppp = pfctx;
                 params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
                 params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+               params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
+               params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up;
                 params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
-                       MLX4_EN_NUM_UP;
+                       params->prof[i].num_up;
                 params->prof[i].rss_rings = 0;
                 params->prof[i].inline_thold = inline_thold;
         }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

index 9da76e3be2fcda54dfcfd5ddf4429e68167c185f..3a291fc1780ab9193d7202be106f36ca8854b258 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -60,11 +60,11 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
         int i;
         unsigned int offset = 0;
  
-       if (up && up != MLX4_EN_NUM_UP)
+       if (up && up != MLX4_EN_NUM_UP_HIGH)
                 return -EINVAL;
  
         netdev_set_num_tc(dev, up);
-
+       netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
         /* Partition Tx queues evenly amongst UP's */
         for (i = 0; i < up; i++) {
                 netdev_set_tc_queue(dev, i, priv->num_tx_rings_p_up, offset);
@@ -86,6 +86,50 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up)
         return 0;
  }
  
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       struct mlx4_en_port_profile new_prof;
+       struct mlx4_en_priv *tmp;
+       int port_up = 0;
+       int err = 0;
+
+       tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+       if (!tmp)
+               return -ENOMEM;
+
+       mutex_lock(&mdev->state_lock);
+       memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+       new_prof.num_up = (tc == 0) ? MLX4_EN_NUM_UP_LOW :
+                                     MLX4_EN_NUM_UP_HIGH;
+       new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up *
+                                  new_prof.num_up;
+       err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
+       if (err)
+               goto out;
+
+       if (priv->port_up) {
+               port_up = 1;
+               mlx4_en_stop_port(dev, 1);
+       }
+
+       mlx4_en_safe_replace_resources(priv, tmp);
+       if (port_up) {
+               err = mlx4_en_start_port(dev);
+               if (err) {
+                       en_err(priv, "Failed starting port for setup TC\n");
+                       goto out;
+               }
+       }
+
+       err = mlx4_en_setup_tc(dev, tc);
+out:
+       mutex_unlock(&mdev->state_lock);
+       kfree(tmp);
+       return err;
+}
+
  static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
                               u32 chain_index, __be16 proto,
                               struct tc_to_netdev *tc)
@@ -93,9 +137,12 @@ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle,
         if (tc->type != TC_SETUP_MQPRIO)
                 return -EINVAL;
  
+       if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH)
+               return -EINVAL;
+
         tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
  
-       return mlx4_en_setup_tc(dev, tc->mqprio->num_tc);
+       return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc);
  }
  
  #ifdef CONFIG_RFS_ACCEL
@@ -2144,7 +2191,7 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
  
         memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
                sizeof(dst->hwtstamp_config));
-       dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
+       dst->num_tx_rings_p_up = prof->num_tx_rings_p_up;
         dst->rx_ring_num = prof->rx_ring_num;
         dst->flags = prof->flags;
         dst->mdev = src->mdev;
@@ -2197,6 +2244,7 @@ static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
                 dst->tx_ring[t] = src->tx_ring[t];
                 dst->tx_cq[t] = src->tx_cq[t];
         }
+       dst->num_tx_rings_p_up = src->num_tx_rings_p_up;
         dst->rx_ring_num = src->rx_ring_num;
         memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
  }
@@ -2780,7 +2828,7 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
         if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) {
                 tx_changed = 1;
                 new_prof.tx_ring_num[TX] =
-                       MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP);
+                       MAX_TX_RINGS - ALIGN(xdp_ring_num, priv->prof->num_up);
                 en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n");
         }
  
@@ -3271,7 +3319,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
                 priv->flags |= MLX4_EN_DCB_ENABLED;
                 priv->cee_config.pfc_state = false;
  
-               for (i = 0; i < MLX4_EN_NUM_UP; i++)
+               for (i = 0; i < MLX4_EN_NUM_UP_HIGH; i++)
                         priv->cee_config.dcb_pfc[i] = pfc_disabled;
  
                 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c

index a6b0db0e038373348fcbe05d6490fcb35bb3c7fb..86d2d42d658de422b241c617adb8d107c51a1b0b 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -63,7 +63,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
         context->local_qpn = cpu_to_be32(qpn);
         context->pri_path.ackto = 1 & 0x07;
         context->pri_path.sched_queue = 0x83 | (priv->port - 1) << 6;
-       if (user_prio >= 0) {
+       /* force user priority per tx ring */
+       if (user_prio >= 0 && priv->prof->num_up == MLX4_EN_NUM_UP_HIGH) {
                 context->pri_path.sched_queue |= user_prio << 3;
                 context->pri_path.feup = MLX4_FEUP_FORCE_ETH_UP;
         }
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c

index 7d69d939ee2dceb09ce3b9b06c5db11875f3c525..4f3a9b27ce4ad647a8a932001f5b2e16e3525b92 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -691,15 +691,11 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
  {
         struct mlx4_en_priv *priv = netdev_priv(dev);
         u16 rings_p_up = priv->num_tx_rings_p_up;
-       u8 up = 0;
  
         if (netdev_get_num_tc(dev))
                 return skb_tx_hash(dev, skb);
  
-       if (skb_vlan_tag_present(skb))
-               up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
-
-       return fallback(dev, skb) % rings_p_up + up * rings_p_up;
+       return fallback(dev, skb) % rings_p_up;
  }
  
  static void mlx4_bf_copy(void __iomem *dst, const void *src,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c

index 457e070bca46ea41587a3d28f2363e52debaab0c..a27c9c13a36ed11d577e7cd9cff1e2a9daec137d 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -91,7 +91,7 @@ module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
  MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
                            "probe_vf=port1,port2,port1+2");
  
-int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
+static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
  module_param_named(log_num_mgm_entry_size,
                         mlx4_log_num_mgm_entry_size, int, 0444);
  MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h

index 6ea2b7a0c34d355aec75ae35031b5cd3a620af8d..30616cd0140d573573f5f334b937522a9a7c0974 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -230,7 +230,6 @@ do {                                                                        \
  #define mlx4_warn(mdev, format, ...)                                   \
         dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
  
-extern int mlx4_log_num_mgm_entry_size;
  extern int log_mtts_per_seg;
  extern int mlx4_internal_err_reset;
  
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

index 963b77d51b482545b0c6797b6afa76651abf4607..d350b2158104e933921a986c7ca84c2e634e1498 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -115,11 +115,12 @@
  #define MLX4_EN_SMALL_PKT_SIZE         64
  #define MLX4_EN_MIN_TX_RING_P_UP       1
  #define MLX4_EN_MAX_TX_RING_P_UP       32
-#define MLX4_EN_NUM_UP                 8
+#define MLX4_EN_NUM_UP_LOW             1
+#define MLX4_EN_NUM_UP_HIGH            8
  #define MLX4_EN_DEF_RX_RING_SIZE       1024
  #define MLX4_EN_DEF_TX_RING_SIZE       MLX4_EN_DEF_RX_RING_SIZE
  #define MAX_TX_RINGS                   (MLX4_EN_MAX_TX_RING_P_UP * \
-                                        MLX4_EN_NUM_UP)
+                                        MLX4_EN_NUM_UP_HIGH)
  
  #define MLX4_EN_DEFAULT_TX_WORK                256
  
@@ -386,6 +387,7 @@ struct mlx4_en_port_profile {
         u8 rx_ppp;
         u8 tx_pause;
         u8 tx_ppp;
+       u8 num_up;
         int rss_rings;
         int inline_thold;
         struct hwtstamp_config hwtstamp_config;
@@ -485,7 +487,7 @@ enum dcb_pfc_type {
  
  struct mlx4_en_cee_config {
         bool    pfc_state;
-       enum    dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP];
+       enum    dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP_HIGH];
  };
  #endif
  
@@ -761,6 +763,7 @@ extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops;
  #endif
  
  int mlx4_en_setup_tc(struct net_device *dev, u8 up);
+int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc);
  
  #ifdef CONFIG_RFS_ACCEL
  void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig

index cf1ef48bfd8d2330c08cbef9fab63217bb2bc2a5..5aee05992f278c91f1ef4a8d051a8dae3e19823c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -11,9 +11,13 @@ config MLX5_CORE
           Core driver for low level functionality of the ConnectX-4 and
           Connect-IB cards by Mellanox Technologies.
  
+config MLX5_ACCEL
+       bool
+
  config MLX5_FPGA
          bool "Mellanox Technologies Innova support"
          depends on MLX5_CORE
+       select MLX5_ACCEL
          ---help---
            Build support for the Innova family of network cards by Mellanox
            Technologies. Innova network cards are comprised of a ConnectX chip
@@ -48,3 +52,15 @@ config MLX5_CORE_IPOIB
         default n
         ---help---
           MLX5 IPoIB offloads & acceleration support.
+
+config MLX5_EN_IPSEC
+       bool "IPSec XFRM cryptography-offload accelaration"
+       depends on MLX5_ACCEL
+       depends on MLX5_CORE_EN
+       depends on XFRM_OFFLOAD
+       depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+       default n
+       ---help---
+         Build support for IPsec cryptography-offload accelaration in the NIC.
+         Note: Support for hardware with this capability needs to be selected
+         for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile

index 5ad093a21a6e6d70152917be6e0f34cd901c88c6..ca367445f8642efdd79ac31dae9ecac8ef376a93 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,9 +4,12 @@ subdir-ccflags-y += -I$(src)
  mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                 health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
                 mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-               fs_counters.o rl.o lag.o dev.o
+               fs_counters.o rl.o lag.o dev.o lib/gid.o
  
-mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o
+mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
+
+mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
+               fpga/ipsec.o
  
  mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
                 en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
@@ -16,3 +19,6 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
  mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
  
  mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+
+mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
+               en_accel/ipsec_stats.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c

new file mode 100644 (file)

index 0000000..53e69ed
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "accel/ipsec.h"
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+                                  struct mlx5_accel_ipsec_sa *cmd)
+{
+       if (!MLX5_IPSEC_DEV(mdev))
+               return ERR_PTR(-EOPNOTSUPP);
+
+       return mlx5_fpga_ipsec_sa_cmd_exec(mdev, cmd);
+}
+
+int mlx5_accel_ipsec_sa_cmd_wait(void *ctx)
+{
+       return mlx5_fpga_ipsec_sa_cmd_wait(ctx);
+}
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+       return mlx5_fpga_ipsec_device_caps(mdev);
+}
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+       return mlx5_fpga_ipsec_counters_count(mdev);
+}
+
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+                                  unsigned int count)
+{
+       return mlx5_fpga_ipsec_counters_read(mdev, counters, count);
+}
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+       return mlx5_fpga_ipsec_init(mdev);
+}
+
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+       mlx5_fpga_ipsec_cleanup(mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h

new file mode 100644 (file)

index 0000000..d6e20fe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_ACCEL_IPSEC_H__
+#define __MLX5_ACCEL_IPSEC_H__
+
+#ifdef CONFIG_MLX5_ACCEL
+
+#include <linux/mlx5/driver.h>
+
+enum {
+       MLX5_ACCEL_IPSEC_DEVICE = BIT(1),
+       MLX5_ACCEL_IPSEC_IPV6 = BIT(2),
+       MLX5_ACCEL_IPSEC_ESP = BIT(3),
+       MLX5_ACCEL_IPSEC_LSO = BIT(4),
+};
+
+#define MLX5_IPSEC_SADB_IP_AH       BIT(7)
+#define MLX5_IPSEC_SADB_IP_ESP      BIT(6)
+#define MLX5_IPSEC_SADB_SA_VALID    BIT(5)
+#define MLX5_IPSEC_SADB_SPI_EN      BIT(4)
+#define MLX5_IPSEC_SADB_DIR_SX      BIT(3)
+#define MLX5_IPSEC_SADB_IPV6        BIT(2)
+
+enum {
+       MLX5_IPSEC_CMD_ADD_SA = 0,
+       MLX5_IPSEC_CMD_DEL_SA = 1,
+};
+
+enum mlx5_accel_ipsec_enc_mode {
+       MLX5_IPSEC_SADB_MODE_NONE = 0,
+       MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128 = 1,
+       MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128 = 3,
+};
+
+#define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \
+                             MLX5_ACCEL_IPSEC_DEVICE)
+
+struct mlx5_accel_ipsec_sa {
+       __be32 cmd;
+       u8 key_enc[32];
+       u8 key_auth[32];
+       __be32 sip[4];
+       __be32 dip[4];
+       union {
+               struct {
+                       __be32 reserved;
+                       u8 salt_iv[8];
+                       __be32 salt;
+               } __packed gcm;
+               struct {
+                       u8 salt[16];
+               } __packed cbc;
+       };
+       __be32 spi;
+       __be32 sw_sa_handle;
+       __be16 tfclen;
+       u8 enc_mode;
+       u8 sip_masklen;
+       u8 dip_masklen;
+       u8 flags;
+       u8 reserved[2];
+} __packed;
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_exec - Execute an IPSec SADB command
+ * @mdev: mlx5 device
+ * @cmd: command to execute
+ * May be called from atomic context. Returns context pointer, or error
+ * Caller must eventually call mlx5_accel_ipsec_sa_cmd_wait from non-atomic
+ * context, to cleanup the context pointer
+ */
+void *mlx5_accel_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+                                  struct mlx5_accel_ipsec_sa *cmd);
+
+/**
+ * mlx5_accel_ipsec_sa_cmd_wait - Wait for command execution completion
+ * @context: Context pointer returned from call to mlx5_accel_ipsec_sa_cmd_exec
+ * Sleeps (killable) until command execution is complete.
+ * Returns the command result, or -EINTR if killed
+ */
+int mlx5_accel_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_accel_ipsec_device_caps(struct mlx5_core_dev *mdev);
+
+unsigned int mlx5_accel_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+                                  unsigned int count);
+
+int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+#define MLX5_IPSEC_DEV(mdev) false
+
+static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev)
+{
+       return 0;
+}
+
+static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif
+
+#endif /* __MLX5_ACCEL_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c

index 4d5bd01f1ebb76b9f32f96eb4c4e0dedf2cdc66e..f5a2c605749ff2dad1db091bb93d3b6957fa4058 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -307,6 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
         case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
         case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
         case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
+       case MLX5_CMD_OP_FPGA_DESTROY_QP:
                 return MLX5_CMD_STAT_OK;
  
         case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,6 +420,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
         case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
         case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
         case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+       case MLX5_CMD_OP_FPGA_CREATE_QP:
+       case MLX5_CMD_OP_FPGA_MODIFY_QP:
+       case MLX5_CMD_OP_FPGA_QUERY_QP:
+       case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
                 *status = MLX5_DRIVER_STATUS_ABORTED;
                 *synd = MLX5_DRIVER_SYND;
                 return -EIO;
@@ -585,6 +590,11 @@ const char *mlx5_command_str(int command)
         MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
         MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
         MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
+       MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
+       MLX5_COMMAND_STR_CASE(FPGA_MODIFY_QP);
+       MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
+       MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
+       MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
         default: return "unknown command opcode";
         }
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h

index eef0a50e2388e812f4d760378a53660bb6c76a94..e1b7ddfecd011436c1520edc93eb30e8e15221d4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -328,6 +328,7 @@ struct mlx5e_sq_dma {
  
  enum {
         MLX5E_SQ_STATE_ENABLED,
+       MLX5E_SQ_STATE_IPSEC,
  };
  
  struct mlx5e_sq_wqe_info {
@@ -784,6 +785,9 @@ struct mlx5e_priv {
  
         const struct mlx5e_profile *profile;
         void                      *ppriv;
+#ifdef CONFIG_MLX5_EN_IPSEC
+       struct mlx5e_ipsec        *ipsec;
+#endif
  };
  
  struct mlx5e_profile {
@@ -833,7 +837,6 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
  void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
  void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
  void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
  
  void mlx5e_rx_am(struct mlx5e_rq *rq);
  void mlx5e_rx_am_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c

new file mode 100644 (file)

index 0000000..bac5103
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/aead.h>
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+
+struct mlx5e_ipsec_sa_entry {
+       struct hlist_node hlist; /* Item in SADB_RX hashtable */
+       unsigned int handle; /* Handle in SADB_RX */
+       struct xfrm_state *x;
+       struct mlx5e_ipsec *ipsec;
+       void *context;
+};
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
+                                             unsigned int handle)
+{
+       struct mlx5e_ipsec_sa_entry *sa_entry;
+       struct xfrm_state *ret = NULL;
+
+       rcu_read_lock();
+       hash_for_each_possible_rcu(ipsec->sadb_rx, sa_entry, hlist, handle)
+               if (sa_entry->handle == handle) {
+                       ret = sa_entry->x;
+                       xfrm_state_hold(ret);
+                       break;
+               }
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+       struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+       ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
+       if (ret < 0)
+               goto out;
+
+       sa_entry->handle = ret;
+       hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
+       ret = 0;
+
+out:
+       spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+       return ret;
+}
+
+static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+       struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+       hash_del_rcu(&sa_entry->hlist);
+       spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
+{
+       struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+       unsigned long flags;
+
+       /* Wait for the hash_del_rcu call in sadb_rx_del to affect data path */
+       synchronize_rcu();
+       spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
+       ida_simple_remove(&ipsec->halloc, sa_entry->handle);
+       spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
+}
+
+static enum mlx5_accel_ipsec_enc_mode mlx5e_ipsec_enc_mode(struct xfrm_state *x)
+{
+       unsigned int key_len = (x->aead->alg_key_len + 7) / 8 - 4;
+
+       switch (key_len) {
+       case 16:
+               return MLX5_IPSEC_SADB_MODE_AES_GCM_128_AUTH_128;
+       case 32:
+               return MLX5_IPSEC_SADB_MODE_AES_GCM_256_AUTH_128;
+       default:
+               netdev_warn(x->xso.dev, "Bad key len: %d for alg %s\n",
+                           key_len, x->aead->alg_name);
+               return -1;
+       }
+}
+
+static void mlx5e_ipsec_build_hw_sa(u32 op, struct mlx5e_ipsec_sa_entry *sa_entry,
+                                   struct mlx5_accel_ipsec_sa *hw_sa)
+{
+       struct xfrm_state *x = sa_entry->x;
+       struct aead_geniv_ctx *geniv_ctx;
+       unsigned int crypto_data_len;
+       struct crypto_aead *aead;
+       unsigned int key_len;
+       int ivsize;
+
+       memset(hw_sa, 0, sizeof(*hw_sa));
+
+       if (op == MLX5_IPSEC_CMD_ADD_SA) {
+               crypto_data_len = (x->aead->alg_key_len + 7) / 8;
+               key_len = crypto_data_len - 4; /* 4 bytes salt at end */
+               aead = x->data;
+               geniv_ctx = crypto_aead_ctx(aead);
+               ivsize = crypto_aead_ivsize(aead);
+
+               memcpy(&hw_sa->key_enc, x->aead->alg_key, key_len);
+               /* Duplicate 128 bit key twice according to HW layout */
+               if (key_len == 16)
+                       memcpy(&hw_sa->key_enc[16], x->aead->alg_key, key_len);
+               memcpy(&hw_sa->gcm.salt_iv, geniv_ctx->salt, ivsize);
+               hw_sa->gcm.salt = *((__be32 *)(x->aead->alg_key + key_len));
+       }
+
+       hw_sa->cmd = htonl(op);
+       hw_sa->flags |= MLX5_IPSEC_SADB_SA_VALID | MLX5_IPSEC_SADB_SPI_EN;
+       if (x->props.family == AF_INET) {
+               hw_sa->sip[3] = x->props.saddr.a4;
+               hw_sa->dip[3] = x->id.daddr.a4;
+               hw_sa->sip_masklen = 32;
+               hw_sa->dip_masklen = 32;
+       } else {
+               memcpy(hw_sa->sip, x->props.saddr.a6, sizeof(hw_sa->sip));
+               memcpy(hw_sa->dip, x->id.daddr.a6, sizeof(hw_sa->dip));
+               hw_sa->sip_masklen = 128;
+               hw_sa->dip_masklen = 128;
+               hw_sa->flags |= MLX5_IPSEC_SADB_IPV6;
+       }
+       hw_sa->spi = x->id.spi;
+       hw_sa->sw_sa_handle = htonl(sa_entry->handle);
+       switch (x->id.proto) {
+       case IPPROTO_ESP:
+               hw_sa->flags |= MLX5_IPSEC_SADB_IP_ESP;
+               break;
+       case IPPROTO_AH:
+               hw_sa->flags |= MLX5_IPSEC_SADB_IP_AH;
+               break;
+       default:
+               break;
+       }
+       hw_sa->enc_mode = mlx5e_ipsec_enc_mode(x);
+       if (!(x->xso.flags & XFRM_OFFLOAD_INBOUND))
+               hw_sa->flags |= MLX5_IPSEC_SADB_DIR_SX;
+}
+
+static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
+{
+       struct net_device *netdev = x->xso.dev;
+       struct mlx5e_priv *priv;
+
+       priv = netdev_priv(netdev);
+
+       if (x->props.aalgo != SADB_AALG_NONE) {
+               netdev_info(netdev, "Cannot offload authenticated xfrm states\n");
+               return -EINVAL;
+       }
+       if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
+               netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n");
+               return -EINVAL;
+       }
+       if (x->props.calgo != SADB_X_CALG_NONE) {
+               netdev_info(netdev, "Cannot offload compressed xfrm states\n");
+               return -EINVAL;
+       }
+       if (x->props.flags & XFRM_STATE_ESN) {
+               netdev_info(netdev, "Cannot offload ESN xfrm states\n");
+               return -EINVAL;
+       }
+       if (x->props.family != AF_INET &&
+           x->props.family != AF_INET6) {
+               netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n");
+               return -EINVAL;
+       }
+       if (x->props.mode != XFRM_MODE_TRANSPORT &&
+           x->props.mode != XFRM_MODE_TUNNEL) {
+               dev_info(&netdev->dev, "Only transport and tunnel xfrm states may be offloaded\n");
+               return -EINVAL;
+       }
+       if (x->id.proto != IPPROTO_ESP) {
+               netdev_info(netdev, "Only ESP xfrm state may be offloaded\n");
+               return -EINVAL;
+       }
+       if (x->encap) {
+               netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n");
+               return -EINVAL;
+       }
+       if (!x->aead) {
+               netdev_info(netdev, "Cannot offload xfrm states without aead\n");
+               return -EINVAL;
+       }
+       if (x->aead->alg_icv_len != 128) {
+               netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n");
+               return -EINVAL;
+       }
+       if ((x->aead->alg_key_len != 128 + 32) &&
+           (x->aead->alg_key_len != 256 + 32)) {
+               netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+               return -EINVAL;
+       }
+       if (x->tfcpad) {
+               netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n");
+               return -EINVAL;
+       }
+       if (!x->geniv) {
+               netdev_info(netdev, "Cannot offload xfrm states without geniv\n");
+               return -EINVAL;
+       }
+       if (strcmp(x->geniv, "seqiv")) {
+               netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n");
+               return -EINVAL;
+       }
+       if (x->props.family == AF_INET6 &&
+           !(mlx5_accel_ipsec_device_caps(priv->mdev) & MLX5_ACCEL_IPSEC_IPV6)) {
+               netdev_info(netdev, "IPv6 xfrm state offload is not supported by this device\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int mlx5e_xfrm_add_state(struct xfrm_state *x)
+{
+       struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
+       struct net_device *netdev = x->xso.dev;
+       struct mlx5_accel_ipsec_sa hw_sa;
+       struct mlx5e_priv *priv;
+       void *context;
+       int err;
+
+       priv = netdev_priv(netdev);
+
+       err = mlx5e_xfrm_validate_state(x);
+       if (err)
+               return err;
+
+       sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+       if (!sa_entry) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       sa_entry->x = x;
+       sa_entry->ipsec = priv->ipsec;
+
+       /* Add the SA to handle processed incoming packets before the add SA
+        * completion was received
+        */
+       if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+               err = mlx5e_ipsec_sadb_rx_add(sa_entry);
+               if (err) {
+                       netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
+                       goto err_entry;
+               }
+       }
+
+       mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
+       context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+       if (IS_ERR(context)) {
+               err = PTR_ERR(context);
+               goto err_sadb_rx;
+       }
+
+       err = mlx5_accel_ipsec_sa_cmd_wait(context);
+       if (err)
+               goto err_sadb_rx;
+
+       x->xso.offload_handle = (unsigned long)sa_entry;
+       goto out;
+
+err_sadb_rx:
+       if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+               mlx5e_ipsec_sadb_rx_del(sa_entry);
+               mlx5e_ipsec_sadb_rx_free(sa_entry);
+       }
+err_entry:
+       kfree(sa_entry);
+out:
+       return err;
+}
+
+static void mlx5e_xfrm_del_state(struct xfrm_state *x)
+{
+       struct mlx5e_ipsec_sa_entry *sa_entry;
+       struct mlx5_accel_ipsec_sa hw_sa;
+       void *context;
+
+       if (!x->xso.offload_handle)
+               return;
+
+       sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+       WARN_ON(sa_entry->x != x);
+
+       if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+               mlx5e_ipsec_sadb_rx_del(sa_entry);
+
+       mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
+       context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
+       if (IS_ERR(context))
+               return;
+
+       sa_entry->context = context;
+}
+
+static void mlx5e_xfrm_free_state(struct xfrm_state *x)
+{
+       struct mlx5e_ipsec_sa_entry *sa_entry;
+       int res;
+
+       if (!x->xso.offload_handle)
+               return;
+
+       sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
+       WARN_ON(sa_entry->x != x);
+
+       res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
+       sa_entry->context = NULL;
+       if (res) {
+               /* Leftover object will leak */
+               return;
+       }
+
+       if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
+               mlx5e_ipsec_sadb_rx_free(sa_entry);
+
+       kfree(sa_entry);
+}
+
+int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+       struct mlx5e_ipsec *ipsec = NULL;
+
+       if (!MLX5_IPSEC_DEV(priv->mdev)) {
+               netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
+               return 0;
+       }
+
+       ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+       if (!ipsec)
+               return -ENOMEM;
+
+       hash_init(ipsec->sadb_rx);
+       spin_lock_init(&ipsec->sadb_rx_lock);
+       ida_init(&ipsec->halloc);
+       ipsec->en_priv = priv;
+       ipsec->en_priv->ipsec = ipsec;
+       netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
+       return 0;
+}
+
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+       struct mlx5e_ipsec *ipsec = priv->ipsec;
+
+       if (!ipsec)
+               return;
+
+       ida_destroy(&ipsec->halloc);
+       kfree(ipsec);
+       priv->ipsec = NULL;
+}
+
+static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+       if (x->props.family == AF_INET) {
+               /* Offload with IPv4 options is not supported yet */
+               if (ip_hdr(skb)->ihl > 5)
+                       return false;
+       } else {
+               /* Offload with IPv6 extension headers is not support yet */
+               if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+                       return false;
+       }
+
+       return true;
+}
+
+static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
+       .xdo_dev_state_add      = mlx5e_xfrm_add_state,
+       .xdo_dev_state_delete   = mlx5e_xfrm_del_state,
+       .xdo_dev_state_free     = mlx5e_xfrm_free_state,
+       .xdo_dev_offload_ok     = mlx5e_ipsec_offload_ok,
+};
+
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+       struct mlx5_core_dev *mdev = priv->mdev;
+       struct net_device *netdev = priv->netdev;
+
+       if (!priv->ipsec)
+               return;
+
+       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_ESP) ||
+           !MLX5_CAP_ETH(mdev, swp)) {
+               mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
+               return;
+       }
+
+       mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
+       netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
+       netdev->features |= NETIF_F_HW_ESP;
+       netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+       if (!MLX5_CAP_ETH(mdev, swp_csum)) {
+               mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
+               return;
+       }
+
+       netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
+       netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
+
+       if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_LSO) ||
+           !MLX5_CAP_ETH(mdev, swp_lso)) {
+               mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
+               return;
+       }
+
+       mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
+       netdev->features |= NETIF_F_GSO_ESP;
+       netdev->hw_features |= NETIF_F_GSO_ESP;
+       netdev->hw_enc_features |= NETIF_F_GSO_ESP;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h

new file mode 100644 (file)

index 0000000..56e00ba
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_H__
+#define __MLX5E_IPSEC_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/mlx5/device.h>
+#include <net/xfrm.h>
+#include <linux/idr.h>
+
+#define MLX5E_IPSEC_SADB_RX_BITS 10
+#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
+#define MLX5E_METADATA_ETHER_LEN 8
+
+struct mlx5e_priv;
+
+struct mlx5e_ipsec_sw_stats {
+       atomic64_t ipsec_rx_drop_sp_alloc;
+       atomic64_t ipsec_rx_drop_sadb_miss;
+       atomic64_t ipsec_rx_drop_syndrome;
+       atomic64_t ipsec_tx_drop_bundle;
+       atomic64_t ipsec_tx_drop_no_state;
+       atomic64_t ipsec_tx_drop_not_ip;
+       atomic64_t ipsec_tx_drop_trailer;
+       atomic64_t ipsec_tx_drop_metadata;
+};
+
+struct mlx5e_ipsec_stats {
+       u64 ipsec_dec_in_packets;
+       u64 ipsec_dec_out_packets;
+       u64 ipsec_dec_bypass_packets;
+       u64 ipsec_enc_in_packets;
+       u64 ipsec_enc_out_packets;
+       u64 ipsec_enc_bypass_packets;
+       u64 ipsec_dec_drop_packets;
+       u64 ipsec_dec_auth_fail_packets;
+       u64 ipsec_enc_drop_packets;
+       u64 ipsec_add_sa_success;
+       u64 ipsec_add_sa_fail;
+       u64 ipsec_del_sa_success;
+       u64 ipsec_del_sa_fail;
+       u64 ipsec_cmd_drop;
+};
+
+struct mlx5e_ipsec {
+       struct mlx5e_priv *en_priv;
+       DECLARE_HASHTABLE(sadb_rx, MLX5E_IPSEC_SADB_RX_BITS);
+       spinlock_t sadb_rx_lock; /* Protects sadb_rx and halloc */
+       struct ida halloc;
+       struct mlx5e_ipsec_sw_stats sw_stats;
+       struct mlx5e_ipsec_stats stats;
+};
+
+void mlx5e_ipsec_build_inverse_table(void);
+int mlx5e_ipsec_init(struct mlx5e_priv *priv);
+void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
+void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
+
+struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
+                                             unsigned int handle);
+
+#else
+
+static inline void mlx5e_ipsec_build_inverse_table(void)
+{
+}
+
+static inline int mlx5e_ipsec_init(struct mlx5e_priv *priv)
+{
+       return 0;
+}
+
+static inline void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
+{
+}
+
+static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+       return 0;
+}
+
+static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
+                                         uint8_t *data)
+{
+       return 0;
+}
+
+static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+}
+
+static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+       return 0;
+}
+
+#endif
+
+#endif /* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c

new file mode 100644 (file)

index 0000000..4a78aef
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <crypto/aead.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+
+#include "en_accel/ipsec_rxtx.h"
+#include "en_accel/ipsec.h"
+#include "en.h"
+
+enum {
+       MLX5E_IPSEC_RX_SYNDROME_DECRYPTED = 0x11,
+       MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED = 0x12,
+};
+
+struct mlx5e_ipsec_rx_metadata {
+       unsigned char   reserved;
+       __be32          sa_handle;
+} __packed;
+
+enum {
+       MLX5E_IPSEC_TX_SYNDROME_OFFLOAD = 0x8,
+       MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP = 0x9,
+};
+
+struct mlx5e_ipsec_tx_metadata {
+       __be16 mss_inv;         /* 1/MSS in 16bit fixed point, only for LSO */
+       __be16 seq;             /* LSBs of the first TCP seq, only for LSO */
+       u8     esp_next_proto;  /* Next protocol of ESP */
+} __packed;
+
+struct mlx5e_ipsec_metadata {
+       unsigned char syndrome;
+       union {
+               unsigned char raw[5];
+               /* from FPGA to host, on successful decrypt */
+               struct mlx5e_ipsec_rx_metadata rx;
+               /* from host to FPGA */
+               struct mlx5e_ipsec_tx_metadata tx;
+       } __packed content;
+       /* packet type ID field */
+       __be16 ethertype;
+} __packed;
+
+#define MAX_LSO_MSS 2048
+
+/* Pre-calculated (Q0.16) fixed-point inverse 1/x function */
+static __be16 mlx5e_ipsec_inverse_table[MAX_LSO_MSS];
+
+static inline __be16 mlx5e_ipsec_mss_inv(struct sk_buff *skb)
+{
+       return mlx5e_ipsec_inverse_table[skb_shinfo(skb)->gso_size];
+}
+
+static struct mlx5e_ipsec_metadata *mlx5e_ipsec_add_metadata(struct sk_buff *skb)
+{
+       struct mlx5e_ipsec_metadata *mdata;
+       struct ethhdr *eth;
+
+       if (unlikely(skb_cow_head(skb, sizeof(*mdata))))
+               return ERR_PTR(-ENOMEM);
+
+       eth = (struct ethhdr *)skb_push(skb, sizeof(*mdata));
+       skb->mac_header -= sizeof(*mdata);
+       mdata = (struct mlx5e_ipsec_metadata *)(eth + 1);
+
+       memmove(skb->data, skb->data + sizeof(*mdata),
+               2 * ETH_ALEN);
+
+       eth->h_proto = cpu_to_be16(MLX5E_METADATA_ETHER_TYPE);
+
+       memset(mdata->content.raw, 0, sizeof(mdata->content.raw));
+       return mdata;
+}
+
+static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+{
+       unsigned int alen = crypto_aead_authsize(x->data);
+       struct ipv6hdr *ipv6hdr = ipv6_hdr(skb);
+       struct iphdr *ipv4hdr = ip_hdr(skb);
+       unsigned int trailer_len;
+       u8 plen;
+       int ret;
+
+       ret = skb_copy_bits(skb, skb->len - alen - 2, &plen, 1);
+       if (unlikely(ret))
+               return ret;
+
+       trailer_len = alen + plen + 2;
+
+       pskb_trim(skb, skb->len - trailer_len);
+       if (skb->protocol == htons(ETH_P_IP)) {
+               ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+               ip_send_check(ipv4hdr);
+       } else {
+               ipv6hdr->payload_len = htons(ntohs(ipv6hdr->payload_len) -
+                                            trailer_len);
+       }
+       return 0;
+}
+
+static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
+                               struct mlx5_wqe_eth_seg *eseg, u8 mode,
+                               struct xfrm_offload *xo)
+{
+       u8 proto;
+
+       /* Tunnel Mode:
+        * SWP:      OutL3       InL3  InL4
+        * Pkt: MAC  IP     ESP  IP    L4
+        *
+        * Transport Mode:
+        * SWP:      OutL3       InL4
+        *           InL3
+        * Pkt: MAC  IP     ESP  L4
+        *
+        * Offsets are in 2-byte words, counting from start of frame
+        */
+       eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+       if (skb->protocol == htons(ETH_P_IPV6))
+               eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+
+       if (mode == XFRM_MODE_TUNNEL) {
+               eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+               if (xo->proto == IPPROTO_IPV6) {
+                       eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+                       proto = inner_ipv6_hdr(skb)->nexthdr;
+               } else {
+                       proto = inner_ip_hdr(skb)->protocol;
+               }
+       } else {
+               eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+               if (skb->protocol == htons(ETH_P_IPV6))
+                       eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+               proto = xo->proto;
+       }
+       switch (proto) {
+       case IPPROTO_UDP:
+               eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+               /* Fall through */
+       case IPPROTO_TCP:
+               eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+               break;
+       }
+}
+
+static void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_offload *xo)
+{
+       int iv_offset;
+       __be64 seqno;
+
+       /* Place the SN in the IV field */
+       seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+       iv_offset = skb_transport_offset(skb) + sizeof(struct ip_esp_hdr);
+       skb_store_bits(skb, iv_offset, &seqno, 8);
+}
+
+static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
+                                    struct mlx5e_ipsec_metadata *mdata,
+                                    struct xfrm_offload *xo)
+{
+       struct ip_esp_hdr *esph;
+       struct tcphdr *tcph;
+
+       if (skb_is_gso(skb)) {
+               /* Add LSO metadata indication */
+               esph = ip_esp_hdr(skb);
+               tcph = inner_tcp_hdr(skb);
+               netdev_dbg(skb->dev, "   Offloading GSO packet outer L3 %u; L4 %u; Inner L3 %u; L4 %u\n",
+                          skb->network_header,
+                          skb->transport_header,
+                          skb->inner_network_header,
+                          skb->inner_transport_header);
+               netdev_dbg(skb->dev, "   Offloading GSO packet of len %u; mss %u; TCP sp %u dp %u seq 0x%x ESP seq 0x%x\n",
+                          skb->len, skb_shinfo(skb)->gso_size,
+                          ntohs(tcph->source), ntohs(tcph->dest),
+                          ntohl(tcph->seq), ntohl(esph->seq_no));
+               mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD_WITH_LSO_TCP;
+               mdata->content.tx.mss_inv = mlx5e_ipsec_mss_inv(skb);
+               mdata->content.tx.seq = htons(ntohl(tcph->seq) & 0xFFFF);
+       } else {
+               mdata->syndrome = MLX5E_IPSEC_TX_SYNDROME_OFFLOAD;
+       }
+       mdata->content.tx.esp_next_proto = xo->proto;
+
+       netdev_dbg(skb->dev, "   TX metadata syndrome %u proto %u mss_inv %04x seq %04x\n",
+                  mdata->syndrome, mdata->content.tx.esp_next_proto,
+                  ntohs(mdata->content.tx.mss_inv),
+                  ntohs(mdata->content.tx.seq));
+}
+
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+                                         struct mlx5e_tx_wqe *wqe,
+                                         struct sk_buff *skb)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct xfrm_offload *xo = xfrm_offload(skb);
+       struct mlx5e_ipsec_metadata *mdata;
+       struct xfrm_state *x;
+
+       if (!xo)
+               return skb;
+
+       if (unlikely(skb->sp->len != 1)) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle);
+               goto drop;
+       }
+
+       x = xfrm_input_state(skb);
+       if (unlikely(!x)) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_no_state);
+               goto drop;
+       }
+
+       if (unlikely(!x->xso.offload_handle ||
+                    (skb->protocol != htons(ETH_P_IP) &&
+                     skb->protocol != htons(ETH_P_IPV6)))) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
+               goto drop;
+       }
+
+       if (!skb_is_gso(skb))
+               if (unlikely(mlx5e_ipsec_remove_trailer(skb, x))) {
+                       atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
+                       goto drop;
+               }
+       mdata = mlx5e_ipsec_add_metadata(skb);
+       if (unlikely(IS_ERR(mdata))) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
+               goto drop;
+       }
+       mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+       mlx5e_ipsec_set_iv(skb, xo);
+       mlx5e_ipsec_set_metadata(skb, mdata, xo);
+
+       return skb;
+
+drop:
+       kfree_skb(skb);
+       return NULL;
+}
+
+static inline struct xfrm_state *
+mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb,
+                    struct mlx5e_ipsec_metadata *mdata)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct xfrm_offload *xo;
+       struct xfrm_state *xs;
+       u32 sa_handle;
+
+       skb->sp = secpath_dup(skb->sp);
+       if (unlikely(!skb->sp)) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc);
+               return NULL;
+       }
+
+       sa_handle = be32_to_cpu(mdata->content.rx.sa_handle);
+       xs = mlx5e_ipsec_sadb_rx_lookup(priv->ipsec, sa_handle);
+       if (unlikely(!xs)) {
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
+               return NULL;
+       }
+
+       skb->sp->xvec[skb->sp->len++] = xs;
+       skb->sp->olen++;
+
+       xo = xfrm_offload(skb);
+       xo->flags = CRYPTO_DONE;
+       switch (mdata->syndrome) {
+       case MLX5E_IPSEC_RX_SYNDROME_DECRYPTED:
+               xo->status = CRYPTO_SUCCESS;
+               break;
+       case MLX5E_IPSEC_RX_SYNDROME_AUTH_FAILED:
+               xo->status = CRYPTO_TUNNEL_ESP_AUTH_FAILED;
+               break;
+       default:
+               atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_syndrome);
+               return NULL;
+       }
+       return xs;
+}
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+                                         struct sk_buff *skb)
+{
+       struct mlx5e_ipsec_metadata *mdata;
+       struct ethhdr *old_eth;
+       struct ethhdr *new_eth;
+       struct xfrm_state *xs;
+       __be16 *ethtype;
+
+       /* Detect inline metadata */
+       if (skb->len < ETH_HLEN + MLX5E_METADATA_ETHER_LEN)
+               return skb;
+       ethtype = (__be16 *)(skb->data + ETH_ALEN * 2);
+       if (*ethtype != cpu_to_be16(MLX5E_METADATA_ETHER_TYPE))
+               return skb;
+
+       /* Use the metadata */
+       mdata = (struct mlx5e_ipsec_metadata *)(skb->data + ETH_HLEN);
+       xs = mlx5e_ipsec_build_sp(netdev, skb, mdata);
+       if (unlikely(!xs)) {
+               kfree_skb(skb);
+               return NULL;
+       }
+
+       /* Remove the metadata from the buffer */
+       old_eth = (struct ethhdr *)skb->data;
+       new_eth = (struct ethhdr *)(skb->data + MLX5E_METADATA_ETHER_LEN);
+       memmove(new_eth, old_eth, 2 * ETH_ALEN);
+       /* Ethertype is already in its new place */
+       skb_pull_inline(skb, MLX5E_METADATA_ETHER_LEN);
+
+       return skb;
+}
+
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+                              netdev_features_t features)
+{
+       struct xfrm_state *x;
+
+       if (skb->sp && skb->sp->len) {
+               x = skb->sp->xvec[0];
+               if (x && x->xso.offload_handle)
+                       return true;
+       }
+       return false;
+}
+
+void mlx5e_ipsec_build_inverse_table(void)
+{
+       u16 mss_inv;
+       u32 mss;
+
+       /* Calculate 1/x inverse table for use in GSO data path.
+        * Using this table, we provide the IPSec accelerator with the value of
+        * 1/gso_size so that it can infer the position of each segment inside
+        * the GSO, and increment the ESP sequence number, and generate the IV.
+        * The HW needs this value in Q0.16 fixed-point number format
+        */
+       mlx5e_ipsec_inverse_table[1] = htons(0xFFFF);
+       for (mss = 2; mss < MAX_LSO_MSS; mss++) {
+               mss_inv = ((1ULL << 32) / mss) >> 16;
+               mlx5e_ipsec_inverse_table[mss] = htons(mss_inv);
+       }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h

new file mode 100644 (file)

index 0000000..e37ae25
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5E_IPSEC_RXTX_H__
+#define __MLX5E_IPSEC_RXTX_H__
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+#include <linux/skbuff.h>
+#include "en.h"
+
+struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
+                                         struct sk_buff *skb);
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+
+void mlx5e_ipsec_inverse_table_init(void);
+bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
+                              netdev_features_t features);
+struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+                                         struct mlx5e_tx_wqe *wqe,
+                                         struct sk_buff *skb);
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
+
+#endif /* __MLX5E_IPSEC_RXTX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c

new file mode 100644 (file)

index 0000000..6fea592
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <net/sock.h>
+
+#include "en.h"
+#include "accel/ipsec.h"
+#include "fpga/sdk.h"
+#include "en_accel/ipsec.h"
+
+static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_out_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_bypass_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_in_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_out_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_bypass_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_drop_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_auth_fail_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_enc_drop_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_success) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_add_sa_fail) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_success) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_del_sa_fail) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_cmd_drop) },
+};
+
+static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sp_alloc) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_sadb_miss) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_rx_drop_syndrome) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_bundle) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_no_state) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_not_ip) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_trailer) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_sw_stats, ipsec_tx_drop_metadata) },
+};
+
+#define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \
+       atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset))
+
+#define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
+#define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
+
+#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS)
+
+int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+{
+       if (!priv->ipsec)
+               return 0;
+
+       return NUM_IPSEC_COUNTERS;
+}
+
+int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+       unsigned int i, idx = 0;
+
+       if (!priv->ipsec)
+               return 0;
+
+       for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      mlx5e_ipsec_hw_stats_desc[i].format);
+
+       for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      mlx5e_ipsec_sw_stats_desc[i].format);
+
+       return NUM_IPSEC_COUNTERS;
+}
+
+void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+{
+       int ret;
+
+       if (!priv->ipsec)
+               return;
+
+       ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
+                                            NUM_IPSEC_HW_COUNTERS);
+       if (ret)
+               memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
+}
+
+int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+{
+       int i, idx = 0;
+
+       if (!priv->ipsec)
+               return 0;
+
+       for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
+                                                  mlx5e_ipsec_hw_stats_desc, i);
+
+       for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+                                                     mlx5e_ipsec_sw_stats_desc, i);
+
+       return NUM_IPSEC_COUNTERS;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

index 16b1e96a7050bbfbb91e21781691f48adfa1fec0..917fade5f5d55aa1a89c5abaadf73d9e5f37d612 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -31,6 +31,7 @@
   */
  
  #include "en.h"
+#include "en_accel/ipsec.h"
  
  void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
                                struct ethtool_drvinfo *drvinfo)
@@ -186,7 +187,8 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
                        MLX5E_NUM_SQ_STATS(priv) +
                        MLX5E_NUM_PFC_COUNTERS(priv) +
                        ARRAY_SIZE(mlx5e_pme_status_desc) +
-                      ARRAY_SIZE(mlx5e_pme_error_desc);
+                      ARRAY_SIZE(mlx5e_pme_error_desc) +
+                      mlx5e_ipsec_get_count(priv);
  
         case ETH_SS_PRIV_FLAGS:
                 return ARRAY_SIZE(mlx5e_priv_flags);
@@ -275,6 +277,9 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
         for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
                 strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
  
+       /* IPSec counters */
+       idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN);
+
         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
                 return;
  
@@ -403,6 +408,9 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
                 data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
                                                    mlx5e_pme_error_desc, i);
  
+       /* IPSec counters */
+       idx += mlx5e_ipsec_get_stats(priv, data + idx);
+
         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
                 return;
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index 9f99f624004fd874240beae13ef7ea42d21d279c..1eac5003084fb9d131392ab7e87ec82d7c4d20b7 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -39,6 +39,9 @@
  #include "en.h"
  #include "en_tc.h"
  #include "en_rep.h"
+#include "en_accel/ipsec.h"
+#include "en_accel/ipsec_rxtx.h"
+#include "accel/ipsec.h"
  #include "vxlan.h"
  
  struct mlx5e_rq_param {
@@ -115,7 +118,7 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
  static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
  {
         u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
-                   !params->xdp_prog ?
+                   !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
                     MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
                     MLX5_WQ_TYPE_LINKED_LIST;
         mlx5e_set_rq_type_params(mdev, params, rq_type);
@@ -328,8 +331,10 @@ static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
  
  void mlx5e_update_stats(struct mlx5e_priv *priv, bool full)
  {
-       if (full)
+       if (full) {
                 mlx5e_update_pcie_counters(priv);
+               mlx5e_ipsec_update_stats(priv);
+       }
         mlx5e_update_pport_counters(priv, full);
         mlx5e_update_vport_counters(priv);
         mlx5e_update_q_counter(priv);
@@ -592,6 +597,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
                 rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
  
                 rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+               if (MLX5_IPSEC_DEV(mdev)) {
+                       err = -EINVAL;
+                       netdev_err(c->netdev, "MPWQE RQ with IPSec offload not supported\n");
+                       goto err_rq_wq_destroy;
+               }
+#endif
                 if (!rq->handle_rx_cqe) {
                         err = -EINVAL;
                         netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err);
@@ -624,7 +636,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
                 rq->alloc_wqe = mlx5e_alloc_rx_wqe;
                 rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
  
-               rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
+#ifdef CONFIG_MLX5_EN_IPSEC
+               if (c->priv->ipsec)
+                       rq->handle_rx_cqe = mlx5e_ipsec_handle_rx_cqe;
+               else
+#endif
+                       rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe;
                 if (!rq->handle_rx_cqe) {
                         kfree(rq->wqe.frag_info);
                         err = -EINVAL;
@@ -635,6 +652,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
                 rq->buff.wqe_sz = params->lro_en  ?
                                 params->lro_wqe_sz :
                                 MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu);
+#ifdef CONFIG_MLX5_EN_IPSEC
+               if (MLX5_IPSEC_DEV(mdev))
+                       rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN;
+#endif
                 rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en;
                 byte_count = rq->buff.wqe_sz;
  
@@ -1095,6 +1116,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
         sq->uar_map   = mdev->mlx5e_res.bfreg.map;
         sq->max_inline      = params->tx_max_inline;
         sq->min_inline_mode = params->tx_min_inline_mode;
+       if (MLX5_IPSEC_DEV(c->priv->mdev))
+               set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
  
         param->wq.db_numa_node = cpu_to_node(c->cpu);
         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
@@ -1914,6 +1937,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
  
         mlx5e_build_sq_param_common(priv, param);
         MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+       MLX5_SET(sqc, sqc, allow_swp, !!MLX5_IPSEC_DEV(priv->mdev));
  }
  
  static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -3070,8 +3094,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
                 PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
         stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
         stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
-       stats->tx_carrier_errors =
-               PPORT_802_3_GET(pstats, a_symbol_error_during_carrier);
         stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
                            stats->rx_frame_errors;
         stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
@@ -3508,6 +3530,11 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
         features = vlan_features_check(skb, features);
         features = vxlan_features_check(skb, features);
  
+#ifdef CONFIG_MLX5_EN_IPSEC
+       if (mlx5e_ipsec_feature_check(skb, netdev, features))
+               return features;
+#endif
+
         /* Validate if the tunneled packet is being offloaded by HW */
         if (skb->encapsulation &&
             (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
@@ -3555,6 +3582,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
                 goto unlock;
         }
  
+       if ((netdev->features & NETIF_F_HW_ESP) && prog) {
+               netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+               err = -EINVAL;
+               goto unlock;
+       }
+
         was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
         /* no need for full reset when exchanging programs */
         reset = (!priv->channels.params.xdp_prog || !prog);
@@ -4046,6 +4079,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
         if (MLX5_CAP_GEN(mdev, vport_group_manager))
                 netdev->switchdev_ops = &mlx5e_switchdev_ops;
  #endif
+
+       mlx5e_ipsec_build_netdev(priv);
  }
  
  static void mlx5e_create_q_counter(struct mlx5e_priv *priv)
@@ -4074,14 +4109,19 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
                            void *ppriv)
  {
         struct mlx5e_priv *priv = netdev_priv(netdev);
+       int err;
  
         mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+       err = mlx5e_ipsec_init(priv);
+       if (err)
+               mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
         mlx5e_build_nic_netdev(netdev);
         mlx5e_vxlan_init(priv);
  }
  
  static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
  {
+       mlx5e_ipsec_cleanup(priv);
         mlx5e_vxlan_cleanup(priv);
  
         if (priv->channels.params.xdp_prog)
@@ -4473,6 +4513,7 @@ static struct mlx5_interface mlx5e_interface = {
  
  void mlx5e_init(void)
  {
+       mlx5e_ipsec_build_inverse_table();
         mlx5e_build_ptys2ethtool_map();
         mlx5_register_interface(&mlx5e_interface);
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 5f3c138c948d3b7bfefa7867786096a74de36743..325b2c8c1c6d18c8d8544ee15d0fcf0a347f6ee8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -41,6 +41,7 @@
  #include "eswitch.h"
  #include "en_rep.h"
  #include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
  
  static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
  {
@@ -996,7 +997,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
                 work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
  
         for (; work_done < budget; work_done++) {
-               struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq);
+               struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq);
  
                 if (!cqe)
                         break;
@@ -1050,7 +1051,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
                 u16 wqe_counter;
                 bool last_wqe;
  
-               cqe = mlx5e_get_cqe(cq);
+               cqe = mlx5_cqwq_get_cqe(&cq->wq);
                 if (!cqe)
                         break;
  
@@ -1183,3 +1184,43 @@ wq_free_wqe:
  }
  
  #endif /* CONFIG_MLX5_CORE_IPOIB */
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+
+void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+       struct mlx5e_wqe_frag_info *wi;
+       struct mlx5e_rx_wqe *wqe;
+       __be16 wqe_counter_be;
+       struct sk_buff *skb;
+       u16 wqe_counter;
+       u32 cqe_bcnt;
+
+       wqe_counter_be = cqe->wqe_counter;
+       wqe_counter    = be16_to_cpu(wqe_counter_be);
+       wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+       wi             = &rq->wqe.frag_info[wqe_counter];
+       cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+
+       skb = skb_from_cqe(rq, cqe, wi, cqe_bcnt);
+       if (unlikely(!skb)) {
+               /* a DROP, save the page-reuse checks */
+               mlx5e_free_rx_wqe(rq, wi);
+               goto wq_ll_pop;
+       }
+       skb = mlx5e_ipsec_handle_rx_skb(rq->netdev, skb);
+       if (unlikely(!skb)) {
+               mlx5e_free_rx_wqe(rq, wi);
+               goto wq_ll_pop;
+       }
+
+       mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+       napi_gro_receive(rq->cq.napi, skb);
+
+       mlx5e_free_rx_wqe_reuse(rq, wi);
+wq_ll_pop:
+       mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
+                      &wqe->next.next_wqe_index);
+}
+
+#endif /* CONFIG_MLX5_EN_IPSEC */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

index 0433d69429f3d4fb4e1e3265875bb73666398ba7..aaa0f4ebba9aee5229cfd7cd22088bfc3ac3027c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -34,6 +34,7 @@
  #include <linux/if_vlan.h>
  #include "en.h"
  #include "ipoib/ipoib.h"
+#include "en_accel/ipsec_rxtx.h"
  
  #define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
  #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
@@ -299,12 +300,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
         }
  }
  
-static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
+static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+                                struct mlx5e_tx_wqe *wqe, u16 pi)
  {
-       struct mlx5_wq_cyc       *wq   = &sq->wq;
-
-       u16 pi = sq->pc & wq->sz_m1;
-       struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
         struct mlx5e_tx_wqe_info *wi   = &sq->db.wqe_info[pi];
  
         struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
@@ -319,8 +317,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb)
         u16 ds_cnt;
         u16 ihs;
  
-       memset(wqe, 0, sizeof(*wqe));
-
         mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
  
         if (skb_is_gso(skb)) {
@@ -375,8 +371,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
  {
         struct mlx5e_priv *priv = netdev_priv(dev);
         struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+       struct mlx5_wq_cyc *wq = &sq->wq;
+       u16 pi = sq->pc & wq->sz_m1;
+       struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+
+       memset(wqe, 0, sizeof(*wqe));
+
+#ifdef CONFIG_MLX5_EN_IPSEC
+       if (sq->state & BIT(MLX5E_SQ_STATE_IPSEC)) {
+               skb = mlx5e_ipsec_handle_tx_skb(dev, wqe, skb);
+               if (unlikely(!skb))
+                       return NETDEV_TX_OK;
+       }
+#endif
  
-       return mlx5e_sq_xmit(sq, skb);
+       return mlx5e_sq_xmit(sq, skb, wqe, pi);
  }
  
  bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
@@ -409,7 +418,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                 u16 wqe_counter;
                 bool last_wqe;
  
-               cqe = mlx5e_get_cqe(cq);
+               cqe = mlx5_cqwq_get_cqe(&cq->wq);
                 if (!cqe)
                         break;
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c

index 5ca6714e3e02bd976515de96fa010d348b349458..92db28a9ed43a89edcaf92c8d43db4d71ad68976 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -32,23 +32,6 @@
  
  #include "en.h"
  
-struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq)
-{
-       struct mlx5_cqwq *wq = &cq->wq;
-       u32 ci = mlx5_cqwq_get_ci(wq);
-       struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
-       u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
-       u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
-
-       if (cqe_ownership_bit != sw_ownership_val)
-               return NULL;
-
-       /* ensure cqe content is read after cqe ownership bit */
-       dma_rmb();
-
-       return cqe;
-}
-
  static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq,
                                              struct mlx5e_icosq *sq,
                                              struct mlx5_cqe64 *cqe,
@@ -89,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
         if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
                 return;
  
-       cqe = mlx5e_get_cqe(cq);
+       cqe = mlx5_cqwq_get_cqe(&cq->wq);
         if (likely(!cqe))
                 return;
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c

index 99cba644b4fc9454a8f17fe85e083204d5f1d462..e37453d838dbb669c76c9506fb3afd3723788a85 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -33,10 +33,44 @@
  #include <linux/etherdevice.h>
  #include <linux/mlx5/cmd.h>
  #include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
  
  #include "mlx5_core.h"
  #include "fpga/cmd.h"
  
+#define MLX5_FPGA_ACCESS_REG_SZ (MLX5_ST_SZ_DW(fpga_access_reg) + \
+                                MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+                        void *buf, bool write)
+{
+       u32 in[MLX5_FPGA_ACCESS_REG_SZ] = {0};
+       u32 out[MLX5_FPGA_ACCESS_REG_SZ];
+       int err;
+
+       if (size & 3)
+               return -EINVAL;
+       if (addr & 3)
+               return -EINVAL;
+       if (size > MLX5_FPGA_ACCESS_REG_SIZE_MAX)
+               return -EINVAL;
+
+       MLX5_SET(fpga_access_reg, in, size, size);
+       MLX5_SET64(fpga_access_reg, in, address, addr);
+       if (write)
+               memcpy(MLX5_ADDR_OF(fpga_access_reg, in, data), buf, size);
+
+       err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+                                  MLX5_REG_FPGA_ACCESS_REG, 0, write);
+       if (err)
+               return err;
+
+       if (!write)
+               memcpy(buf, MLX5_ADDR_OF(fpga_access_reg, out, data), size);
+
+       return 0;
+}
+
  int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
  {
         u32 in[MLX5_ST_SZ_DW(fpga_cap)] = {0};
@@ -46,6 +80,49 @@ int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps)
                                     MLX5_REG_FPGA_CAP, 0, 0);
  }
  
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op)
+{
+       u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
+       u32 out[MLX5_ST_SZ_DW(fpga_ctrl)];
+
+       MLX5_SET(fpga_ctrl, in, operation, op);
+
+       return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+                                   MLX5_REG_FPGA_CTRL, 0, true);
+}
+
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size)
+{
+       unsigned int cap_size = MLX5_CAP_FPGA(dev, sandbox_extended_caps_len);
+       u64 addr = MLX5_CAP64_FPGA(dev, sandbox_extended_caps_addr);
+       unsigned int read;
+       int ret = 0;
+
+       if (cap_size > size) {
+               mlx5_core_warn(dev, "Not enough buffer %u for FPGA SBU caps %u",
+                              size, cap_size);
+               return -EINVAL;
+       }
+
+       while (cap_size > 0) {
+               read = min_t(unsigned int, cap_size,
+                            MLX5_FPGA_ACCESS_REG_SIZE_MAX);
+
+               ret = mlx5_fpga_access_reg(dev, read, addr, caps, false);
+               if (ret) {
+                       mlx5_core_warn(dev, "Error reading FPGA SBU caps %u bytes at address 0x%llx: %d",
+                                      read, addr, ret);
+                       return ret;
+               }
+
+               cap_size -= read;
+               addr += read;
+               caps += read;
+       }
+
+       return ret;
+}
+
  int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
  {
         u32 in[MLX5_ST_SZ_DW(fpga_ctrl)] = {0};
@@ -62,3 +139,100 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
         query->oper_image = MLX5_GET(fpga_ctrl, out, flash_select_oper);
         return 0;
  }
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+                       u32 *fpga_qpn)
+{
+       u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
+       u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+       int ret;
+
+       MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
+       memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
+              MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
+
+       ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (ret)
+               return ret;
+
+       memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_create_qp_out, out, fpga_qpc),
+              MLX5_FLD_SZ_BYTES(fpga_create_qp_out, fpga_qpc));
+       *fpga_qpn = MLX5_GET(fpga_create_qp_out, out, fpga_qpn);
+       return ret;
+}
+
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+                       enum mlx5_fpga_qpc_field_select fields,
+                       void *fpga_qpc)
+{
+       u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
+       u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+
+       MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
+       MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
+       MLX5_SET(fpga_modify_qp_in, in, fpga_qpn, fpga_qpn);
+       memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
+              MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
+                      u32 fpga_qpn, void *fpga_qpc)
+{
+       u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
+       u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+       int ret;
+
+       MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
+       MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
+
+       ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (ret)
+               return ret;
+
+       memcpy(fpga_qpc, MLX5_ADDR_OF(fpga_query_qp_out, out, fpga_qpc),
+              MLX5_FLD_SZ_BYTES(fpga_query_qp_out, fpga_qpc));
+       return ret;
+}
+
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
+{
+       u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
+       u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+
+       MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
+       MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+                               bool clear, struct mlx5_fpga_qp_counters *data)
+{
+       u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
+       u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+       int ret;
+
+       MLX5_SET(fpga_query_qp_counters_in, in, opcode,
+                MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS);
+       MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
+       MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
+
+       ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (ret)
+               return ret;
+
+       data->rx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+                                         rx_ack_packets);
+       data->rx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+                                          rx_send_packets);
+       data->tx_ack_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+                                         tx_ack_packets);
+       data->tx_send_packets = MLX5_GET64(fpga_query_qp_counters_out, out,
+                                          tx_send_packets);
+       data->rx_total_drop = MLX5_GET64(fpga_query_qp_counters_out, out,
+                                        rx_total_drop);
+
+       return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h

index a74396a61bc37fa9af81b134186003eb82de5661..94bdfd47c3f094a167edc185468a3f8d10c1a1c0 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.h
@@ -53,7 +53,32 @@ struct mlx5_fpga_query {
         enum mlx5_fpga_status status;
  };
  
+enum mlx5_fpga_qpc_field_select {
+       MLX5_FPGA_QPC_STATE = BIT(0),
+};
+
+struct mlx5_fpga_qp_counters {
+       u64 rx_ack_packets;
+       u64 rx_send_packets;
+       u64 tx_ack_packets;
+       u64 tx_send_packets;
+       u64 rx_total_drop;
+};
+
  int mlx5_fpga_caps(struct mlx5_core_dev *dev, u32 *caps);
  int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query);
+int mlx5_fpga_ctrl_op(struct mlx5_core_dev *dev, u8 op);
+int mlx5_fpga_access_reg(struct mlx5_core_dev *dev, u8 size, u64 addr,
+                        void *buf, bool write);
+int mlx5_fpga_sbu_caps(struct mlx5_core_dev *dev, void *caps, int size);
+
+int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
+                       u32 *fpga_qpn);
+int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
+                       enum mlx5_fpga_qpc_field_select fields, void *fpga_qpc);
+int mlx5_fpga_query_qp(struct mlx5_core_dev *dev, u32 fpga_qpn, void *fpga_qpc);
+int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
+                               bool clear, struct mlx5_fpga_qp_counters *data);
+int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn);
  
  #endif /* __MLX5_FPGA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c

new file mode 100644 (file)

index 0000000..c4392f7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -0,0 +1,1042 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <net/addrconf.h>
+#include <linux/etherdevice.h>
+#include <linux/mlx5/vport.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+#include "fpga/conn.h"
+
+#define MLX5_FPGA_PKEY 0xFFFF
+#define MLX5_FPGA_PKEY_INDEX 0 /* RoCE PKEY 0xFFFF is always at index 0 */
+#define MLX5_FPGA_RECV_SIZE 2048
+#define MLX5_FPGA_PORT_NUM 1
+#define MLX5_FPGA_CQ_BUDGET 64
+
+static int mlx5_fpga_conn_map_buf(struct mlx5_fpga_conn *conn,
+                                 struct mlx5_fpga_dma_buf *buf)
+{
+       struct device *dma_device;
+       int err = 0;
+
+       if (unlikely(!buf->sg[0].data))
+               goto out;
+
+       dma_device = &conn->fdev->mdev->pdev->dev;
+       buf->sg[0].dma_addr = dma_map_single(dma_device, buf->sg[0].data,
+                                            buf->sg[0].size, buf->dma_dir);
+       err = dma_mapping_error(dma_device, buf->sg[0].dma_addr);
+       if (unlikely(err)) {
+               mlx5_fpga_warn(conn->fdev, "DMA error on sg 0: %d\n", err);
+               err = -ENOMEM;
+               goto out;
+       }
+
+       if (!buf->sg[1].data)
+               goto out;
+
+       buf->sg[1].dma_addr = dma_map_single(dma_device, buf->sg[1].data,
+                                            buf->sg[1].size, buf->dma_dir);
+       err = dma_mapping_error(dma_device, buf->sg[1].dma_addr);
+       if (unlikely(err)) {
+               mlx5_fpga_warn(conn->fdev, "DMA error on sg 1: %d\n", err);
+               dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+                                buf->sg[0].size, buf->dma_dir);
+               err = -ENOMEM;
+       }
+
+out:
+       return err;
+}
+
+static void mlx5_fpga_conn_unmap_buf(struct mlx5_fpga_conn *conn,
+                                    struct mlx5_fpga_dma_buf *buf)
+{
+       struct device *dma_device;
+
+       dma_device = &conn->fdev->mdev->pdev->dev;
+       if (buf->sg[1].data)
+               dma_unmap_single(dma_device, buf->sg[1].dma_addr,
+                                buf->sg[1].size, buf->dma_dir);
+
+       if (likely(buf->sg[0].data))
+               dma_unmap_single(dma_device, buf->sg[0].dma_addr,
+                                buf->sg[0].size, buf->dma_dir);
+}
+
+static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
+                                   struct mlx5_fpga_dma_buf *buf)
+{
+       struct mlx5_wqe_data_seg *data;
+       unsigned int ix;
+       int err = 0;
+
+       err = mlx5_fpga_conn_map_buf(conn, buf);
+       if (unlikely(err))
+               goto out;
+
+       if (unlikely(conn->qp.rq.pc - conn->qp.rq.cc >= conn->qp.rq.size)) {
+               mlx5_fpga_conn_unmap_buf(conn, buf);
+               return -EBUSY;
+       }
+
+       ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
+       data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
+       data->byte_count = cpu_to_be32(buf->sg[0].size);
+       data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+       data->addr = cpu_to_be64(buf->sg[0].dma_addr);
+
+       conn->qp.rq.pc++;
+       conn->qp.rq.bufs[ix] = buf;
+
+       /* Make sure that descriptors are written before doorbell record. */
+       dma_wmb();
+       *conn->qp.wq.rq.db = cpu_to_be32(conn->qp.rq.pc & 0xffff);
+out:
+       return err;
+}
+
+static void mlx5_fpga_conn_notify_hw(struct mlx5_fpga_conn *conn, void *wqe)
+{
+       /* ensure wqe is visible to device before updating doorbell record */
+       dma_wmb();
+       *conn->qp.wq.sq.db = cpu_to_be32(conn->qp.sq.pc);
+       /* Make sure that doorbell record is visible before ringing */
+       wmb();
+       mlx5_write64(wqe, conn->fdev->conn_res.uar->map + MLX5_BF_OFFSET, NULL);
+}
+
+static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
+                                    struct mlx5_fpga_dma_buf *buf)
+{
+       struct mlx5_wqe_ctrl_seg *ctrl;
+       struct mlx5_wqe_data_seg *data;
+       unsigned int ix, sgi;
+       int size = 1;
+
+       ix = conn->qp.sq.pc & (conn->qp.sq.size - 1);
+
+       ctrl = mlx5_wq_cyc_get_wqe(&conn->qp.wq.sq, ix);
+       data = (void *)(ctrl + 1);
+
+       for (sgi = 0; sgi < ARRAY_SIZE(buf->sg); sgi++) {
+               if (!buf->sg[sgi].data)
+                       break;
+               data->byte_count = cpu_to_be32(buf->sg[sgi].size);
+               data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+               data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
+               data++;
+               size++;
+       }
+
+       ctrl->imm = 0;
+       ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+       ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
+                                            MLX5_OPCODE_SEND);
+       ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8));
+
+       conn->qp.sq.pc++;
+       conn->qp.sq.bufs[ix] = buf;
+       mlx5_fpga_conn_notify_hw(conn, ctrl);
+}
+
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+                       struct mlx5_fpga_dma_buf *buf)
+{
+       unsigned long flags;
+       int err;
+
+       if (!conn->qp.active)
+               return -ENOTCONN;
+
+       err = mlx5_fpga_conn_map_buf(conn, buf);
+       if (err)
+               return err;
+
+       spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+       if (conn->qp.sq.pc - conn->qp.sq.cc >= conn->qp.sq.size) {
+               list_add_tail(&buf->list, &conn->qp.sq.backlog);
+               goto out_unlock;
+       }
+
+       mlx5_fpga_conn_post_send(conn, buf);
+
+out_unlock:
+       spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+       return err;
+}
+
+static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_fpga_dma_buf *buf;
+       int err;
+
+       buf = kzalloc(sizeof(*buf) + MLX5_FPGA_RECV_SIZE, 0);
+       if (!buf)
+               return -ENOMEM;
+
+       buf->sg[0].data = (void *)(buf + 1);
+       buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+       buf->dma_dir = DMA_FROM_DEVICE;
+
+       err = mlx5_fpga_conn_post_recv(conn, buf);
+       if (err)
+               kfree(buf);
+
+       return err;
+}
+
+static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+                                     struct mlx5_core_mkey *mkey)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+       void *mkc;
+       u32 *in;
+       int err;
+
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+       MLX5_SET(mkc, mkc, lw, 1);
+       MLX5_SET(mkc, mkc, lr, 1);
+
+       MLX5_SET(mkc, mkc, pd, pdn);
+       MLX5_SET(mkc, mkc, length64, 1);
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+       err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
+
+       kvfree(in);
+       return err;
+}
+
+static void mlx5_fpga_conn_rq_cqe(struct mlx5_fpga_conn *conn,
+                                 struct mlx5_cqe64 *cqe, u8 status)
+{
+       struct mlx5_fpga_dma_buf *buf;
+       int ix, err;
+
+       ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.rq.size - 1);
+       buf = conn->qp.rq.bufs[ix];
+       conn->qp.rq.bufs[ix] = NULL;
+       if (!status)
+               buf->sg[0].size = be32_to_cpu(cqe->byte_cnt);
+       conn->qp.rq.cc++;
+
+       if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+               mlx5_fpga_warn(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+                              buf, conn->fpga_qpn, status);
+       else
+               mlx5_fpga_dbg(conn->fdev, "RQ buf %p on FPGA QP %u completion status %d\n",
+                             buf, conn->fpga_qpn, status);
+
+       mlx5_fpga_conn_unmap_buf(conn, buf);
+
+       if (unlikely(status || !conn->qp.active)) {
+               conn->qp.active = false;
+               kfree(buf);
+               return;
+       }
+
+       mlx5_fpga_dbg(conn->fdev, "Message with %u bytes received successfully\n",
+                     buf->sg[0].size);
+       conn->recv_cb(conn->cb_arg, buf);
+
+       buf->sg[0].size = MLX5_FPGA_RECV_SIZE;
+       err = mlx5_fpga_conn_post_recv(conn, buf);
+       if (unlikely(err)) {
+               mlx5_fpga_warn(conn->fdev,
+                              "Failed to re-post recv buf: %d\n", err);
+               kfree(buf);
+       }
+}
+
+static void mlx5_fpga_conn_sq_cqe(struct mlx5_fpga_conn *conn,
+                                 struct mlx5_cqe64 *cqe, u8 status)
+{
+       struct mlx5_fpga_dma_buf *buf, *nextbuf;
+       unsigned long flags;
+       int ix;
+
+       spin_lock_irqsave(&conn->qp.sq.lock, flags);
+
+       ix = be16_to_cpu(cqe->wqe_counter) & (conn->qp.sq.size - 1);
+       buf = conn->qp.sq.bufs[ix];
+       conn->qp.sq.bufs[ix] = NULL;
+       conn->qp.sq.cc++;
+
+       /* Handle backlog still under the spinlock to ensure message post order */
+       if (unlikely(!list_empty(&conn->qp.sq.backlog))) {
+               if (likely(conn->qp.active)) {
+                       nextbuf = list_first_entry(&conn->qp.sq.backlog,
+                                                  struct mlx5_fpga_dma_buf, list);
+                       list_del(&nextbuf->list);
+                       mlx5_fpga_conn_post_send(conn, nextbuf);
+               }
+       }
+
+       spin_unlock_irqrestore(&conn->qp.sq.lock, flags);
+
+       if (unlikely(status && (status != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)))
+               mlx5_fpga_warn(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+                              buf, conn->fpga_qpn, status);
+       else
+               mlx5_fpga_dbg(conn->fdev, "SQ buf %p on FPGA QP %u completion status %d\n",
+                             buf, conn->fpga_qpn, status);
+
+       mlx5_fpga_conn_unmap_buf(conn, buf);
+
+       if (likely(buf->complete))
+               buf->complete(conn, conn->fdev, buf, status);
+
+       if (unlikely(status))
+               conn->qp.active = false;
+}
+
+static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn,
+                                     struct mlx5_cqe64 *cqe)
+{
+       u8 opcode, status = 0;
+
+       opcode = cqe->op_own >> 4;
+
+       switch (opcode) {
+       case MLX5_CQE_REQ_ERR:
+               status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+               /* Fall through */
+       case MLX5_CQE_REQ:
+               mlx5_fpga_conn_sq_cqe(conn, cqe, status);
+               break;
+
+       case MLX5_CQE_RESP_ERR:
+               status = ((struct mlx5_err_cqe *)cqe)->syndrome;
+               /* Fall through */
+       case MLX5_CQE_RESP_SEND:
+               mlx5_fpga_conn_rq_cqe(conn, cqe, status);
+               break;
+       default:
+               mlx5_fpga_warn(conn->fdev, "Unexpected cqe opcode %u\n",
+                              opcode);
+       }
+}
+
+static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
+{
+       mlx5_cq_arm(&conn->cq.mcq, MLX5_CQ_DB_REQ_NOT,
+                   conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
+}
+
+static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq,
+                                   enum mlx5_event event)
+{
+       struct mlx5_fpga_conn *conn;
+
+       conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+       mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn);
+}
+
+static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event)
+{
+       struct mlx5_fpga_conn *conn;
+
+       conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp);
+       mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn);
+}
+
+static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
+                                      unsigned int budget)
+{
+       struct mlx5_cqe64 *cqe;
+
+       while (budget) {
+               cqe = mlx5_cqwq_get_cqe(&conn->cq.wq);
+               if (!cqe)
+                       break;
+
+               budget--;
+               mlx5_cqwq_pop(&conn->cq.wq);
+               mlx5_fpga_conn_handle_cqe(conn, cqe);
+               mlx5_cqwq_update_db_record(&conn->cq.wq);
+       }
+       if (!budget) {
+               tasklet_schedule(&conn->cq.tasklet);
+               return;
+       }
+
+       mlx5_fpga_dbg(conn->fdev, "Re-arming CQ with cc# %u\n", conn->cq.wq.cc);
+       /* ensure cq space is freed before enabling more cqes */
+       wmb();
+       mlx5_fpga_conn_arm_cq(conn);
+}
+
+static void mlx5_fpga_conn_cq_tasklet(unsigned long data)
+{
+       struct mlx5_fpga_conn *conn = (void *)data;
+
+       if (unlikely(!conn->qp.active))
+               return;
+       mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq)
+{
+       struct mlx5_fpga_conn *conn;
+
+       conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
+       if (unlikely(!conn->qp.active))
+               return;
+       mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET);
+}
+
+static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+       u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0};
+       struct mlx5_wq_param wqp;
+       struct mlx5_cqe64 *cqe;
+       int inlen, err, eqn;
+       unsigned int irqn;
+       void *cqc, *in;
+       __be64 *pas;
+       u32 i;
+
+       cq_size = roundup_pow_of_two(cq_size);
+       MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size));
+
+       wqp.buf_numa_node = mdev->priv.numa_node;
+       wqp.db_numa_node  = mdev->priv.numa_node;
+
+       err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &conn->cq.wq,
+                              &conn->cq.wq_ctrl);
+       if (err)
+               return err;
+
+       for (i = 0; i < mlx5_cqwq_get_size(&conn->cq.wq); i++) {
+               cqe = mlx5_cqwq_get_wqe(&conn->cq.wq, i);
+               cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
+       }
+
+       inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+               sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_cqwq;
+       }
+
+       err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+       if (err)
+               goto err_cqwq;
+
+       cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+       MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
+       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
+       MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift -
+                          MLX5_ADAPTER_PAGE_SHIFT);
+       MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);
+
+       pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+       mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas);
+
+       err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
+       kvfree(in);
+
+       if (err)
+               goto err_cqwq;
+
+       conn->cq.mcq.cqe_sz     = 64;
+       conn->cq.mcq.set_ci_db  = conn->cq.wq_ctrl.db.db;
+       conn->cq.mcq.arm_db     = conn->cq.wq_ctrl.db.db + 1;
+       *conn->cq.mcq.set_ci_db = 0;
+       *conn->cq.mcq.arm_db    = 0;
+       conn->cq.mcq.vector     = 0;
+       conn->cq.mcq.comp       = mlx5_fpga_conn_cq_complete;
+       conn->cq.mcq.event      = mlx5_fpga_conn_cq_event;
+       conn->cq.mcq.irqn       = irqn;
+       conn->cq.mcq.uar        = fdev->conn_res.uar;
+       tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
+                    (unsigned long)conn);
+
+       mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
+
+       goto out;
+
+err_cqwq:
+       mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+out:
+       return err;
+}
+
+static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
+{
+       tasklet_disable(&conn->cq.tasklet);
+       tasklet_kill(&conn->cq.tasklet);
+       mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
+       mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+       struct mlx5_wq_param wqp;
+
+       wqp.buf_numa_node = mdev->priv.numa_node;
+       wqp.db_numa_node  = mdev->priv.numa_node;
+
+       return mlx5_wq_qp_create(mdev, &wqp, qpc, &conn->qp.wq,
+                                &conn->qp.wq_ctrl);
+}
+
+static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
+                                   unsigned int tx_size, unsigned int rx_size)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+       u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0};
+       void *in = NULL, *qpc;
+       int err, inlen;
+
+       conn->qp.rq.pc = 0;
+       conn->qp.rq.cc = 0;
+       conn->qp.rq.size = roundup_pow_of_two(rx_size);
+       conn->qp.sq.pc = 0;
+       conn->qp.sq.cc = 0;
+       conn->qp.sq.size = roundup_pow_of_two(tx_size);
+
+       MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+       MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(conn->qp.rq.size));
+       MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(conn->qp.sq.size));
+       err = mlx5_fpga_conn_create_wq(conn, temp_qpc);
+       if (err)
+               goto out;
+
+       conn->qp.rq.bufs = kvzalloc(sizeof(conn->qp.rq.bufs[0]) *
+                                   conn->qp.rq.size, GFP_KERNEL);
+       if (!conn->qp.rq.bufs) {
+               err = -ENOMEM;
+               goto err_wq;
+       }
+
+       conn->qp.sq.bufs = kvzalloc(sizeof(conn->qp.sq.bufs[0]) *
+                                   conn->qp.sq.size, GFP_KERNEL);
+       if (!conn->qp.sq.bufs) {
+               err = -ENOMEM;
+               goto err_rq_bufs;
+       }
+
+       inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+               MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+               conn->qp.wq_ctrl.buf.npages;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               err = -ENOMEM;
+               goto err_sq_bufs;
+       }
+
+       qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+       MLX5_SET(qpc, qpc, uar_page, fdev->conn_res.uar->index);
+       MLX5_SET(qpc, qpc, log_page_size,
+                conn->qp.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+       MLX5_SET(qpc, qpc, fre, 1);
+       MLX5_SET(qpc, qpc, rlky, 1);
+       MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+       MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+       MLX5_SET(qpc, qpc, pd, fdev->conn_res.pdn);
+       MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
+       MLX5_SET(qpc, qpc, log_rq_size, ilog2(conn->qp.rq.size));
+       MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+       MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
+       MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+       MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+       MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+       if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+               MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+
+       mlx5_fill_page_array(&conn->qp.wq_ctrl.buf,
+                            (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
+
+       err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
+       if (err)
+               goto err_sq_bufs;
+
+       conn->qp.mqp.event = mlx5_fpga_conn_event;
+       mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
+
+       goto out;
+
+err_sq_bufs:
+       kvfree(conn->qp.sq.bufs);
+err_rq_bufs:
+       kvfree(conn->qp.rq.bufs);
+err_wq:
+       mlx5_wq_destroy(&conn->qp.wq_ctrl);
+out:
+       kvfree(in);
+       return err;
+}
+
+static void mlx5_fpga_conn_free_recv_bufs(struct mlx5_fpga_conn *conn)
+{
+       int ix;
+
+       for (ix = 0; ix < conn->qp.rq.size; ix++) {
+               if (!conn->qp.rq.bufs[ix])
+                       continue;
+               mlx5_fpga_conn_unmap_buf(conn, conn->qp.rq.bufs[ix]);
+               kfree(conn->qp.rq.bufs[ix]);
+               conn->qp.rq.bufs[ix] = NULL;
+       }
+}
+
+static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_fpga_dma_buf *buf, *temp;
+       int ix;
+
+       for (ix = 0; ix < conn->qp.sq.size; ix++) {
+               buf = conn->qp.sq.bufs[ix];
+               if (!buf)
+                       continue;
+               conn->qp.sq.bufs[ix] = NULL;
+               mlx5_fpga_conn_unmap_buf(conn, buf);
+               if (!buf->complete)
+                       continue;
+               buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+       }
+       list_for_each_entry_safe(buf, temp, &conn->qp.sq.backlog, list) {
+               mlx5_fpga_conn_unmap_buf(conn, buf);
+               if (!buf->complete)
+                       continue;
+               buf->complete(conn, conn->fdev, buf, MLX5_CQE_SYNDROME_WR_FLUSH_ERR);
+       }
+}
+
+static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
+{
+       mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp);
+       mlx5_fpga_conn_free_recv_bufs(conn);
+       mlx5_fpga_conn_flush_send_bufs(conn);
+       kvfree(conn->qp.sq.bufs);
+       kvfree(conn->qp.rq.bufs);
+       mlx5_wq_destroy(&conn->qp.wq_ctrl);
+}
+
+static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_core_dev *mdev = conn->fdev->mdev;
+
+       mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
+
+       return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL,
+                                  &conn->qp.mqp);
+}
+
+static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+       u32 *qpc = NULL;
+       int err;
+
+       mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
+
+       qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+       if (!qpc) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+       MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+       MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+       MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+       MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
+       MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
+       MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+       MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
+
+       err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
+                                 &conn->qp.mqp);
+       if (err) {
+               mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+               goto out;
+       }
+
+out:
+       kfree(qpc);
+       return err;
+}
+
+static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+       u32 *qpc = NULL;
+       int err;
+
+       mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
+
+       qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+       if (!qpc) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
+       MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
+       MLX5_SET(qpc, qpc, remote_qpn, conn->fpga_qpn);
+       MLX5_SET(qpc, qpc, next_rcv_psn,
+                MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
+       MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
+       MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM);
+       ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
+                       MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
+       MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
+                MLX5_CAP_ROCE(mdev, r_roce_min_src_udp_port));
+       MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
+                conn->qp.sgid_index);
+       MLX5_SET(qpc, qpc, primary_address_path.hop_limit, 0);
+       memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
+              MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
+              MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
+
+       err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
+                                 &conn->qp.mqp);
+       if (err) {
+               mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+               goto out;
+       }
+
+out:
+       kfree(qpc);
+       return err;
+}
+
+static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+       u32 *qpc = NULL;
+       u32 opt_mask;
+       int err;
+
+       mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
+
+       qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+       if (!qpc) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
+       MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
+       MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x12); /* ~1.07s */
+       MLX5_SET(qpc, qpc, next_send_psn,
+                MLX5_GET(fpga_qpc, conn->fpga_qpc, next_rcv_psn));
+       MLX5_SET(qpc, qpc, retry_count, 7);
+       MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
+
+       opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT;
+       err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc,
+                                 &conn->qp.mqp);
+       if (err) {
+               mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
+               goto out;
+       }
+
+out:
+       kfree(qpc);
+       return err;
+}
+
+static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       int err;
+
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_ACTIVE);
+       err = mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+                                 MLX5_FPGA_QPC_STATE, &conn->fpga_qpc);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to activate FPGA RC QP: %d\n", err);
+               goto out;
+       }
+
+       err = mlx5_fpga_conn_reset_qp(conn);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to change QP state to reset\n");
+               goto err_fpga_qp;
+       }
+
+       err = mlx5_fpga_conn_init_qp(conn);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to modify QP from RESET to INIT\n");
+               goto err_fpga_qp;
+       }
+       conn->qp.active = true;
+
+       while (!mlx5_fpga_conn_post_recv_buf(conn))
+               ;
+
+       err = mlx5_fpga_conn_rtr_qp(conn);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to change QP state from INIT to RTR\n");
+               goto err_recv_bufs;
+       }
+
+       err = mlx5_fpga_conn_rts_qp(conn);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to change QP state from RTR to RTS\n");
+               goto err_recv_bufs;
+       }
+       goto out;
+
+err_recv_bufs:
+       mlx5_fpga_conn_free_recv_bufs(conn);
+err_fpga_qp:
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+       if (mlx5_fpga_modify_qp(conn->fdev->mdev, conn->fpga_qpn,
+                               MLX5_FPGA_QPC_STATE, &conn->fpga_qpc))
+               mlx5_fpga_err(fdev, "Failed to revert FPGA QP to INIT\n");
+out:
+       return err;
+}
+
+struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+                                            struct mlx5_fpga_conn_attr *attr,
+                                            enum mlx5_ifc_fpga_qp_type qp_type)
+{
+       struct mlx5_fpga_conn *ret, *conn;
+       u8 *remote_mac, *remote_ip;
+       int err;
+
+       if (!attr->recv_cb)
+               return ERR_PTR(-EINVAL);
+
+       conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+       if (!conn)
+               return ERR_PTR(-ENOMEM);
+
+       conn->fdev = fdev;
+       INIT_LIST_HEAD(&conn->qp.sq.backlog);
+
+       spin_lock_init(&conn->qp.sq.lock);
+
+       conn->recv_cb = attr->recv_cb;
+       conn->cb_arg = attr->cb_arg;
+
+       remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32);
+       err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err);
+               ret = ERR_PTR(err);
+               goto err;
+       }
+
+       /* Build Modified EUI-64 IPv6 address from the MAC address */
+       remote_ip = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_ip);
+       remote_ip[0] = 0xfe;
+       remote_ip[1] = 0x80;
+       addrconf_addr_eui48(&remote_ip[8], remote_mac);
+
+       err = mlx5_core_reserved_gid_alloc(fdev->mdev, &conn->qp.sgid_index);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to allocate SGID: %d\n", err);
+               ret = ERR_PTR(err);
+               goto err;
+       }
+
+       err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
+                                    MLX5_ROCE_VERSION_2,
+                                    MLX5_ROCE_L3_TYPE_IPV6,
+                                    remote_ip, remote_mac, true, 0);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
+               ret = ERR_PTR(err);
+               goto err_rsvd_gid;
+       }
+       mlx5_fpga_dbg(fdev, "Reserved SGID index %u\n", conn->qp.sgid_index);
+
+       /* Allow for one cqe per rx/tx wqe, plus one cqe for the next wqe,
+        * created during processing of the cqe
+        */
+       err = mlx5_fpga_conn_create_cq(conn,
+                                      (attr->tx_size + attr->rx_size) * 2);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to create CQ: %d\n", err);
+               ret = ERR_PTR(err);
+               goto err_gid;
+       }
+
+       mlx5_fpga_conn_arm_cq(conn);
+
+       err = mlx5_fpga_conn_create_qp(conn, attr->tx_size, attr->rx_size);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to create QP: %d\n", err);
+               ret = ERR_PTR(err);
+               goto err_cq;
+       }
+
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, state, MLX5_FPGA_QPC_STATE_INIT);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, qp_type, qp_type);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, st, MLX5_FPGA_QPC_ST_RC);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, ether_type, ETH_P_8021Q);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, vid, 0);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
+       MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
+
+       err = mlx5_fpga_create_qp(fdev->mdev, &conn->fpga_qpc,
+                                 &conn->fpga_qpn);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to create FPGA RC QP: %d\n", err);
+               ret = ERR_PTR(err);
+               goto err_qp;
+       }
+
+       err = mlx5_fpga_conn_connect(conn);
+       if (err) {
+               ret = ERR_PTR(err);
+               goto err_conn;
+       }
+
+       mlx5_fpga_dbg(fdev, "FPGA QPN is %u\n", conn->fpga_qpn);
+       ret = conn;
+       goto out;
+
+err_conn:
+       mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+err_qp:
+       mlx5_fpga_conn_destroy_qp(conn);
+err_cq:
+       mlx5_fpga_conn_destroy_cq(conn);
+err_gid:
+       mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
+                              NULL, false, 0);
+err_rsvd_gid:
+       mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
+err:
+       kfree(conn);
+out:
+       return ret;
+}
+
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+       struct mlx5_fpga_device *fdev = conn->fdev;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+       int err = 0;
+
+       conn->qp.active = false;
+       tasklet_disable(&conn->cq.tasklet);
+       synchronize_irq(conn->cq.mcq.irqn);
+
+       mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
+       err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL,
+                                 &conn->qp.mqp);
+       if (err)
+               mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err);
+       mlx5_fpga_conn_destroy_qp(conn);
+       mlx5_fpga_conn_destroy_cq(conn);
+
+       mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
+                              NULL, NULL, false, 0);
+       mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
+       kfree(conn);
+}
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
+{
+       int err;
+
+       err = mlx5_nic_vport_enable_roce(fdev->mdev);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to enable RoCE: %d\n", err);
+               goto out;
+       }
+
+       fdev->conn_res.uar = mlx5_get_uars_page(fdev->mdev);
+       if (IS_ERR(fdev->conn_res.uar)) {
+               err = PTR_ERR(fdev->conn_res.uar);
+               mlx5_fpga_err(fdev, "get_uars_page failed, %d\n", err);
+               goto err_roce;
+       }
+       mlx5_fpga_dbg(fdev, "Allocated UAR index %u\n",
+                     fdev->conn_res.uar->index);
+
+       err = mlx5_core_alloc_pd(fdev->mdev, &fdev->conn_res.pdn);
+       if (err) {
+               mlx5_fpga_err(fdev, "alloc pd failed, %d\n", err);
+               goto err_uar;
+       }
+       mlx5_fpga_dbg(fdev, "Allocated PD %u\n", fdev->conn_res.pdn);
+
+       err = mlx5_fpga_conn_create_mkey(fdev->mdev, fdev->conn_res.pdn,
+                                        &fdev->conn_res.mkey);
+       if (err) {
+               mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
+               goto err_dealloc_pd;
+       }
+       mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+
+       return 0;
+
+err_dealloc_pd:
+       mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+err_uar:
+       mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+err_roce:
+       mlx5_nic_vport_disable_roce(fdev->mdev);
+out:
+       return err;
+}
+
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
+{
+       mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+       mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
+       mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
+       mlx5_nic_vport_disable_roce(fdev->mdev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h

new file mode 100644 (file)

index 0000000..44bd9ec
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_CONN_H__
+#define __MLX5_FPGA_CONN_H__
+
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
+
+#include "fpga/core.h"
+#include "fpga/sdk.h"
+#include "wq.h"
+
+struct mlx5_fpga_conn {
+       struct mlx5_fpga_device *fdev;
+
+       void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+       void *cb_arg;
+
+       /* FPGA QP */
+       u32 fpga_qpc[MLX5_ST_SZ_DW(fpga_qpc)];
+       u32 fpga_qpn;
+
+       /* CQ */
+       struct {
+               struct mlx5_cqwq wq;
+               struct mlx5_frag_wq_ctrl wq_ctrl;
+               struct mlx5_core_cq mcq;
+               struct tasklet_struct tasklet;
+       } cq;
+
+       /* QP */
+       struct {
+               bool active;
+               int sgid_index;
+               struct mlx5_wq_qp wq;
+               struct mlx5_wq_ctrl wq_ctrl;
+               struct mlx5_core_qp mqp;
+               struct {
+                       spinlock_t lock; /* Protects all SQ state */
+                       unsigned int pc;
+                       unsigned int cc;
+                       unsigned int size;
+                       struct mlx5_fpga_dma_buf **bufs;
+                       struct list_head backlog;
+               } sq;
+               struct {
+                       unsigned int pc;
+                       unsigned int cc;
+                       unsigned int size;
+                       struct mlx5_fpga_dma_buf **bufs;
+               } rq;
+       } qp;
+};
+
+int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev);
+void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev);
+struct mlx5_fpga_conn *
+mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
+                     struct mlx5_fpga_conn_attr *attr,
+                     enum mlx5_ifc_fpga_qp_type qp_type);
+void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn);
+int mlx5_fpga_conn_send(struct mlx5_fpga_conn *conn,
+                       struct mlx5_fpga_dma_buf *buf);
+
+#endif /* __MLX5_FPGA_CONN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c

index d88b332e96697b0db682cc51272709d7fb01e5d1..31e5a2627eb8a6a2ec9aad78a6e2ab3d8aa5ab4f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c
@@ -35,7 +35,9 @@
  #include <linux/mlx5/driver.h>
  
  #include "mlx5_core.h"
+#include "lib/mlx5.h"
  #include "fpga/core.h"
+#include "fpga/conn.h"
  
  static const char *const mlx5_fpga_error_strings[] = {
         "Null Syndrome",
@@ -100,10 +102,34 @@ static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
         return 0;
  }
  
+int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
+{
+       int err;
+       struct mlx5_core_dev *mdev = fdev->mdev;
+
+       err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
+               return err;
+       }
+       err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
+               return err;
+       }
+       err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
+               return err;
+       }
+       return 0;
+}
+
  int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
  {
         struct mlx5_fpga_device *fdev = mdev->fpga;
         unsigned long flags;
+       unsigned int max_num_qps;
         int err;
  
         if (!fdev)
@@ -123,6 +149,28 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
                        mlx5_fpga_image_name(fdev->last_oper_image),
                        MLX5_CAP_FPGA(fdev->mdev, image_version));
  
+       max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+       err = mlx5_core_reserve_gids(mdev, max_num_qps);
+       if (err)
+               goto out;
+
+       err = mlx5_fpga_conn_device_init(fdev);
+       if (err)
+               goto err_rsvd_gid;
+
+       if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+               err = mlx5_fpga_device_brb(fdev);
+               if (err)
+                       goto err_conn_init;
+       }
+
+       goto out;
+
+err_conn_init:
+       mlx5_fpga_conn_device_cleanup(fdev);
+
+err_rsvd_gid:
+       mlx5_core_unreserve_gids(mdev, max_num_qps);
  out:
         spin_lock_irqsave(&fdev->state_lock, flags);
         fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
@@ -130,7 +178,7 @@ out:
         return err;
  }
  
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+int mlx5_fpga_init(struct mlx5_core_dev *mdev)
  {
         struct mlx5_fpga_device *fdev = NULL;
  
@@ -151,9 +199,42 @@ int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
         return 0;
  }
  
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       unsigned int max_num_qps;
+       unsigned long flags;
+       int err;
+
+       if (!fdev)
+               return;
+
+       spin_lock_irqsave(&fdev->state_lock, flags);
+       if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
+               spin_unlock_irqrestore(&fdev->state_lock, flags);
+               return;
+       }
+       fdev->state = MLX5_FPGA_STATUS_NONE;
+       spin_unlock_irqrestore(&fdev->state_lock, flags);
+
+       if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
+               err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
+               if (err)
+                       mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
+                                     err);
+       }
+
+       mlx5_fpga_conn_device_cleanup(fdev);
+       max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
+       mlx5_core_unreserve_gids(mdev, max_num_qps);
+}
+
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
  {
-       kfree(mdev->fpga);
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+
+       mlx5_fpga_device_stop(mdev);
+       kfree(fdev);
         mdev->fpga = NULL;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h

index c55044d667787d5938811dff9cce8fb6dbe90ec7..82405ed847255030d1c042420569d4c61b7a1c47 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -44,6 +44,15 @@ struct mlx5_fpga_device {
         enum mlx5_fpga_status state;
         enum mlx5_fpga_image last_admin_image;
         enum mlx5_fpga_image last_oper_image;
+
+       /* QP Connection resources */
+       struct {
+               u32 pdn;
+               struct mlx5_core_mkey mkey;
+               struct mlx5_uars_page *uar;
+       } conn_res;
+
+       struct mlx5_fpga_ipsec *ipsec;
  };
  
  #define mlx5_fpga_dbg(__adev, format, ...) \
@@ -68,19 +77,20 @@ struct mlx5_fpga_device {
  #define mlx5_fpga_info(__adev, format, ...) \
         dev_info(&(__adev)->mdev->pdev->dev, "FPGA: " format, ##__VA_ARGS__)
  
-int mlx5_fpga_device_init(struct mlx5_core_dev *mdev);
-void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev);
+int mlx5_fpga_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev);
  int mlx5_fpga_device_start(struct mlx5_core_dev *mdev);
+void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev);
  void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data);
  
  #else
  
-static inline int mlx5_fpga_device_init(struct mlx5_core_dev *mdev)
+static inline int mlx5_fpga_init(struct mlx5_core_dev *mdev)
  {
         return 0;
  }
  
-static inline void mlx5_fpga_device_cleanup(struct mlx5_core_dev *mdev)
+static inline void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
  {
  }
  
@@ -89,6 +99,10 @@ static inline int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
         return 0;
  }
  
+static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
+{
+}
+
  static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event,
                                    void *data)
  {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c

new file mode 100644 (file)

index 0000000..42970e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/driver.h>
+
+#include "mlx5_core.h"
+#include "fpga/ipsec.h"
+#include "fpga/sdk.h"
+#include "fpga/core.h"
+
+#define SBU_QP_QUEUE_SIZE 8
+
+enum mlx5_ipsec_response_syndrome {
+       MLX5_IPSEC_RESPONSE_SUCCESS = 0,
+       MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST = 1,
+       MLX5_IPSEC_RESPONSE_SADB_ISSUE = 2,
+       MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE = 3,
+};
+
+enum mlx5_fpga_ipsec_sacmd_status {
+       MLX5_FPGA_IPSEC_SACMD_PENDING,
+       MLX5_FPGA_IPSEC_SACMD_SEND_FAIL,
+       MLX5_FPGA_IPSEC_SACMD_COMPLETE,
+};
+
+struct mlx5_ipsec_command_context {
+       struct mlx5_fpga_dma_buf buf;
+       struct mlx5_accel_ipsec_sa sa;
+       enum mlx5_fpga_ipsec_sacmd_status status;
+       int status_code;
+       struct completion complete;
+       struct mlx5_fpga_device *dev;
+       struct list_head list; /* Item in pending_cmds */
+};
+
+struct mlx5_ipsec_sadb_resp {
+       __be32 syndrome;
+       __be32 sw_sa_handle;
+       u8 reserved[24];
+} __packed;
+
+struct mlx5_fpga_ipsec {
+       struct list_head pending_cmds;
+       spinlock_t pending_cmds_lock; /* Protects pending_cmds */
+       u32 caps[MLX5_ST_SZ_DW(ipsec_extended_cap)];
+       struct mlx5_fpga_conn *conn;
+};
+
+static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
+{
+       if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
+               return false;
+
+       if (MLX5_CAP_FPGA(mdev, ieee_vendor_id) !=
+           MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX)
+               return false;
+
+       if (MLX5_CAP_FPGA(mdev, sandbox_product_id) !=
+           MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC)
+               return false;
+
+       return true;
+}
+
+static void mlx5_fpga_ipsec_send_complete(struct mlx5_fpga_conn *conn,
+                                         struct mlx5_fpga_device *fdev,
+                                         struct mlx5_fpga_dma_buf *buf,
+                                         u8 status)
+{
+       struct mlx5_ipsec_command_context *context;
+
+       if (status) {
+               context = container_of(buf, struct mlx5_ipsec_command_context,
+                                      buf);
+               mlx5_fpga_warn(fdev, "IPSec command send failed with status %u\n",
+                              status);
+               context->status = MLX5_FPGA_IPSEC_SACMD_SEND_FAIL;
+               complete(&context->complete);
+       }
+}
+
+static inline int syndrome_to_errno(enum mlx5_ipsec_response_syndrome syndrome)
+{
+       switch (syndrome) {
+       case MLX5_IPSEC_RESPONSE_SUCCESS:
+               return 0;
+       case MLX5_IPSEC_RESPONSE_SADB_ISSUE:
+               return -EEXIST;
+       case MLX5_IPSEC_RESPONSE_ILLEGAL_REQUEST:
+               return -EINVAL;
+       case MLX5_IPSEC_RESPONSE_WRITE_RESPONSE_ISSUE:
+               return -EIO;
+       }
+       return -EIO;
+}
+
+static void mlx5_fpga_ipsec_recv(void *cb_arg, struct mlx5_fpga_dma_buf *buf)
+{
+       struct mlx5_ipsec_sadb_resp *resp = buf->sg[0].data;
+       struct mlx5_ipsec_command_context *context;
+       enum mlx5_ipsec_response_syndrome syndrome;
+       struct mlx5_fpga_device *fdev = cb_arg;
+       unsigned long flags;
+
+       if (buf->sg[0].size < sizeof(*resp)) {
+               mlx5_fpga_warn(fdev, "Short receive from FPGA IPSec: %u < %zu bytes\n",
+                              buf->sg[0].size, sizeof(*resp));
+               return;
+       }
+
+       mlx5_fpga_dbg(fdev, "mlx5_ipsec recv_cb syndrome %08x sa_id %x\n",
+                     ntohl(resp->syndrome), ntohl(resp->sw_sa_handle));
+
+       spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+       context = list_first_entry_or_null(&fdev->ipsec->pending_cmds,
+                                          struct mlx5_ipsec_command_context,
+                                          list);
+       if (context)
+               list_del(&context->list);
+       spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+       if (!context) {
+               mlx5_fpga_warn(fdev, "Received IPSec offload response without pending command request\n");
+               return;
+       }
+       mlx5_fpga_dbg(fdev, "Handling response for %p\n", context);
+
+       if (context->sa.sw_sa_handle != resp->sw_sa_handle) {
+               mlx5_fpga_err(fdev, "mismatch SA handle. cmd 0x%08x vs resp 0x%08x\n",
+                             ntohl(context->sa.sw_sa_handle),
+                             ntohl(resp->sw_sa_handle));
+               return;
+       }
+
+       syndrome = ntohl(resp->syndrome);
+       context->status_code = syndrome_to_errno(syndrome);
+       context->status = MLX5_FPGA_IPSEC_SACMD_COMPLETE;
+
+       if (context->status_code)
+               mlx5_fpga_warn(fdev, "IPSec SADB command failed with syndrome %08x\n",
+                              syndrome);
+       complete(&context->complete);
+}
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+                                 struct mlx5_accel_ipsec_sa *cmd)
+{
+       struct mlx5_ipsec_command_context *context;
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       unsigned long flags;
+       int res = 0;
+
+       BUILD_BUG_ON((sizeof(struct mlx5_accel_ipsec_sa) & 3) != 0);
+       if (!fdev || !fdev->ipsec)
+               return ERR_PTR(-EOPNOTSUPP);
+
+       context = kzalloc(sizeof(*context), GFP_ATOMIC);
+       if (!context)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(&context->sa, cmd, sizeof(*cmd));
+       context->buf.complete = mlx5_fpga_ipsec_send_complete;
+       context->buf.sg[0].size = sizeof(context->sa);
+       context->buf.sg[0].data = &context->sa;
+       init_completion(&context->complete);
+       context->dev = fdev;
+       spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+       list_add_tail(&context->list, &fdev->ipsec->pending_cmds);
+       spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+
+       context->status = MLX5_FPGA_IPSEC_SACMD_PENDING;
+
+       res = mlx5_fpga_sbu_conn_sendmsg(fdev->ipsec->conn, &context->buf);
+       if (res) {
+               mlx5_fpga_warn(fdev, "Failure sending IPSec command: %d\n",
+                              res);
+               spin_lock_irqsave(&fdev->ipsec->pending_cmds_lock, flags);
+               list_del(&context->list);
+               spin_unlock_irqrestore(&fdev->ipsec->pending_cmds_lock, flags);
+               kfree(context);
+               return ERR_PTR(res);
+       }
+       /* Context will be freed by wait func after completion */
+       return context;
+}
+
+int mlx5_fpga_ipsec_sa_cmd_wait(void *ctx)
+{
+       struct mlx5_ipsec_command_context *context = ctx;
+       int res;
+
+       res = wait_for_completion_killable(&context->complete);
+       if (res) {
+               mlx5_fpga_warn(context->dev, "Failure waiting for IPSec command response\n");
+               return -EINTR;
+       }
+
+       if (context->status == MLX5_FPGA_IPSEC_SACMD_COMPLETE)
+               res = context->status_code;
+       else
+               res = -EIO;
+
+       kfree(context);
+       return res;
+}
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       u32 ret = 0;
+
+       if (mlx5_fpga_is_ipsec_device(mdev))
+               ret |= MLX5_ACCEL_IPSEC_DEVICE;
+       else
+               return ret;
+
+       if (!fdev->ipsec)
+               return ret;
+
+       if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, esp))
+               ret |= MLX5_ACCEL_IPSEC_ESP;
+
+       if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, ipv6))
+               ret |= MLX5_ACCEL_IPSEC_IPV6;
+
+       if (MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps, lso))
+               ret |= MLX5_ACCEL_IPSEC_LSO;
+
+       return ret;
+}
+
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+
+       if (!fdev || !fdev->ipsec)
+               return 0;
+
+       return MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+                       number_of_ipsec_counters);
+}
+
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+                                 unsigned int counters_count)
+{
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       unsigned int i;
+       u32 *data;
+       u32 count;
+       u64 addr;
+       int ret;
+
+       if (!fdev || !fdev->ipsec)
+               return 0;
+
+       addr = (u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+                            ipsec_counters_addr_low) +
+              ((u64)MLX5_GET(ipsec_extended_cap, fdev->ipsec->caps,
+                            ipsec_counters_addr_high) << 32);
+
+       count = mlx5_fpga_ipsec_counters_count(mdev);
+
+       data = kzalloc(sizeof(u32) * count * 2, GFP_KERNEL);
+       if (!data) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = mlx5_fpga_mem_read(fdev, count * sizeof(u64), addr, data,
+                                MLX5_FPGA_ACCESS_TYPE_DONTCARE);
+       if (ret < 0) {
+               mlx5_fpga_err(fdev, "Failed to read IPSec counters from HW: %d\n",
+                             ret);
+               goto out;
+       }
+       ret = 0;
+
+       if (count > counters_count)
+               count = counters_count;
+
+       /* Each counter is low word, then high. But each word is big-endian */
+       for (i = 0; i < count; i++)
+               counters[i] = (u64)ntohl(data[i * 2]) |
+                             ((u64)ntohl(data[i * 2 + 1]) << 32);
+
+out:
+       kfree(data);
+       return ret;
+}
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_fpga_conn_attr init_attr = {0};
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+       struct mlx5_fpga_conn *conn;
+       int err;
+
+       if (!mlx5_fpga_is_ipsec_device(mdev))
+               return 0;
+
+       fdev->ipsec = kzalloc(sizeof(*fdev->ipsec), GFP_KERNEL);
+       if (!fdev->ipsec)
+               return -ENOMEM;
+
+       err = mlx5_fpga_get_sbu_caps(fdev, sizeof(fdev->ipsec->caps),
+                                    fdev->ipsec->caps);
+       if (err) {
+               mlx5_fpga_err(fdev, "Failed to retrieve IPSec extended capabilities: %d\n",
+                             err);
+               goto error;
+       }
+
+       INIT_LIST_HEAD(&fdev->ipsec->pending_cmds);
+       spin_lock_init(&fdev->ipsec->pending_cmds_lock);
+
+       init_attr.rx_size = SBU_QP_QUEUE_SIZE;
+       init_attr.tx_size = SBU_QP_QUEUE_SIZE;
+       init_attr.recv_cb = mlx5_fpga_ipsec_recv;
+       init_attr.cb_arg = fdev;
+       conn = mlx5_fpga_sbu_conn_create(fdev, &init_attr);
+       if (IS_ERR(conn)) {
+               err = PTR_ERR(conn);
+               mlx5_fpga_err(fdev, "Error creating IPSec command connection %d\n",
+                             err);
+               goto error;
+       }
+       fdev->ipsec->conn = conn;
+       return 0;
+
+error:
+       kfree(fdev->ipsec);
+       fdev->ipsec = NULL;
+       return err;
+}
+
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_fpga_device *fdev = mdev->fpga;
+
+       if (!mlx5_fpga_is_ipsec_device(mdev))
+               return;
+
+       mlx5_fpga_sbu_conn_destroy(fdev->ipsec->conn);
+       kfree(fdev->ipsec);
+       fdev->ipsec = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h

new file mode 100644 (file)

index 0000000..26a3e4b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __MLX5_FPGA_IPSEC_H__
+#define __MLX5_FPGA_IPSEC_H__
+
+#include "accel/ipsec.h"
+
+#ifdef CONFIG_MLX5_FPGA
+
+void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+                                 struct mlx5_accel_ipsec_sa *cmd);
+int mlx5_fpga_ipsec_sa_cmd_wait(void *context);
+
+u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
+unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
+int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
+                                 unsigned int counters_count);
+
+int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
+void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev);
+
+#else
+
+static inline void *mlx5_fpga_ipsec_sa_cmd_exec(struct mlx5_core_dev *mdev,
+                                               struct mlx5_accel_ipsec_sa *cmd)
+{
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int mlx5_fpga_ipsec_sa_cmd_wait(void *context)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+       return 0;
+}
+
+static inline unsigned int
+mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev)
+{
+       return 0;
+}
+
+static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev,
+                                               u64 *counters)
+{
+       return 0;
+}
+
+static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
+{
+       return 0;
+}
+
+static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
+{
+}
+
+#endif /* CONFIG_MLX5_FPGA */
+
+#endif /* __MLX5_FPGA_SADB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c

new file mode 100644 (file)

index 0000000..3c11d6e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/mlx5/device.h>
+
+#include "fpga/core.h"
+#include "fpga/conn.h"
+#include "fpga/sdk.h"
+
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+                         struct mlx5_fpga_conn_attr *attr)
+{
+       return mlx5_fpga_conn_create(fdev, attr, MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_create);
+
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn)
+{
+       mlx5_fpga_conn_destroy(conn);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_destroy);
+
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+                              struct mlx5_fpga_dma_buf *buf)
+{
+       return mlx5_fpga_conn_send(conn, buf);
+}
+EXPORT_SYMBOL(mlx5_fpga_sbu_conn_sendmsg);
+
+static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
+                                 u64 addr, u8 *buf)
+{
+       size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+       size_t bytes_done = 0;
+       u8 actual_size;
+       int err;
+
+       if (!fdev->mdev)
+               return -ENOTCONN;
+
+       while (bytes_done < size) {
+               actual_size = min(max_size, (size - bytes_done));
+
+               err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+                                          addr + bytes_done,
+                                          buf + bytes_done, false);
+               if (err) {
+                       mlx5_fpga_err(fdev, "Failed to read over I2C: %d\n",
+                                     err);
+                       break;
+               }
+
+               bytes_done += actual_size;
+       }
+
+       return err;
+}
+
+static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
+                                  u64 addr, u8 *buf)
+{
+       size_t max_size = MLX5_FPGA_ACCESS_REG_SIZE_MAX;
+       size_t bytes_done = 0;
+       u8 actual_size;
+       int err;
+
+       if (!fdev->mdev)
+               return -ENOTCONN;
+
+       while (bytes_done < size) {
+               actual_size = min(max_size, (size - bytes_done));
+
+               err = mlx5_fpga_access_reg(fdev->mdev, actual_size,
+                                          addr + bytes_done,
+                                          buf + bytes_done, true);
+               if (err) {
+                       mlx5_fpga_err(fdev, "Failed to write FPGA crspace\n");
+                       break;
+               }
+
+               bytes_done += actual_size;
+       }
+
+       return err;
+}
+
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+                      void *buf, enum mlx5_fpga_access_type access_type)
+{
+       int ret;
+
+       switch (access_type) {
+       case MLX5_FPGA_ACCESS_TYPE_I2C:
+               ret = mlx5_fpga_mem_read_i2c(fdev, size, addr, buf);
+               if (ret)
+                       return ret;
+               break;
+       default:
+               mlx5_fpga_warn(fdev, "Unexpected read access_type %u\n",
+                              access_type);
+               return -EACCES;
+       }
+
+       return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_read);
+
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+                       void *buf, enum mlx5_fpga_access_type access_type)
+{
+       int ret;
+
+       switch (access_type) {
+       case MLX5_FPGA_ACCESS_TYPE_I2C:
+               ret = mlx5_fpga_mem_write_i2c(fdev, size, addr, buf);
+               if (ret)
+                       return ret;
+               break;
+       default:
+               mlx5_fpga_warn(fdev, "Unexpected write access_type %u\n",
+                              access_type);
+               return -EACCES;
+       }
+
+       return size;
+}
+EXPORT_SYMBOL(mlx5_fpga_mem_write);
+
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf)
+{
+       return mlx5_fpga_sbu_caps(fdev->mdev, buf, size);
+}
+EXPORT_SYMBOL(mlx5_fpga_get_sbu_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h

new file mode 100644 (file)

index 0000000..baa537e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MLX5_FPGA_SDK_H
+#define MLX5_FPGA_SDK_H
+
+#include <linux/types.h>
+#include <linux/dma-direction.h>
+
+/**
+ * DOC: Innova SDK
+ * This header defines the in-kernel API for Innova FPGA client drivers.
+ */
+
+enum mlx5_fpga_access_type {
+       MLX5_FPGA_ACCESS_TYPE_I2C = 0x0,
+       MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0,
+};
+
+struct mlx5_fpga_conn;
+struct mlx5_fpga_device;
+
+/**
+ * struct mlx5_fpga_dma_entry - A scatter-gather DMA entry
+ */
+struct mlx5_fpga_dma_entry {
+       /** @data: Virtual address pointer to the data */
+       void *data;
+       /** @size: Size in bytes of the data */
+       unsigned int size;
+       /** @dma_addr: Private member. Physical DMA-mapped address of the data */
+       dma_addr_t dma_addr;
+};
+
+/**
+ * struct mlx5_fpga_dma_buf - A packet buffer
+ * May contain up to 2 scatter-gather data entries
+ */
+struct mlx5_fpga_dma_buf {
+       /** @dma_dir: DMA direction */
+       enum dma_data_direction dma_dir;
+       /** @sg: Scatter-gather entries pointing to the data in memory */
+       struct mlx5_fpga_dma_entry sg[2];
+       /** @list: Item in SQ backlog, for TX packets */
+       struct list_head list;
+       /**
+        * @complete: Completion routine, for TX packets
+        * @conn: FPGA Connection this packet was sent to
+        * @fdev: FPGA device this packet was sent to
+        * @buf: The packet buffer
+        * @status: 0 if successful, or an error code otherwise
+        */
+       void (*complete)(struct mlx5_fpga_conn *conn,
+                        struct mlx5_fpga_device *fdev,
+                        struct mlx5_fpga_dma_buf *buf, u8 status);
+};
+
+/**
+ * struct mlx5_fpga_conn_attr - FPGA connection attributes
+ * Describes the attributes of a connection
+ */
+struct mlx5_fpga_conn_attr {
+       /** @tx_size: Size of connection TX queue, in packets */
+       unsigned int tx_size;
+       /** @rx_size: Size of connection RX queue, in packets */
+       unsigned int rx_size;
+       /**
+        * @recv_cb: Callback function which is called for received packets
+        * @cb_arg: The value provided in mlx5_fpga_conn_attr.cb_arg
+        * @buf: A buffer containing a received packet
+        *
+        * buf is guaranteed to only contain a single scatter-gather entry.
+        * The size of the actual packet received is specified in buf.sg[0].size
+        * When this callback returns, the packet buffer may be re-used for
+        * subsequent receives.
+        */
+       void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf);
+       void *cb_arg;
+};
+
+/**
+ * mlx5_fpga_sbu_conn_create() - Initialize a new FPGA SBU connection
+ * @fdev: The FPGA device
+ * @attr: Attributes of the new connection
+ *
+ * Sets up a new FPGA SBU connection with the specified attributes.
+ * The receive callback function may be called for incoming messages even
+ * before this function returns.
+ *
+ * The caller must eventually destroy the connection by calling
+ * mlx5_fpga_sbu_conn_destroy.
+ *
+ * Return: A new connection, or ERR_PTR() error value otherwise.
+ */
+struct mlx5_fpga_conn *
+mlx5_fpga_sbu_conn_create(struct mlx5_fpga_device *fdev,
+                         struct mlx5_fpga_conn_attr *attr);
+
+/**
+ * mlx5_fpga_sbu_conn_destroy() - Destroy an FPGA SBU connection
+ * @conn: The FPGA SBU connection to destroy
+ *
+ * Cleans up an FPGA SBU connection which was previously created with
+ * mlx5_fpga_sbu_conn_create.
+ */
+void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn);
+
+/**
+ * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet
+ * @fdev: An FPGA SBU connection
+ * @buf: The packet buffer
+ *
+ * Queues a packet for transmission over an FPGA SBU connection.
+ * The buffer should not be modified or freed until completion.
+ * Upon completion, the buf's complete() callback is invoked, indicating the
+ * success or error status of the transmission.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_sbu_conn_sendmsg(struct mlx5_fpga_conn *conn,
+                              struct mlx5_fpga_dma_buf *buf);
+
+/**
+ * mlx5_fpga_mem_read() - Read from FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to read, in bytes
+ * @addr: Starting address to read from, in FPGA address space
+ * @buf: Buffer to read into
+ * @access_type: Method for reading
+ *
+ * Reads from the specified address into the specified buffer.
+ * The address may point to configuration space or to DDR.
+ * Large reads may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_read(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+                      void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_mem_write() - Write to FPGA memory address space
+ * @fdev: The FPGA device
+ * @size: Size of chunk to write, in bytes
+ * @addr: Starting address to write to, in FPGA address space
+ * @buf: Buffer which contains data to write
+ * @access_type: Method for writing
+ *
+ * Writes the specified buffer data to FPGA memory at the specified address.
+ * The address may point to configuration space or to DDR.
+ * Large writes may be performed internally as several non-atomic operations.
+ * This function may sleep, so should not be called from atomic contexts.
+ *
+ * Return: 0 if successful, or an error value otherwise.
+ */
+int mlx5_fpga_mem_write(struct mlx5_fpga_device *fdev, size_t size, u64 addr,
+                       void *buf, enum mlx5_fpga_access_type access_type);
+
+/**
+ * mlx5_fpga_get_sbu_caps() - Read the SBU capabilities
+ * @fdev: The FPGA device
+ * @size: Size of the buffer to read into
+ * @buf: Buffer to read the capabilities into
+ *
+ * Reads the FPGA SBU capabilities into the specified buffer.
+ * The format of the capabilities buffer is SBU-dependent.
+ *
+ * Return: 0 if successful
+ *         -EINVAL if the buffer is not large enough to contain SBU caps
+ *         or any other error value otherwise.
+ */
+int mlx5_fpga_get_sbu_caps(struct mlx5_fpga_device *fdev, int size, void *buf);
+
+#endif /* MLX5_FPGA_SDK_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c

index 0648a659b21da281f5661d633e0cba9e09e5a6c6..4b6b03d6297f9884412c2ee9768edd0462c34fd3 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -67,6 +67,7 @@ enum {
  
  enum {
         MLX5_DROP_NEW_HEALTH_WORK,
+       MLX5_DROP_NEW_RECOVERY_WORK,
  };
  
  static u8 get_nic_state(struct mlx5_core_dev *dev)
@@ -194,7 +195,7 @@ static void health_care(struct work_struct *work)
         mlx5_handle_bad_state(dev);
  
         spin_lock_irqsave(&health->wq_lock, flags);
-       if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+       if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags))
                 schedule_delayed_work(&health->recover_work, recover_delay);
         else
                 dev_err(&dev->pdev->dev,
@@ -322,6 +323,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
         init_timer(&health->timer);
         health->sick = 0;
         clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+       clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
         health->health = &dev->iseg->health;
         health->health_counter = &dev->iseg->health_counter;
  
@@ -345,11 +347,22 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
  
         spin_lock_irqsave(&health->wq_lock, flags);
         set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+       set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
         spin_unlock_irqrestore(&health->wq_lock, flags);
         cancel_delayed_work_sync(&health->recover_work);
         cancel_work_sync(&health->work);
  }
  
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_health *health = &dev->priv.health;
+
+       spin_lock(&health->wq_lock);
+       set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+       spin_unlock(&health->wq_lock);
+       cancel_delayed_work_sync(&dev->priv.health.recover_work);
+}
+
  void mlx5_health_cleanup(struct mlx5_core_dev *dev)
  {
         struct mlx5_core_health *health = &dev->priv.health;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c

new file mode 100644 (file)

index 0000000..de2aed4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include <linux/etherdevice.h>
+#include <linux/idr.h>
+#include "mlx5_core.h"
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev)
+{
+       unsigned int tblsz = MLX5_CAP_ROCE(dev, roce_address_table_size);
+
+       ida_init(&dev->roce.reserved_gids.ida);
+       dev->roce.reserved_gids.start = tblsz;
+       dev->roce.reserved_gids.count = 0;
+}
+
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
+{
+       WARN_ON(!ida_is_empty(&dev->roce.reserved_gids.ida));
+       dev->roce.reserved_gids.start = 0;
+       dev->roce.reserved_gids.count = 0;
+       ida_destroy(&dev->roce.reserved_gids.ida);
+}
+
+int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+       if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+               mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
+               return -EPERM;
+       }
+       if (dev->roce.reserved_gids.start < count) {
+               mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
+                              count);
+               return -ENOMEM;
+       }
+       if (dev->roce.reserved_gids.count + count > MLX5_MAX_RESERVED_GIDS) {
+               mlx5_core_warn(dev, "Unable to reserve %d more GIDs\n", count);
+               return -ENOMEM;
+       }
+
+       dev->roce.reserved_gids.start -= count;
+       dev->roce.reserved_gids.count += count;
+       mlx5_core_dbg(dev, "Reserved %u GIDs starting at %u\n",
+                     dev->roce.reserved_gids.count,
+                     dev->roce.reserved_gids.start);
+       return 0;
+}
+
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
+{
+       WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
+       WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
+            count, dev->roce.reserved_gids.count);
+
+       dev->roce.reserved_gids.start += count;
+       dev->roce.reserved_gids.count -= count;
+       mlx5_core_dbg(dev, "%u GIDs starting at %u left reserved\n",
+                     dev->roce.reserved_gids.count,
+                     dev->roce.reserved_gids.start);
+}
+
+int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
+{
+       int end = dev->roce.reserved_gids.start +
+                 dev->roce.reserved_gids.count;
+       int index = 0;
+
+       index = ida_simple_get(&dev->roce.reserved_gids.ida,
+                              dev->roce.reserved_gids.start, end,
+                              GFP_KERNEL);
+       if (index < 0)
+               return index;
+
+       mlx5_core_dbg(dev, "Allocating reserved GID %u\n", index);
+       *gid_index = index;
+       return 0;
+}
+
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
+{
+       mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
+       ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+}
+
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
+{
+       return dev->roce.reserved_gids.count;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
+
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+                          u8 roce_version, u8 roce_l3_type, const u8 *gid,
+                          const u8 *mac, bool vlan, u16 vlan_id)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+       u32  in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
+       u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+       void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+       char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+                                         source_l3_address);
+       void *addr_mac = MLX5_ADDR_OF(roce_addr_layout, in_addr,
+                                     source_mac_47_32);
+       int gidsz = MLX5_FLD_SZ_BYTES(roce_addr_layout, source_l3_address);
+
+       if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+               return -EINVAL;
+
+       if (gid) {
+               if (vlan) {
+                       MLX5_SET_RA(in_addr, vlan_valid, 1);
+                       MLX5_SET_RA(in_addr, vlan_id, vlan_id);
+               }
+
+               ether_addr_copy(addr_mac, mac);
+               MLX5_SET_RA(in_addr, roce_version, roce_version);
+               MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type);
+               memcpy(addr_l3_addr, gid, gidsz);
+       }
+
+       MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+       MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_roce_gid_set);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h

new file mode 100644 (file)

index 0000000..7550b1c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_MLX5_H__
+#define __LIB_MLX5_H__
+
+void mlx5_init_reserved_gids(struct mlx5_core_dev *dev);
+void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev);
+int  mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
+int  mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
+void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c

index c7f75e12c13b44f9bc54d8fb57b9e3b5af50a7ae..c065132b956d6ba772f812bff21a190d5759bf13 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -56,7 +56,9 @@
  #ifdef CONFIG_MLX5_CORE_EN
  #include "eswitch.h"
  #endif
+#include "lib/mlx5.h"
  #include "fpga/core.h"
+#include "accel/ipsec.h"
  
  MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
  MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -936,6 +938,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
  
         mlx5_init_mkey_table(dev);
  
+       mlx5_init_reserved_gids(dev);
+
         err = mlx5_init_rl_table(dev);
         if (err) {
                 dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -956,8 +960,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
                 goto err_eswitch_cleanup;
         }
  
+       err = mlx5_fpga_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
+               goto err_sriov_cleanup;
+       }
+
         return 0;
  
+err_sriov_cleanup:
+       mlx5_sriov_cleanup(dev);
  err_eswitch_cleanup:
  #ifdef CONFIG_MLX5_CORE_EN
         mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -981,11 +993,13 @@ out:
  
  static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
  {
+       mlx5_fpga_cleanup(dev);
         mlx5_sriov_cleanup(dev);
  #ifdef CONFIG_MLX5_CORE_EN
         mlx5_eswitch_cleanup(dev->priv.eswitch);
  #endif
         mlx5_cleanup_rl_table(dev);
+       mlx5_cleanup_reserved_gids(dev);
         mlx5_cleanup_mkey_table(dev);
         mlx5_cleanup_srq_table(dev);
         mlx5_cleanup_qp_table(dev);
@@ -1020,7 +1034,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
         if (err) {
                 dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
                         FW_PRE_INIT_TIMEOUT_MILI);
-               goto out;
+               goto out_err;
         }
  
         err = mlx5_cmd_init(dev);
@@ -1117,16 +1131,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                 goto err_disable_msix;
         }
  
-       err = mlx5_fpga_device_init(dev);
-       if (err) {
-               dev_err(&pdev->dev, "fpga device init failed %d\n", err);
-               goto err_put_uars;
-       }
-
         err = mlx5_start_eqs(dev);
         if (err) {
                 dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
-               goto err_fpga_init;
+               goto err_put_uars;
         }
  
         err = alloc_comp_eqs(dev);
@@ -1160,7 +1168,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
         err = mlx5_fpga_device_start(dev);
         if (err) {
                 dev_err(&pdev->dev, "fpga device start failed %d\n", err);
-               goto err_reg_dev;
+               goto err_fpga_start;
+       }
+       err = mlx5_accel_ipsec_init(dev);
+       if (err) {
+               dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
+               goto err_ipsec_start;
         }
  
         if (mlx5_device_registered(dev)) {
@@ -1181,6 +1194,11 @@ out:
         return 0;
  
  err_reg_dev:
+       mlx5_accel_ipsec_cleanup(dev);
+err_ipsec_start:
+       mlx5_fpga_device_stop(dev);
+
+err_fpga_start:
         mlx5_sriov_detach(dev);
  
  err_sriov:
@@ -1198,9 +1216,6 @@ err_affinity_hints:
  err_stop_eqs:
         mlx5_stop_eqs(dev);
  
-err_fpga_init:
-       mlx5_fpga_device_cleanup(dev);
-
  err_put_uars:
         mlx5_put_uars_page(dev, priv->uar);
  
@@ -1243,7 +1258,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
         int err = 0;
  
         if (cleanup)
-               mlx5_drain_health_wq(dev);
+               mlx5_drain_health_recovery(dev);
  
         mutex_lock(&dev->intf_state_mutex);
         if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
@@ -1254,9 +1269,15 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                 goto out;
         }
  
+       clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+       set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
+
         if (mlx5_device_registered(dev))
                 mlx5_detach_device(dev);
  
+       mlx5_accel_ipsec_cleanup(dev);
+       mlx5_fpga_device_stop(dev);
+
         mlx5_sriov_detach(dev);
  #ifdef CONFIG_MLX5_CORE_EN
         mlx5_eswitch_detach(dev->priv.eswitch);
@@ -1265,7 +1286,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
         mlx5_irq_clear_affinity_hints(dev);
         free_comp_eqs(dev);
         mlx5_stop_eqs(dev);
-       mlx5_fpga_device_cleanup(dev);
         mlx5_put_uars_page(dev, priv->uar);
         mlx5_disable_msix(dev);
         if (cleanup)
@@ -1282,8 +1302,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
         mlx5_cmd_cleanup(dev);
  
  out:
-       clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
-       set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
         mutex_unlock(&dev->intf_state_mutex);
         return err;
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c

index 06019d00ab7ba2078b082ec1ade123b88934c0da..5abfec1c3399547fc41469e65158359a30ae98fb 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -926,12 +926,16 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
  
  int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
  {
+       if (atomic_inc_return(&mdev->roce.roce_en) != 1)
+               return 0;
         return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
  }
  EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
  
  int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
  {
+       if (atomic_dec_return(&mdev->roce.roce_en) != 0)
+               return 0;
         return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
  }
  EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c

index 921673c42bc98b3335ab65ed9b63737fe18f29e1..6bcfc25350f564d39b038a7ba5bf3054994f40c8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -54,6 +54,12 @@ static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
         return mlx5_wq_cyc_get_size(wq) << wq->log_stride;
  }
  
+static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
+{
+       return mlx5_wq_cyc_get_byte_size(&wq->rq) +
+              mlx5_wq_cyc_get_byte_size(&wq->sq);
+}
+
  static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
  {
         return mlx5_cqwq_get_size(wq) << wq->log_stride;
@@ -99,6 +105,46 @@ err_db_free:
         return err;
  }
  
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+                     void *qpc, struct mlx5_wq_qp *wq,
+                     struct mlx5_wq_ctrl *wq_ctrl)
+{
+       int err;
+
+       wq->rq.log_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+       wq->rq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_rq_size)) - 1;
+
+       wq->sq.log_stride = ilog2(MLX5_SEND_WQE_BB);
+       wq->sq.sz_m1 = (1 << MLX5_GET(qpc, qpc, log_sq_size)) - 1;
+
+       err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
+       if (err) {
+               mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
+               return err;
+       }
+
+       err = mlx5_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+                                 &wq_ctrl->buf, param->buf_numa_node);
+       if (err) {
+               mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+               goto err_db_free;
+       }
+
+       wq->rq.buf = wq_ctrl->buf.direct.buf;
+       wq->sq.buf = wq->rq.buf + mlx5_wq_cyc_get_byte_size(&wq->rq);
+       wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
+       wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
+
+       wq_ctrl->mdev = mdev;
+
+       return 0;
+
+err_db_free:
+       mlx5_db_free(mdev, &wq_ctrl->db);
+
+       return err;
+}
+
  int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                      void *cqc, struct mlx5_cqwq *wq,
                      struct mlx5_frag_wq_ctrl *wq_ctrl)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h

index d8afed898c31d3719d6d44d3d46649e52e9baa10..718589d0cec283c8ef2c94968fa658c57b43170a 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -34,6 +34,8 @@
  #define __MLX5_WQ_H__
  
  #include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
  
  struct mlx5_wq_param {
         int             linear;
@@ -60,6 +62,11 @@ struct mlx5_wq_cyc {
         u8                      log_stride;
  };
  
+struct mlx5_wq_qp {
+       struct mlx5_wq_cyc      rq;
+       struct mlx5_wq_cyc      sq;
+};
+
  struct mlx5_cqwq {
         struct mlx5_frag_buf    frag_buf;
         __be32                  *db;
@@ -87,6 +94,10 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                        struct mlx5_wq_ctrl *wq_ctrl);
  u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
  
+int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
+                     void *qpc, struct mlx5_wq_qp *wq,
+                     struct mlx5_wq_ctrl *wq_ctrl);
+
  int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                      void *cqc, struct mlx5_cqwq *wq,
                      struct mlx5_frag_wq_ctrl *wq_ctrl);
@@ -146,6 +157,22 @@ static inline void mlx5_cqwq_update_db_record(struct mlx5_cqwq *wq)
         *wq->db = cpu_to_be32(wq->cc & 0xffffff);
  }
  
+static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
+{
+       u32 ci = mlx5_cqwq_get_ci(wq);
+       struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci);
+       u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK;
+       u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1;
+
+       if (cqe_ownership_bit != sw_ownership_val)
+               return NULL;
+
+       /* ensure cqe content is read after cqe ownership bit */
+       dma_rmb();
+
+       return cqe;
+}
+
  static inline int mlx5_wq_ll_is_full(struct mlx5_wq_ll *wq)
  {
         return wq->cur_sz == wq->sz_m1;
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h

index 9ca85383aa35e64b842b7013bcfa3d07efdd9920..7a712b6b09ec11852444eb8ce2df18823b155be2 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h
@@ -96,7 +96,7 @@ struct mlxfw_dev {
         u16 psid_size;
  };
  
-#if IS_ENABLED(CONFIG_MLXFW)
+#if IS_REACHABLE(CONFIG_MLXFW)
  int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev,
                          const struct firmware *firmware);
  #else
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

index 700cc8c6aa5be69c9c6135c0572259434143955e..192cb93e7669be9587d850bc3c5a661d9816af24 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3301,6 +3301,9 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
         u16 vid = vlan_dev_vlan_id(vlan_dev);
  
+       if (netif_is_bridge_port(vlan_dev))
+               return 0;
+
         if (mlxsw_sp_port_dev_check(real_dev))
                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
                                                          event, vid);
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig

index 0d5a7b9203a41d5235cb8a725ac3a81d4fc4c936..0e331e2f685ad25029aabbcd7112a51b086a7a2c 100644 (file)
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -25,6 +25,16 @@ config NFP
           cards working as a advanced Ethernet NIC.  It works with both
           SR-IOV physical and virtual functions.
  
+config NFP_APP_FLOWER
+       bool "NFP4000/NFP6000 TC Flower offload support"
+       depends on NFP
+       depends on NET_SWITCHDEV
+       ---help---
+         Enable driver support for TC Flower offload on NFP4000 and NFP6000.
+         Say Y, if you are planning to make use of TC Flower offload
+         either directly, with Open vSwitch, or any other way.  Note that
+         TC Flower offload requires specific FW to work.
+
  config NFP_DEBUG
         bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers"
         depends on NFP
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile

index 10b556b2c59d277fb73b7b27cbd03c852eec6a36..b8e1358868bd0da2e745b5155b6b9d977e111666 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -27,9 +27,17 @@ nfp-objs := \
             nfp_port.o \
             bpf/main.o \
             bpf/offload.o \
+           nic/main.o
+
+ifeq ($(CONFIG_NFP_APP_FLOWER),y)
+nfp-objs += \
+           flower/action.o \
             flower/cmsg.o \
             flower/main.o \
-           nic/main.o
+           flower/match.o \
+           flower/metadata.o \
+           flower/offload.o
+endif
  
  ifeq ($(CONFIG_BPF_SYSCALL),y)
  nfp-objs += \
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c

new file mode 100644 (file)

index 0000000..db97506
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <net/pkt_cls.h>
+#include <net/switchdev.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_net_repr.h"
+
+static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
+{
+       size_t act_size = sizeof(struct nfp_fl_pop_vlan);
+       u16 tmp_pop_vlan_op;
+
+       tmp_pop_vlan_op =
+               FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+               FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN);
+
+       pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op);
+       pop_vlan->reserved = 0;
+}
+
+static void
+nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
+                const struct tc_action *action)
+{
+       size_t act_size = sizeof(struct nfp_fl_push_vlan);
+       struct tcf_vlan *vlan = to_vlan(action);
+       u16 tmp_push_vlan_tci;
+       u16 tmp_push_vlan_op;
+
+       tmp_push_vlan_op =
+               FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+               FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN);
+
+       push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op);
+       /* Set action push vlan parameters. */
+       push_vlan->reserved = 0;
+       push_vlan->vlan_tpid = tcf_vlan_push_proto(action);
+
+       tmp_push_vlan_tci =
+               FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) |
+               FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) |
+               NFP_FL_PUSH_VLAN_CFI;
+       push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
+}
+
+static int
+nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
+             struct nfp_fl_payload *nfp_flow, bool last,
+             struct net_device *in_dev)
+{
+       size_t act_size = sizeof(struct nfp_fl_output);
+       struct net_device *out_dev;
+       u16 tmp_output_op;
+       int ifindex;
+
+       /* Set action opcode to output action. */
+       tmp_output_op =
+               FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
+               FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT);
+
+       output->a_op = cpu_to_be16(tmp_output_op);
+
+       /* Set action output parameters. */
+       output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0);
+
+       ifindex = tcf_mirred_ifindex(action);
+       out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
+       if (!out_dev)
+               return -EOPNOTSUPP;
+
+       /* Only offload egress ports are on the same device as the ingress
+        * port.
+        */
+       if (!switchdev_port_same_parent_id(in_dev, out_dev))
+               return -EOPNOTSUPP;
+
+       output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
+       if (!output->port)
+               return -EOPNOTSUPP;
+
+       nfp_flow->meta.shortcut = output->port;
+
+       return 0;
+}
+
+static int
+nfp_flower_loop_action(const struct tc_action *a,
+                      struct nfp_fl_payload *nfp_fl, int *a_len,
+                      struct net_device *netdev)
+{
+       struct nfp_fl_push_vlan *psh_v;
+       struct nfp_fl_pop_vlan *pop_v;
+       struct nfp_fl_output *output;
+       int err;
+
+       if (is_tcf_gact_shot(a)) {
+               nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP);
+       } else if (is_tcf_mirred_egress_redirect(a)) {
+               if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+                       return -EOPNOTSUPP;
+
+               output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
+               err = nfp_fl_output(output, a, nfp_fl, true, netdev);
+               if (err)
+                       return err;
+
+               *a_len += sizeof(struct nfp_fl_output);
+       } else if (is_tcf_mirred_egress_mirror(a)) {
+               if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
+                       return -EOPNOTSUPP;
+
+               output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
+               err = nfp_fl_output(output, a, nfp_fl, false, netdev);
+               if (err)
+                       return err;
+
+               *a_len += sizeof(struct nfp_fl_output);
+       } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
+               if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ)
+                       return -EOPNOTSUPP;
+
+               pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len];
+               nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV);
+
+               nfp_fl_pop_vlan(pop_v);
+               *a_len += sizeof(struct nfp_fl_pop_vlan);
+       } else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
+               if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ)
+                       return -EOPNOTSUPP;
+
+               psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len];
+               nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+               nfp_fl_push_vlan(psh_v, a);
+               *a_len += sizeof(struct nfp_fl_push_vlan);
+       } else {
+               /* Currently we do not handle any other actions. */
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+                             struct net_device *netdev,
+                             struct nfp_fl_payload *nfp_flow)
+{
+       int act_len, act_cnt, err;
+       const struct tc_action *a;
+       LIST_HEAD(actions);
+
+       memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
+       nfp_flow->meta.act_len = 0;
+       act_len = 0;
+       act_cnt = 0;
+
+       tcf_exts_to_list(flow->exts, &actions);
+       list_for_each_entry(a, &actions, list) {
+               err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev);
+               if (err)
+                       return err;
+               act_cnt++;
+       }
+
+       /* We optimise when the action list is small, this can unfortunately
+        * not happen once we have more than one action in the action list.
+        */
+       if (act_cnt > 1)
+               nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+
+       nfp_flow->meta.act_len = act_len;
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c

index 7761be436726da430e0b0c40938a80c39bda2d0c..dd7fa9cf225ff3c9e60d6f62e3f7f17317f2ea52 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -36,6 +36,7 @@
  #include <linux/skbuff.h>
  #include <net/dst_metadata.h>
  
+#include "main.h"
  #include "../nfpcore/nfp_cpp.h"
  #include "../nfp_net_repr.h"
  #include "./cmsg.h"
@@ -52,12 +53,7 @@ nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
         return (struct nfp_flower_cmsg_hdr *)skb->data;
  }
  
-static void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
-{
-       return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
-}
-
-static struct sk_buff *
+struct sk_buff *
  nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
                       enum nfp_flower_cmsg_type_port type)
  {
@@ -79,9 +75,8 @@ nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
         return skb;
  }
  
-int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok)
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok)
  {
-       struct nfp_repr *repr = netdev_priv(netdev);
         struct nfp_flower_cmsg_portmod *msg;
         struct sk_buff *skb;
  
@@ -94,7 +89,7 @@ int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok)
         msg->portnum = cpu_to_be32(repr->dst->u.port_info.port_id);
         msg->reserved = 0;
         msg->info = carrier_ok;
-       msg->mtu = cpu_to_be16(netdev->mtu);
+       msg->mtu = cpu_to_be16(repr->netdev->mtu);
  
         nfp_ctrl_tx(repr->app->ctrl, skb);
  
@@ -149,6 +144,9 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
         case NFP_FLOWER_CMSG_TYPE_PORT_MOD:
                 nfp_flower_cmsg_portmod_rx(app, skb);
                 break;
+       case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
+               nfp_flower_rx_flow_stats(app, skb);
+               break;
         default:
                 nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
                                      type);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h

index 2eeddada7f4dca96e3f168d06e4631a54e15c139..cf738de170ab6d11309223571ea85cfcc2aadf60 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -40,6 +40,196 @@
  
  #include "../nfp_app.h"
  
+#define NFP_FLOWER_LAYER_META          BIT(0)
+#define NFP_FLOWER_LAYER_PORT          BIT(1)
+#define NFP_FLOWER_LAYER_MAC           BIT(2)
+#define NFP_FLOWER_LAYER_TP            BIT(3)
+#define NFP_FLOWER_LAYER_IPV4          BIT(4)
+#define NFP_FLOWER_LAYER_IPV6          BIT(5)
+#define NFP_FLOWER_LAYER_CT            BIT(6)
+#define NFP_FLOWER_LAYER_VXLAN         BIT(7)
+
+#define NFP_FLOWER_LAYER_ETHER         BIT(3)
+#define NFP_FLOWER_LAYER_ARP           BIT(4)
+
+#define NFP_FLOWER_MASK_VLAN_PRIO      GENMASK(15, 13)
+#define NFP_FLOWER_MASK_VLAN_CFI       BIT(12)
+#define NFP_FLOWER_MASK_VLAN_VID       GENMASK(11, 0)
+
+#define NFP_FL_SC_ACT_DROP             0x80000000
+#define NFP_FL_SC_ACT_USER             0x7D000000
+#define NFP_FL_SC_ACT_POPV             0x6A000000
+#define NFP_FL_SC_ACT_NULL             0x00000000
+
+/* The maximum action list size (in bytes) supported by the NFP.
+ */
+#define NFP_FL_MAX_A_SIZ               1216
+#define NFP_FL_LW_SIZ                  2
+
+/* Action opcodes */
+#define NFP_FL_ACTION_OPCODE_OUTPUT    0
+#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
+#define NFP_FL_ACTION_OPCODE_POP_VLAN  2
+#define NFP_FL_ACTION_OPCODE_NUM       32
+
+#define NFP_FL_ACT_JMP_ID              GENMASK(15, 8)
+#define NFP_FL_ACT_LEN_LW              GENMASK(7, 0)
+
+#define NFP_FL_OUT_FLAGS_LAST          BIT(15)
+#define NFP_FL_OUT_FLAGS_USE_TUN       BIT(4)
+#define NFP_FL_OUT_FLAGS_TYPE_IDX      GENMASK(2, 0)
+
+#define NFP_FL_PUSH_VLAN_PRIO          GENMASK(15, 13)
+#define NFP_FL_PUSH_VLAN_CFI           BIT(12)
+#define NFP_FL_PUSH_VLAN_VID           GENMASK(11, 0)
+
+struct nfp_fl_output {
+       __be16 a_op;
+       __be16 flags;
+       __be32 port;
+};
+
+struct nfp_fl_push_vlan {
+       __be16 a_op;
+       __be16 reserved;
+       __be16 vlan_tpid;
+       __be16 vlan_tci;
+};
+
+struct nfp_fl_pop_vlan {
+       __be16 a_op;
+       __be16 reserved;
+};
+
+/* Metadata without L2 (1W/4B)
+ * ----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  key_layers   |    mask_id    |           reserved            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_meta_one {
+       u8 nfp_flow_key_layer;
+       u8 mask_id;
+       u16 reserved;
+};
+
+/* Metadata with L2 (1W/4B)
+ * ----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |    key_type   |    mask_id    | PCP |p|   vlan outermost VID  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                 ^                               ^
+ *                           NOTE: |             TCI               |
+ *                                 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_meta_two {
+       u8 nfp_flow_key_layer;
+       u8 mask_id;
+       __be16 tci;
+};
+
+/* Port details (1W/4B)
+ * ----------------------------------------------------------------
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         port_ingress                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_in_port {
+       __be32 in_port;
+};
+
+/* L2 details (4W/16B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     mac_addr_dst, 31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      mac_addr_dst, 47 - 32    |     mac_addr_src, 15 - 0      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     mac_addr_src, 47 - 16                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |       mpls outermost label            |  TC |B|   reserved  |q|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_mac_mpls {
+       u8 mac_dst[6];
+       u8 mac_src[6];
+       __be32 mpls_lse;
+};
+
+/* L4 ports (for UDP, TCP, SCTP) (1W/4B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |            port_src           |           port_dst            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_tp_ports {
+       __be16 port_src;
+       __be16 port_dst;
+};
+
+/* L3 IPv4 details (3W/12B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |    DSCP   |ECN|   protocol    |           reserved            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        ipv4_addr_src                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        ipv4_addr_dst                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv4 {
+       u8 tos;
+       u8 proto;
+       u8 ttl;
+       u8 reserved;
+       __be32 ipv4_src;
+       __be32 ipv4_dst;
+};
+
+/* L3 IPv6 details (10W/40B)
+ *    3                   2                   1
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |    DSCP   |ECN|   protocol    |          reserved             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   ipv6_exthdr   | res |            ipv6_flow_label            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_src, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,   31 - 0                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  63 - 32                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst,  95 - 64                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  ipv6_addr_dst, 127 - 96                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv6 {
+       u8 tos;
+       u8 proto;
+       u8 ttl;
+       u8 reserved;
+       __be32 ipv6_flow_label_exthdr;
+       struct in6_addr ipv6_src;
+       struct in6_addr ipv6_dst;
+};
+
  /* The base header for a control message packet.
   * Defines an 8-bit version, and an 8-bit type, padded
   * to a 32-bit word. Rest of the packet is type-specific.
@@ -55,7 +245,10 @@ struct nfp_flower_cmsg_hdr {
  
  /* Types defined for port related control messages  */
  enum nfp_flower_cmsg_type_port {
+       NFP_FLOWER_CMSG_TYPE_FLOW_ADD =         0,
+       NFP_FLOWER_CMSG_TYPE_FLOW_DEL =         2,
         NFP_FLOWER_CMSG_TYPE_PORT_MOD =         8,
+       NFP_FLOWER_CMSG_TYPE_FLOW_STATS =       15,
         NFP_FLOWER_CMSG_TYPE_PORT_ECHO =        16,
         NFP_FLOWER_CMSG_TYPE_MAX =              32,
  };
@@ -110,7 +303,15 @@ nfp_flower_cmsg_pcie_port(u8 nfp_pcie, enum nfp_flower_cmsg_port_vnic_type type,
                            NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT);
  }
  
-int nfp_flower_cmsg_portmod(struct net_device *netdev, bool carrier_ok);
+static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
+{
+       return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
+}
+
+int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok);
  void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb);
+struct sk_buff *
+nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size,
+                     enum nfp_flower_cmsg_type_port type);
  
  #endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c

index 8e5ca6b4bb3367c08762ca38e40ad00c1971b6fa..5fe6d3582597d24ebe9995ab8b7c8c6709af09fb 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -34,10 +34,13 @@
  #include <linux/etherdevice.h>
  #include <linux/pci.h>
  #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
  #include <net/devlink.h>
  #include <net/dst_metadata.h>
  
+#include "main.h"
  #include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nffw.h"
  #include "../nfpcore/nfp_nsp.h"
  #include "../nfp_app.h"
  #include "../nfp_main.h"
@@ -46,13 +49,7 @@
  #include "../nfp_port.h"
  #include "./cmsg.h"
  
-/**
- * struct nfp_flower_priv - Flower APP per-vNIC priv data
- * @nn:                     Pointer to vNIC
- */
-struct nfp_flower_priv {
-       struct nfp_net *nn;
-};
+#define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL
  
  static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn)
  {
@@ -104,51 +101,30 @@ nfp_flower_repr_get(struct nfp_app *app, u32 port_id)
         return reprs->reprs[port];
  }
  
-static void
-nfp_flower_repr_netdev_get_stats64(struct net_device *netdev,
-                                  struct rtnl_link_stats64 *stats)
-{
-       struct nfp_repr *repr = netdev_priv(netdev);
-       enum nfp_repr_type type;
-       u32 port_id;
-       u8 port = 0;
-
-       port_id = repr->dst->u.port_info.port_id;
-       type = nfp_flower_repr_get_type_and_port(repr->app, port_id, &port);
-       nfp_repr_get_stats64(repr->app, type, port, stats);
-}
-
-static int nfp_flower_repr_netdev_open(struct net_device *netdev)
+static int
+nfp_flower_repr_netdev_open(struct nfp_app *app, struct nfp_repr *repr)
  {
         int err;
  
-       err = nfp_flower_cmsg_portmod(netdev, true);
+       err = nfp_flower_cmsg_portmod(repr, true);
         if (err)
                 return err;
  
-       netif_carrier_on(netdev);
-       netif_tx_wake_all_queues(netdev);
+       netif_carrier_on(repr->netdev);
+       netif_tx_wake_all_queues(repr->netdev);
  
         return 0;
  }
  
-static int nfp_flower_repr_netdev_stop(struct net_device *netdev)
+static int
+nfp_flower_repr_netdev_stop(struct nfp_app *app, struct nfp_repr *repr)
  {
-       netif_carrier_off(netdev);
-       netif_tx_disable(netdev);
+       netif_carrier_off(repr->netdev);
+       netif_tx_disable(repr->netdev);
  
-       return nfp_flower_cmsg_portmod(netdev, false);
+       return nfp_flower_cmsg_portmod(repr, false);
  }
  
-static const struct net_device_ops nfp_flower_repr_netdev_ops = {
-       .ndo_open               = nfp_flower_repr_netdev_open,
-       .ndo_stop               = nfp_flower_repr_netdev_stop,
-       .ndo_start_xmit         = nfp_repr_xmit,
-       .ndo_get_stats64        = nfp_flower_repr_netdev_get_stats64,
-       .ndo_has_offload_stats  = nfp_repr_has_offload_stats,
-       .ndo_get_offload_stats  = nfp_repr_get_offload_stats,
-};
-
  static void nfp_flower_sriov_disable(struct nfp_app *app)
  {
         nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_VF);
@@ -162,14 +138,19 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
         u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp);
         struct nfp_flower_priv *priv = app->priv;
         struct nfp_reprs *reprs, *old_reprs;
+       enum nfp_port_type port_type;
         const u8 queue = 0;
         int i, err;
  
+       port_type = repr_type == NFP_REPR_TYPE_PF ? NFP_PORT_PF_PORT :
+                                                   NFP_PORT_VF_PORT;
+
         reprs = nfp_reprs_alloc(cnt);
         if (!reprs)
                 return -ENOMEM;
  
         for (i = 0; i < cnt; i++) {
+               struct nfp_port *port;
                 u32 port_id;
  
                 reprs->reprs[i] = nfp_repr_alloc(app);
@@ -178,15 +159,24 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app,
                         goto err_reprs_clean;
                 }
  
+               port = nfp_port_alloc(app, port_type, reprs->reprs[i]);
+               if (repr_type == NFP_REPR_TYPE_PF) {
+                       port->pf_id = i;
+               } else {
+                       port->pf_id = 0; /* For now we only support 1 PF */
+                       port->vf_id = i;
+               }
+
                 eth_hw_addr_random(reprs->reprs[i]);
  
                 port_id = nfp_flower_cmsg_pcie_port(nfp_pcie, vnic_type,
                                                     i, queue);
                 err = nfp_repr_init(app, reprs->reprs[i],
-                                   &nfp_flower_repr_netdev_ops,
-                                   port_id, NULL, priv->nn->dp.netdev);
-               if (err)
+                                   port_id, port, priv->nn->dp.netdev);
+               if (err) {
+                       nfp_port_free(port);
                         goto err_reprs_clean;
+               }
  
                 nfp_info(app->cpp, "%s%d Representor(%s) created\n",
                          repr_type == NFP_REPR_TYPE_PF ? "PF" : "VF", i,
@@ -260,7 +250,6 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv)
  
                 cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port);
                 err = nfp_repr_init(app, reprs->reprs[phys_port],
-                                   &nfp_flower_repr_netdev_ops,
                                     cmsg_port_id, port, priv->nn->dp.netdev);
                 if (err) {
                         nfp_port_free(port);
@@ -296,26 +285,16 @@ static int nfp_flower_start(struct nfp_app *app)
                                            NFP_REPR_TYPE_PF, 1);
  }
  
-static void nfp_flower_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
-{
-       kfree(app->priv);
-       app->priv = NULL;
-}
-
  static int nfp_flower_vnic_init(struct nfp_app *app, struct nfp_net *nn,
                                 unsigned int id)
  {
-       struct nfp_flower_priv *priv;
+       struct nfp_flower_priv *priv = app->priv;
  
         if (id > 0) {
                 nfp_warn(app->cpp, "FlowerNIC doesn't support more than one data vNIC\n");
                 goto err_invalid_port;
         }
  
-       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return -ENOMEM;
-       app->priv = priv;
         priv->nn = nn;
  
         eth_hw_addr_random(nn->dp.netdev);
@@ -331,6 +310,8 @@ err_invalid_port:
  static int nfp_flower_init(struct nfp_app *app)
  {
         const struct nfp_pf *pf = app->pf;
+       u64 version;
+       int err;
  
         if (!pf->eth_tbl) {
                 nfp_warn(app->cpp, "FlowerNIC requires eth table\n");
@@ -347,7 +328,37 @@ static int nfp_flower_init(struct nfp_app *app)
                 return -EINVAL;
         }
  
+       version = nfp_rtsym_read_le(app->pf->rtbl, "hw_flower_version", &err);
+       if (err) {
+               nfp_warn(app->cpp, "FlowerNIC requires hw_flower_version memory symbol\n");
+               return err;
+       }
+
+       /* We need to ensure hardware has enough flower capabilities. */
+       if (version != NFP_FLOWER_ALLOWED_VER) {
+               nfp_warn(app->cpp, "FlowerNIC: unsupported firmware version\n");
+               return -EINVAL;
+       }
+
+       app->priv = vzalloc(sizeof(struct nfp_flower_priv));
+       if (!app->priv)
+               return -ENOMEM;
+
+       err = nfp_flower_metadata_init(app);
+       if (err)
+               goto err_free_app_priv;
+
         return 0;
+
+err_free_app_priv:
+       vfree(app->priv);
+       return err;
+}
+
+static void nfp_flower_clean(struct nfp_app *app)
+{
+       vfree(app->priv);
+       app->priv = NULL;
  }
  
  const struct nfp_app_type app_flower = {
@@ -358,9 +369,12 @@ const struct nfp_app_type app_flower = {
         .extra_cap      = nfp_flower_extra_cap,
  
         .init           = nfp_flower_init,
+       .clean          = nfp_flower_clean,
  
         .vnic_init      = nfp_flower_vnic_init,
-       .vnic_clean     = nfp_flower_vnic_clean,
+
+       .repr_open      = nfp_flower_repr_netdev_open,
+       .repr_stop      = nfp_flower_repr_netdev_stop,
  
         .start          = nfp_flower_start,
         .stop           = nfp_flower_stop,
@@ -372,4 +386,6 @@ const struct nfp_app_type app_flower = {
  
         .eswitch_mode_get  = eswitch_mode_get,
         .repr_get       = nfp_flower_repr_get,
+
+       .setup_tc       = nfp_flower_setup_tc,
  };
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h

new file mode 100644 (file)

index 0000000..9e64c04
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_FLOWER_H__
+#define __NFP_FLOWER_H__ 1
+
+#include <linux/circ_buf.h>
+#include <linux/hashtable.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+struct tc_to_netdev;
+struct net_device;
+struct nfp_app;
+
+#define NFP_FL_STATS_ENTRY_RS          BIT(20)
+#define NFP_FL_STATS_ELEM_RS           4
+#define NFP_FL_REPEATED_HASH_MAX       BIT(17)
+#define NFP_FLOWER_HASH_BITS           19
+#define NFP_FLOWER_MASK_ENTRY_RS       256
+#define NFP_FLOWER_MASK_ELEMENT_RS     1
+#define NFP_FLOWER_MASK_HASH_BITS      10
+
+#define NFP_FL_META_FLAG_NEW_MASK      128
+#define NFP_FL_META_FLAG_LAST_MASK     1
+
+#define NFP_FL_MASK_REUSE_TIME_NS      40000
+#define NFP_FL_MASK_ID_LOCATION                1
+
+struct nfp_fl_mask_id {
+       struct circ_buf mask_id_free_list;
+       struct timespec64 *last_used;
+       u8 init_unallocated;
+};
+
+struct nfp_fl_stats_id {
+       struct circ_buf free_list;
+       u32 init_unalloc;
+       u8 repeated_em_count;
+};
+
+/**
+ * struct nfp_flower_priv - Flower APP per-vNIC priv data
+ * @nn:                        Pointer to vNIC
+ * @mask_id_seed:      Seed used for mask hash table
+ * @flower_version:    HW version of flower
+ * @stats_ids:         List of free stats ids
+ * @mask_ids:          List of free mask ids
+ * @mask_table:                Hash table used to store masks
+ * @flow_table:                Hash table used to store flower rules
+ */
+struct nfp_flower_priv {
+       struct nfp_net *nn;
+       u32 mask_id_seed;
+       u64 flower_version;
+       struct nfp_fl_stats_id stats_ids;
+       struct nfp_fl_mask_id mask_ids;
+       DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
+       DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
+};
+
+struct nfp_fl_key_ls {
+       u32 key_layer_two;
+       u8 key_layer;
+       int key_size;
+};
+
+struct nfp_fl_rule_metadata {
+       u8 key_len;
+       u8 mask_len;
+       u8 act_len;
+       u8 flags;
+       __be32 host_ctx_id;
+       __be64 host_cookie __packed;
+       __be64 flow_version __packed;
+       __be32 shortcut;
+};
+
+struct nfp_fl_stats {
+       u64 pkts;
+       u64 bytes;
+       u64 used;
+};
+
+struct nfp_fl_payload {
+       struct nfp_fl_rule_metadata meta;
+       unsigned long tc_flower_cookie;
+       struct hlist_node link;
+       struct rcu_head rcu;
+       spinlock_t lock; /* lock stats */
+       struct nfp_fl_stats stats;
+       char *unmasked_data;
+       char *mask_data;
+       char *action_data;
+};
+
+struct nfp_fl_stats_frame {
+       __be32 stats_con_id;
+       __be32 pkt_count;
+       __be64 byte_count;
+       __be64 stats_cookie;
+};
+
+int nfp_flower_metadata_init(struct nfp_app *app);
+void nfp_flower_metadata_cleanup(struct nfp_app *app);
+
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+                       u32 handle, __be16 proto, struct tc_to_netdev *tc);
+int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+                                 struct nfp_fl_key_ls *key_ls,
+                                 struct net_device *netdev,
+                                 struct nfp_fl_payload *nfp_flow);
+int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
+                             struct net_device *netdev,
+                             struct nfp_fl_payload *nfp_flow);
+int nfp_compile_flow_metadata(struct nfp_app *app,
+                             struct tc_cls_flower_offload *flow,
+                             struct nfp_fl_payload *nfp_flow);
+int nfp_modify_flow_metadata(struct nfp_app *app,
+                            struct nfp_fl_payload *nfp_flow);
+
+struct nfp_fl_payload *
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+struct nfp_fl_payload *
+nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
+
+void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c

new file mode 100644 (file)

index 0000000..0e08404
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitfield.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+
+static void
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_two *frame,
+                           struct tc_cls_flower_offload *flow, u8 key_type,
+                           bool mask_version)
+{
+       struct flow_dissector_key_vlan *flow_vlan;
+       u16 tmp_tci;
+
+       /* Populate the metadata frame. */
+       frame->nfp_flow_key_layer = key_type;
+       frame->mask_id = ~0;
+
+       if (mask_version) {
+               frame->tci = cpu_to_be16(~0);
+               return;
+       }
+
+       flow_vlan = skb_flow_dissector_target(flow->dissector,
+                                             FLOW_DISSECTOR_KEY_VLAN,
+                                             flow->key);
+
+       /* Populate the tci field. */
+       if (!flow_vlan->vlan_id) {
+               tmp_tci = 0;
+       } else {
+               tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+                                    flow_vlan->vlan_priority) |
+                         FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+                                    flow_vlan->vlan_id) |
+                         NFP_FLOWER_MASK_VLAN_CFI;
+       }
+       frame->tci = cpu_to_be16(tmp_tci);
+}
+
+static void
+nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
+{
+       frame->nfp_flow_key_layer = key_type;
+       frame->mask_id = 0;
+       frame->reserved = 0;
+}
+
+static int
+nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
+                       bool mask_version)
+{
+       if (mask_version) {
+               frame->in_port = cpu_to_be32(~0);
+               return 0;
+       }
+
+       frame->in_port = cpu_to_be32(cmsg_port);
+
+       return 0;
+}
+
+static void
+nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
+                      struct tc_cls_flower_offload *flow,
+                      bool mask_version)
+{
+       struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+       struct flow_dissector_key_eth_addrs *flow_mac;
+
+       flow_mac = skb_flow_dissector_target(flow->dissector,
+                                            FLOW_DISSECTOR_KEY_ETH_ADDRS,
+                                            target);
+
+       memset(frame, 0, sizeof(struct nfp_flower_mac_mpls));
+
+       /* Populate mac frame. */
+       ether_addr_copy(frame->mac_dst, &flow_mac->dst[0]);
+       ether_addr_copy(frame->mac_src, &flow_mac->src[0]);
+
+       if (mask_version)
+               frame->mpls_lse = cpu_to_be32(~0);
+}
+
+static void
+nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame,
+                        struct tc_cls_flower_offload *flow,
+                        bool mask_version)
+{
+       struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+       struct flow_dissector_key_ports *flow_tp;
+
+       flow_tp = skb_flow_dissector_target(flow->dissector,
+                                           FLOW_DISSECTOR_KEY_PORTS,
+                                           target);
+
+       frame->port_src = flow_tp->src;
+       frame->port_dst = flow_tp->dst;
+}
+
+static void
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
+                       struct tc_cls_flower_offload *flow,
+                       bool mask_version)
+{
+       struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+       struct flow_dissector_key_ipv4_addrs *flow_ipv4;
+       struct flow_dissector_key_basic *flow_basic;
+
+       flow_ipv4 = skb_flow_dissector_target(flow->dissector,
+                                             FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+                                             target);
+
+       flow_basic = skb_flow_dissector_target(flow->dissector,
+                                              FLOW_DISSECTOR_KEY_BASIC,
+                                              target);
+
+       /* Populate IPv4 frame. */
+       frame->reserved = 0;
+       frame->ipv4_src = flow_ipv4->src;
+       frame->ipv4_dst = flow_ipv4->dst;
+       frame->proto = flow_basic->ip_proto;
+       /* Wildcard TOS/TTL for now. */
+       frame->tos = 0;
+       frame->ttl = 0;
+}
+
+static void
+nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
+                       struct tc_cls_flower_offload *flow,
+                       bool mask_version)
+{
+       struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+       struct flow_dissector_key_ipv6_addrs *flow_ipv6;
+       struct flow_dissector_key_basic *flow_basic;
+
+       flow_ipv6 = skb_flow_dissector_target(flow->dissector,
+                                             FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+                                             target);
+
+       flow_basic = skb_flow_dissector_target(flow->dissector,
+                                              FLOW_DISSECTOR_KEY_BASIC,
+                                              target);
+
+       /* Populate IPv6 frame. */
+       frame->reserved = 0;
+       frame->ipv6_src = flow_ipv6->src;
+       frame->ipv6_dst = flow_ipv6->dst;
+       frame->proto = flow_basic->ip_proto;
+       /* Wildcard LABEL/TOS/TTL for now. */
+       frame->ipv6_flow_label_exthdr = 0;
+       frame->tos = 0;
+       frame->ttl = 0;
+}
+
+int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
+                                 struct nfp_fl_key_ls *key_ls,
+                                 struct net_device *netdev,
+                                 struct nfp_fl_payload *nfp_flow)
+{
+       int err;
+       u8 *ext;
+       u8 *msk;
+
+       memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
+       memset(nfp_flow->mask_data, 0, key_ls->key_size);
+
+       ext = nfp_flow->unmasked_data;
+       msk = nfp_flow->mask_data;
+       if (NFP_FLOWER_LAYER_PORT & key_ls->key_layer) {
+               /* Populate Exact Metadata. */
+               nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)ext,
+                                           flow, key_ls->key_layer, false);
+               /* Populate Mask Metadata. */
+               nfp_flower_compile_meta_tci((struct nfp_flower_meta_two *)msk,
+                                           flow, key_ls->key_layer, true);
+               ext += sizeof(struct nfp_flower_meta_two);
+               msk += sizeof(struct nfp_flower_meta_two);
+
+               /* Populate Exact Port data. */
+               err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
+                                             nfp_repr_get_port_id(netdev),
+                                             false);
+               if (err)
+                       return err;
+
+               /* Populate Mask Port Data. */
+               err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
+                                             nfp_repr_get_port_id(netdev),
+                                             true);
+               if (err)
+                       return err;
+
+               ext += sizeof(struct nfp_flower_in_port);
+               msk += sizeof(struct nfp_flower_in_port);
+       } else {
+               /* Populate Exact Metadata. */
+               nfp_flower_compile_meta((struct nfp_flower_meta_one *)ext,
+                                       key_ls->key_layer);
+               /* Populate Mask Metadata. */
+               nfp_flower_compile_meta((struct nfp_flower_meta_one *)msk,
+                                       key_ls->key_layer);
+               ext += sizeof(struct nfp_flower_meta_one);
+               msk += sizeof(struct nfp_flower_meta_one);
+       }
+
+       if (NFP_FLOWER_LAYER_META & key_ls->key_layer) {
+               /* Additional Metadata Fields.
+                * Currently unsupported.
+                */
+               return -EOPNOTSUPP;
+       }
+
+       if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
+               /* Populate Exact MAC Data. */
+               nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
+                                      flow, false);
+               /* Populate Mask MAC Data. */
+               nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)msk,
+                                      flow, true);
+               ext += sizeof(struct nfp_flower_mac_mpls);
+               msk += sizeof(struct nfp_flower_mac_mpls);
+       }
+
+       if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) {
+               /* Populate Exact TP Data. */
+               nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext,
+                                        flow, false);
+               /* Populate Mask TP Data. */
+               nfp_flower_compile_tport((struct nfp_flower_tp_ports *)msk,
+                                        flow, true);
+               ext += sizeof(struct nfp_flower_tp_ports);
+               msk += sizeof(struct nfp_flower_tp_ports);
+       }
+
+       if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) {
+               /* Populate Exact IPv4 Data. */
+               nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext,
+                                       flow, false);
+               /* Populate Mask IPv4 Data. */
+               nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)msk,
+                                       flow, true);
+               ext += sizeof(struct nfp_flower_ipv4);
+               msk += sizeof(struct nfp_flower_ipv4);
+       }
+
+       if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) {
+               /* Populate Exact IPv4 Data. */
+               nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext,
+                                       flow, false);
+               /* Populate Mask IPv4 Data. */
+               nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)msk,
+                                       flow, true);
+               ext += sizeof(struct nfp_flower_ipv6);
+               msk += sizeof(struct nfp_flower_ipv6);
+       }
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c

new file mode 100644 (file)

index 0000000..fec0ff2
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/vmalloc.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_app.h"
+
+struct nfp_mask_id_table {
+       struct hlist_node link;
+       u32 hash_key;
+       u32 ref_cnt;
+       u8 mask_id;
+};
+
+static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct circ_buf *ring;
+
+       ring = &priv->stats_ids.free_list;
+       /* Check if buffer is full. */
+       if (!CIRC_SPACE(ring->head, ring->tail, NFP_FL_STATS_ENTRY_RS *
+                       NFP_FL_STATS_ELEM_RS -
+                       NFP_FL_STATS_ELEM_RS + 1))
+               return -ENOBUFS;
+
+       memcpy(&ring->buf[ring->head], &stats_context_id, NFP_FL_STATS_ELEM_RS);
+       ring->head = (ring->head + NFP_FL_STATS_ELEM_RS) %
+                    (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+
+       return 0;
+}
+
+static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       u32 freed_stats_id, temp_stats_id;
+       struct circ_buf *ring;
+
+       ring = &priv->stats_ids.free_list;
+       freed_stats_id = NFP_FL_STATS_ENTRY_RS;
+       /* Check for unallocated entries first. */
+       if (priv->stats_ids.init_unalloc > 0) {
+               *stats_context_id = priv->stats_ids.init_unalloc - 1;
+               priv->stats_ids.init_unalloc--;
+               return 0;
+       }
+
+       /* Check if buffer is empty. */
+       if (ring->head == ring->tail) {
+               *stats_context_id = freed_stats_id;
+               return -ENOENT;
+       }
+
+       memcpy(&temp_stats_id, &ring->buf[ring->tail], NFP_FL_STATS_ELEM_RS);
+       *stats_context_id = temp_stats_id;
+       memcpy(&ring->buf[ring->tail], &freed_stats_id, NFP_FL_STATS_ELEM_RS);
+       ring->tail = (ring->tail + NFP_FL_STATS_ELEM_RS) %
+                    (NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+
+       return 0;
+}
+
+/* Must be called with either RTNL or rcu_read_lock */
+struct nfp_fl_payload *
+nfp_flower_search_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct nfp_fl_payload *flower_entry;
+
+       hash_for_each_possible_rcu(priv->flow_table, flower_entry, link,
+                                  tc_flower_cookie)
+               if (flower_entry->tc_flower_cookie == tc_flower_cookie)
+                       return flower_entry;
+
+       return NULL;
+}
+
+static void
+nfp_flower_update_stats(struct nfp_app *app, struct nfp_fl_stats_frame *stats)
+{
+       struct nfp_fl_payload *nfp_flow;
+       unsigned long flower_cookie;
+
+       flower_cookie = be64_to_cpu(stats->stats_cookie);
+
+       rcu_read_lock();
+       nfp_flow = nfp_flower_search_fl_table(app, flower_cookie);
+       if (!nfp_flow)
+               goto exit_rcu_unlock;
+
+       if (nfp_flow->meta.host_ctx_id != stats->stats_con_id)
+               goto exit_rcu_unlock;
+
+       spin_lock(&nfp_flow->lock);
+       nfp_flow->stats.pkts += be32_to_cpu(stats->pkt_count);
+       nfp_flow->stats.bytes += be64_to_cpu(stats->byte_count);
+       nfp_flow->stats.used = jiffies;
+       spin_unlock(&nfp_flow->lock);
+
+exit_rcu_unlock:
+       rcu_read_unlock();
+}
+
+void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
+{
+       unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN;
+       struct nfp_fl_stats_frame *stats_frame;
+       unsigned char *msg;
+       int i;
+
+       msg = nfp_flower_cmsg_get_data(skb);
+
+       stats_frame = (struct nfp_fl_stats_frame *)msg;
+       for (i = 0; i < msg_len / sizeof(*stats_frame); i++)
+               nfp_flower_update_stats(app, stats_frame + i);
+}
+
+static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct circ_buf *ring;
+       struct timespec64 now;
+
+       ring = &priv->mask_ids.mask_id_free_list;
+       /* Checking if buffer is full. */
+       if (CIRC_SPACE(ring->head, ring->tail, NFP_FLOWER_MASK_ENTRY_RS) == 0)
+               return -ENOBUFS;
+
+       memcpy(&ring->buf[ring->head], &mask_id, NFP_FLOWER_MASK_ELEMENT_RS);
+       ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) %
+                    (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
+
+       getnstimeofday64(&now);
+       priv->mask_ids.last_used[mask_id] = now;
+
+       return 0;
+}
+
+static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct timespec64 delta, now;
+       struct circ_buf *ring;
+       u8 temp_id, freed_id;
+
+       ring = &priv->mask_ids.mask_id_free_list;
+       freed_id = NFP_FLOWER_MASK_ENTRY_RS - 1;
+       /* Checking for unallocated entries first. */
+       if (priv->mask_ids.init_unallocated > 0) {
+               *mask_id = priv->mask_ids.init_unallocated;
+               priv->mask_ids.init_unallocated--;
+               return 0;
+       }
+
+       /* Checking if buffer is empty. */
+       if (ring->head == ring->tail)
+               goto err_not_found;
+
+       memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS);
+       *mask_id = temp_id;
+
+       getnstimeofday64(&now);
+       delta = timespec64_sub(now, priv->mask_ids.last_used[*mask_id]);
+
+       if (timespec64_to_ns(&delta) < NFP_FL_MASK_REUSE_TIME_NS)
+               goto err_not_found;
+
+       memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS);
+       ring->tail = (ring->tail + NFP_FLOWER_MASK_ELEMENT_RS) %
+                    (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS);
+
+       return 0;
+
+err_not_found:
+       *mask_id = freed_id;
+       return -ENOENT;
+}
+
+static int
+nfp_add_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct nfp_mask_id_table *mask_entry;
+       unsigned long hash_key;
+       u8 mask_id;
+
+       if (nfp_mask_alloc(app, &mask_id))
+               return -ENOENT;
+
+       mask_entry = kmalloc(sizeof(*mask_entry), GFP_KERNEL);
+       if (!mask_entry) {
+               nfp_release_mask_id(app, mask_id);
+               return -ENOMEM;
+       }
+
+       INIT_HLIST_NODE(&mask_entry->link);
+       mask_entry->mask_id = mask_id;
+       hash_key = jhash(mask_data, mask_len, priv->mask_id_seed);
+       mask_entry->hash_key = hash_key;
+       mask_entry->ref_cnt = 1;
+       hash_add(priv->mask_table, &mask_entry->link, hash_key);
+
+       return mask_id;
+}
+
+static struct nfp_mask_id_table *
+nfp_search_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct nfp_mask_id_table *mask_entry;
+       unsigned long hash_key;
+
+       hash_key = jhash(mask_data, mask_len, priv->mask_id_seed);
+
+       hash_for_each_possible(priv->mask_table, mask_entry, link, hash_key)
+               if (mask_entry->hash_key == hash_key)
+                       return mask_entry;
+
+       return NULL;
+}
+
+static int
+nfp_find_in_mask_table(struct nfp_app *app, char *mask_data, u32 mask_len)
+{
+       struct nfp_mask_id_table *mask_entry;
+
+       mask_entry = nfp_search_mask_table(app, mask_data, mask_len);
+       if (!mask_entry)
+               return -ENOENT;
+
+       mask_entry->ref_cnt++;
+
+       /* Casting u8 to int for later use. */
+       return mask_entry->mask_id;
+}
+
+static bool
+nfp_check_mask_add(struct nfp_app *app, char *mask_data, u32 mask_len,
+                  u8 *meta_flags, u8 *mask_id)
+{
+       int id;
+
+       id = nfp_find_in_mask_table(app, mask_data, mask_len);
+       if (id < 0) {
+               id = nfp_add_mask_table(app, mask_data, mask_len);
+               if (id < 0)
+                       return false;
+               *meta_flags |= NFP_FL_META_FLAG_NEW_MASK;
+       }
+       *mask_id = id;
+
+       return true;
+}
+
+static bool
+nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
+                     u8 *meta_flags, u8 *mask_id)
+{
+       struct nfp_mask_id_table *mask_entry;
+
+       mask_entry = nfp_search_mask_table(app, mask_data, mask_len);
+       if (!mask_entry)
+               return false;
+
+       *mask_id = mask_entry->mask_id;
+       mask_entry->ref_cnt--;
+       if (!mask_entry->ref_cnt) {
+               hash_del(&mask_entry->link);
+               nfp_release_mask_id(app, *mask_id);
+               kfree(mask_entry);
+               if (meta_flags)
+                       *meta_flags |= NFP_FL_META_FLAG_LAST_MASK;
+       }
+
+       return true;
+}
+
+int nfp_compile_flow_metadata(struct nfp_app *app,
+                             struct tc_cls_flower_offload *flow,
+                             struct nfp_fl_payload *nfp_flow)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct nfp_fl_payload *check_entry;
+       u8 new_mask_id;
+       u32 stats_cxt;
+
+       if (nfp_get_stats_entry(app, &stats_cxt))
+               return -ENOENT;
+
+       nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
+       nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
+
+       new_mask_id = 0;
+       if (!nfp_check_mask_add(app, nfp_flow->mask_data,
+                               nfp_flow->meta.mask_len,
+                               &nfp_flow->meta.flags, &new_mask_id)) {
+               if (nfp_release_stats_entry(app, stats_cxt))
+                       return -EINVAL;
+               return -ENOENT;
+       }
+
+       nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+       priv->flower_version++;
+
+       /* Update flow payload with mask ids. */
+       nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
+       nfp_flow->stats.pkts = 0;
+       nfp_flow->stats.bytes = 0;
+       nfp_flow->stats.used = jiffies;
+
+       check_entry = nfp_flower_search_fl_table(app, flow->cookie);
+       if (check_entry) {
+               if (nfp_release_stats_entry(app, stats_cxt))
+                       return -EINVAL;
+
+               if (!nfp_check_mask_remove(app, nfp_flow->mask_data,
+                                          nfp_flow->meta.mask_len,
+                                          NULL, &new_mask_id))
+                       return -EINVAL;
+
+               return -EEXIST;
+       }
+
+       return 0;
+}
+
+int nfp_modify_flow_metadata(struct nfp_app *app,
+                            struct nfp_fl_payload *nfp_flow)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       u8 new_mask_id = 0;
+       u32 temp_ctx_id;
+
+       nfp_check_mask_remove(app, nfp_flow->mask_data,
+                             nfp_flow->meta.mask_len, &nfp_flow->meta.flags,
+                             &new_mask_id);
+
+       nfp_flow->meta.flow_version = cpu_to_be64(priv->flower_version);
+       priv->flower_version++;
+
+       /* Update flow payload with mask ids. */
+       nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
+
+       /* Release the stats ctx id. */
+       temp_ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
+
+       return nfp_release_stats_entry(app, temp_ctx_id);
+}
+
+int nfp_flower_metadata_init(struct nfp_app *app)
+{
+       struct nfp_flower_priv *priv = app->priv;
+
+       hash_init(priv->mask_table);
+       hash_init(priv->flow_table);
+       get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed));
+
+       /* Init ring buffer and unallocated mask_ids. */
+       priv->mask_ids.mask_id_free_list.buf =
+               kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
+                             NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL);
+       if (!priv->mask_ids.mask_id_free_list.buf)
+               return -ENOMEM;
+
+       priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1;
+
+       /* Init timestamps for mask id*/
+       priv->mask_ids.last_used =
+               kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS,
+                             sizeof(*priv->mask_ids.last_used), GFP_KERNEL);
+       if (!priv->mask_ids.last_used)
+               goto err_free_mask_id;
+
+       /* Init ring buffer and unallocated stats_ids. */
+       priv->stats_ids.free_list.buf =
+               vmalloc(NFP_FL_STATS_ENTRY_RS * NFP_FL_STATS_ELEM_RS);
+       if (!priv->stats_ids.free_list.buf)
+               goto err_free_last_used;
+
+       priv->stats_ids.init_unalloc = NFP_FL_REPEATED_HASH_MAX;
+
+       return 0;
+
+err_free_last_used:
+       kfree(priv->stats_ids.free_list.buf);
+err_free_mask_id:
+       kfree(priv->mask_ids.mask_id_free_list.buf);
+       return -ENOMEM;
+}
+
+void nfp_flower_metadata_cleanup(struct nfp_app *app)
+{
+       struct nfp_flower_priv *priv = app->priv;
+
+       if (!priv)
+               return;
+
+       kfree(priv->mask_ids.mask_id_free_list.buf);
+       kfree(priv->mask_ids.last_used);
+       vfree(priv->stats_ids.free_list.buf);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c

new file mode 100644 (file)

index 0000000..4ad10bd
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+#include <net/devlink.h>
+#include <net/pkt_cls.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfpcore/nfp_cpp.h"
+#include "../nfpcore/nfp_nsp.h"
+#include "../nfp_app.h"
+#include "../nfp_main.h"
+#include "../nfp_net.h"
+#include "../nfp_port.h"
+
+static int
+nfp_flower_xmit_flow(struct net_device *netdev,
+                    struct nfp_fl_payload *nfp_flow, u8 mtype)
+{
+       u32 meta_len, key_len, mask_len, act_len, tot_len;
+       struct nfp_repr *priv = netdev_priv(netdev);
+       struct sk_buff *skb;
+       unsigned char *msg;
+
+       meta_len =  sizeof(struct nfp_fl_rule_metadata);
+       key_len = nfp_flow->meta.key_len;
+       mask_len = nfp_flow->meta.mask_len;
+       act_len = nfp_flow->meta.act_len;
+
+       tot_len = meta_len + key_len + mask_len + act_len;
+
+       /* Convert to long words as firmware expects
+        * lengths in units of NFP_FL_LW_SIZ.
+        */
+       nfp_flow->meta.key_len >>= NFP_FL_LW_SIZ;
+       nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ;
+       nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ;
+
+       skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype);
+       if (!skb)
+               return -ENOMEM;
+
+       msg = nfp_flower_cmsg_get_data(skb);
+       memcpy(msg, &nfp_flow->meta, meta_len);
+       memcpy(&msg[meta_len], nfp_flow->unmasked_data, key_len);
+       memcpy(&msg[meta_len + key_len], nfp_flow->mask_data, mask_len);
+       memcpy(&msg[meta_len + key_len + mask_len],
+              nfp_flow->action_data, act_len);
+
+       /* Convert back to bytes as software expects
+        * lengths in units of bytes.
+        */
+       nfp_flow->meta.key_len <<= NFP_FL_LW_SIZ;
+       nfp_flow->meta.mask_len <<= NFP_FL_LW_SIZ;
+       nfp_flow->meta.act_len <<= NFP_FL_LW_SIZ;
+
+       nfp_ctrl_tx(priv->app->ctrl, skb);
+
+       return 0;
+}
+
+static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
+{
+       return dissector_uses_key(f->dissector,
+                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
+               dissector_uses_key(f->dissector,
+                                  FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
+               dissector_uses_key(f->dissector,
+                                  FLOW_DISSECTOR_KEY_PORTS) ||
+               dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP);
+}
+
+static int
+nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
+                               struct tc_cls_flower_offload *flow)
+{
+       struct flow_dissector_key_control *mask_enc_ctl;
+       struct flow_dissector_key_basic *mask_basic;
+       struct flow_dissector_key_basic *key_basic;
+       u32 key_layer_two;
+       u8 key_layer;
+       int key_size;
+
+       mask_enc_ctl = skb_flow_dissector_target(flow->dissector,
+                                                FLOW_DISSECTOR_KEY_ENC_CONTROL,
+                                                flow->mask);
+
+       mask_basic = skb_flow_dissector_target(flow->dissector,
+                                              FLOW_DISSECTOR_KEY_BASIC,
+                                              flow->mask);
+
+       key_basic = skb_flow_dissector_target(flow->dissector,
+                                             FLOW_DISSECTOR_KEY_BASIC,
+                                             flow->key);
+       key_layer_two = 0;
+       key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
+       key_size = sizeof(struct nfp_flower_meta_one) +
+                  sizeof(struct nfp_flower_in_port) +
+                  sizeof(struct nfp_flower_mac_mpls);
+
+       /* We are expecting a tunnel. For now we ignore offloading. */
+       if (mask_enc_ctl->addr_type)
+               return -EOPNOTSUPP;
+
+       if (mask_basic->n_proto) {
+               /* Ethernet type is present in the key. */
+               switch (key_basic->n_proto) {
+               case cpu_to_be16(ETH_P_IP):
+                       key_layer |= NFP_FLOWER_LAYER_IPV4;
+                       key_size += sizeof(struct nfp_flower_ipv4);
+                       break;
+
+               case cpu_to_be16(ETH_P_IPV6):
+                       key_layer |= NFP_FLOWER_LAYER_IPV6;
+                       key_size += sizeof(struct nfp_flower_ipv6);
+                       break;
+
+               /* Currently we do not offload ARP
+                * because we rely on it to get to the host.
+                */
+               case cpu_to_be16(ETH_P_ARP):
+                       return -EOPNOTSUPP;
+
+               /* Will be included in layer 2. */
+               case cpu_to_be16(ETH_P_8021Q):
+                       break;
+
+               default:
+                       /* Other ethtype - we need check the masks for the
+                        * remainder of the key to ensure we can offload.
+                        */
+                       if (nfp_flower_check_higher_than_mac(flow))
+                               return -EOPNOTSUPP;
+                       break;
+               }
+       }
+
+       if (mask_basic->ip_proto) {
+               /* Ethernet type is present in the key. */
+               switch (key_basic->ip_proto) {
+               case IPPROTO_TCP:
+               case IPPROTO_UDP:
+               case IPPROTO_SCTP:
+               case IPPROTO_ICMP:
+               case IPPROTO_ICMPV6:
+                       key_layer |= NFP_FLOWER_LAYER_TP;
+                       key_size += sizeof(struct nfp_flower_tp_ports);
+                       break;
+               default:
+                       /* Other ip proto - we need check the masks for the
+                        * remainder of the key to ensure we can offload.
+                        */
+                       return -EOPNOTSUPP;
+               }
+       }
+
+       ret_key_ls->key_layer = key_layer;
+       ret_key_ls->key_layer_two = key_layer_two;
+       ret_key_ls->key_size = key_size;
+
+       return 0;
+}
+
+static struct nfp_fl_payload *
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
+{
+       struct nfp_fl_payload *flow_pay;
+
+       flow_pay = kmalloc(sizeof(*flow_pay), GFP_KERNEL);
+       if (!flow_pay)
+               return NULL;
+
+       flow_pay->meta.key_len = key_layer->key_size;
+       flow_pay->unmasked_data = kmalloc(key_layer->key_size, GFP_KERNEL);
+       if (!flow_pay->unmasked_data)
+               goto err_free_flow;
+
+       flow_pay->meta.mask_len = key_layer->key_size;
+       flow_pay->mask_data = kmalloc(key_layer->key_size, GFP_KERNEL);
+       if (!flow_pay->mask_data)
+               goto err_free_unmasked;
+
+       flow_pay->action_data = kmalloc(NFP_FL_MAX_A_SIZ, GFP_KERNEL);
+       if (!flow_pay->action_data)
+               goto err_free_mask;
+
+       flow_pay->meta.flags = 0;
+       spin_lock_init(&flow_pay->lock);
+
+       return flow_pay;
+
+err_free_mask:
+       kfree(flow_pay->mask_data);
+err_free_unmasked:
+       kfree(flow_pay->unmasked_data);
+err_free_flow:
+       kfree(flow_pay);
+       return NULL;
+}
+
+/**
+ * nfp_flower_add_offload() - Adds a new flow to hardware.
+ * @app:       Pointer to the APP handle
+ * @netdev:    netdev structure.
+ * @flow:      TC flower classifier offload structure.
+ *
+ * Adds a new flow to the repeated hash structure and action payload.
+ *
+ * Return: negative value on error, 0 if configured successfully.
+ */
+static int
+nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
+                      struct tc_cls_flower_offload *flow)
+{
+       struct nfp_flower_priv *priv = app->priv;
+       struct nfp_fl_payload *flow_pay;
+       struct nfp_fl_key_ls *key_layer;
+       int err;
+
+       key_layer = kmalloc(sizeof(*key_layer), GFP_KERNEL);
+       if (!key_layer)
+               return -ENOMEM;
+
+       err = nfp_flower_calculate_key_layers(key_layer, flow);
+       if (err)
+               goto err_free_key_ls;
+
+       flow_pay = nfp_flower_allocate_new(key_layer);
+       if (!flow_pay) {
+               err = -ENOMEM;
+               goto err_free_key_ls;
+       }
+
+       err = nfp_flower_compile_flow_match(flow, key_layer, netdev, flow_pay);
+       if (err)
+               goto err_destroy_flow;
+
+       err = nfp_flower_compile_action(flow, netdev, flow_pay);
+       if (err)
+               goto err_destroy_flow;
+
+       err = nfp_compile_flow_metadata(app, flow, flow_pay);
+       if (err)
+               goto err_destroy_flow;
+
+       err = nfp_flower_xmit_flow(netdev, flow_pay,
+                                  NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
+       if (err)
+               goto err_destroy_flow;
+
+       INIT_HLIST_NODE(&flow_pay->link);
+       flow_pay->tc_flower_cookie = flow->cookie;
+       hash_add_rcu(priv->flow_table, &flow_pay->link, flow->cookie);
+
+       /* Deallocate flow payload when flower rule has been destroyed. */
+       kfree(key_layer);
+
+       return 0;
+
+err_destroy_flow:
+       kfree(flow_pay->action_data);
+       kfree(flow_pay->mask_data);
+       kfree(flow_pay->unmasked_data);
+       kfree(flow_pay);
+err_free_key_ls:
+       kfree(key_layer);
+       return err;
+}
+
+/**
+ * nfp_flower_del_offload() - Removes a flow from hardware.
+ * @app:       Pointer to the APP handle
+ * @netdev:    netdev structure.
+ * @flow:      TC flower classifier offload structure
+ *
+ * Removes a flow from the repeated hash structure and clears the
+ * action payload.
+ *
+ * Return: negative value on error, 0 if removed successfully.
+ */
+static int
+nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
+                      struct tc_cls_flower_offload *flow)
+{
+       struct nfp_fl_payload *nfp_flow;
+       int err;
+
+       nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+       if (!nfp_flow)
+               return -ENOENT;
+
+       err = nfp_modify_flow_metadata(app, nfp_flow);
+       if (err)
+               goto err_free_flow;
+
+       err = nfp_flower_xmit_flow(netdev, nfp_flow,
+                                  NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+       if (err)
+               goto err_free_flow;
+
+err_free_flow:
+       hash_del_rcu(&nfp_flow->link);
+       kfree(nfp_flow->action_data);
+       kfree(nfp_flow->mask_data);
+       kfree(nfp_flow->unmasked_data);
+       kfree_rcu(nfp_flow, rcu);
+       return err;
+}
+
+/**
+ * nfp_flower_get_stats() - Populates flow stats obtained from hardware.
+ * @app:       Pointer to the APP handle
+ * @flow:      TC flower classifier offload structure
+ *
+ * Populates a flow statistics structure which which corresponds to a
+ * specific flow.
+ *
+ * Return: negative value on error, 0 if stats populated successfully.
+ */
+static int
+nfp_flower_get_stats(struct nfp_app *app, struct tc_cls_flower_offload *flow)
+{
+       struct nfp_fl_payload *nfp_flow;
+
+       nfp_flow = nfp_flower_search_fl_table(app, flow->cookie);
+       if (!nfp_flow)
+               return -EINVAL;
+
+       spin_lock_bh(&nfp_flow->lock);
+       tcf_exts_stats_update(flow->exts, nfp_flow->stats.bytes,
+                             nfp_flow->stats.pkts, nfp_flow->stats.used);
+
+       nfp_flow->stats.pkts = 0;
+       nfp_flow->stats.bytes = 0;
+       spin_unlock_bh(&nfp_flow->lock);
+
+       return 0;
+}
+
+static int
+nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
+                       struct tc_cls_flower_offload *flower)
+{
+       switch (flower->command) {
+       case TC_CLSFLOWER_REPLACE:
+               return nfp_flower_add_offload(app, netdev, flower);
+       case TC_CLSFLOWER_DESTROY:
+               return nfp_flower_del_offload(app, netdev, flower);
+       case TC_CLSFLOWER_STATS:
+               return nfp_flower_get_stats(app, flower);
+       }
+
+       return -EOPNOTSUPP;
+}
+
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+                       u32 handle, __be16 proto, struct tc_to_netdev *tc)
+{
+       if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+               return -EOPNOTSUPP;
+
+       if (!eth_proto_is_802_3(proto))
+               return -EOPNOTSUPP;
+
+       if (tc->type != TC_SETUP_CLSFLOWER)
+               return -EINVAL;
+
+       return nfp_flower_repr_offload(app, netdev, tc->cls_flower);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c

index 5620de05c9969275681166423ef41eeef79165c4..c704c022574f419ead4f7b4c9a9b54e1b273eb77 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -43,7 +43,9 @@
  static const struct nfp_app_type *apps[] = {
         &app_nic,
         &app_bpf,
+#ifdef CONFIG_NFP_APP_FLOWER
         &app_flower,
+#endif
  };
  
  const char *nfp_app_mip_name(struct nfp_app *app)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h

index ae2d02753d1a073d03590d4288a26ad454406989..5d714e10d9a947fcabded2642547aab22c3a08c4 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -47,6 +47,7 @@ struct sk_buff;
  struct nfp_app;
  struct nfp_cpp;
  struct nfp_pf;
+struct nfp_repr;
  struct nfp_net;
  
  enum nfp_app_id {
@@ -66,10 +67,13 @@ extern const struct nfp_app_type app_flower;
   * @ctrl_has_meta:  control messages have prepend of type:5/port:CTRL
   *
   * Callbacks
- * @init:      perform basic app checks
+ * @init:      perform basic app checks and init
+ * @clean:     clean app state
   * @extra_cap: extra capabilities string
   * @vnic_init: init vNICs (assign port types, etc.)
   * @vnic_clean:        clean up app's vNIC state
+ * @repr_open: representor netdev open callback
+ * @repr_stop: representor netdev stop callback
   * @start:     start application logic
   * @stop:      stop application logic
   * @ctrl_msg_rx:    control message handler
@@ -88,6 +92,7 @@ struct nfp_app_type {
         bool ctrl_has_meta;
  
         int (*init)(struct nfp_app *app);
+       void (*clean)(struct nfp_app *app);
  
         const char *(*extra_cap)(struct nfp_app *app, struct nfp_net *nn);
  
@@ -95,6 +100,9 @@ struct nfp_app_type {
                          unsigned int id);
         void (*vnic_clean)(struct nfp_app *app, struct nfp_net *nn);
  
+       int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr);
+       int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr);
+
         int (*start)(struct nfp_app *app);
         void (*stop)(struct nfp_app *app);
  
@@ -144,6 +152,12 @@ static inline int nfp_app_init(struct nfp_app *app)
         return app->type->init(app);
  }
  
+static inline void nfp_app_clean(struct nfp_app *app)
+{
+       if (app->type->clean)
+               app->type->clean(app);
+}
+
  static inline int nfp_app_vnic_init(struct nfp_app *app, struct nfp_net *nn,
                                     unsigned int id)
  {
@@ -156,6 +170,20 @@ static inline void nfp_app_vnic_clean(struct nfp_app *app, struct nfp_net *nn)
                 app->type->vnic_clean(app, nn);
  }
  
+static inline int nfp_app_repr_open(struct nfp_app *app, struct nfp_repr *repr)
+{
+       if (!app->type->repr_open)
+               return -EINVAL;
+       return app->type->repr_open(app, repr);
+}
+
+static inline int nfp_app_repr_stop(struct nfp_app *app, struct nfp_repr *repr)
+{
+       if (!app->type->repr_stop)
+               return -EINVAL;
+       return app->type->repr_stop(app, repr);
+}
+
  static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl)
  {
         app->ctrl = ctrl;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c

index 748e54cc885e280405bf0e635089c7cb437f79d1..d67969d3e484682c102a965c4abf5908dc1fc6fc 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -107,17 +107,18 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs)
                 goto err_unlock;
         }
  
-       err = nfp_app_sriov_enable(pf->app, num_vfs);
+       err = pci_enable_sriov(pdev, num_vfs);
         if (err) {
-               dev_warn(&pdev->dev, "App specific PCI sriov configuration failed: %d\n",
-                        err);
+               dev_warn(&pdev->dev, "Failed to enable PCI SR-IOV: %d\n", err);
                 goto err_unlock;
         }
  
-       err = pci_enable_sriov(pdev, num_vfs);
+       err = nfp_app_sriov_enable(pf->app, num_vfs);
         if (err) {
-               dev_warn(&pdev->dev, "Failed to enable PCI sriov: %d\n", err);
-               goto err_app_sriov_disable;
+               dev_warn(&pdev->dev,
+                        "App specific PCI SR-IOV configuration failed: %d\n",
+                        err);
+               goto err_sriov_disable;
         }
  
         pf->num_vfs = num_vfs;
@@ -127,8 +128,8 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs)
         mutex_unlock(&pf->lock);
         return num_vfs;
  
-err_app_sriov_disable:
-       nfp_app_sriov_disable(pf->app);
+err_sriov_disable:
+       pci_disable_sriov(pdev);
  err_unlock:
         mutex_unlock(&pf->lock);
         return err;
@@ -136,17 +137,20 @@ err_unlock:
         return 0;
  }
  
-static int __nfp_pcie_sriov_disable(struct pci_dev *pdev)
+static int nfp_pcie_sriov_disable(struct pci_dev *pdev)
  {
  #ifdef CONFIG_PCI_IOV
         struct nfp_pf *pf = pci_get_drvdata(pdev);
  
+       mutex_lock(&pf->lock);
+
         /* If the VFs are assigned we cannot shut down SR-IOV without
          * causing issues, so just leave the hardware available but
          * disabled
          */
         if (pci_vfs_assigned(pdev)) {
                 dev_warn(&pdev->dev, "Disabling while VFs assigned - VFs will not be deallocated\n");
+               mutex_unlock(&pf->lock);
                 return -EPERM;
         }
  
@@ -156,20 +160,10 @@ static int __nfp_pcie_sriov_disable(struct pci_dev *pdev)
  
         pci_disable_sriov(pdev);
         dev_dbg(&pdev->dev, "Removed VFs.\n");
-#endif
-       return 0;
-}
-
-static int nfp_pcie_sriov_disable(struct pci_dev *pdev)
-{
-       struct nfp_pf *pf = pci_get_drvdata(pdev);
-       int err;
  
-       mutex_lock(&pf->lock);
-       err = __nfp_pcie_sriov_disable(pdev);
         mutex_unlock(&pf->lock);
-
-       return err;
+#endif
+       return 0;
  }
  
  static int nfp_pcie_sriov_configure(struct pci_dev *pdev, int num_vfs)
@@ -382,6 +376,12 @@ static int nfp_pci_probe(struct pci_dev *pdev,
         pci_set_drvdata(pdev, pf);
         pf->pdev = pdev;
  
+       pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev));
+       if (!pf->wq) {
+               err = -ENOMEM;
+               goto err_pci_priv_unset;
+       }
+
         pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev);
         if (IS_ERR_OR_NULL(pf->cpp)) {
                 err = PTR_ERR(pf->cpp);
@@ -414,6 +414,14 @@ static int nfp_pci_probe(struct pci_dev *pdev,
         if (err)
                 goto err_fw_unload;
  
+       pf->num_vfs = pci_num_vf(pdev);
+       if (pf->num_vfs > pf->limit_vfs) {
+               dev_err(&pdev->dev,
+                       "Error: %d VFs already enabled, but loaded FW can only support %d\n",
+                       pf->num_vfs, pf->limit_vfs);
+               goto err_fw_unload;
+       }
+
         err = nfp_net_pci_probe(pf);
         if (err)
                 goto err_sriov_unlimit;
@@ -443,6 +451,8 @@ err_hwinfo_free:
         kfree(pf->hwinfo);
         nfp_cpp_free(pf->cpp);
  err_disable_msix:
+       destroy_workqueue(pf->wq);
+err_pci_priv_unset:
         pci_set_drvdata(pdev, NULL);
         mutex_destroy(&pf->lock);
         devlink_free(devlink);
@@ -463,11 +473,11 @@ static void nfp_pci_remove(struct pci_dev *pdev)
  
         devlink = priv_to_devlink(pf);
  
+       nfp_net_pci_remove(pf);
+
         nfp_pcie_sriov_disable(pdev);
         pci_sriov_set_totalvfs(pf->pdev, 0);
  
-       nfp_net_pci_remove(pf);
-
         devlink_unregister(devlink);
  
         kfree(pf->rtbl);
@@ -475,6 +485,7 @@ static void nfp_pci_remove(struct pci_dev *pdev)
         if (pf->fw_loaded)
                 nfp_fw_unload(pf);
  
+       destroy_workqueue(pf->wq);
         pci_set_drvdata(pdev, NULL);
         kfree(pf->hwinfo);
         nfp_cpp_free(pf->cpp);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h

index edc14dc78674e9c5d9e960a3b33e449ec1c6dd60..a08cfba7e68ed41c69b6e92a6167d229366f479b 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h
@@ -89,6 +89,7 @@ struct nfp_rtsym_table;
   * @num_vnics:         Number of vNICs spawned
   * @vnics:             Linked list of vNIC structures (struct nfp_net)
   * @ports:             Linked list of port structures (struct nfp_port)
+ * @wq:                        Workqueue for running works which need to grab @lock
   * @port_refresh_work: Work entry for taking netdevs out
   * @lock:              Protects all fields which may change after probe
   */
@@ -131,7 +132,10 @@ struct nfp_pf {
  
         struct list_head vnics;
         struct list_head ports;
+
+       struct workqueue_struct *wq;
         struct work_struct port_refresh_work;
+
         struct mutex lock;
  };
  
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index 2e728543e8402b46db04c4c614b08256a6f03e78..30f82b41d400cd4956f279dec5b3c18f0d04a358 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -64,6 +64,7 @@
  #include <linux/vmalloc.h>
  #include <linux/ktime.h>
  
+#include <net/switchdev.h>
  #include <net/vxlan.h>
  
  #include "nfpcore/nfp_nsp.h"
@@ -3096,18 +3097,6 @@ static void nfp_net_stat64(struct net_device *netdev,
         }
  }
  
-static int
-nfp_net_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
-                __be16 proto, struct tc_to_netdev *tc)
-{
-       struct nfp_net *nn = netdev_priv(netdev);
-
-       if (chain_index)
-               return -EOPNOTSUPP;
-
-       return nfp_app_setup_tc(nn->app, netdev, handle, proto, tc);
-}
-
  static int nfp_net_set_features(struct net_device *netdev,
                                 netdev_features_t features)
  {
@@ -3423,7 +3412,7 @@ const struct net_device_ops nfp_net_netdev_ops = {
         .ndo_get_stats64        = nfp_net_stat64,
         .ndo_vlan_rx_add_vid    = nfp_net_vlan_rx_add_vid,
         .ndo_vlan_rx_kill_vid   = nfp_net_vlan_rx_kill_vid,
-       .ndo_setup_tc           = nfp_net_setup_tc,
+       .ndo_setup_tc           = nfp_port_setup_tc,
         .ndo_tx_timeout         = nfp_net_tx_timeout,
         .ndo_set_rx_mode        = nfp_net_set_rx_mode,
         .ndo_change_mtu         = nfp_net_change_mtu,
@@ -3703,6 +3692,8 @@ static void nfp_net_netdev_init(struct nfp_net *nn)
         netdev->netdev_ops = &nfp_net_netdev_ops;
         netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
  
+       SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
+
         /* MTU range: 68 - hw-specific max */
         netdev->min_mtu = ETH_MIN_MTU;
         netdev->max_mtu = nn->max_mtu;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c

index cfcbc3b9a9aa72b09170cdde5f948534f9554f15..c85a2f18c4df9df095427b33b03f248bce218bc3 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -80,58 +80,6 @@ static int nfp_is_ready(struct nfp_pf *pf)
         return state == 15;
  }
  
-/**
- * nfp_net_map_area() - Help function to map an area
- * @cpp:    NFP CPP handler
- * @name:   Name for the area
- * @target: CPP target
- * @addr:   CPP address
- * @size:   Size of the area
- * @area:   Area handle (returned).
- *
- * This function is primarily to simplify the code in the main probe
- * function. To undo the effect of this functions call
- * @nfp_cpp_area_release_free(*area);
- *
- * Return: Pointer to memory mapped area or ERR_PTR
- */
-static u8 __iomem *nfp_net_map_area(struct nfp_cpp *cpp,
-                                   const char *name, int isl, int target,
-                                   unsigned long long addr, unsigned long size,
-                                   struct nfp_cpp_area **area)
-{
-       u8 __iomem *res;
-       u32 dest;
-       int err;
-
-       dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, isl);
-
-       *area = nfp_cpp_area_alloc_with_name(cpp, dest, name, addr, size);
-       if (!*area) {
-               err = -EIO;
-               goto err_area;
-       }
-
-       err = nfp_cpp_area_acquire(*area);
-       if (err < 0)
-               goto err_acquire;
-
-       res = nfp_cpp_area_iomem(*area);
-       if (!res) {
-               err = -EIO;
-               goto err_map;
-       }
-
-       return res;
-
-err_map:
-       nfp_cpp_area_release(*area);
-err_acquire:
-       nfp_cpp_area_free(*area);
-err_area:
-       return (u8 __iomem *)ERR_PTR(err);
-}
-
  /**
   * nfp_net_get_mac_addr() - Get the MAC address.
   * @pf:       NFP PF handle
@@ -226,31 +174,12 @@ static u8 __iomem *
  nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt,
                      unsigned int min_size, struct nfp_cpp_area **area)
  {
-       const struct nfp_rtsym *sym;
         char pf_symbol[256];
-       u8 __iomem *mem;
  
         snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt,
                  nfp_cppcore_pcie_unit(pf->cpp));
  
-       sym = nfp_rtsym_lookup(pf->rtbl, pf_symbol);
-       if (!sym)
-               return (u8 __iomem *)ERR_PTR(-ENOENT);
-
-       if (sym->size < min_size) {
-               nfp_err(pf->cpp, "PF symbol %s too small\n", pf_symbol);
-               return (u8 __iomem *)ERR_PTR(-EINVAL);
-       }
-
-       mem = nfp_net_map_area(pf->cpp, name, sym->domain, sym->target,
-                              sym->addr, sym->size, area);
-       if (IS_ERR(mem)) {
-               nfp_err(pf->cpp, "Failed to map PF symbol %s: %ld\n",
-                       pf_symbol, PTR_ERR(mem));
-               return mem;
-       }
-
-       return mem;
+       return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area);
  }
  
  static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn)
@@ -485,7 +414,7 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
         if (IS_ERR(ctrl_bar)) {
                 nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
                 err = PTR_ERR(ctrl_bar);
-               goto err_free;
+               goto err_app_clean;
         }
  
         pf->ctrl_vnic = nfp_net_pf_alloc_vnic(pf, false, ctrl_bar, qc_bar,
@@ -499,8 +428,11 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride)
  
  err_unmap:
         nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
+err_app_clean:
+       nfp_app_clean(pf->app);
  err_free:
         nfp_app_free(pf->app);
+       pf->app = NULL;
         return err;
  }
  
@@ -510,6 +442,7 @@ static void nfp_net_pf_app_clean(struct nfp_pf *pf)
                 nfp_net_pf_free_vnic(pf, pf->ctrl_vnic);
                 nfp_cpp_area_release_free(pf->ctrl_vnic_bar);
         }
+       nfp_app_clean(pf->app);
         nfp_app_free(pf->app);
         pf->app = NULL;
  }
@@ -555,8 +488,16 @@ static int nfp_net_pf_app_start(struct nfp_pf *pf)
         if (err)
                 goto err_ctrl_stop;
  
+       if (pf->num_vfs) {
+               err = nfp_app_sriov_enable(pf->app, pf->num_vfs);
+               if (err)
+                       goto err_app_stop;
+       }
+
         return 0;
  
+err_app_stop:
+       nfp_app_stop(pf->app);
  err_ctrl_stop:
         nfp_net_pf_app_stop_ctrl(pf);
         return err;
@@ -564,6 +505,8 @@ err_ctrl_stop:
  
  static void nfp_net_pf_app_stop(struct nfp_pf *pf)
  {
+       if (pf->num_vfs)
+               nfp_app_sriov_disable(pf->app);
         nfp_app_stop(pf->app);
         nfp_net_pf_app_stop_ctrl(pf);
  }
@@ -580,26 +523,22 @@ static void nfp_net_pci_unmap_mem(struct nfp_pf *pf)
  
  static int nfp_net_pci_map_mem(struct nfp_pf *pf)
  {
-       u32 ctrl_bar_sz;
         u8 __iomem *mem;
+       u32 min_size;
         int err;
  
-       ctrl_bar_sz = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
+       min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE;
         mem = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%d_net_bar0",
-                                  ctrl_bar_sz, &pf->data_vnic_bar);
+                                  min_size, &pf->data_vnic_bar);
         if (IS_ERR(mem)) {
                 nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n");
-               err = PTR_ERR(mem);
-               if (!pf->fw_loaded && err == -ENOENT)
-                       err = -EPROBE_DEFER;
-               return err;
+               return PTR_ERR(mem);
         }
  
-       pf->mac_stats_mem = nfp_net_pf_map_rtsym(pf, "net.macstats",
-                                                "_mac_stats",
-                                                NFP_MAC_STATS_SIZE *
-                                                (pf->eth_tbl->max_index + 1),
-                                                &pf->mac_stats_bar);
+       min_size =  NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1);
+       pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats",
+                                         "net.macstats", min_size,
+                                         &pf->mac_stats_bar);
         if (IS_ERR(pf->mac_stats_mem)) {
                 if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) {
                         err = PTR_ERR(pf->mac_stats_mem);
@@ -620,7 +559,7 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf)
                 pf->vf_cfg_mem = NULL;
         }
  
-       mem = nfp_net_map_area(pf->cpp, "net.qc", 0, 0,
+       mem = nfp_cpp_map_area(pf->cpp, "net.qc", 0, 0,
                                NFP_PCIE_QUEUE(0), NFP_QCP_QUEUE_AREA_SZ,
                                &pf->qc_area);
         if (IS_ERR(mem)) {
@@ -743,7 +682,7 @@ void nfp_net_refresh_port_table(struct nfp_port *port)
  
         set_bit(NFP_PORT_CHANGED, &port->flags);
  
-       schedule_work(&pf->port_refresh_work);
+       queue_work(pf->wq, &pf->port_refresh_work);
  }
  
  int nfp_net_refresh_eth_port(struct nfp_port *port)
@@ -786,6 +725,12 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
                 return -EINVAL;
         }
  
+       if (!pf->rtbl) {
+               nfp_err(pf->cpp, "No %s, giving up.\n",
+                       pf->fw_loaded ? "symbol table" : "firmware found");
+               return -EPROBE_DEFER;
+       }
+
         mutex_lock(&pf->lock);
         pf->max_data_vnics = nfp_net_pf_get_num_ports(pf);
         if ((int)pf->max_data_vnics < 0) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c

index 44adcc5df11ec8ad1f6d0c834a8f6c83d9b6bab3..8ec5474f4b186f46baadafa33a4cab18cabea95d 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -35,8 +35,10 @@
  #include <linux/io-64-nonatomic-hi-lo.h>
  #include <linux/lockdep.h>
  #include <net/dst_metadata.h>
+#include <net/switchdev.h>
  
  #include "nfpcore/nfp_cpp.h"
+#include "nfpcore/nfp_nsp.h"
  #include "nfp_app.h"
  #include "nfp_main.h"
  #include "nfp_net_ctrl.h"
@@ -135,25 +137,34 @@ nfp_repr_pf_get_stats64(const struct nfp_app *app, u8 pf,
         stats->rx_dropped = readq(mem + NFP_NET_CFG_STATS_TX_DISCARDS);
  }
  
-void
-nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type,
-                    u8 port, struct rtnl_link_stats64 *stats)
+static void
+nfp_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
  {
-       switch (type) {
-       case NFP_REPR_TYPE_PHYS_PORT:
-               nfp_repr_phy_port_get_stats64(app, port, stats);
+       struct nfp_repr *repr = netdev_priv(netdev);
+       struct nfp_eth_table_port *eth_port;
+       struct nfp_app *app = repr->app;
+
+       if (WARN_ON(!repr->port))
+               return;
+
+       switch (repr->port->type) {
+       case NFP_PORT_PHYS_PORT:
+               eth_port = __nfp_port_get_eth_port(repr->port);
+               if (!eth_port)
+                       break;
+               nfp_repr_phy_port_get_stats64(app, eth_port->index, stats);
                 break;
-       case NFP_REPR_TYPE_PF:
-               nfp_repr_pf_get_stats64(app, port, stats);
+       case NFP_PORT_PF_PORT:
+               nfp_repr_pf_get_stats64(app, repr->port->pf_id, stats);
                 break;
-       case NFP_REPR_TYPE_VF:
-               nfp_repr_vf_get_stats64(app, port, stats);
+       case NFP_PORT_VF_PORT:
+               nfp_repr_vf_get_stats64(app, repr->port->vf_id, stats);
         default:
                 break;
         }
  }
  
-bool
+static bool
  nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id)
  {
         switch (attr_id) {
@@ -196,8 +207,9 @@ nfp_repr_get_host_stats64(const struct net_device *netdev,
         return 0;
  }
  
-int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
-                              void *stats)
+static int
+nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
+                          void *stats)
  {
         switch (attr_id) {
         case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -207,7 +219,7 @@ int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
         return -EINVAL;
  }
  
-netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
  {
         struct nfp_repr *repr = netdev_priv(netdev);
         unsigned int len = skb->len;
@@ -224,6 +236,31 @@ netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev)
         return ret;
  }
  
+static int nfp_repr_stop(struct net_device *netdev)
+{
+       struct nfp_repr *repr = netdev_priv(netdev);
+
+       return nfp_app_repr_stop(repr->app, repr);
+}
+
+static int nfp_repr_open(struct net_device *netdev)
+{
+       struct nfp_repr *repr = netdev_priv(netdev);
+
+       return nfp_app_repr_open(repr->app, repr);
+}
+
+const struct net_device_ops nfp_repr_netdev_ops = {
+       .ndo_open               = nfp_repr_open,
+       .ndo_stop               = nfp_repr_stop,
+       .ndo_start_xmit         = nfp_repr_xmit,
+       .ndo_get_stats64        = nfp_repr_get_stats64,
+       .ndo_has_offload_stats  = nfp_repr_has_offload_stats,
+       .ndo_get_offload_stats  = nfp_repr_get_offload_stats,
+       .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
+       .ndo_setup_tc           = nfp_port_setup_tc,
+};
+
  static void nfp_repr_clean(struct nfp_repr *repr)
  {
         unregister_netdev(repr->netdev);
@@ -248,8 +285,8 @@ static void nfp_repr_set_lockdep_class(struct net_device *dev)
  }
  
  int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
-                 const struct net_device_ops *netdev_ops, u32 cmsg_port_id,
-                 struct nfp_port *port, struct net_device *pf_netdev)
+                 u32 cmsg_port_id, struct nfp_port *port,
+                 struct net_device *pf_netdev)
  {
         struct nfp_repr *repr = netdev_priv(netdev);
         int err;
@@ -263,7 +300,13 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
         repr->dst->u.port_info.port_id = cmsg_port_id;
         repr->dst->u.port_info.lower_dev = pf_netdev;
  
-       netdev->netdev_ops = netdev_ops;
+       netdev->netdev_ops = &nfp_repr_netdev_ops;
+       SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
+
+       if (nfp_app_has_tc(app)) {
+               netdev->features |= NETIF_F_HW_TC;
+               netdev->hw_features |= NETIF_F_HW_TC;
+       }
  
         err = register_netdev(netdev);
         if (err)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h

index c5ed6611f7087be9288d18695171cf5e0c15a83b..32179cad062ab3227f2c9758f20ca58d329d9053 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
@@ -38,6 +38,8 @@ struct metadata_dst;
  struct nfp_net;
  struct nfp_port;
  
+#include <net/dst_metadata.h>
+
  /**
   * struct nfp_reprs - container for representor netdevs
   * @num_reprs: Number of elements in reprs array
@@ -97,16 +99,22 @@ enum nfp_repr_type {
  };
  #define NFP_REPR_TYPE_MAX (__NFP_REPR_TYPE_MAX - 1)
  
+extern const struct net_device_ops nfp_repr_netdev_ops;
+
+static inline bool nfp_netdev_is_nfp_repr(struct net_device *netdev)
+{
+       return netdev->netdev_ops == &nfp_repr_netdev_ops;
+}
+
+static inline int nfp_repr_get_port_id(struct net_device *netdev)
+{
+       struct nfp_repr *priv = netdev_priv(netdev);
+
+       return priv->dst->u.port_info.port_id;
+}
+
  void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len);
-void
-nfp_repr_get_stats64(const struct nfp_app *app, enum nfp_repr_type type,
-                    u8 port, struct rtnl_link_stats64 *stats);
-bool nfp_repr_has_offload_stats(const struct net_device *dev, int attr_id);
-int nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev,
-                              void *stats);
-netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev);
  int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
-                 const struct net_device_ops *netdev_ops,
                   u32 cmsg_port_id, struct nfp_port *port,
                   struct net_device *pf_netdev);
  struct net_device *nfp_repr_alloc(struct nfp_app *app);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c

index 19bceeb822258b4703d8cbca83199551de4336b0..776e54dd5dd0508df072dd09c61d4da755147270 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -32,6 +32,7 @@
   */
  
  #include <linux/lockdep.h>
+#include <net/switchdev.h>
  
  #include "nfpcore/nfp_cpp.h"
  #include "nfpcore/nfp_nsp.h"
@@ -42,13 +43,64 @@
  
  struct nfp_port *nfp_port_from_netdev(struct net_device *netdev)
  {
-       struct nfp_net *nn;
+       if (nfp_netdev_is_nfp_net(netdev)) {
+               struct nfp_net *nn = netdev_priv(netdev);
  
-       if (WARN_ON(!nfp_netdev_is_nfp_net(netdev)))
-               return NULL;
-       nn = netdev_priv(netdev);
+               return nn->port;
+       }
+
+       if (nfp_netdev_is_nfp_repr(netdev)) {
+               struct nfp_repr *repr = netdev_priv(netdev);
+
+               return repr->port;
+       }
  
-       return nn->port;
+       WARN(1, "Unknown netdev type for nfp_port\n");
+
+       return NULL;
+}
+
+static int
+nfp_port_attr_get(struct net_device *netdev, struct switchdev_attr *attr)
+{
+       struct nfp_port *port;
+
+       port = nfp_port_from_netdev(netdev);
+       if (!port)
+               return -EOPNOTSUPP;
+
+       switch (attr->id) {
+       case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: {
+               const u8 *serial;
+               /* N.B: attr->u.ppid.id is binary data */
+               attr->u.ppid.id_len = nfp_cpp_serial(port->app->cpp, &serial);
+               memcpy(&attr->u.ppid.id, serial, attr->u.ppid.id_len);
+               break;
+       }
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+const struct switchdev_ops nfp_port_switchdev_ops = {
+       .switchdev_port_attr_get        = nfp_port_attr_get,
+};
+
+int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+                     __be16 proto, struct tc_to_netdev *tc)
+{
+       struct nfp_port *port;
+
+       if (chain_index)
+               return -EOPNOTSUPP;
+
+       port = nfp_port_from_netdev(netdev);
+       if (!port)
+               return -EOPNOTSUPP;
+
+       return nfp_app_setup_tc(port->app, netdev, handle, proto, tc);
  }
  
  struct nfp_port *
@@ -98,15 +150,31 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
         int n;
  
         port = nfp_port_from_netdev(netdev);
-       eth_port = __nfp_port_get_eth_port(port);
-       if (!eth_port)
+       if (!port)
+               return -EOPNOTSUPP;
+
+       switch (port->type) {
+       case NFP_PORT_PHYS_PORT:
+               eth_port = __nfp_port_get_eth_port(port);
+               if (!eth_port)
+                       return -EOPNOTSUPP;
+
+               if (!eth_port->is_split)
+                       n = snprintf(name, len, "p%d", eth_port->label_port);
+               else
+                       n = snprintf(name, len, "p%ds%d", eth_port->label_port,
+                                    eth_port->label_subport);
+               break;
+       case NFP_PORT_PF_PORT:
+               n = snprintf(name, len, "pf%d", port->pf_id);
+               break;
+       case NFP_PORT_VF_PORT:
+               n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id);
+               break;
+       default:
                 return -EOPNOTSUPP;
+       }
  
-       if (!eth_port->is_split)
-               n = snprintf(name, len, "p%d", eth_port->label_port);
-       else
-               n = snprintf(name, len, "p%ds%d", eth_port->label_port,
-                            eth_port->label_subport);
         if (n >= len)
                 return -EINVAL;
  
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h

index f472bea4ec2bc3f2b3a40489afb74e7a5d6a3f03..a33d22e18f94f80fbccc3e66e0ef9fb342ebecf7 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -36,6 +36,7 @@
  
  #include <net/devlink.h>
  
+struct tc_to_netdev;
  struct net_device;
  struct nfp_app;
  struct nfp_pf;
@@ -47,10 +48,14 @@ struct nfp_port;
   *                     state when port disappears because of FW fault or config
   *                     change
   * @NFP_PORT_PHYS_PORT:        external NIC port
+ * @NFP_PORT_PF_PORT:  logical port of PCI PF
+ * @NFP_PORT_VF_PORT:  logical port of PCI VF
   */
  enum nfp_port_type {
         NFP_PORT_INVALID,
         NFP_PORT_PHYS_PORT,
+       NFP_PORT_PF_PORT,
+       NFP_PORT_VF_PORT,
  };
  
  /**
@@ -72,6 +77,8 @@ enum nfp_port_flags {
   * @dl_port:   devlink port structure
   * @eth_id:    for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme
   * @eth_port:  for %NFP_PORT_PHYS_PORT translated ETH Table port entry
+ * @pf_id:     for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3)
+ * @vf_id:     for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id
   * @port_list: entry on pf's list of ports
   */
  struct nfp_port {
@@ -84,12 +91,27 @@ struct nfp_port {
  
         struct devlink_port dl_port;
  
-       unsigned int eth_id;
-       struct nfp_eth_table_port *eth_port;
+       union {
+               /* NFP_PORT_PHYS_PORT */
+               struct {
+                       unsigned int eth_id;
+                       struct nfp_eth_table_port *eth_port;
+               };
+               /* NFP_PORT_PF_PORT, NFP_PORT_VF_PORT */
+               struct {
+                       unsigned int pf_id;
+                       unsigned int vf_id;
+               };
+       };
  
         struct list_head port_list;
  };
  
+extern const struct switchdev_ops nfp_port_switchdev_ops;
+
+int nfp_port_setup_tc(struct net_device *netdev, u32 handle, u32 chain_index,
+                     __be16 proto, struct tc_to_netdev *tc);
+
  struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
  struct nfp_port *
  nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id);
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h

index 25a967158ce9cb6706e617d79d642f05630bcc99..5798adc57cbc9962ec36a74e4f133c5c22d5c78c 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h
@@ -230,6 +230,9 @@ struct nfp_cpp_area *nfp_cpp_area_alloc_with_name(struct nfp_cpp *cpp,
  struct nfp_cpp_area *nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 cpp_id,
                                         unsigned long long address,
                                         unsigned long size);
+struct nfp_cpp_area *
+nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 cpp_id,
+                          unsigned long long address, unsigned long size);
  void nfp_cpp_area_free(struct nfp_cpp_area *area);
  int nfp_cpp_area_acquire(struct nfp_cpp_area *area);
  int nfp_cpp_area_acquire_nonblocking(struct nfp_cpp_area *area);
@@ -239,8 +242,6 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area, unsigned long offset,
                       void *buffer, size_t length);
  int nfp_cpp_area_write(struct nfp_cpp_area *area, unsigned long offset,
                        const void *buffer, size_t length);
-int nfp_cpp_area_check_range(struct nfp_cpp_area *area,
-                            unsigned long long offset, unsigned long size);
  const char *nfp_cpp_area_name(struct nfp_cpp_area *cpp_area);
  void *nfp_cpp_area_priv(struct nfp_cpp_area *cpp_area);
  struct nfp_cpp *nfp_cpp_area_cpp(struct nfp_cpp_area *cpp_area);
@@ -278,6 +279,10 @@ int nfp_cpp_readq(struct nfp_cpp *cpp, u32 cpp_id,
  int nfp_cpp_writeq(struct nfp_cpp *cpp, u32 cpp_id,
                    unsigned long long address, u64 value);
  
+u8 __iomem *
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
+                u64 addr, unsigned long size, struct nfp_cpp_area **area);
+
  struct nfp_cpp_mutex;
  
  int nfp_cpp_mutex_init(struct nfp_cpp *cpp, int target,
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c

index 9b69dcf87be934b10d57727626388a2b0b76e958..04dd5758ecf54223cdc80b9632f7e3b7f7d709fe 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c
@@ -360,6 +360,41 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest,
         return nfp_cpp_area_alloc_with_name(cpp, dest, NULL, address, size);
  }
  
+/**
+ * nfp_cpp_area_alloc_acquire() - allocate a new CPP area and lock it down
+ * @cpp:       CPP handle
+ * @name:      Name of region
+ * @dest:      CPP id
+ * @address:   Start address on CPP target
+ * @size:      Size of area
+ *
+ * Allocate and initialize a CPP area structure, and lock it down so
+ * that it can be accessed directly.
+ *
+ * NOTE: @address and @size must be 32-bit aligned values.
+ *
+ * NOTE: The area must also be 'released' when the structure is freed.
+ *
+ * Return: NFP CPP Area handle, or NULL
+ */
+struct nfp_cpp_area *
+nfp_cpp_area_alloc_acquire(struct nfp_cpp *cpp, const char *name, u32 dest,
+                          unsigned long long address, unsigned long size)
+{
+       struct nfp_cpp_area *area;
+
+       area = nfp_cpp_area_alloc_with_name(cpp, dest, name, address, size);
+       if (!area)
+               return NULL;
+
+       if (nfp_cpp_area_acquire(area)) {
+               nfp_cpp_area_free(area);
+               return NULL;
+       }
+
+       return area;
+}
+
  /**
   * nfp_cpp_area_free() - free up the CPP area
   * @area:      CPP area handle
@@ -535,27 +570,6 @@ int nfp_cpp_area_write(struct nfp_cpp_area *area,
         return area->cpp->op->area_write(area, kernel_vaddr, offset, length);
  }
  
-/**
- * nfp_cpp_area_check_range() - check if address range fits in CPP area
- * @area:      CPP area handle
- * @offset:    offset into CPP target
- * @length:    size of address range in bytes
- *
- * Check if address range fits within CPP area.  Return 0 if area
- * fits or -EFAULT on error.
- *
- * Return: 0, or -ERRNO
- */
-int nfp_cpp_area_check_range(struct nfp_cpp_area *area,
-                            unsigned long long offset, unsigned long length)
-{
-       if (offset < area->offset ||
-           offset + length > area->offset + area->size)
-               return -EFAULT;
-
-       return 0;
-}
-
  /**
   * nfp_cpp_area_name() - return name of a CPP area
   * @cpp_area:  CPP area handle
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c

index 0ba0379b8f7581bab817b1c5ab65fe024b55451e..ab86bceb93f2dbc907bcf88bb1653a7d8a8b87c4 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c
@@ -279,3 +279,43 @@ exit_release:
  
         return err;
  }
+
+/**
+ * nfp_cpp_map_area() - Helper function to map an area
+ * @cpp:    NFP CPP handler
+ * @name:   Name for the area
+ * @domain: CPP domain
+ * @target: CPP target
+ * @addr:   CPP address
+ * @size:   Size of the area
+ * @area:   Area handle (output)
+ *
+ * Map an area of IOMEM access.  To undo the effect of this function call
+ * @nfp_cpp_area_release_free(*area).
+ *
+ * Return: Pointer to memory mapped area or ERR_PTR
+ */
+u8 __iomem *
+nfp_cpp_map_area(struct nfp_cpp *cpp, const char *name, int domain, int target,
+                u64 addr, unsigned long size, struct nfp_cpp_area **area)
+{
+       u8 __iomem *res;
+       u32 dest;
+
+       dest = NFP_CPP_ISLAND_ID(target, NFP_CPP_ACTION_RW, 0, domain);
+
+       *area = nfp_cpp_area_alloc_acquire(cpp, name, dest, addr, size);
+       if (!*area)
+               goto err_eio;
+
+       res = nfp_cpp_area_iomem(*area);
+       if (!res)
+               goto err_release_free;
+
+       return res;
+
+err_release_free:
+       nfp_cpp_area_release_free(*area);
+err_eio:
+       return (u8 __iomem *)ERR_PTR(-EIO);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h

index d27d29782a1262be31f7e067122ec2f2bcca2f18..c9724fb7ea4b23cf695dc28d6dc1e2922930262a 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.h
@@ -97,7 +97,11 @@ int nfp_rtsym_count(struct nfp_rtsym_table *rtbl);
  const struct nfp_rtsym *nfp_rtsym_get(struct nfp_rtsym_table *rtbl, int idx);
  const struct nfp_rtsym *
  nfp_rtsym_lookup(struct nfp_rtsym_table *rtbl, const char *name);
+
  u64 nfp_rtsym_read_le(struct nfp_rtsym_table *rtbl, const char *name,
                       int *error);
+u8 __iomem *
+nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
+             unsigned int min_size, struct nfp_cpp_area **area);
  
  #endif /* NFP_NFFW_H */
diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c

index 203f9cbae0fbc79271f4e6c724a90acb89a20137..ecda474ac7c3e256730e1319766caf40f44d8053 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_rtsym.c
@@ -289,3 +289,30 @@ exit:
                 return ~0ULL;
         return val;
  }
+
+u8 __iomem *
+nfp_rtsym_map(struct nfp_rtsym_table *rtbl, const char *name, const char *id,
+             unsigned int min_size, struct nfp_cpp_area **area)
+{
+       const struct nfp_rtsym *sym;
+       u8 __iomem *mem;
+
+       sym = nfp_rtsym_lookup(rtbl, name);
+       if (!sym)
+               return (u8 __iomem *)ERR_PTR(-ENOENT);
+
+       if (sym->size < min_size) {
+               nfp_err(rtbl->cpp, "Symbol %s too small\n", name);
+               return (u8 __iomem *)ERR_PTR(-EINVAL);
+       }
+
+       mem = nfp_cpp_map_area(rtbl->cpp, id, sym->domain, sym->target,
+                              sym->addr, sym->size, area);
+       if (IS_ERR(mem)) {
+               nfp_err(rtbl->cpp, "Failed to map symbol %s: %ld\n",
+                       name, PTR_ERR(mem));
+               return mem;
+       }
+
+       return mem;
+}
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c

index e306765155290a31e390f5ddcf88b4f0aa8f973c..6cec2a6a3dcc3214c5d200484d2c1046bee44854 100644 (file)
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -174,7 +174,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter)
  {
         int err = 0, i;
         u32 *template, *tmp_buf;
-       struct netxen_minidump_template_hdr *hdr;
         err = netxen_get_minidump_template_size(adapter);
         if (err) {
                 adapter->mdump.fw_supports_md = 0;
@@ -218,8 +217,6 @@ netxen_setup_minidump(struct netxen_adapter *adapter)
         template = (u32 *) adapter->mdump.md_template;
         for (i = 0; i < adapter->mdump.md_template_size/sizeof(u32); i++)
                 *template++ = __le32_to_cpu(*tmp_buf++);
-       hdr = (struct netxen_minidump_template_hdr *)
-                               adapter->mdump.md_template;
         adapter->mdump.md_capture_buff = NULL;
         adapter->mdump.fw_supports_md = 1;
         adapter->mdump.md_enabled = 0;
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile

index 67452380b60eb6b420d386d5c01bb7f69b44ac4d..82dd47068e1861bdbc368eeb625294f036b8b651 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -5,6 +5,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
          qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o
  qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
  qed-$(CONFIG_QED_LL2) += qed_ll2.o
-qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o
+qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o
  qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
  qed-$(CONFIG_QED_FCOE) += qed_fcoe.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h

index 14b08ee9e3ade1db0bf5053de37fa9e76f140e2b..91003bc6f00bd8599d3908811956fc5459e902ae 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -210,14 +210,16 @@ struct qed_tunn_update_params {
  
  /* The PCI personality is not quite synonymous to protocol ID:
   * 1. All personalities need CORE connections
- * 2. The Ethernet personality may support also the RoCE protocol
+ * 2. The Ethernet personality may support also the RoCE/iWARP protocol
   */
  enum qed_pci_personality {
         QED_PCI_ETH,
         QED_PCI_FCOE,
         QED_PCI_ISCSI,
         QED_PCI_ETH_ROCE,
-       QED_PCI_DEFAULT /* default in shmem */
+       QED_PCI_ETH_IWARP,
+       QED_PCI_ETH_RDMA,
+       QED_PCI_DEFAULT, /* default in shmem */
  };
  
  /* All VFs are symmetric, all counters are PF + all VFs */
@@ -277,6 +279,7 @@ enum qed_dev_cap {
         QED_DEV_CAP_FCOE,
         QED_DEV_CAP_ISCSI,
         QED_DEV_CAP_ROCE,
+       QED_DEV_CAP_IWARP,
  };
  
  enum qed_wol_support {
@@ -286,7 +289,24 @@ enum qed_wol_support {
  
  struct qed_hw_info {
         /* PCI personality */
-       enum qed_pci_personality        personality;
+       enum qed_pci_personality personality;
+#define QED_IS_RDMA_PERSONALITY(dev)                       \
+       ((dev)->hw_info.personality == QED_PCI_ETH_ROCE ||  \
+        (dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+        (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_ROCE_PERSONALITY(dev)                      \
+       ((dev)->hw_info.personality == QED_PCI_ETH_ROCE || \
+        (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_IWARP_PERSONALITY(dev)                      \
+       ((dev)->hw_info.personality == QED_PCI_ETH_IWARP || \
+        (dev)->hw_info.personality == QED_PCI_ETH_RDMA)
+#define QED_IS_L2_PERSONALITY(dev)                   \
+       ((dev)->hw_info.personality == QED_PCI_ETH || \
+        QED_IS_RDMA_PERSONALITY(dev))
+#define QED_IS_FCOE_PERSONALITY(dev) \
+       ((dev)->hw_info.personality == QED_PCI_FCOE)
+#define QED_IS_ISCSI_PERSONALITY(dev) \
+       ((dev)->hw_info.personality == QED_PCI_ISCSI)
  
         /* Resource Allocation scheme results */
         u32                             resc_start[QED_MAX_RESC];
@@ -759,7 +779,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
  }
  
  #define PURE_LB_TC 8
-#define OOO_LB_TC 9
+#define PKT_LB_TC 9
  
  int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate);
  void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
@@ -769,6 +789,8 @@ void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
  void qed_clean_wfq_db(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
  int qed_device_num_engines(struct qed_dev *cdev);
  int qed_device_get_port_id(struct qed_dev *cdev);
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+                        __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
  
  #define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
  
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c

index e201214764db298081db01b0137028780b4ce453..af106be8cc080bcd57986e4bbbb14bc2c09cc547 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -246,14 +246,16 @@ struct qed_cxt_mngr {
  static bool src_proto(enum protocol_type type)
  {
         return type == PROTOCOLID_ISCSI ||
-              type == PROTOCOLID_FCOE;
+              type == PROTOCOLID_FCOE ||
+              type == PROTOCOLID_IWARP;
  }
  
  static bool tm_cid_proto(enum protocol_type type)
  {
         return type == PROTOCOLID_ISCSI ||
                type == PROTOCOLID_FCOE ||
-              type == PROTOCOLID_ROCE;
+              type == PROTOCOLID_ROCE ||
+              type == PROTOCOLID_IWARP;
  }
  
  static bool tm_tid_proto(enum protocol_type type)
@@ -853,7 +855,7 @@ u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines)
         if (!excess_lines)
                 return 0;
  
-       if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+       if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
                 return 0;
  
         p_mngr = p_hwfn->p_cxt_mngr;
@@ -1033,7 +1035,7 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn,
         u32 lines, line, sz_left, lines_to_skip = 0;
  
         /* Special handling for RoCE that supports dynamic allocation */
-       if ((p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) &&
+       if (QED_IS_RDMA_PERSONALITY(p_hwfn) &&
             ((ilt_client == ILT_CLI_CDUT) || ilt_client == ILT_CLI_TSDM))
                 return 0;
  
@@ -1833,7 +1835,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn)
                 tm_offset += tm_iids.pf_tids[i];
         }
  
-       if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE)
+       if (QED_IS_RDMA_PERSONALITY(p_hwfn))
                 active_seg_mask = 0;
  
         STORE_RT_REG(p_hwfn, TM_REG_PF_ENABLE_TASK_RT_OFFSET, active_seg_mask);
@@ -2068,6 +2070,11 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
         num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs);
  
         switch (p_hwfn->hw_info.personality) {
+       case QED_PCI_ETH_IWARP:
+               /* Each QP requires one connection */
+               num_cons = min_t(u32, IWARP_MAX_QPS, p_params->num_qps);
+               proto = PROTOCOLID_IWARP;
+               break;
         case QED_PCI_ETH_ROCE:
                 num_qps = min_t(u32, ROCE_MAX_QPS, p_params->num_qps);
                 num_cons = num_qps * 2; /* each QP requires two connections */
@@ -2103,6 +2110,8 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks)
         qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0);
  
         switch (p_hwfn->hw_info.personality) {
+       case QED_PCI_ETH_RDMA:
+       case QED_PCI_ETH_IWARP:
         case QED_PCI_ETH_ROCE:
         {
                         qed_rdma_set_pf_params(p_hwfn,
@@ -2344,7 +2353,7 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
                        last_cid_allocated - 1);
  
                 if (!p_hwfn->b_rdma_enabled_in_prs) {
-                       /* Enable RoCE search */
+                       /* Enable RDMA search */
                         qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
                         p_hwfn->b_rdma_enabled_in_prs = true;
                 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c

index 49667ad9042da9db095694d206634c74c4a46f4a..6c87bed13bd233328d2cdc12223ab5654c57d061 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -216,6 +216,10 @@ static u32 qed_get_pq_flags(struct qed_hwfn *p_hwfn)
         case QED_PCI_ETH_ROCE:
                 flags |= PQ_FLAGS_MCOS | PQ_FLAGS_OFLD | PQ_FLAGS_LLT;
                 break;
+       case QED_PCI_ETH_IWARP:
+               flags |= PQ_FLAGS_MCOS | PQ_FLAGS_ACK | PQ_FLAGS_OOO |
+                   PQ_FLAGS_OFLD;
+               break;
         default:
                 DP_ERR(p_hwfn,
                        "unknown personality %d\n", p_hwfn->hw_info.personality);
@@ -936,9 +940,16 @@ int qed_resc_alloc(struct qed_dev *cdev)
  
                 /* EQ */
                 n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain);
-               if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+               if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
+                       enum protocol_type rdma_proto;
+
+                       if (QED_IS_ROCE_PERSONALITY(p_hwfn))
+                               rdma_proto = PROTOCOLID_ROCE;
+                       else
+                               rdma_proto = PROTOCOLID_IWARP;
+
                         num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
-                                                              PROTOCOLID_ROCE,
+                                                              rdma_proto,
                                                                NULL) * 2;
                         n_eqes += num_cons + 2 * MAX_NUM_VFS_BB;
                 } else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
@@ -2057,7 +2068,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
         qed_int_get_num_sbs(p_hwfn, &sb_cnt);
  
         if (IS_ENABLED(CONFIG_QED_RDMA) &&
-           p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+           QED_IS_RDMA_PERSONALITY(p_hwfn)) {
                 /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide
                  * the status blocks equally between L2 / RoCE but with
                  * consideration as to how many l2 queues / cnqs we have.
@@ -2068,9 +2079,7 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
  
                 non_l2_sbs = feat_num[QED_RDMA_CNQ];
         }
-
-       if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE ||
-           p_hwfn->hw_info.personality == QED_PCI_ETH) {
+       if (QED_IS_L2_PERSONALITY(p_hwfn)) {
                 /* Start by allocating VF queues, then PF's */
                 feat_num[QED_VF_L2_QUE] = min_t(u32,
                                                 RESC_NUM(p_hwfn, QED_L2_QUEUE),
@@ -2083,12 +2092,12 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
                                                          QED_VF_L2_QUE));
         }
  
-       if (p_hwfn->hw_info.personality == QED_PCI_FCOE)
+       if (QED_IS_FCOE_PERSONALITY(p_hwfn))
                 feat_num[QED_FCOE_CQ] =  min_t(u32, sb_cnt.cnt,
                                                RESC_NUM(p_hwfn,
                                                         QED_CMDQS_CQS));
  
-       if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+       if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
                 feat_num[QED_ISCSI_CQ] = min_t(u32, sb_cnt.cnt,
                                                RESC_NUM(p_hwfn,
                                                         QED_CMDQS_CQS));
@@ -4122,3 +4131,14 @@ int qed_device_get_port_id(struct qed_dev *cdev)
  {
         return (QED_LEADING_HWFN(cdev)->abs_pf_id) % qed_device_num_ports(cdev);
  }
+
+void qed_set_fw_mac_addr(__le16 *fw_msb,
+                        __le16 *fw_mid, __le16 *fw_lsb, u8 *mac)
+{
+       ((u8 *)fw_msb)[0] = mac[1];
+       ((u8 *)fw_msb)[1] = mac[0];
+       ((u8 *)fw_mid)[0] = mac[3];
+       ((u8 *)fw_mid)[1] = mac[2];
+       ((u8 *)fw_lsb)[0] = mac[5];
+       ((u8 *)fw_lsb)[1] = mac[4];
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h

index 3bf3614b30846ef55952820189e0b22ac221f702..31fb0bffa098376cfd91e99ecae0d7dc22dc0245 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -46,6 +46,7 @@
  #include <linux/qed/fcoe_common.h>
  #include <linux/qed/eth_common.h>
  #include <linux/qed/iscsi_common.h>
+#include <linux/qed/iwarp_common.h>
  #include <linux/qed/rdma_common.h>
  #include <linux/qed/roce_common.h>
  #include <linux/qed/qed_fcoe_if.h>
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c

new file mode 100644 (file)

index 0000000..5cd20da
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -0,0 +1,2409 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/spinlock.h>
+#include <linux/tcp.h>
+#include "qed_cxt.h"
+#include "qed_hw.h"
+#include "qed_ll2.h"
+#include "qed_rdma.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+#define QED_IWARP_ORD_DEFAULT          32
+#define QED_IWARP_IRD_DEFAULT          32
+#define QED_IWARP_MAX_FW_MSS           4120
+
+#define QED_EP_SIG 0xecabcdef
+
+struct mpa_v2_hdr {
+       __be16 ird;
+       __be16 ord;
+};
+
+#define MPA_V2_PEER2PEER_MODEL  0x8000
+#define MPA_V2_SEND_RTR         0x4000 /* on ird */
+#define MPA_V2_READ_RTR         0x4000 /* on ord */
+#define MPA_V2_WRITE_RTR        0x8000
+#define MPA_V2_IRD_ORD_MASK     0x3FFF
+
+#define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED)
+
+#define QED_IWARP_INVALID_TCP_CID      0xffffffff
+#define QED_IWARP_RCV_WND_SIZE_DEF     (256 * 1024)
+#define QED_IWARP_RCV_WND_SIZE_MIN     (64 * 1024)
+#define TIMESTAMP_HEADER_SIZE          (12)
+
+#define QED_IWARP_TS_EN                        BIT(0)
+#define QED_IWARP_DA_EN                        BIT(1)
+#define QED_IWARP_PARAM_CRC_NEEDED     (1)
+#define QED_IWARP_PARAM_P2P            (1)
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+                                u8 fw_event_code, u16 echo,
+                                union event_ring_data *data,
+                                u8 fw_return_code);
+
+/* Override devinfo with iWARP specific values */
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn)
+{
+       struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+
+       dev->max_inline = IWARP_REQ_MAX_INLINE_DATA_SIZE;
+       dev->max_qp = min_t(u32,
+                           IWARP_MAX_QPS,
+                           p_hwfn->p_rdma_info->num_qps) -
+                     QED_IWARP_PREALLOC_CNT;
+
+       dev->max_cq = dev->max_qp;
+
+       dev->max_qp_resp_rd_atomic_resc = QED_IWARP_IRD_DEFAULT;
+       dev->max_qp_req_rd_atomic_resc = QED_IWARP_ORD_DEFAULT;
+}
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_TCP;
+       qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 1);
+       p_hwfn->b_rdma_enabled_in_prs = true;
+}
+
+/* We have two cid maps, one for tcp which should be used only from passive
+ * syn processing and replacing a pre-allocated ep in the list. The second
+ * for active tcp and for QPs.
+ */
+static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid)
+{
+       cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+       if (cid < QED_IWARP_PREALLOC_CNT)
+               qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+                                   cid);
+       else
+               qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+       int rc;
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+       rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, cid);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+       if (rc) {
+               DP_NOTICE(p_hwfn, "Failed in allocating iwarp cid\n");
+               return rc;
+       }
+       *cid += qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+       rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *cid);
+       if (rc)
+               qed_iwarp_cid_cleaned(p_hwfn, *cid);
+
+       return rc;
+}
+
+static void qed_iwarp_set_tcp_cid(struct qed_hwfn *p_hwfn, u32 cid)
+{
+       cid -= qed_cxt_get_proto_cid_start(p_hwfn, p_hwfn->p_rdma_info->proto);
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+       qed_bmap_set_id(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+/* This function allocates a cid for passive tcp (called from syn receive)
+ * the reason it's separate from the regular cid allocation is because it
+ * is assured that these cids already have ilt allocated. They are preallocated
+ * to ensure that we won't need to allocate memory during syn processing
+ */
+static int qed_iwarp_alloc_tcp_cid(struct qed_hwfn *p_hwfn, u32 *cid)
+{
+       int rc;
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+       rc = qed_rdma_bmap_alloc_id(p_hwfn,
+                                   &p_hwfn->p_rdma_info->tcp_cid_map, cid);
+
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+       if (rc) {
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "can't allocate iwarp tcp cid max-count=%d\n",
+                          p_hwfn->p_rdma_info->tcp_cid_map.max_count);
+
+               *cid = QED_IWARP_INVALID_TCP_CID;
+               return rc;
+       }
+
+       *cid += qed_cxt_get_proto_cid_start(p_hwfn,
+                                           p_hwfn->p_rdma_info->proto);
+       return 0;
+}
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+                       struct qed_rdma_qp *qp,
+                       struct qed_rdma_create_qp_out_params *out_params)
+{
+       struct iwarp_create_qp_ramrod_data *p_ramrod;
+       struct qed_sp_init_data init_data;
+       struct qed_spq_entry *p_ent;
+       u16 physical_queue;
+       u32 cid;
+       int rc;
+
+       qp->shared_queue = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+                                             IWARP_SHARED_QUEUE_PAGE_SIZE,
+                                             &qp->shared_queue_phys_addr,
+                                             GFP_KERNEL);
+       if (!qp->shared_queue)
+               return -ENOMEM;
+
+       out_params->sq_pbl_virt = (u8 *)qp->shared_queue +
+           IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+       out_params->sq_pbl_phys = qp->shared_queue_phys_addr +
+           IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET;
+       out_params->rq_pbl_virt = (u8 *)qp->shared_queue +
+           IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+       out_params->rq_pbl_phys = qp->shared_queue_phys_addr +
+           IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET;
+
+       rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+       if (rc)
+               goto err1;
+
+       qp->icid = (u16)cid;
+
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.cid = qp->icid;
+       init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                IWARP_RAMROD_CMD_ID_CREATE_QP,
+                                PROTOCOLID_IWARP, &init_data);
+       if (rc)
+               goto err2;
+
+       p_ramrod = &p_ent->ramrod.iwarp_create_qp;
+
+       SET_FIELD(p_ramrod->flags,
+                 IWARP_CREATE_QP_RAMROD_DATA_FMR_AND_RESERVED_EN,
+                 qp->fmr_and_reserved_lkey);
+
+       SET_FIELD(p_ramrod->flags,
+                 IWARP_CREATE_QP_RAMROD_DATA_SIGNALED_COMP, qp->signal_all);
+
+       SET_FIELD(p_ramrod->flags,
+                 IWARP_CREATE_QP_RAMROD_DATA_RDMA_RD_EN,
+                 qp->incoming_rdma_read_en);
+
+       SET_FIELD(p_ramrod->flags,
+                 IWARP_CREATE_QP_RAMROD_DATA_RDMA_WR_EN,
+                 qp->incoming_rdma_write_en);
+
+       SET_FIELD(p_ramrod->flags,
+                 IWARP_CREATE_QP_RAMROD_DATA_ATOMIC_EN,
+                 qp->incoming_atomic_en);
+
+       SET_FIELD(p_ramrod->flags,
+                 IWARP_CREATE_QP_RAMROD_DATA_SRQ_FLG, qp->use_srq);
+
+       p_ramrod->pd = qp->pd;
+       p_ramrod->sq_num_pages = qp->sq_num_pages;
+       p_ramrod->rq_num_pages = qp->rq_num_pages;
+
+       p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
+       p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
+
+       p_ramrod->cq_cid_for_sq =
+           cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->sq_cq_id);
+       p_ramrod->cq_cid_for_rq =
+           cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id);
+
+       p_ramrod->dpi = cpu_to_le16(qp->dpi);
+
+       physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+       p_ramrod->physical_q0 = cpu_to_le16(physical_queue);
+       physical_queue = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+       p_ramrod->physical_q1 = cpu_to_le16(physical_queue);
+
+       rc = qed_spq_post(p_hwfn, p_ent, NULL);
+       if (rc)
+               goto err2;
+
+       return rc;
+
+err2:
+       qed_iwarp_cid_cleaned(p_hwfn, cid);
+err1:
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         IWARP_SHARED_QUEUE_PAGE_SIZE,
+                         qp->shared_queue, qp->shared_queue_phys_addr);
+
+       return rc;
+}
+
+static int qed_iwarp_modify_fw(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+       struct iwarp_modify_qp_ramrod_data *p_ramrod;
+       struct qed_sp_init_data init_data;
+       struct qed_spq_entry *p_ent;
+       int rc;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = qp->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                IWARP_RAMROD_CMD_ID_MODIFY_QP,
+                                p_hwfn->p_rdma_info->proto, &init_data);
+       if (rc)
+               return rc;
+
+       p_ramrod = &p_ent->ramrod.iwarp_modify_qp;
+       SET_FIELD(p_ramrod->flags, IWARP_MODIFY_QP_RAMROD_DATA_STATE_TRANS_EN,
+                 0x1);
+       if (qp->iwarp_state == QED_IWARP_QP_STATE_CLOSING)
+               p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_CLOSING;
+       else
+               p_ramrod->transition_to_state = IWARP_MODIFY_QP_STATE_ERROR;
+
+       rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x)rc=%d\n", qp->icid, rc);
+
+       return rc;
+}
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state)
+{
+       switch (state) {
+       case QED_ROCE_QP_STATE_RESET:
+       case QED_ROCE_QP_STATE_INIT:
+       case QED_ROCE_QP_STATE_RTR:
+               return QED_IWARP_QP_STATE_IDLE;
+       case QED_ROCE_QP_STATE_RTS:
+               return QED_IWARP_QP_STATE_RTS;
+       case QED_ROCE_QP_STATE_SQD:
+               return QED_IWARP_QP_STATE_CLOSING;
+       case QED_ROCE_QP_STATE_ERR:
+               return QED_IWARP_QP_STATE_ERROR;
+       case QED_ROCE_QP_STATE_SQE:
+               return QED_IWARP_QP_STATE_TERMINATE;
+       default:
+               return QED_IWARP_QP_STATE_ERROR;
+       }
+}
+
+static enum qed_roce_qp_state
+qed_iwarp2roce_state(enum qed_iwarp_qp_state state)
+{
+       switch (state) {
+       case QED_IWARP_QP_STATE_IDLE:
+               return QED_ROCE_QP_STATE_INIT;
+       case QED_IWARP_QP_STATE_RTS:
+               return QED_ROCE_QP_STATE_RTS;
+       case QED_IWARP_QP_STATE_TERMINATE:
+               return QED_ROCE_QP_STATE_SQE;
+       case QED_IWARP_QP_STATE_CLOSING:
+               return QED_ROCE_QP_STATE_SQD;
+       case QED_IWARP_QP_STATE_ERROR:
+               return QED_ROCE_QP_STATE_ERR;
+       default:
+               return QED_ROCE_QP_STATE_ERR;
+       }
+}
+
+const char *iwarp_state_names[] = {
+       "IDLE",
+       "RTS",
+       "TERMINATE",
+       "CLOSING",
+       "ERROR",
+};
+
+int
+qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn,
+                   struct qed_rdma_qp *qp,
+                   enum qed_iwarp_qp_state new_state, bool internal)
+{
+       enum qed_iwarp_qp_state prev_iw_state;
+       bool modify_fw = false;
+       int rc = 0;
+
+       /* modify QP can be called from upper-layer or as a result of async
+        * RST/FIN... therefore need to protect
+        */
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+       prev_iw_state = qp->iwarp_state;
+
+       if (prev_iw_state == new_state) {
+               spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+               return 0;
+       }
+
+       switch (prev_iw_state) {
+       case QED_IWARP_QP_STATE_IDLE:
+               switch (new_state) {
+               case QED_IWARP_QP_STATE_RTS:
+                       qp->iwarp_state = QED_IWARP_QP_STATE_RTS;
+                       break;
+               case QED_IWARP_QP_STATE_ERROR:
+                       qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+                       if (!internal)
+                               modify_fw = true;
+                       break;
+               default:
+                       break;
+               }
+               break;
+       case QED_IWARP_QP_STATE_RTS:
+               switch (new_state) {
+               case QED_IWARP_QP_STATE_CLOSING:
+                       if (!internal)
+                               modify_fw = true;
+
+                       qp->iwarp_state = QED_IWARP_QP_STATE_CLOSING;
+                       break;
+               case QED_IWARP_QP_STATE_ERROR:
+                       if (!internal)
+                               modify_fw = true;
+                       qp->iwarp_state = QED_IWARP_QP_STATE_ERROR;
+                       break;
+               default:
+                       break;
+               }
+               break;
+       case QED_IWARP_QP_STATE_ERROR:
+               switch (new_state) {
+               case QED_IWARP_QP_STATE_IDLE:
+
+                       qp->iwarp_state = new_state;
+                       break;
+               case QED_IWARP_QP_STATE_CLOSING:
+                       /* could happen due to race... do nothing.... */
+                       break;
+               default:
+                       rc = -EINVAL;
+               }
+               break;
+       case QED_IWARP_QP_STATE_TERMINATE:
+       case QED_IWARP_QP_STATE_CLOSING:
+               qp->iwarp_state = new_state;
+               break;
+       default:
+               break;
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) %s --> %s%s\n",
+                  qp->icid,
+                  iwarp_state_names[prev_iw_state],
+                  iwarp_state_names[qp->iwarp_state],
+                  internal ? "internal" : "");
+
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+
+       if (modify_fw)
+               rc = qed_iwarp_modify_fw(p_hwfn, qp);
+
+       return rc;
+}
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+       struct qed_sp_init_data init_data;
+       struct qed_spq_entry *p_ent;
+       int rc;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = qp->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                IWARP_RAMROD_CMD_ID_DESTROY_QP,
+                                p_hwfn->p_rdma_info->proto, &init_data);
+       if (rc)
+               return rc;
+
+       rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) rc = %d\n", qp->icid, rc);
+
+       return rc;
+}
+
+static void qed_iwarp_destroy_ep(struct qed_hwfn *p_hwfn,
+                                struct qed_iwarp_ep *ep,
+                                bool remove_from_active_list)
+{
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         sizeof(*ep->ep_buffer_virt),
+                         ep->ep_buffer_virt, ep->ep_buffer_phys);
+
+       if (remove_from_active_list) {
+               spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+               list_del(&ep->list_entry);
+               spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       }
+
+       if (ep->qp)
+               ep->qp->ep = NULL;
+
+       kfree(ep);
+}
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+       struct qed_iwarp_ep *ep = qp->ep;
+       int wait_count = 0;
+       int rc = 0;
+
+       if (qp->iwarp_state != QED_IWARP_QP_STATE_ERROR) {
+               rc = qed_iwarp_modify_qp(p_hwfn, qp,
+                                        QED_IWARP_QP_STATE_ERROR, false);
+               if (rc)
+                       return rc;
+       }
+
+       /* Make sure ep is closed before returning and freeing memory. */
+       if (ep) {
+               while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200)
+                       msleep(100);
+
+               if (ep->state != QED_IWARP_EP_CLOSED)
+                       DP_NOTICE(p_hwfn, "ep state close timeout state=%x\n",
+                                 ep->state);
+
+               qed_iwarp_destroy_ep(p_hwfn, ep, false);
+       }
+
+       rc = qed_iwarp_fw_destroy(p_hwfn, qp);
+
+       if (qp->shared_queue)
+               dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                                 IWARP_SHARED_QUEUE_PAGE_SIZE,
+                                 qp->shared_queue, qp->shared_queue_phys_addr);
+
+       return rc;
+}
+
+static int
+qed_iwarp_create_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep **ep_out)
+{
+       struct qed_iwarp_ep *ep;
+       int rc;
+
+       ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+       if (!ep)
+               return -ENOMEM;
+
+       ep->state = QED_IWARP_EP_INIT;
+
+       ep->ep_buffer_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+                                               sizeof(*ep->ep_buffer_virt),
+                                               &ep->ep_buffer_phys,
+                                               GFP_KERNEL);
+       if (!ep->ep_buffer_virt) {
+               rc = -ENOMEM;
+               goto err;
+       }
+
+       ep->sig = QED_EP_SIG;
+
+       *ep_out = ep;
+
+       return 0;
+
+err:
+       kfree(ep);
+       return rc;
+}
+
+static void
+qed_iwarp_print_tcp_ramrod(struct qed_hwfn *p_hwfn,
+                          struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod)
+{
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "local_mac=%x %x %x, remote_mac=%x %x %x\n",
+                  p_tcp_ramrod->tcp.local_mac_addr_lo,
+                  p_tcp_ramrod->tcp.local_mac_addr_mid,
+                  p_tcp_ramrod->tcp.local_mac_addr_hi,
+                  p_tcp_ramrod->tcp.remote_mac_addr_lo,
+                  p_tcp_ramrod->tcp.remote_mac_addr_mid,
+                  p_tcp_ramrod->tcp.remote_mac_addr_hi);
+
+       if (p_tcp_ramrod->tcp.ip_version == TCP_IPV4) {
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "local_ip=%pI4h:%x, remote_ip=%pI4h%x, vlan=%x\n",
+                          p_tcp_ramrod->tcp.local_ip,
+                          p_tcp_ramrod->tcp.local_port,
+                          p_tcp_ramrod->tcp.remote_ip,
+                          p_tcp_ramrod->tcp.remote_port,
+                          p_tcp_ramrod->tcp.vlan_id);
+       } else {
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "local_ip=%pI6h:%x, remote_ip=%pI6h:%x, vlan=%x\n",
+                          p_tcp_ramrod->tcp.local_ip,
+                          p_tcp_ramrod->tcp.local_port,
+                          p_tcp_ramrod->tcp.remote_ip,
+                          p_tcp_ramrod->tcp.remote_port,
+                          p_tcp_ramrod->tcp.vlan_id);
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "flow_label=%x, ttl=%x, tos_or_tc=%x, mss=%x, rcv_wnd_scale=%x, connect_mode=%x, flags=%x\n",
+                  p_tcp_ramrod->tcp.flow_label,
+                  p_tcp_ramrod->tcp.ttl,
+                  p_tcp_ramrod->tcp.tos_or_tc,
+                  p_tcp_ramrod->tcp.mss,
+                  p_tcp_ramrod->tcp.rcv_wnd_scale,
+                  p_tcp_ramrod->tcp.connect_mode,
+                  p_tcp_ramrod->tcp.flags);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "syn_ip_payload_length=%x, lo=%x, hi=%x\n",
+                  p_tcp_ramrod->tcp.syn_ip_payload_length,
+                  p_tcp_ramrod->tcp.syn_phy_addr_lo,
+                  p_tcp_ramrod->tcp.syn_phy_addr_hi);
+}
+
+static int
+qed_iwarp_tcp_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+       struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+       struct iwarp_tcp_offload_ramrod_data *p_tcp_ramrod;
+       struct tcp_offload_params_opt2 *tcp;
+       struct qed_sp_init_data init_data;
+       struct qed_spq_entry *p_ent;
+       dma_addr_t async_output_phys;
+       dma_addr_t in_pdata_phys;
+       u16 physical_q;
+       u8 tcp_flags;
+       int rc;
+       int i;
+
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = ep->tcp_cid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       if (ep->connect_mode == TCP_CONNECT_PASSIVE)
+               init_data.comp_mode = QED_SPQ_MODE_CB;
+       else
+               init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                IWARP_RAMROD_CMD_ID_TCP_OFFLOAD,
+                                PROTOCOLID_IWARP, &init_data);
+       if (rc)
+               return rc;
+
+       p_tcp_ramrod = &p_ent->ramrod.iwarp_tcp_offload;
+
+       in_pdata_phys = ep->ep_buffer_phys +
+                       offsetof(struct qed_iwarp_ep_memory, in_pdata);
+       DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.incoming_ulp_buffer.addr,
+                      in_pdata_phys);
+
+       p_tcp_ramrod->iwarp.incoming_ulp_buffer.len =
+           cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+
+       async_output_phys = ep->ep_buffer_phys +
+                           offsetof(struct qed_iwarp_ep_memory, async_output);
+       DMA_REGPAIR_LE(p_tcp_ramrod->iwarp.async_eqe_output_buf,
+                      async_output_phys);
+
+       p_tcp_ramrod->iwarp.handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+       p_tcp_ramrod->iwarp.handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+       physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OFLD);
+       p_tcp_ramrod->iwarp.physical_q0 = cpu_to_le16(physical_q);
+       physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_ACK);
+       p_tcp_ramrod->iwarp.physical_q1 = cpu_to_le16(physical_q);
+       p_tcp_ramrod->iwarp.mpa_mode = iwarp_info->mpa_rev;
+
+       tcp = &p_tcp_ramrod->tcp;
+       qed_set_fw_mac_addr(&tcp->remote_mac_addr_hi,
+                           &tcp->remote_mac_addr_mid,
+                           &tcp->remote_mac_addr_lo, ep->remote_mac_addr);
+       qed_set_fw_mac_addr(&tcp->local_mac_addr_hi, &tcp->local_mac_addr_mid,
+                           &tcp->local_mac_addr_lo, ep->local_mac_addr);
+
+       tcp->vlan_id = cpu_to_le16(ep->cm_info.vlan);
+
+       tcp_flags = p_hwfn->p_rdma_info->iwarp.tcp_flags;
+       tcp->flags = 0;
+       SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_TS_EN,
+                 !!(tcp_flags & QED_IWARP_TS_EN));
+
+       SET_FIELD(tcp->flags, TCP_OFFLOAD_PARAMS_OPT2_DA_EN,
+                 !!(tcp_flags & QED_IWARP_DA_EN));
+
+       tcp->ip_version = ep->cm_info.ip_version;
+
+       for (i = 0; i < 4; i++) {
+               tcp->remote_ip[i] = cpu_to_le32(ep->cm_info.remote_ip[i]);
+               tcp->local_ip[i] = cpu_to_le32(ep->cm_info.local_ip[i]);
+       }
+
+       tcp->remote_port = cpu_to_le16(ep->cm_info.remote_port);
+       tcp->local_port = cpu_to_le16(ep->cm_info.local_port);
+       tcp->mss = cpu_to_le16(ep->mss);
+       tcp->flow_label = 0;
+       tcp->ttl = 0x40;
+       tcp->tos_or_tc = 0;
+
+       tcp->rcv_wnd_scale = (u8)p_hwfn->p_rdma_info->iwarp.rcv_wnd_scale;
+       tcp->connect_mode = ep->connect_mode;
+
+       if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+               tcp->syn_ip_payload_length =
+                       cpu_to_le16(ep->syn_ip_payload_length);
+               tcp->syn_phy_addr_hi = DMA_HI_LE(ep->syn_phy_addr);
+               tcp->syn_phy_addr_lo = DMA_LO_LE(ep->syn_phy_addr);
+       }
+
+       qed_iwarp_print_tcp_ramrod(p_hwfn, p_tcp_ramrod);
+
+       rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "EP(0x%x) Offload completed rc=%d\n", ep->tcp_cid, rc);
+
+       return rc;
+}
+
+static void
+qed_iwarp_mpa_received(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+       struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+       struct qed_iwarp_cm_event_params params;
+       struct mpa_v2_hdr *mpa_v2;
+       union async_output *async_data;
+       u16 mpa_ord, mpa_ird;
+       u8 mpa_hdr_size = 0;
+       u8 mpa_rev;
+
+       async_data = &ep->ep_buffer_virt->async_output;
+
+       mpa_rev = async_data->mpa_request.mpa_handshake_mode;
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "private_data_len=%x handshake_mode=%x private_data=(%x)\n",
+                  async_data->mpa_request.ulp_data_len,
+                  mpa_rev, *((u32 *)(ep->ep_buffer_virt->in_pdata)));
+
+       if (mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+               /* Read ord/ird values from private data buffer */
+               mpa_v2 = (struct mpa_v2_hdr *)ep->ep_buffer_virt->in_pdata;
+               mpa_hdr_size = sizeof(*mpa_v2);
+
+               mpa_ord = ntohs(mpa_v2->ord);
+               mpa_ird = ntohs(mpa_v2->ird);
+
+               /* Temprary store in cm_info incoming ord/ird requested, later
+                * replace with negotiated value during accept
+                */
+               ep->cm_info.ord = (u8)min_t(u16,
+                                           (mpa_ord & MPA_V2_IRD_ORD_MASK),
+                                           QED_IWARP_ORD_DEFAULT);
+
+               ep->cm_info.ird = (u8)min_t(u16,
+                                           (mpa_ird & MPA_V2_IRD_ORD_MASK),
+                                           QED_IWARP_IRD_DEFAULT);
+
+               /* Peer2Peer negotiation */
+               ep->rtr_type = MPA_RTR_TYPE_NONE;
+               if (mpa_ird & MPA_V2_PEER2PEER_MODEL) {
+                       if (mpa_ord & MPA_V2_WRITE_RTR)
+                               ep->rtr_type |= MPA_RTR_TYPE_ZERO_WRITE;
+
+                       if (mpa_ord & MPA_V2_READ_RTR)
+                               ep->rtr_type |= MPA_RTR_TYPE_ZERO_READ;
+
+                       if (mpa_ird & MPA_V2_SEND_RTR)
+                               ep->rtr_type |= MPA_RTR_TYPE_ZERO_SEND;
+
+                       ep->rtr_type &= iwarp_info->rtr_type;
+
+                       /* if we're left with no match send our capabilities */
+                       if (ep->rtr_type == MPA_RTR_TYPE_NONE)
+                               ep->rtr_type = iwarp_info->rtr_type;
+               }
+
+               ep->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+       } else {
+               ep->cm_info.ord = QED_IWARP_ORD_DEFAULT;
+               ep->cm_info.ird = QED_IWARP_IRD_DEFAULT;
+               ep->mpa_rev = MPA_NEGOTIATION_TYPE_BASIC;
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x rtr:0x%x ulp_data_len = %x mpa_hdr_size = %x\n",
+                  mpa_rev, ep->cm_info.ord, ep->cm_info.ird, ep->rtr_type,
+                  async_data->mpa_request.ulp_data_len, mpa_hdr_size);
+
+       /* Strip mpa v2 hdr from private data before sending to upper layer */
+       ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_hdr_size;
+
+       ep->cm_info.private_data_len = async_data->mpa_request.ulp_data_len -
+                                      mpa_hdr_size;
+
+       params.event = QED_IWARP_EVENT_MPA_REQUEST;
+       params.cm_info = &ep->cm_info;
+       params.ep_context = ep;
+       params.status = 0;
+
+       ep->state = QED_IWARP_EP_MPA_REQ_RCVD;
+       ep->event_cb(ep->cb_context, &params);
+}
+
+static int
+qed_iwarp_mpa_offload(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+       struct iwarp_mpa_offload_ramrod_data *p_mpa_ramrod;
+       struct qed_sp_init_data init_data;
+       dma_addr_t async_output_phys;
+       struct qed_spq_entry *p_ent;
+       dma_addr_t out_pdata_phys;
+       dma_addr_t in_pdata_phys;
+       struct qed_rdma_qp *qp;
+       bool reject;
+       int rc;
+
+       if (!ep)
+               return -EINVAL;
+
+       qp = ep->qp;
+       reject = !qp;
+
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = reject ? ep->tcp_cid : qp->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+
+       if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+               init_data.comp_mode = QED_SPQ_MODE_CB;
+       else
+               init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                IWARP_RAMROD_CMD_ID_MPA_OFFLOAD,
+                                PROTOCOLID_IWARP, &init_data);
+       if (rc)
+               return rc;
+
+       p_mpa_ramrod = &p_ent->ramrod.iwarp_mpa_offload;
+       out_pdata_phys = ep->ep_buffer_phys +
+                        offsetof(struct qed_iwarp_ep_memory, out_pdata);
+       DMA_REGPAIR_LE(p_mpa_ramrod->common.outgoing_ulp_buffer.addr,
+                      out_pdata_phys);
+       p_mpa_ramrod->common.outgoing_ulp_buffer.len =
+           ep->cm_info.private_data_len;
+       p_mpa_ramrod->common.crc_needed = p_hwfn->p_rdma_info->iwarp.crc_needed;
+
+       p_mpa_ramrod->common.out_rq.ord = ep->cm_info.ord;
+       p_mpa_ramrod->common.out_rq.ird = ep->cm_info.ird;
+
+       p_mpa_ramrod->tcp_cid = p_hwfn->hw_info.opaque_fid << 16 | ep->tcp_cid;
+
+       in_pdata_phys = ep->ep_buffer_phys +
+                       offsetof(struct qed_iwarp_ep_memory, in_pdata);
+       p_mpa_ramrod->tcp_connect_side = ep->connect_mode;
+       DMA_REGPAIR_LE(p_mpa_ramrod->incoming_ulp_buffer.addr,
+                      in_pdata_phys);
+       p_mpa_ramrod->incoming_ulp_buffer.len =
+           cpu_to_le16(sizeof(ep->ep_buffer_virt->in_pdata));
+       async_output_phys = ep->ep_buffer_phys +
+                           offsetof(struct qed_iwarp_ep_memory, async_output);
+       DMA_REGPAIR_LE(p_mpa_ramrod->async_eqe_output_buf,
+                      async_output_phys);
+       p_mpa_ramrod->handle_for_async.hi = cpu_to_le32(PTR_HI(ep));
+       p_mpa_ramrod->handle_for_async.lo = cpu_to_le32(PTR_LO(ep));
+
+       if (!reject) {
+               DMA_REGPAIR_LE(p_mpa_ramrod->shared_queue_addr,
+                              qp->shared_queue_phys_addr);
+               p_mpa_ramrod->stats_counter_id =
+                   RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + qp->stats_queue;
+       } else {
+               p_mpa_ramrod->common.reject = 1;
+       }
+
+       p_mpa_ramrod->mode = ep->mpa_rev;
+       SET_FIELD(p_mpa_ramrod->rtr_pref,
+                 IWARP_MPA_OFFLOAD_RAMROD_DATA_RTR_SUPPORTED, ep->rtr_type);
+
+       ep->state = QED_IWARP_EP_MPA_OFFLOADED;
+       rc = qed_spq_post(p_hwfn, p_ent, NULL);
+       if (!reject)
+               ep->cid = qp->icid;     /* Now they're migrated. */
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_RDMA,
+                  "QP(0x%x) EP(0x%x) MPA Offload rc = %d IRD=0x%x ORD=0x%x rtr_type=%d mpa_rev=%d reject=%d\n",
+                  reject ? 0xffff : qp->icid,
+                  ep->tcp_cid,
+                  rc,
+                  ep->cm_info.ird,
+                  ep->cm_info.ord, ep->rtr_type, ep->mpa_rev, reject);
+       return rc;
+}
+
+static void
+qed_iwarp_return_ep(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+       ep->state = QED_IWARP_EP_INIT;
+       if (ep->qp)
+               ep->qp->ep = NULL;
+       ep->qp = NULL;
+       memset(&ep->cm_info, 0, sizeof(ep->cm_info));
+
+       if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+               /* We don't care about the return code, it's ok if tcp_cid
+                * remains invalid...in this case we'll defer allocation
+                */
+               qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+       }
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       list_del(&ep->list_entry);
+       list_add_tail(&ep->list_entry,
+                     &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+}
+
+void
+qed_iwarp_parse_private_data(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+       struct mpa_v2_hdr *mpa_v2_params;
+       union async_output *async_data;
+       u16 mpa_ird, mpa_ord;
+       u8 mpa_data_size = 0;
+
+       if (MPA_REV2(p_hwfn->p_rdma_info->iwarp.mpa_rev)) {
+               mpa_v2_params =
+                       (struct mpa_v2_hdr *)(ep->ep_buffer_virt->in_pdata);
+               mpa_data_size = sizeof(*mpa_v2_params);
+               mpa_ird = ntohs(mpa_v2_params->ird);
+               mpa_ord = ntohs(mpa_v2_params->ord);
+
+               ep->cm_info.ird = (u8)(mpa_ord & MPA_V2_IRD_ORD_MASK);
+               ep->cm_info.ord = (u8)(mpa_ird & MPA_V2_IRD_ORD_MASK);
+       }
+       async_data = &ep->ep_buffer_virt->async_output;
+
+       ep->cm_info.private_data = ep->ep_buffer_virt->in_pdata + mpa_data_size;
+       ep->cm_info.private_data_len = async_data->mpa_response.ulp_data_len -
+                                      mpa_data_size;
+}
+
+void
+qed_iwarp_mpa_reply_arrived(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+       struct qed_iwarp_cm_event_params params;
+
+       if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+               DP_NOTICE(p_hwfn,
+                         "MPA reply event not expected on passive side!\n");
+               return;
+       }
+
+       params.event = QED_IWARP_EVENT_ACTIVE_MPA_REPLY;
+
+       qed_iwarp_parse_private_data(p_hwfn, ep);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+                  ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+       params.cm_info = &ep->cm_info;
+       params.ep_context = ep;
+       params.status = 0;
+
+       ep->mpa_reply_processed = true;
+
+       ep->event_cb(ep->cb_context, &params);
+}
+
+#define QED_IWARP_CONNECT_MODE_STRING(ep) \
+       ((ep)->connect_mode == TCP_CONNECT_PASSIVE) ? "Passive" : "Active"
+
+/* Called as a result of the event:
+ * IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE
+ */
+static void
+qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn,
+                      struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+       struct qed_iwarp_cm_event_params params;
+
+       if (ep->connect_mode == TCP_CONNECT_ACTIVE)
+               params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+       else
+               params.event = QED_IWARP_EVENT_PASSIVE_COMPLETE;
+
+       if (ep->connect_mode == TCP_CONNECT_ACTIVE && !ep->mpa_reply_processed)
+               qed_iwarp_parse_private_data(p_hwfn, ep);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "MPA_NEGOTIATE (v%d): ORD: 0x%x IRD: 0x%x\n",
+                  ep->mpa_rev, ep->cm_info.ord, ep->cm_info.ird);
+
+       params.cm_info = &ep->cm_info;
+
+       params.ep_context = ep;
+
+       ep->state = QED_IWARP_EP_CLOSED;
+
+       switch (fw_return_code) {
+       case RDMA_RETURN_OK:
+               ep->qp->max_rd_atomic_req = ep->cm_info.ord;
+               ep->qp->max_rd_atomic_resp = ep->cm_info.ird;
+               qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_RTS, 1);
+               ep->state = QED_IWARP_EP_ESTABLISHED;
+               params.status = 0;
+               break;
+       case IWARP_CONN_ERROR_MPA_TIMEOUT:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA timeout\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -EBUSY;
+               break;
+       case IWARP_CONN_ERROR_MPA_ERROR_REJECT:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA Reject\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -ECONNREFUSED;
+               break;
+       case IWARP_CONN_ERROR_MPA_RST:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA reset(tcp cid: 0x%x)\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid,
+                         ep->tcp_cid);
+               params.status = -ECONNRESET;
+               break;
+       case IWARP_CONN_ERROR_MPA_FIN:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA received FIN\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -ECONNREFUSED;
+               break;
+       case IWARP_CONN_ERROR_MPA_INSUF_IRD:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA insufficient ird\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -ECONNREFUSED;
+               break;
+       case IWARP_CONN_ERROR_MPA_RTR_MISMATCH:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA RTR MISMATCH\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -ECONNREFUSED;
+               break;
+       case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -ECONNREFUSED;
+               break;
+       case IWARP_CONN_ERROR_MPA_LOCAL_ERROR:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA Local Error\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -ECONNREFUSED;
+               break;
+       case IWARP_CONN_ERROR_MPA_TERMINATE:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA TERMINATE\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->cid);
+               params.status = -ECONNREFUSED;
+               break;
+       default:
+               params.status = -ECONNRESET;
+               break;
+       }
+
+       ep->event_cb(ep->cb_context, &params);
+
+       /* on passive side, if there is no associated QP (REJECT) we need to
+        * return the ep to the pool, (in the regular case we add an element
+        * in accept instead of this one.
+        * In both cases we need to remove it from the ep_list.
+        */
+       if (fw_return_code != RDMA_RETURN_OK) {
+               ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+               if ((ep->connect_mode == TCP_CONNECT_PASSIVE) &&
+                   (!ep->qp)) {        /* Rejected */
+                       qed_iwarp_return_ep(p_hwfn, ep);
+               } else {
+                       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+                       list_del(&ep->list_entry);
+                       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+               }
+       }
+}
+
+static void
+qed_iwarp_mpa_v2_set_private(struct qed_hwfn *p_hwfn,
+                            struct qed_iwarp_ep *ep, u8 *mpa_data_size)
+{
+       struct mpa_v2_hdr *mpa_v2_params;
+       u16 mpa_ird, mpa_ord;
+
+       *mpa_data_size = 0;
+       if (MPA_REV2(ep->mpa_rev)) {
+               mpa_v2_params =
+                   (struct mpa_v2_hdr *)ep->ep_buffer_virt->out_pdata;
+               *mpa_data_size = sizeof(*mpa_v2_params);
+
+               mpa_ird = (u16)ep->cm_info.ird;
+               mpa_ord = (u16)ep->cm_info.ord;
+
+               if (ep->rtr_type != MPA_RTR_TYPE_NONE) {
+                       mpa_ird |= MPA_V2_PEER2PEER_MODEL;
+
+                       if (ep->rtr_type & MPA_RTR_TYPE_ZERO_SEND)
+                               mpa_ird |= MPA_V2_SEND_RTR;
+
+                       if (ep->rtr_type & MPA_RTR_TYPE_ZERO_WRITE)
+                               mpa_ord |= MPA_V2_WRITE_RTR;
+
+                       if (ep->rtr_type & MPA_RTR_TYPE_ZERO_READ)
+                               mpa_ord |= MPA_V2_READ_RTR;
+               }
+
+               mpa_v2_params->ird = htons(mpa_ird);
+               mpa_v2_params->ord = htons(mpa_ord);
+
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "MPA_NEGOTIATE Header: [%x ord:%x ird] %x ord:%x ird:%x peer2peer:%x rtr_send:%x rtr_write:%x rtr_read:%x\n",
+                          mpa_v2_params->ird,
+                          mpa_v2_params->ord,
+                          *((u32 *)mpa_v2_params),
+                          mpa_ord & MPA_V2_IRD_ORD_MASK,
+                          mpa_ird & MPA_V2_IRD_ORD_MASK,
+                          !!(mpa_ird & MPA_V2_PEER2PEER_MODEL),
+                          !!(mpa_ird & MPA_V2_SEND_RTR),
+                          !!(mpa_ord & MPA_V2_WRITE_RTR),
+                          !!(mpa_ord & MPA_V2_READ_RTR));
+       }
+}
+
+int qed_iwarp_connect(void *rdma_cxt,
+                     struct qed_iwarp_connect_in *iparams,
+                     struct qed_iwarp_connect_out *oparams)
+{
+       struct qed_hwfn *p_hwfn = rdma_cxt;
+       struct qed_iwarp_info *iwarp_info;
+       struct qed_iwarp_ep *ep;
+       u8 mpa_data_size = 0;
+       u8 ts_hdr_size = 0;
+       u32 cid;
+       int rc;
+
+       if ((iparams->cm_info.ord > QED_IWARP_ORD_DEFAULT) ||
+           (iparams->cm_info.ird > QED_IWARP_IRD_DEFAULT)) {
+               DP_NOTICE(p_hwfn,
+                         "QP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+                         iparams->qp->icid, iparams->cm_info.ord,
+                         iparams->cm_info.ird);
+
+               return -EINVAL;
+       }
+
+       iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+       /* Allocate ep object */
+       rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+       if (rc)
+               return rc;
+
+       rc = qed_iwarp_create_ep(p_hwfn, &ep);
+       if (rc)
+               goto err;
+
+       ep->tcp_cid = cid;
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       ep->qp = iparams->qp;
+       ep->qp->ep = ep;
+       ether_addr_copy(ep->remote_mac_addr, iparams->remote_mac_addr);
+       ether_addr_copy(ep->local_mac_addr, iparams->local_mac_addr);
+       memcpy(&ep->cm_info, &iparams->cm_info, sizeof(ep->cm_info));
+
+       ep->cm_info.ord = iparams->cm_info.ord;
+       ep->cm_info.ird = iparams->cm_info.ird;
+
+       ep->rtr_type = iwarp_info->rtr_type;
+       if (!iwarp_info->peer2peer)
+               ep->rtr_type = MPA_RTR_TYPE_NONE;
+
+       if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) && (ep->cm_info.ord == 0))
+               ep->cm_info.ord = 1;
+
+       ep->mpa_rev = iwarp_info->mpa_rev;
+
+       qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+       ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+       ep->cm_info.private_data_len = iparams->cm_info.private_data_len +
+                                      mpa_data_size;
+
+       memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+              iparams->cm_info.private_data,
+              iparams->cm_info.private_data_len);
+
+       if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+               ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+       ep->mss = iparams->mss - ts_hdr_size;
+       ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+       ep->event_cb = iparams->event_cb;
+       ep->cb_context = iparams->cb_context;
+       ep->connect_mode = TCP_CONNECT_ACTIVE;
+
+       oparams->ep_context = ep;
+
+       rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x) rc = %d\n",
+                  iparams->qp->icid, ep->tcp_cid, rc);
+
+       if (rc) {
+               qed_iwarp_destroy_ep(p_hwfn, ep, true);
+               goto err;
+       }
+
+       return rc;
+err:
+       qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+       return rc;
+}
+
+static struct qed_iwarp_ep *qed_iwarp_get_free_ep(struct qed_hwfn *p_hwfn)
+{
+       struct qed_iwarp_ep *ep = NULL;
+       int rc;
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       if (list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+               DP_ERR(p_hwfn, "Ep list is empty\n");
+               goto out;
+       }
+
+       ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+                             struct qed_iwarp_ep, list_entry);
+
+       /* in some cases we could have failed allocating a tcp cid when added
+        * from accept / failure... retry now..this is not the common case.
+        */
+       if (ep->tcp_cid == QED_IWARP_INVALID_TCP_CID) {
+               rc = qed_iwarp_alloc_tcp_cid(p_hwfn, &ep->tcp_cid);
+
+               /* if we fail we could look for another entry with a valid
+                * tcp_cid, but since we don't expect to reach this anyway
+                * it's not worth the handling
+                */
+               if (rc) {
+                       ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+                       ep = NULL;
+                       goto out;
+               }
+       }
+
+       list_del(&ep->list_entry);
+
+out:
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       return ep;
+}
+
+#define QED_IWARP_MAX_CID_CLEAN_TIME  100
+#define QED_IWARP_MAX_NO_PROGRESS_CNT 5
+
+/* This function waits for all the bits of a bmap to be cleared, as long as
+ * there is progress ( i.e. the number of bits left to be cleared decreases )
+ * the function continues.
+ */
+static int
+qed_iwarp_wait_cid_map_cleared(struct qed_hwfn *p_hwfn, struct qed_bmap *bmap)
+{
+       int prev_weight = 0;
+       int wait_count = 0;
+       int weight = 0;
+
+       weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+       prev_weight = weight;
+
+       while (weight) {
+               msleep(QED_IWARP_MAX_CID_CLEAN_TIME);
+
+               weight = bitmap_weight(bmap->bitmap, bmap->max_count);
+
+               if (prev_weight == weight) {
+                       wait_count++;
+               } else {
+                       prev_weight = weight;
+                       wait_count = 0;
+               }
+
+               if (wait_count > QED_IWARP_MAX_NO_PROGRESS_CNT) {
+                       DP_NOTICE(p_hwfn,
+                                 "%s bitmap wait timed out (%d cids pending)\n",
+                                 bmap->name, weight);
+                       return -EBUSY;
+               }
+       }
+       return 0;
+}
+
+static int qed_iwarp_wait_for_all_cids(struct qed_hwfn *p_hwfn)
+{
+       int rc;
+       int i;
+
+       rc = qed_iwarp_wait_cid_map_cleared(p_hwfn,
+                                           &p_hwfn->p_rdma_info->tcp_cid_map);
+       if (rc)
+               return rc;
+
+       /* Now free the tcp cids from the main cid map */
+       for (i = 0; i < QED_IWARP_PREALLOC_CNT; i++)
+               qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, i);
+
+       /* Now wait for all cids to be completed */
+       return qed_iwarp_wait_cid_map_cleared(p_hwfn,
+                                             &p_hwfn->p_rdma_info->cid_map);
+}
+
+static void qed_iwarp_free_prealloc_ep(struct qed_hwfn *p_hwfn)
+{
+       struct qed_iwarp_ep *ep;
+
+       while (!list_empty(&p_hwfn->p_rdma_info->iwarp.ep_free_list)) {
+               spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+               ep = list_first_entry(&p_hwfn->p_rdma_info->iwarp.ep_free_list,
+                                     struct qed_iwarp_ep, list_entry);
+
+               if (!ep) {
+                       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+                       break;
+               }
+               list_del(&ep->list_entry);
+
+               spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+               if (ep->tcp_cid != QED_IWARP_INVALID_TCP_CID)
+                       qed_iwarp_cid_cleaned(p_hwfn, ep->tcp_cid);
+
+               qed_iwarp_destroy_ep(p_hwfn, ep, false);
+       }
+}
+
+static int qed_iwarp_prealloc_ep(struct qed_hwfn *p_hwfn, bool init)
+{
+       struct qed_iwarp_ep *ep;
+       int rc = 0;
+       int count;
+       u32 cid;
+       int i;
+
+       count = init ? QED_IWARP_PREALLOC_CNT : 1;
+       for (i = 0; i < count; i++) {
+               rc = qed_iwarp_create_ep(p_hwfn, &ep);
+               if (rc)
+                       return rc;
+
+               /* During initialization we allocate from the main pool,
+                * afterwards we allocate only from the tcp_cid.
+                */
+               if (init) {
+                       rc = qed_iwarp_alloc_cid(p_hwfn, &cid);
+                       if (rc)
+                               goto err;
+                       qed_iwarp_set_tcp_cid(p_hwfn, cid);
+               } else {
+                       /* We don't care about the return code, it's ok if
+                        * tcp_cid remains invalid...in this case we'll
+                        * defer allocation
+                        */
+                       qed_iwarp_alloc_tcp_cid(p_hwfn, &cid);
+               }
+
+               ep->tcp_cid = cid;
+
+               spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+               list_add_tail(&ep->list_entry,
+                             &p_hwfn->p_rdma_info->iwarp.ep_free_list);
+               spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       }
+
+       return rc;
+
+err:
+       qed_iwarp_destroy_ep(p_hwfn, ep, false);
+
+       return rc;
+}
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
+{
+       int rc;
+
+       /* Allocate bitmap for tcp cid. These are used by passive side
+        * to ensure it can allocate a tcp cid during dpc that was
+        * pre-acquired and doesn't require dynamic allocation of ilt
+        */
+       rc = qed_rdma_bmap_alloc(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map,
+                                QED_IWARP_PREALLOC_CNT, "TCP_CID");
+       if (rc) {
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "Failed to allocate tcp cid, rc = %d\n", rc);
+               return rc;
+       }
+
+       INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list);
+       spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       return qed_iwarp_prealloc_ep(p_hwfn, true);
+}
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
+{
+       qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1);
+}
+
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
+{
+       struct qed_hwfn *p_hwfn = rdma_cxt;
+       struct qed_iwarp_ep *ep;
+       u8 mpa_data_size = 0;
+       int rc;
+
+       ep = iparams->ep_context;
+       if (!ep) {
+               DP_ERR(p_hwfn, "Ep Context receive in accept is NULL\n");
+               return -EINVAL;
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+                  iparams->qp->icid, ep->tcp_cid);
+
+       if ((iparams->ord > QED_IWARP_ORD_DEFAULT) ||
+           (iparams->ird > QED_IWARP_IRD_DEFAULT)) {
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "QP(0x%x) EP(0x%x) ERROR: Invalid ord(0x%x)/ird(0x%x)\n",
+                          iparams->qp->icid,
+                          ep->tcp_cid, iparams->ord, iparams->ord);
+               return -EINVAL;
+       }
+
+       qed_iwarp_prealloc_ep(p_hwfn, false);
+
+       ep->cb_context = iparams->cb_context;
+       ep->qp = iparams->qp;
+       ep->qp->ep = ep;
+
+       if (ep->mpa_rev == MPA_NEGOTIATION_TYPE_ENHANCED) {
+               /* Negotiate ord/ird: if upperlayer requested ord larger than
+                * ird advertised by remote, we need to decrease our ord
+                */
+               if (iparams->ord > ep->cm_info.ird)
+                       iparams->ord = ep->cm_info.ird;
+
+               if ((ep->rtr_type & MPA_RTR_TYPE_ZERO_READ) &&
+                   (iparams->ird == 0))
+                       iparams->ird = 1;
+       }
+
+       /* Update cm_info ord/ird to be negotiated values */
+       ep->cm_info.ord = iparams->ord;
+       ep->cm_info.ird = iparams->ird;
+
+       qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+       ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+       ep->cm_info.private_data_len = iparams->private_data_len +
+                                      mpa_data_size;
+
+       memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+              iparams->private_data, iparams->private_data_len);
+
+       rc = qed_iwarp_mpa_offload(p_hwfn, ep);
+       if (rc)
+               qed_iwarp_modify_qp(p_hwfn,
+                                   iparams->qp, QED_IWARP_QP_STATE_ERROR, 1);
+
+       return rc;
+}
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams)
+{
+       struct qed_hwfn *p_hwfn = rdma_cxt;
+       struct qed_iwarp_ep *ep;
+       u8 mpa_data_size = 0;
+
+       ep = iparams->ep_context;
+       if (!ep) {
+               DP_ERR(p_hwfn, "Ep Context receive in reject is NULL\n");
+               return -EINVAL;
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x)\n", ep->tcp_cid);
+
+       ep->cb_context = iparams->cb_context;
+       ep->qp = NULL;
+
+       qed_iwarp_mpa_v2_set_private(p_hwfn, ep, &mpa_data_size);
+
+       ep->cm_info.private_data = ep->ep_buffer_virt->out_pdata;
+       ep->cm_info.private_data_len = iparams->private_data_len +
+                                      mpa_data_size;
+
+       memcpy((u8 *)ep->ep_buffer_virt->out_pdata + mpa_data_size,
+              iparams->private_data, iparams->private_data_len);
+
+       return qed_iwarp_mpa_offload(p_hwfn, ep);
+}
+
+static void
+qed_iwarp_print_cm_info(struct qed_hwfn *p_hwfn,
+                       struct qed_iwarp_cm_info *cm_info)
+{
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "ip_version = %d\n",
+                  cm_info->ip_version);
+
+       if (cm_info->ip_version == QED_TCP_IPV4)
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "remote_ip %pI4h:%x, local_ip %pI4h:%x vlan=%x\n",
+                          cm_info->remote_ip, cm_info->remote_port,
+                          cm_info->local_ip, cm_info->local_port,
+                          cm_info->vlan);
+       else
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "remote_ip %pI6h:%x, local_ip %pI6h:%x vlan=%x\n",
+                          cm_info->remote_ip, cm_info->remote_port,
+                          cm_info->local_ip, cm_info->local_port,
+                          cm_info->vlan);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                  "private_data_len = %x ord = %d, ird = %d\n",
+                  cm_info->private_data_len, cm_info->ord, cm_info->ird);
+}
+
+static int
+qed_iwarp_ll2_post_rx(struct qed_hwfn *p_hwfn,
+                     struct qed_iwarp_ll2_buff *buf, u8 handle)
+{
+       int rc;
+
+       rc = qed_ll2_post_rx_buffer(p_hwfn, handle, buf->data_phys_addr,
+                                   (u16)buf->buff_size, buf, 1);
+       if (rc) {
+               DP_NOTICE(p_hwfn,
+                         "Failed to repost rx buffer to ll2 rc = %d, handle=%d\n",
+                         rc, handle);
+               dma_free_coherent(&p_hwfn->cdev->pdev->dev, buf->buff_size,
+                                 buf->data, buf->data_phys_addr);
+               kfree(buf);
+       }
+
+       return rc;
+}
+
+static bool
+qed_iwarp_ep_exists(struct qed_hwfn *p_hwfn, struct qed_iwarp_cm_info *cm_info)
+{
+       struct qed_iwarp_ep *ep = NULL;
+       bool found = false;
+
+       list_for_each_entry(ep,
+                           &p_hwfn->p_rdma_info->iwarp.ep_list,
+                           list_entry) {
+               if ((ep->cm_info.local_port == cm_info->local_port) &&
+                   (ep->cm_info.remote_port == cm_info->remote_port) &&
+                   (ep->cm_info.vlan == cm_info->vlan) &&
+                   !memcmp(&ep->cm_info.local_ip, cm_info->local_ip,
+                           sizeof(cm_info->local_ip)) &&
+                   !memcmp(&ep->cm_info.remote_ip, cm_info->remote_ip,
+                           sizeof(cm_info->remote_ip))) {
+                       found = true;
+                       break;
+               }
+       }
+
+       if (found) {
+               DP_NOTICE(p_hwfn,
+                         "SYN received on active connection - dropping\n");
+               qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+               return true;
+       }
+
+       return false;
+}
+
+static struct qed_iwarp_listener *
+qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
+                      struct qed_iwarp_cm_info *cm_info)
+{
+       struct qed_iwarp_listener *listener = NULL;
+       static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+       bool found = false;
+
+       qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+       list_for_each_entry(listener,
+                           &p_hwfn->p_rdma_info->iwarp.listen_list,
+                           list_entry) {
+               if (listener->port == cm_info->local_port) {
+                       if (!memcmp(listener->ip_addr,
+                                   ip_zero, sizeof(ip_zero))) {
+                               found = true;
+                               break;
+                       }
+
+                       if (!memcmp(listener->ip_addr,
+                                   cm_info->local_ip,
+                                   sizeof(cm_info->local_ip)) &&
+                           (listener->vlan == cm_info->vlan)) {
+                               found = true;
+                               break;
+                       }
+               }
+       }
+
+       if (found) {
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener found = %p\n",
+                          listener);
+               return listener;
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "listener not found\n");
+       return NULL;
+}
+
+static int
+qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
+                      struct qed_iwarp_cm_info *cm_info,
+                      void *buf,
+                      u8 *remote_mac_addr,
+                      u8 *local_mac_addr,
+                      int *payload_len, int *tcp_start_offset)
+{
+       struct vlan_ethhdr *vethh;
+       bool vlan_valid = false;
+       struct ipv6hdr *ip6h;
+       struct ethhdr *ethh;
+       struct tcphdr *tcph;
+       struct iphdr *iph;
+       int eth_hlen;
+       int ip_hlen;
+       int eth_type;
+       int i;
+
+       ethh = buf;
+       eth_type = ntohs(ethh->h_proto);
+       if (eth_type == ETH_P_8021Q) {
+               vlan_valid = true;
+               vethh = (struct vlan_ethhdr *)ethh;
+               cm_info->vlan = ntohs(vethh->h_vlan_TCI) & VLAN_VID_MASK;
+               eth_type = ntohs(vethh->h_vlan_encapsulated_proto);
+       }
+
+       eth_hlen = ETH_HLEN + (vlan_valid ? sizeof(u32) : 0);
+
+       ether_addr_copy(remote_mac_addr, ethh->h_source);
+       ether_addr_copy(local_mac_addr, ethh->h_dest);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_type =%d source mac: %pM\n",
+                  eth_type, ethh->h_source);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "eth_hlen=%d destination mac: %pM\n",
+                  eth_hlen, ethh->h_dest);
+
+       iph = (struct iphdr *)((u8 *)(ethh) + eth_hlen);
+
+       if (eth_type == ETH_P_IP) {
+               cm_info->local_ip[0] = ntohl(iph->daddr);
+               cm_info->remote_ip[0] = ntohl(iph->saddr);
+               cm_info->ip_version = TCP_IPV4;
+
+               ip_hlen = (iph->ihl) * sizeof(u32);
+               *payload_len = ntohs(iph->tot_len) - ip_hlen;
+       } else if (eth_type == ETH_P_IPV6) {
+               ip6h = (struct ipv6hdr *)iph;
+               for (i = 0; i < 4; i++) {
+                       cm_info->local_ip[i] =
+                           ntohl(ip6h->daddr.in6_u.u6_addr32[i]);
+                       cm_info->remote_ip[i] =
+                           ntohl(ip6h->saddr.in6_u.u6_addr32[i]);
+               }
+               cm_info->ip_version = TCP_IPV6;
+
+               ip_hlen = sizeof(*ip6h);
+               *payload_len = ntohs(ip6h->payload_len);
+       } else {
+               DP_NOTICE(p_hwfn, "Unexpected ethertype on ll2 %x\n", eth_type);
+               return -EINVAL;
+       }
+
+       tcph = (struct tcphdr *)((u8 *)iph + ip_hlen);
+
+       if (!tcph->syn) {
+               DP_NOTICE(p_hwfn,
+                         "Only SYN type packet expected on this ll2 conn, iph->ihl=%d source=%d dest=%d\n",
+                         iph->ihl, tcph->source, tcph->dest);
+               return -EINVAL;
+       }
+
+       cm_info->local_port = ntohs(tcph->dest);
+       cm_info->remote_port = ntohs(tcph->source);
+
+       qed_iwarp_print_cm_info(p_hwfn, cm_info);
+
+       *tcp_start_offset = eth_hlen + ip_hlen;
+
+       return 0;
+}
+
+static void
+qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
+{
+       struct qed_iwarp_ll2_buff *buf = data->cookie;
+       struct qed_iwarp_listener *listener;
+       struct qed_ll2_tx_pkt_info tx_pkt;
+       struct qed_iwarp_cm_info cm_info;
+       struct qed_hwfn *p_hwfn = cxt;
+       u8 remote_mac_addr[ETH_ALEN];
+       u8 local_mac_addr[ETH_ALEN];
+       struct qed_iwarp_ep *ep;
+       int tcp_start_offset;
+       u8 ts_hdr_size = 0;
+       u8 ll2_syn_handle;
+       int payload_len;
+       u32 hdr_size;
+       int rc;
+
+       memset(&cm_info, 0, sizeof(cm_info));
+
+       if (GET_FIELD(data->parse_flags,
+                     PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) &&
+           GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) {
+               DP_NOTICE(p_hwfn, "Syn packet received with checksum error\n");
+               goto err;
+       }
+
+       rc = qed_iwarp_parse_rx_pkt(p_hwfn, &cm_info, (u8 *)(buf->data) +
+                                   data->u.placement_offset, remote_mac_addr,
+                                   local_mac_addr, &payload_len,
+                                   &tcp_start_offset);
+       if (rc)
+               goto err;
+
+       /* Check if there is a listener for this 4-tuple+vlan */
+       ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+       listener = qed_iwarp_get_listener(p_hwfn, &cm_info);
+       if (!listener) {
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "SYN received on tuple not listened on parse_flags=%d packet len=%d\n",
+                          data->parse_flags, data->length.packet_length);
+
+               memset(&tx_pkt, 0, sizeof(tx_pkt));
+               tx_pkt.num_of_bds = 1;
+               tx_pkt.vlan = data->vlan;
+
+               if (GET_FIELD(data->parse_flags,
+                             PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
+                       SET_FIELD(tx_pkt.bd_flags,
+                                 CORE_TX_BD_DATA_VLAN_INSERTION, 1);
+
+               tx_pkt.l4_hdr_offset_w = (data->length.packet_length) >> 2;
+               tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+               tx_pkt.first_frag = buf->data_phys_addr +
+                                   data->u.placement_offset;
+               tx_pkt.first_frag_len = data->length.packet_length;
+               tx_pkt.cookie = buf;
+
+               rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_syn_handle,
+                                              &tx_pkt, true);
+
+               if (rc) {
+                       DP_NOTICE(p_hwfn,
+                                 "Can't post SYN back to chip rc=%d\n", rc);
+                       goto err;
+               }
+               return;
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Received syn on listening port\n");
+       /* There may be an open ep on this connection if this is a syn
+        * retrasnmit... need to make sure there isn't...
+        */
+       if (qed_iwarp_ep_exists(p_hwfn, &cm_info))
+               goto err;
+
+       ep = qed_iwarp_get_free_ep(p_hwfn);
+       if (!ep)
+               goto err;
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       list_add_tail(&ep->list_entry, &p_hwfn->p_rdma_info->iwarp.ep_list);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       ether_addr_copy(ep->remote_mac_addr, remote_mac_addr);
+       ether_addr_copy(ep->local_mac_addr, local_mac_addr);
+
+       memcpy(&ep->cm_info, &cm_info, sizeof(ep->cm_info));
+
+       if (p_hwfn->p_rdma_info->iwarp.tcp_flags & QED_IWARP_TS_EN)
+               ts_hdr_size = TIMESTAMP_HEADER_SIZE;
+
+       hdr_size = ((cm_info.ip_version == QED_TCP_IPV4) ? 40 : 60) +
+                  ts_hdr_size;
+       ep->mss = p_hwfn->p_rdma_info->iwarp.max_mtu - hdr_size;
+       ep->mss = min_t(u16, QED_IWARP_MAX_FW_MSS, ep->mss);
+
+       ep->event_cb = listener->event_cb;
+       ep->cb_context = listener->cb_context;
+       ep->connect_mode = TCP_CONNECT_PASSIVE;
+
+       ep->syn = buf;
+       ep->syn_ip_payload_length = (u16)payload_len;
+       ep->syn_phy_addr = buf->data_phys_addr + data->u.placement_offset +
+                          tcp_start_offset;
+
+       rc = qed_iwarp_tcp_offload(p_hwfn, ep);
+       if (rc) {
+               qed_iwarp_return_ep(p_hwfn, ep);
+               goto err;
+       }
+
+       return;
+err:
+       qed_iwarp_ll2_post_rx(p_hwfn, buf, ll2_syn_handle);
+}
+
+static void qed_iwarp_ll2_rel_rx_pkt(void *cxt, u8 connection_handle,
+                                    void *cookie, dma_addr_t rx_buf_addr,
+                                    bool b_last_packet)
+{
+       struct qed_iwarp_ll2_buff *buffer = cookie;
+       struct qed_hwfn *p_hwfn = cxt;
+
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
+                         buffer->data, buffer->data_phys_addr);
+       kfree(buffer);
+}
+
+static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle,
+                                     void *cookie, dma_addr_t first_frag_addr,
+                                     bool b_last_fragment, bool b_last_packet)
+{
+       struct qed_iwarp_ll2_buff *buffer = cookie;
+       struct qed_hwfn *p_hwfn = cxt;
+
+       /* this was originally an rx packet, post it back */
+       qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle);
+}
+
+static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
+                                    void *cookie, dma_addr_t first_frag_addr,
+                                    bool b_last_fragment, bool b_last_packet)
+{
+       struct qed_iwarp_ll2_buff *buffer = cookie;
+       struct qed_hwfn *p_hwfn = cxt;
+
+       if (!buffer)
+               return;
+
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
+                         buffer->data, buffer->data_phys_addr);
+
+       kfree(buffer);
+}
+
+static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+       int rc = 0;
+
+       if (iwarp_info->ll2_syn_handle != QED_IWARP_HANDLE_INVAL) {
+               rc = qed_ll2_terminate_connection(p_hwfn,
+                                                 iwarp_info->ll2_syn_handle);
+               if (rc)
+                       DP_INFO(p_hwfn, "Failed to terminate syn connection\n");
+
+               qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_syn_handle);
+               iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+       }
+
+       qed_llh_remove_mac_filter(p_hwfn,
+                                 p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
+       return rc;
+}
+
+static int
+qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn,
+                           int num_rx_bufs, int buff_size, u8 ll2_handle)
+{
+       struct qed_iwarp_ll2_buff *buffer;
+       int rc = 0;
+       int i;
+
+       for (i = 0; i < num_rx_bufs; i++) {
+               buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+               if (!buffer) {
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               buffer->data = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+                                                 buff_size,
+                                                 &buffer->data_phys_addr,
+                                                 GFP_KERNEL);
+               if (!buffer->data) {
+                       kfree(buffer);
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               buffer->buff_size = buff_size;
+               rc = qed_iwarp_ll2_post_rx(p_hwfn, buffer, ll2_handle);
+               if (rc)
+                       /* buffers will be deallocated by qed_ll2 */
+                       break;
+       }
+       return rc;
+}
+
+#define QED_IWARP_MAX_BUF_SIZE(mtu)                                 \
+       ALIGN((mtu) + ETH_HLEN + 2 * VLAN_HLEN + 2 + ETH_CACHE_LINE_SIZE, \
+               ETH_CACHE_LINE_SIZE)
+
+static int
+qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
+                   struct qed_rdma_start_in_params *params,
+                   struct qed_ptt *p_ptt)
+{
+       struct qed_iwarp_info *iwarp_info;
+       struct qed_ll2_acquire_data data;
+       struct qed_ll2_cbs cbs;
+       int rc = 0;
+
+       iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+       iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+
+       iwarp_info->max_mtu = params->max_mtu;
+
+       ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr);
+
+       rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+       if (rc)
+               return rc;
+
+       /* Start SYN connection */
+       cbs.rx_comp_cb = qed_iwarp_ll2_comp_syn_pkt;
+       cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt;
+       cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt;
+       cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt;
+       cbs.cookie = p_hwfn;
+
+       memset(&data, 0, sizeof(data));
+       data.input.conn_type = QED_LL2_TYPE_IWARP;
+       data.input.mtu = QED_IWARP_MAX_SYN_PKT_SIZE;
+       data.input.rx_num_desc = QED_IWARP_LL2_SYN_RX_SIZE;
+       data.input.tx_num_desc = QED_IWARP_LL2_SYN_TX_SIZE;
+       data.input.tx_max_bds_per_packet = 1;   /* will never be fragmented */
+       data.input.tx_tc = PKT_LB_TC;
+       data.input.tx_dest = QED_LL2_TX_DEST_LB;
+       data.p_connection_handle = &iwarp_info->ll2_syn_handle;
+       data.cbs = &cbs;
+
+       rc = qed_ll2_acquire_connection(p_hwfn, &data);
+       if (rc) {
+               DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n");
+               qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+               return rc;
+       }
+
+       rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_syn_handle);
+       if (rc) {
+               DP_NOTICE(p_hwfn, "Failed to establish LL2 connection\n");
+               goto err;
+       }
+
+       rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
+                                        QED_IWARP_LL2_SYN_RX_SIZE,
+                                        QED_IWARP_MAX_SYN_PKT_SIZE,
+                                        iwarp_info->ll2_syn_handle);
+       if (rc)
+               goto err;
+
+       return rc;
+err:
+       qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+
+       return rc;
+}
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+                   struct qed_rdma_start_in_params *params)
+{
+       struct qed_iwarp_info *iwarp_info;
+       u32 rcv_wnd_size;
+
+       iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+       iwarp_info->tcp_flags = QED_IWARP_TS_EN;
+       rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF;
+
+       /* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */
+       iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) -
+           ilog2(QED_IWARP_RCV_WND_SIZE_MIN);
+       iwarp_info->crc_needed = QED_IWARP_PARAM_CRC_NEEDED;
+       iwarp_info->mpa_rev = MPA_NEGOTIATION_TYPE_ENHANCED;
+
+       iwarp_info->peer2peer = QED_IWARP_PARAM_P2P;
+
+       iwarp_info->rtr_type =  MPA_RTR_TYPE_ZERO_SEND |
+                               MPA_RTR_TYPE_ZERO_WRITE |
+                               MPA_RTR_TYPE_ZERO_READ;
+
+       spin_lock_init(&p_hwfn->p_rdma_info->iwarp.qp_lock);
+       INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_list);
+       INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.listen_list);
+
+       qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
+                                 qed_iwarp_async_event);
+
+       return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
+}
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       int rc;
+
+       qed_iwarp_free_prealloc_ep(p_hwfn);
+       rc = qed_iwarp_wait_for_all_cids(p_hwfn);
+       if (rc)
+               return rc;
+
+       qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
+
+       return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+}
+
+void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
+                          struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+       struct qed_iwarp_cm_event_params params;
+
+       qed_iwarp_modify_qp(p_hwfn, ep->qp, QED_IWARP_QP_STATE_ERROR, true);
+
+       params.event = QED_IWARP_EVENT_CLOSE;
+       params.ep_context = ep;
+       params.cm_info = &ep->cm_info;
+       params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ?
+                        0 : -ECONNRESET;
+
+       ep->state = QED_IWARP_EP_CLOSED;
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       list_del(&ep->list_entry);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       ep->event_cb(ep->cb_context, &params);
+}
+
+void qed_iwarp_exception_received(struct qed_hwfn *p_hwfn,
+                                 struct qed_iwarp_ep *ep, int fw_ret_code)
+{
+       struct qed_iwarp_cm_event_params params;
+       bool event_cb = false;
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "EP(0x%x) fw_ret_code=%d\n",
+                  ep->cid, fw_ret_code);
+
+       switch (fw_ret_code) {
+       case IWARP_EXCEPTION_DETECTED_LLP_CLOSED:
+               params.status = 0;
+               params.event = QED_IWARP_EVENT_DISCONNECT;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_LLP_RESET:
+               params.status = -ECONNRESET;
+               params.event = QED_IWARP_EVENT_DISCONNECT;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_RQ_EMPTY:
+               params.event = QED_IWARP_EVENT_RQ_EMPTY;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_IRQ_FULL:
+               params.event = QED_IWARP_EVENT_IRQ_FULL;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT:
+               params.event = QED_IWARP_EVENT_LLP_TIMEOUT;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR:
+               params.event = QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW:
+               params.event = QED_IWARP_EVENT_CQ_OVERFLOW;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_LOCAL_CATASTROPHIC:
+               params.event = QED_IWARP_EVENT_QP_CATASTROPHIC;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_LOCAL_ACCESS_ERROR:
+               params.event = QED_IWARP_EVENT_LOCAL_ACCESS_ERROR;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_REMOTE_OPERATION_ERROR:
+               params.event = QED_IWARP_EVENT_REMOTE_OPERATION_ERROR;
+               event_cb = true;
+               break;
+       case IWARP_EXCEPTION_DETECTED_TERMINATE_RECEIVED:
+               params.event = QED_IWARP_EVENT_TERMINATE_RECEIVED;
+               event_cb = true;
+               break;
+       default:
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "Unhandled exception received...fw_ret_code=%d\n",
+                          fw_ret_code);
+               break;
+       }
+
+       if (event_cb) {
+               params.ep_context = ep;
+               params.cm_info = &ep->cm_info;
+               ep->event_cb(ep->cb_context, &params);
+       }
+}
+
+static void
+qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn,
+                                  struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+       struct qed_iwarp_cm_event_params params;
+
+       memset(&params, 0, sizeof(params));
+       params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE;
+       params.ep_context = ep;
+       params.cm_info = &ep->cm_info;
+       ep->state = QED_IWARP_EP_CLOSED;
+
+       switch (fw_return_code) {
+       case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET:
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "%s(0x%x) TCP connect got invalid packet\n",
+                          QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+               params.status = -ECONNRESET;
+               break;
+       case IWARP_CONN_ERROR_TCP_CONNECTION_RST:
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "%s(0x%x) TCP Connection Reset\n",
+                          QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+               params.status = -ECONNRESET;
+               break;
+       case IWARP_CONN_ERROR_TCP_CONNECT_TIMEOUT:
+               DP_NOTICE(p_hwfn, "%s(0x%x) TCP timeout\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+               params.status = -EBUSY;
+               break;
+       case IWARP_CONN_ERROR_MPA_NOT_SUPPORTED_VER:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA not supported VER\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+               params.status = -ECONNREFUSED;
+               break;
+       case IWARP_CONN_ERROR_MPA_INVALID_PACKET:
+               DP_NOTICE(p_hwfn, "%s(0x%x) MPA Invalid Packet\n",
+                         QED_IWARP_CONNECT_MODE_STRING(ep), ep->tcp_cid);
+               params.status = -ECONNRESET;
+               break;
+       default:
+               DP_ERR(p_hwfn,
+                      "%s(0x%x) Unexpected return code tcp connect: %d\n",
+                      QED_IWARP_CONNECT_MODE_STRING(ep),
+                      ep->tcp_cid, fw_return_code);
+               params.status = -ECONNRESET;
+               break;
+       }
+
+       if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+               ep->tcp_cid = QED_IWARP_INVALID_TCP_CID;
+               qed_iwarp_return_ep(p_hwfn, ep);
+       } else {
+               ep->event_cb(ep->cb_context, &params);
+               spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+               list_del(&ep->list_entry);
+               spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       }
+}
+
+void
+qed_iwarp_connect_complete(struct qed_hwfn *p_hwfn,
+                          struct qed_iwarp_ep *ep, u8 fw_return_code)
+{
+       u8 ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+
+       if (ep->connect_mode == TCP_CONNECT_PASSIVE) {
+               /* Done with the SYN packet, post back to ll2 rx */
+               qed_iwarp_ll2_post_rx(p_hwfn, ep->syn, ll2_syn_handle);
+
+               ep->syn = NULL;
+
+               /* If connect failed - upper layer doesn't know about it */
+               if (fw_return_code == RDMA_RETURN_OK)
+                       qed_iwarp_mpa_received(p_hwfn, ep);
+               else
+                       qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+                                                          fw_return_code);
+       } else {
+               if (fw_return_code == RDMA_RETURN_OK)
+                       qed_iwarp_mpa_offload(p_hwfn, ep);
+               else
+                       qed_iwarp_tcp_connect_unsuccessful(p_hwfn, ep,
+                                                          fw_return_code);
+       }
+}
+
+static inline bool
+qed_iwarp_check_ep_ok(struct qed_hwfn *p_hwfn, struct qed_iwarp_ep *ep)
+{
+       if (!ep || (ep->sig != QED_EP_SIG)) {
+               DP_ERR(p_hwfn, "ERROR ON ASYNC ep=%p\n", ep);
+               return false;
+       }
+
+       return true;
+}
+
+static int qed_iwarp_async_event(struct qed_hwfn *p_hwfn,
+                                u8 fw_event_code, u16 echo,
+                                union event_ring_data *data,
+                                u8 fw_return_code)
+{
+       struct regpair *fw_handle = &data->rdma_data.async_handle;
+       struct qed_iwarp_ep *ep = NULL;
+       u16 cid;
+
+       ep = (struct qed_iwarp_ep *)(uintptr_t)HILO_64(fw_handle->hi,
+                                                      fw_handle->lo);
+
+       switch (fw_event_code) {
+       case IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE:
+               /* Async completion after TCP 3-way handshake */
+               if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+                       return -EINVAL;
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "EP(0x%x) IWARP_EVENT_TYPE_ASYNC_CONNECT_COMPLETE fw_ret_code=%d\n",
+                          ep->tcp_cid, fw_return_code);
+               qed_iwarp_connect_complete(p_hwfn, ep, fw_return_code);
+               break;
+       case IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED:
+               if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+                       return -EINVAL;
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED fw_ret_code=%d\n",
+                          ep->cid, fw_return_code);
+               qed_iwarp_exception_received(p_hwfn, ep, fw_return_code);
+               break;
+       case IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE:
+               /* Async completion for Close Connection ramrod */
+               if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+                       return -EINVAL;
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE fw_ret_code=%d\n",
+                          ep->cid, fw_return_code);
+               qed_iwarp_qp_in_error(p_hwfn, ep, fw_return_code);
+               break;
+       case IWARP_EVENT_TYPE_ASYNC_ENHANCED_MPA_REPLY_ARRIVED:
+               /* Async event for active side only */
+               if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+                       return -EINVAL;
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_MPA_REPLY_ARRIVED fw_ret_code=%d\n",
+                          ep->cid, fw_return_code);
+               qed_iwarp_mpa_reply_arrived(p_hwfn, ep);
+               break;
+       case IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE:
+               if (!qed_iwarp_check_ep_ok(p_hwfn, ep))
+                       return -EINVAL;
+               DP_VERBOSE(p_hwfn,
+                          QED_MSG_RDMA,
+                          "QP(0x%x) IWARP_EVENT_TYPE_ASYNC_MPA_HANDSHAKE_COMPLETE fw_ret_code=%d\n",
+                          ep->cid, fw_return_code);
+               qed_iwarp_mpa_complete(p_hwfn, ep, fw_return_code);
+               break;
+       case IWARP_EVENT_TYPE_ASYNC_CID_CLEANED:
+               cid = (u16)le32_to_cpu(fw_handle->lo);
+               DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+                          "(0x%x)IWARP_EVENT_TYPE_ASYNC_CID_CLEANED\n", cid);
+               qed_iwarp_cid_cleaned(p_hwfn, cid);
+
+               break;
+       case IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW:
+               DP_NOTICE(p_hwfn, "IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW\n");
+
+               p_hwfn->p_rdma_info->events.affiliated_event(
+                       p_hwfn->p_rdma_info->events.context,
+                       QED_IWARP_EVENT_CQ_OVERFLOW,
+                       (void *)fw_handle);
+               break;
+       default:
+               DP_ERR(p_hwfn, "Received unexpected async iwarp event %d\n",
+                      fw_event_code);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+                       struct qed_iwarp_listen_in *iparams,
+                       struct qed_iwarp_listen_out *oparams)
+{
+       struct qed_hwfn *p_hwfn = rdma_cxt;
+       struct qed_iwarp_listener *listener;
+
+       listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+       if (!listener)
+               return -ENOMEM;
+
+       listener->ip_version = iparams->ip_version;
+       memcpy(listener->ip_addr, iparams->ip_addr, sizeof(listener->ip_addr));
+       listener->port = iparams->port;
+       listener->vlan = iparams->vlan;
+
+       listener->event_cb = iparams->event_cb;
+       listener->cb_context = iparams->cb_context;
+       listener->max_backlog = iparams->max_backlog;
+       oparams->handle = listener;
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       list_add_tail(&listener->list_entry,
+                     &p_hwfn->p_rdma_info->iwarp.listen_list);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_RDMA,
+                  "callback=%p handle=%p ip=%x:%x:%x:%x port=0x%x vlan=0x%x\n",
+                  listener->event_cb,
+                  listener,
+                  listener->ip_addr[0],
+                  listener->ip_addr[1],
+                  listener->ip_addr[2],
+                  listener->ip_addr[3], listener->port, listener->vlan);
+
+       return 0;
+}
+
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle)
+{
+       struct qed_iwarp_listener *listener = handle;
+       struct qed_hwfn *p_hwfn = rdma_cxt;
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "handle=%p\n", handle);
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+       list_del(&listener->list_entry);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock);
+
+       kfree(listener);
+
+       return 0;
+}
+
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams)
+{
+       struct qed_hwfn *p_hwfn = rdma_cxt;
+       struct qed_sp_init_data init_data;
+       struct qed_spq_entry *p_ent;
+       struct qed_iwarp_ep *ep;
+       struct qed_rdma_qp *qp;
+       int rc;
+
+       ep = iparams->ep_context;
+       if (!ep) {
+               DP_ERR(p_hwfn, "Ep Context receive in send_rtr is NULL\n");
+               return -EINVAL;
+       }
+
+       qp = ep->qp;
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP(0x%x) EP(0x%x)\n",
+                  qp->icid, ep->tcp_cid);
+
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = qp->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = QED_SPQ_MODE_CB;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR,
+                                PROTOCOLID_IWARP, &init_data);
+
+       if (rc)
+               return rc;
+
+       rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = 0x%x\n", rc);
+
+       return rc;
+}
+
+void
+qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+                  struct qed_rdma_query_qp_out_params *out_params)
+{
+       out_params->state = qed_iwarp2roce_state(qp->iwarp_state);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h

new file mode 100644 (file)

index 0000000..148ef3c
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -0,0 +1,189 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QED_IWARP_H
+#define _QED_IWARP_H
+
+enum qed_iwarp_qp_state {
+       QED_IWARP_QP_STATE_IDLE,
+       QED_IWARP_QP_STATE_RTS,
+       QED_IWARP_QP_STATE_TERMINATE,
+       QED_IWARP_QP_STATE_CLOSING,
+       QED_IWARP_QP_STATE_ERROR,
+};
+
+enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
+
+#define QED_IWARP_PREALLOC_CNT  (256)
+
+#define QED_IWARP_LL2_SYN_TX_SIZE       (128)
+#define QED_IWARP_LL2_SYN_RX_SIZE       (256)
+#define QED_IWARP_MAX_SYN_PKT_SIZE      (128)
+#define QED_IWARP_HANDLE_INVAL                 (0xff)
+
+struct qed_iwarp_ll2_buff {
+       void *data;
+       dma_addr_t data_phys_addr;
+       u32 buff_size;
+};
+
+struct qed_iwarp_info {
+       struct list_head listen_list;   /* qed_iwarp_listener */
+       struct list_head ep_list;       /* qed_iwarp_ep */
+       struct list_head ep_free_list;  /* pre-allocated ep's */
+       spinlock_t iw_lock;     /* for iwarp resources */
+       spinlock_t qp_lock;     /* for teardown races */
+       u32 rcv_wnd_scale;
+       u16 max_mtu;
+       u8 mac_addr[ETH_ALEN];
+       u8 crc_needed;
+       u8 tcp_flags;
+       u8 ll2_syn_handle;
+       u8 peer2peer;
+       enum mpa_negotiation_mode mpa_rev;
+       enum mpa_rtr_type rtr_type;
+};
+
+enum qed_iwarp_ep_state {
+       QED_IWARP_EP_INIT,
+       QED_IWARP_EP_MPA_REQ_RCVD,
+       QED_IWARP_EP_MPA_OFFLOADED,
+       QED_IWARP_EP_ESTABLISHED,
+       QED_IWARP_EP_CLOSED
+};
+
+union async_output {
+       struct iwarp_eqe_data_mpa_async_completion mpa_response;
+       struct iwarp_eqe_data_tcp_async_completion mpa_request;
+};
+
+#define QED_MAX_PRIV_DATA_LEN (512)
+struct qed_iwarp_ep_memory {
+       u8 in_pdata[QED_MAX_PRIV_DATA_LEN];
+       u8 out_pdata[QED_MAX_PRIV_DATA_LEN];
+       union async_output async_output;
+};
+
+/* Endpoint structure represents a TCP connection. This connection can be
+ * associated with a QP or not (in which case QP==NULL)
+ */
+struct qed_iwarp_ep {
+       struct list_head list_entry;
+       struct qed_rdma_qp *qp;
+       struct qed_iwarp_ep_memory *ep_buffer_virt;
+       dma_addr_t ep_buffer_phys;
+       enum qed_iwarp_ep_state state;
+       int sig;
+       struct qed_iwarp_cm_info cm_info;
+       enum tcp_connect_mode connect_mode;
+       enum mpa_rtr_type rtr_type;
+       enum mpa_negotiation_mode mpa_rev;
+       u32 tcp_cid;
+       u32 cid;
+       u16 mss;
+       u8 remote_mac_addr[6];
+       u8 local_mac_addr[6];
+       bool mpa_reply_processed;
+
+       /* For Passive side - syn packet related data */
+       u16 syn_ip_payload_length;
+       struct qed_iwarp_ll2_buff *syn;
+       dma_addr_t syn_phy_addr;
+
+       /* The event_cb function is called for asynchrounous events associated
+        * with the ep. It is initialized at different entry points depending
+        * on whether the ep is the tcp connection active side or passive side
+        * The cb_context is passed to the event_cb function.
+        */
+       iwarp_event_handler event_cb;
+       void *cb_context;
+};
+
+struct qed_iwarp_listener {
+       struct list_head list_entry;
+
+       /* The event_cb function is called for connection requests.
+        * The cb_context is passed to the event_cb function.
+        */
+       iwarp_event_handler event_cb;
+       void *cb_context;
+       u32 max_backlog;
+       u32 ip_addr[4];
+       u16 port;
+       u16 vlan;
+       u8 ip_version;
+};
+
+int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
+
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+                   struct qed_rdma_start_in_params *params);
+
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_devinfo(struct qed_hwfn *p_hwfn);
+
+void qed_iwarp_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+int qed_iwarp_create_qp(struct qed_hwfn *p_hwfn,
+                       struct qed_rdma_qp *qp,
+                       struct qed_rdma_create_qp_out_params *out_params);
+
+int qed_iwarp_modify_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp,
+                       enum qed_iwarp_qp_state new_state, bool internal);
+
+int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+int qed_iwarp_fw_destroy(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp);
+
+void qed_iwarp_query_qp(struct qed_rdma_qp *qp,
+                       struct qed_rdma_query_qp_out_params *out_params);
+
+int
+qed_iwarp_connect(void *rdma_cxt,
+                 struct qed_iwarp_connect_in *iparams,
+                 struct qed_iwarp_connect_out *oparams);
+
+int
+qed_iwarp_create_listen(void *rdma_cxt,
+                       struct qed_iwarp_listen_in *iparams,
+                       struct qed_iwarp_listen_out *oparams);
+
+int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams);
+
+int qed_iwarp_reject(void *rdma_cxt, struct qed_iwarp_reject_in *iparams);
+int qed_iwarp_destroy_listen(void *rdma_cxt, void *handle);
+
+int qed_iwarp_send_rtr(void *rdma_cxt, struct qed_iwarp_send_rtr_in *iparams);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c

index e57699bfbdfa415cd9eb56c04e31f2fe0ad20c80..0ba5ec8a9814571a121539f260aecaeefcd783b3 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -79,8 +79,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn)
         unsigned long **pp_qids;
         u32 i;
  
-       if (p_hwfn->hw_info.personality != QED_PCI_ETH &&
-           p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+       if (!QED_IS_L2_PERSONALITY(p_hwfn))
                 return 0;
  
         p_l2_info = kzalloc(sizeof(*p_l2_info), GFP_KERNEL);
@@ -1228,19 +1227,6 @@ static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
         return action;
  }
  
-static void qed_set_fw_mac_addr(__le16 *fw_msb,
-                               __le16 *fw_mid,
-                               __le16 *fw_lsb,
-                               u8 *mac)
-{
-       ((u8 *)fw_msb)[0] = mac[1];
-       ((u8 *)fw_msb)[1] = mac[0];
-       ((u8 *)fw_mid)[0] = mac[3];
-       ((u8 *)fw_mid)[1] = mac[2];
-       ((u8 *)fw_lsb)[0] = mac[5];
-       ((u8 *)fw_lsb)[1] = mac[4];
-}
-
  static int
  qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
                         u16 opaque_fid,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c

index 17f9b0a7b55332e645ad02cfc9ed238dad38f672..c06ad4f0758eb755ad9d82ffccecc3def154fc6b 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -309,7 +309,7 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                 list_del(&p_pkt->list_entry);
                 b_last_packet = list_empty(&p_tx->active_descq);
                 list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
-               if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+               if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
                         struct qed_ooo_buffer *p_buffer;
  
                         p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -532,7 +532,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
  
                 list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
  
-               if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+               if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO) {
                         struct qed_ooo_buffer *p_buffer;
  
                         p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
@@ -893,11 +893,11 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
         p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
         p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
         p_ramrod->queue_id = p_ll2_conn->queue_id;
-       p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0
-                                                                         : 1;
+       p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
  
         if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
-           p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
+           p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
+           (conn_type != QED_LL2_TYPE_IWARP)) {
                 p_ramrod->mf_si_bcast_accept_all = 1;
                 p_ramrod->mf_si_mcast_accept_all = 1;
         } else {
@@ -924,7 +924,7 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
         if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
                 return 0;
  
-       if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+       if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
                 p_ll2_conn->tx_stats_en = 0;
         else
                 p_ll2_conn->tx_stats_en = 1;
@@ -955,10 +955,10 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
         p_ramrod->pbl_size = cpu_to_le16(pbl_size);
  
         switch (p_ll2_conn->input.tx_tc) {
-       case LB_TC:
+       case PURE_LB_TC:
                 pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
                 break;
-       case OOO_LB_TC:
+       case PKT_LB_TC:
                 pq_id = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_OOO);
                 break;
         default:
@@ -973,12 +973,20 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
                 p_ramrod->conn_type = PROTOCOLID_FCOE;
                 break;
         case QED_LL2_TYPE_ISCSI:
-       case QED_LL2_TYPE_ISCSI_OOO:
                 p_ramrod->conn_type = PROTOCOLID_ISCSI;
                 break;
         case QED_LL2_TYPE_ROCE:
                 p_ramrod->conn_type = PROTOCOLID_ROCE;
                 break;
+       case QED_LL2_TYPE_IWARP:
+               p_ramrod->conn_type = PROTOCOLID_IWARP;
+               break;
+       case QED_LL2_TYPE_OOO:
+               if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
+                       p_ramrod->conn_type = PROTOCOLID_ISCSI;
+               else
+                       p_ramrod->conn_type = PROTOCOLID_IWARP;
+               break;
         default:
                 p_ramrod->conn_type = PROTOCOLID_ETH;
                 DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type);
@@ -1142,7 +1150,7 @@ qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
         u16 buf_idx;
         int rc = 0;
  
-       if (p_ll2_info->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+       if (p_ll2_info->input.conn_type != QED_LL2_TYPE_OOO)
                 return rc;
  
         /* Correct number of requested OOO buffers if needed */
@@ -1280,7 +1288,7 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
                 goto q_allocate_fail;
  
         /* Register callbacks for the Rx/Tx queues */
-       if (data->input.conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+       if (data->input.conn_type == QED_LL2_TYPE_OOO) {
                 comp_rx_cb = qed_ll2_lb_rxq_completion;
                 comp_tx_cb = qed_ll2_lb_txq_completion;
         } else {
@@ -1339,7 +1347,7 @@ static void
  qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
                                  struct qed_ll2_info *p_ll2_conn)
  {
-       if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+       if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
                 return;
  
         qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -1421,7 +1429,7 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
         if (rc)
                 goto out;
  
-       if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+       if (!QED_IS_RDMA_PERSONALITY(p_hwfn))
                 qed_wr(p_hwfn, p_ptt, PRS_REG_USE_LIGHT_L2, 1);
  
         qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
@@ -1794,7 +1802,7 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
                 qed_ll2_rxq_flush(p_hwfn, connection_handle);
         }
  
-       if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_ISCSI_OOO)
+       if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_OOO)
                 qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
  
         if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
@@ -1816,7 +1824,7 @@ static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
  {
         struct qed_ooo_buffer *p_buffer;
  
-       if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_ISCSI_OOO)
+       if (p_ll2_conn->input.conn_type != QED_LL2_TYPE_OOO)
                 return;
  
         qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
@@ -2063,7 +2071,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
         ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
  
         if (lb) {
-               data->input.tx_tc = OOO_LB_TC;
+               data->input.tx_tc = PKT_LB_TC;
                 data->input.tx_dest = QED_LL2_TX_DEST_LB;
         } else {
                 data->input.tx_tc = 0;
@@ -2080,7 +2088,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
         int rc;
  
         qed_ll2_set_conn_data(cdev, &data, params,
-                             QED_LL2_TYPE_ISCSI_OOO, handle, true);
+                             QED_LL2_TYPE_OOO, handle, true);
  
         rc = qed_ll2_acquire_connection(hwfn, &data);
         if (rc) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c

index 16cc30b11cce34602ca68973f23ba2713c713ac9..b11399606990ae5b950cc933fcceeae966375058 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -237,6 +237,8 @@ err0:
  int qed_fill_dev_info(struct qed_dev *cdev,
                       struct qed_dev_info *dev_info)
  {
+       struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+       struct qed_hw_info *hw_info = &p_hwfn->hw_info;
         struct qed_tunnel_info *tun = &cdev->tunnel;
         struct qed_ptt  *ptt;
  
@@ -260,11 +262,10 @@ int qed_fill_dev_info(struct qed_dev *cdev,
         dev_info->pci_mem_start = cdev->pci_params.mem_start;
         dev_info->pci_mem_end = cdev->pci_params.mem_end;
         dev_info->pci_irq = cdev->pci_params.irq;
-       dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality ==
-                                   QED_PCI_ETH_ROCE);
+       dev_info->rdma_supported = QED_IS_RDMA_PERSONALITY(p_hwfn);
         dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
         dev_info->dev_type = cdev->type;
-       ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
+       ether_addr_copy(dev_info->hw_mac, hw_info->hw_mac_addr);
  
         if (IS_PF(cdev)) {
                 dev_info->fw_major = FW_MAJOR_VERSION;
@@ -274,8 +275,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
                 dev_info->mf_mode = cdev->mf_mode;
                 dev_info->tx_switching = true;
  
-               if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
-                   QED_WOL_SUPPORT_PME)
+               if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
                         dev_info->wol_support = true;
  
                 dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
@@ -304,7 +304,7 @@ int qed_fill_dev_info(struct qed_dev *cdev,
                                     &dev_info->mfw_rev, NULL);
         }
  
-       dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu;
+       dev_info->mtu = hw_info->mtu;
  
         return 0;
  }
@@ -790,7 +790,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
                                        cdev->num_hwfns;
  
         if (!IS_ENABLED(CONFIG_QED_RDMA) ||
-           QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH_ROCE)
+           !QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev)))
                 return 0;
  
         for_each_hwfn(cdev, i)
@@ -931,8 +931,7 @@ static void qed_update_pf_params(struct qed_dev *cdev,
         /* In case we might support RDMA, don't allow qede to be greedy
          * with the L2 contexts. Allow for 64 queues [rx, tx, xdp] per hwfn.
          */
-       if (QED_LEADING_HWFN(cdev)->hw_info.personality ==
-           QED_PCI_ETH_ROCE) {
+       if (QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) {
                 u16 *num_cons;
  
                 num_cons = &params->eth_pf_params.num_cons;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c

index df76e212f86e6b73c8dd541dc40fe199883aa065..6fb99518a61fdef22b42b61ad1b93946257beec4 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -161,7 +161,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
         num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto,
                                                NULL);
  
-       p_rdma_info->num_qps = num_cons / 2;
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               p_rdma_info->num_qps = num_cons;
+       else
+               p_rdma_info->num_qps = num_cons / 2; /* 2 cids per qp */
  
         num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE);
  
@@ -252,6 +255,13 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
                            "Failed to allocate real cid bitmap, rc = %d\n", rc);
                 goto free_cid_map;
         }
+
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               rc = qed_iwarp_alloc(p_hwfn);
+
+       if (rc)
+               goto free_cid_map;
+
         DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n");
         return 0;
  
@@ -329,6 +339,9 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
  {
         struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
  
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               qed_iwarp_resc_free(p_hwfn);
+
         qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->cid_map, 1);
         qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->pd_map, 1);
         qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, 1);
@@ -470,6 +483,9 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
  
         if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
                 SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
+
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               qed_iwarp_init_devinfo(p_hwfn);
  }
  
  static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
@@ -490,29 +506,17 @@ static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
  
  static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
-       u32 ll2_ethertype_en;
+       int rc = 0;
  
         DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n");
         p_hwfn->b_rdma_enabled_in_prs = false;
  
-       qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
-
-       p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
-
-       /* We delay writing to this reg until first cid is allocated. See
-        * qed_cxt_dynamic_ilt_alloc function for more details
-        */
-       ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
-       qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
-              (ll2_ethertype_en | 0x01));
-
-       if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
-               DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
-               return -EINVAL;
-       }
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               qed_iwarp_init_hw(p_hwfn, p_ptt);
+       else
+               rc = qed_roce_init_hw(p_hwfn, p_ptt);
  
-       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
-       return 0;
+       return rc;
  }
  
  static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
@@ -544,7 +548,10 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
         if (rc)
                 return rc;
  
-       p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma;
+       else
+               p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
  
         p_params_header = &p_ramrod->params_header;
         p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
@@ -641,7 +648,15 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
         if (rc)
                 return rc;
  
-       qed_roce_setup(p_hwfn);
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+               rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+               if (rc)
+                       return rc;
+       } else {
+               rc = qed_roce_setup(p_hwfn);
+               if (rc)
+                       return rc;
+       }
  
         return qed_rdma_start_fw(p_hwfn, params, p_ptt);
  }
@@ -675,7 +690,16 @@ int qed_rdma_stop(void *rdma_cxt)
         qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
                (ll2_ethertype_en & 0xFFFE));
  
-       qed_roce_stop(p_hwfn);
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+               rc = qed_iwarp_stop(p_hwfn, p_ptt);
+               if (rc) {
+                       qed_ptt_release(p_hwfn, p_ptt);
+                       return rc;
+               }
+       } else {
+               qed_roce_stop(p_hwfn);
+       }
+
         qed_ptt_release(p_hwfn, p_ptt);
  
         /* Get SPQ entry */
@@ -810,7 +834,9 @@ static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
  
         memset(info, 0, sizeof(*info));
  
-       info->rdma_type = QED_RDMA_TYPE_ROCE;
+       info->rdma_type = QED_IS_ROCE_PERSONALITY(p_hwfn) ?
+           QED_RDMA_TYPE_ROCE : QED_RDMA_TYPE_IWARP;
+
         info->user_dpm_enabled = (p_hwfn->db_bar_no_edpm == 0);
  
         qed_fill_dev_info(cdev, &info->common);
@@ -1112,7 +1138,7 @@ static int qed_rdma_query_qp(void *rdma_cxt,
                              struct qed_rdma_query_qp_out_params *out_params)
  {
         struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
-       int rc;
+       int rc = 0;
  
         DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
  
@@ -1138,7 +1164,10 @@ static int qed_rdma_query_qp(void *rdma_cxt,
         out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp;
         out_params->sqd_async = qp->sqd_async;
  
-       rc = qed_roce_query_qp(p_hwfn, qp, out_params);
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               qed_iwarp_query_qp(qp, out_params);
+       else
+               rc = qed_roce_query_qp(p_hwfn, qp, out_params);
  
         DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc);
         return rc;
@@ -1151,7 +1180,10 @@ static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
  
         DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
  
-       rc = qed_roce_destroy_qp(p_hwfn, qp);
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+               rc = qed_iwarp_destroy_qp(p_hwfn, qp);
+       else
+               rc = qed_roce_destroy_qp(p_hwfn, qp);
  
         /* free qp params struct */
         kfree(qp);
@@ -1190,20 +1222,27 @@ qed_rdma_create_qp(void *rdma_cxt,
                 return NULL;
         }
  
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+               if (in_params->sq_num_pages * sizeof(struct regpair) >
+                   IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE) {
+                       DP_NOTICE(p_hwfn->cdev,
+                                 "Sq num pages: %d exceeds maximum\n",
+                                 in_params->sq_num_pages);
+                       return NULL;
+               }
+               if (in_params->rq_num_pages * sizeof(struct regpair) >
+                   IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE) {
+                       DP_NOTICE(p_hwfn->cdev,
+                                 "Rq num pages: %d exceeds maximum\n",
+                                 in_params->rq_num_pages);
+                       return NULL;
+               }
+       }
+
         qp = kzalloc(sizeof(*qp), GFP_KERNEL);
         if (!qp)
                 return NULL;
  
-       rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
-       qp->qpid = ((0xFF << 16) | qp->icid);
-
-       DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid);
-
-       if (rc) {
-               kfree(qp);
-               return NULL;
-       }
-
         qp->cur_state = QED_ROCE_QP_STATE_RESET;
         qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi);
         qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo);
@@ -1226,6 +1265,19 @@ qed_rdma_create_qp(void *rdma_cxt,
         qp->e2e_flow_control_en = qp->use_srq ? false : true;
         qp->stats_queue = in_params->stats_queue;
  
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+               rc = qed_iwarp_create_qp(p_hwfn, qp, out_params);
+               qp->qpid = qp->icid;
+       } else {
+               rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
+               qp->qpid = ((0xFF << 16) | qp->icid);
+       }
+
+       if (rc) {
+               kfree(qp);
+               return NULL;
+       }
+
         out_params->icid = qp->icid;
         out_params->qp_id = qp->qpid;
  
@@ -1324,7 +1376,14 @@ static int qed_rdma_modify_qp(void *rdma_cxt,
                            qp->cur_state);
         }
  
-       rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+       if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+               enum qed_iwarp_qp_state new_state =
+                   qed_roce2iwarp_state(qp->cur_state);
+
+               rc = qed_iwarp_modify_qp(p_hwfn, qp, new_state, 0);
+       } else {
+               rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+       }
  
         DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc);
         return rc;
@@ -1713,6 +1772,12 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
         .ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
         .ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
         .ll2_get_stats = &qed_ll2_get_stats,
+       .iwarp_connect = &qed_iwarp_connect,
+       .iwarp_create_listen = &qed_iwarp_create_listen,
+       .iwarp_destroy_listen = &qed_iwarp_destroy_listen,
+       .iwarp_accept = &qed_iwarp_accept,
+       .iwarp_reject = &qed_iwarp_reject,
+       .iwarp_send_rtr = &qed_iwarp_send_rtr,
  };
  
  const struct qed_rdma_ops *qed_get_rdma_ops(void)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h

index d91e5c4069a6a2ece9ca22a8c7b07bd3706aff20..18ec9cbd84f597e75e44af4a57c668e35a57b3fc 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -42,6 +42,7 @@
  #include "qed.h"
  #include "qed_dev_api.h"
  #include "qed_hsi.h"
+#include "qed_iwarp.h"
  #include "qed_roce.h"
  
  #define QED_RDMA_MAX_FMR                    (RDMA_MAX_TIDS)
@@ -84,6 +85,7 @@ struct qed_rdma_info {
         struct qed_bmap qp_map;
         struct qed_bmap srq_map;
         struct qed_bmap cid_map;
+       struct qed_bmap tcp_cid_map;
         struct qed_bmap real_cid_map;
         struct qed_bmap dpi_map;
         struct qed_bmap toggle_bits;
@@ -97,6 +99,7 @@ struct qed_rdma_info {
         u16 queue_zone_base;
         u16 max_queue_zones;
         enum protocol_type proto;
+       struct qed_iwarp_info iwarp;
  };
  
  struct qed_rdma_qp {
@@ -105,6 +108,7 @@ struct qed_rdma_qp {
         u32 qpid;
         u16 icid;
         enum qed_roce_qp_state cur_state;
+       enum qed_iwarp_qp_state iwarp_state;
         bool use_srq;
         bool signal_all;
         bool fmr_and_reserved_lkey;
@@ -164,6 +168,7 @@ struct qed_rdma_qp {
  
         void *shared_queue;
         dma_addr_t shared_queue_phys_addr;
+       struct qed_iwarp_ep *ep;
  };
  
  #if IS_ENABLED(CONFIG_QED_RDMA)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c

index e53adc3d009b8a1b52ecf23083399f4bf499d8fd..fb7c2d1562ae7a1fca7a5345e2aecf4a8dc2da7d 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -1149,3 +1149,23 @@ int qed_roce_setup(struct qed_hwfn *p_hwfn)
                                          qed_roce_async_event);
  }
  
+int qed_roce_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+       u32 ll2_ethertype_en;
+
+       qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
+
+       p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
+
+       ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
+       qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
+              (ll2_ethertype_en | 0x01));
+
+       if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
+               DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
+               return -EINVAL;
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
+       return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h

index 56c95fb9a26d527e54dd96338d618618eede1ee7..ab4ad8a1e2a5e3a9e1a846d82341468719073ea1 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -104,12 +104,17 @@ union ramrod_data {
         struct roce_query_qp_req_ramrod_data roce_query_qp_req;
         struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp;
         struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req;
+       struct roce_init_func_ramrod_data roce_init_func;
         struct rdma_create_cq_ramrod_data rdma_create_cq;
         struct rdma_destroy_cq_ramrod_data rdma_destroy_cq;
         struct rdma_srq_create_ramrod_data rdma_create_srq;
         struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
         struct rdma_srq_modify_ramrod_data rdma_modify_srq;
-       struct roce_init_func_ramrod_data roce_init_func;
+       struct iwarp_create_qp_ramrod_data iwarp_create_qp;
+       struct iwarp_tcp_offload_ramrod_data iwarp_tcp_offload;
+       struct iwarp_mpa_offload_ramrod_data iwarp_mpa_offload;
+       struct iwarp_modify_qp_ramrod_data iwarp_modify_qp;
+       struct iwarp_init_func_ramrod_data iwarp_init_func;
         struct fcoe_init_ramrod_params fcoe_init;
         struct fcoe_conn_offload_ramrod_params fcoe_conn_ofld;
         struct fcoe_conn_terminate_ramrod_params fcoe_conn_terminate;
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c

index bd0e3f157e9e8629eaf0ebe591a475eb9e7b5979..600e30e8f0be3156b764fdc7c762504cb74bab7a 100644 (file)
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1409,8 +1409,8 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
                 *index = entry->index;
                 resolved = false;
         } else if (removing) {
-               ofdpa_neigh_del(found);
                 *index = found->index;
+               ofdpa_neigh_del(found);
         } else if (updating) {
                 ofdpa_neigh_update(found, NULL, false);
                 resolved = !is_zero_ether_addr(found->eth_dst);
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c

index ad9c4ded2b901f2f339dafab57ec7eed0b5f8059..761c518b2f92e0f91ee4671ede1185a41f5a5528 100644 (file)
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -4172,7 +4172,7 @@ found:
          * recipients
          */
         if (is_mc_recip) {
-               MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+               MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
                 unsigned int depth, i;
  
                 memset(inbuf, 0, sizeof(inbuf));
@@ -4320,7 +4320,7 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
                         efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
                 } else {
                         efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
-                                              MC_CMD_FILTER_OP_IN_LEN,
+                                              MC_CMD_FILTER_OP_EXT_IN_LEN,
                                                NULL, 0, rc);
                 }
         }
@@ -4453,7 +4453,7 @@ static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx,
                                       struct efx_filter_spec *spec)
  {
         struct efx_ef10_filter_table *table = efx->filter_state;
-       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
         struct efx_filter_spec *saved_spec;
         unsigned int hash, i, depth = 1;
         bool replacing = false;
@@ -4940,7 +4940,7 @@ not_restored:
  static void efx_ef10_filter_table_remove(struct efx_nic *efx)
  {
         struct efx_ef10_filter_table *table = efx->filter_state;
-       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_IN_LEN);
+       MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
         struct efx_filter_spec *spec;
         unsigned int filter_idx;
         int rc;
@@ -5105,6 +5105,7 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
  
         /* Insert/renew filters */
         for (i = 0; i < addr_count; i++) {
+               EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
                 efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
                 efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
                 rc = efx_ef10_filter_insert(efx, &spec, true);
@@ -5122,11 +5123,11 @@ static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
                                 }
                                 return rc;
                         } else {
-                               /* mark as not inserted, and carry on */
-                               rc = EFX_EF10_FILTER_ID_INVALID;
+                               /* keep invalid ID, and carry on */
                         }
+               } else {
+                       ids[i] = efx_ef10_filter_get_unsafe_id(rc);
                 }
-               ids[i] = efx_ef10_filter_get_unsafe_id(rc);
         }
  
         if (multicast && rollback) {
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c

index b9422450deb8e91b4a34bee26e8648d6e3a6ac8b..3df872f56289a2be0de52e98e641501d51fbf174 100644 (file)
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1301,7 +1301,7 @@ static void efx_mcdi_abandon(struct efx_nic *efx)
         efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
  }
  
-/* Called from  falcon_process_eventq for MCDI events */
+/* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */
  void efx_mcdi_process_event(struct efx_channel *channel,
                             efx_qword_t *event)
  {
@@ -1389,8 +1389,9 @@ void efx_mcdi_process_event(struct efx_channel *channel,
                                 MCDI_EVENT_FIELD(*event, PROXY_RESPONSE_RC));
                 break;
         default:
-               netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
-                         code);
+               netif_err(efx, hw, efx->net_dev,
+                         "Unknown MCDI event " EFX_QWORD_FMT "\n",
+                         EFX_QWORD_VAL(*event));
         }
  }
  
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c

index 0d230b125c6c8576c7eeecf06703a6de3288ce5a..0804287628584ec24faa12a827f179a346cae0cb 100644 (file)
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2485,7 +2485,7 @@ static int smc_drv_resume(struct device *dev)
         return 0;
  }
  
-static struct dev_pm_ops smc_drv_pm_ops = {
+static const struct dev_pm_ops smc_drv_pm_ops = {
         .suspend        = smc_drv_suspend,
         .resume         = smc_drv_resume,
  };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c

index fffd6d5fc907b01d2277370f80b23af5a9288e8d..6c2d1da0558889f3361129a77b55cba9df6ab0ee 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -638,7 +638,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
  {
         struct sunxi_priv_data *gmac = priv->plat->bsp_priv;
         struct device_node *node = priv->device->of_node;
-       int ret;
+       int ret, phy_interface;
         u32 reg, val;
  
         regmap_read(gmac->regmap, SYSCON_EMAC_REG, &val);
@@ -718,7 +718,11 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
         if (gmac->variant->support_rmii)
                 reg &= ~SYSCON_RMII_EN;
  
-       switch (priv->plat->interface) {
+       phy_interface = priv->plat->interface;
+       /* if PHY is internal, select the mode (xMII) used by the SoC */
+       if (gmac->use_internal_phy)
+               phy_interface = gmac->variant->internal_phy;
+       switch (phy_interface) {
         case PHY_INTERFACE_MODE_MII:
                 /* default */
                 break;
@@ -932,7 +936,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
         }
  
         plat_dat->interface = of_get_phy_mode(dev->of_node);
-       if (plat_dat->interface == gmac->variant->internal_phy) {
+       if (plat_dat->interface == PHY_INTERFACE_MODE_INTERNAL) {
                 dev_info(&pdev->dev, "Will use internal PHY\n");
                 gmac->use_internal_phy = true;
                 gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c

index 471a9aa6ac94c14d46d4dcf2d956965948193c56..22cf6353ba0418ab5c64ad1fbb343c797d82f18d 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -205,8 +205,8 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
  {
         int i;
  
-       for (i = 0; i < 22; i++)
-               if ((i < 9) || (i > 17))
+       for (i = 0; i < 23; i++)
+               if ((i < 12) || (i > 17))
                         reg_space[DMA_BUS_MODE / 4 + i] =
                                 readl(ioaddr + DMA_BUS_MODE + i * 4);
  }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c

index 743170d57f62258f68faf4706d04113faea2fc70..babb39c646ff2f64c96880b437cbca5fc5668453 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -29,7 +29,7 @@
  #include "stmmac.h"
  #include "dwmac_dma.h"
  
-#define REG_SPACE_SIZE 0x1054
+#define REG_SPACE_SIZE 0x1060
  #define MAC100_ETHTOOL_NAME    "st_mac100"
  #define GMAC_ETHTOOL_NAME      "st_gmac"
  
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c

index 1562ab4151e192a079fc2a54dec7f8c101bcd109..56ba411421f0a77bae5b4568fb273464472741f5 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -90,7 +90,7 @@ int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
         if (of_device_is_compatible(dev->of_node, "ti,dm816-emac"))
                 return cpsw_am33xx_cm_get_macid(dev, 0x30, slave, mac_addr);
  
-       if (of_machine_is_compatible("ti,am4372"))
+       if (of_machine_is_compatible("ti,am43"))
                 return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
  
         if (of_machine_is_compatible("ti,dra7"))
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c

index b7a0f5eeab620b30c5abcf8798f2bd3fdb2fb401..1850e348f5555b67f253c7a8fc9cb927d9694f16 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1236,6 +1236,7 @@ static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
  {
         struct cpsw_common *cpsw = priv->cpsw;
  
+       skb_tx_timestamp(skb);
         return cpdma_chan_submit(txch, skb, skb->data, skb->len,
                                  priv->emac_port + cpsw->data.dual_emac);
  }
@@ -1597,6 +1598,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
  {
         struct cpsw_priv *priv = netdev_priv(ndev);
         struct cpsw_common *cpsw = priv->cpsw;
+       struct cpts *cpts = cpsw->cpts;
         struct netdev_queue *txq;
         struct cpdma_chan *txch;
         int ret, q_idx;
@@ -1608,11 +1610,9 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
         }
  
         if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-           cpts_is_tx_enabled(cpsw->cpts))
+           cpts_is_tx_enabled(cpts) && cpts_can_timestamp(cpts, skb))
                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
  
-       skb_tx_timestamp(skb);
-
         q_idx = skb_get_queue_mapping(skb);
         if (q_idx >= cpsw->tx_ch_num)
                 q_idx = q_idx % cpsw->tx_ch_num;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h

index c96eca2b1b46033a67198c0bd346b8dbc4929fed..01ea82ba9cdca7e83a03f36d9ef1f43ec4267bc0 100644 (file)
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -30,6 +30,7 @@
  #include <linux/of.h>
  #include <linux/ptp_clock_kernel.h>
  #include <linux/skbuff.h>
+#include <linux/ptp_classify.h>
  #include <linux/timecounter.h>
  
  struct cpsw_cpts {
@@ -155,6 +156,16 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
         return !!cpts->tx_enable;
  }
  
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+       unsigned int class = ptp_classify_raw(skb);
+
+       if (class == PTP_CLASS_NONE)
+               return false;
+
+       return true;
+}
+
  #else
  struct cpts;
  
@@ -203,6 +214,11 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
  {
         return false;
  }
+
+static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
+{
+       return false;
+}
  #endif
  
  
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c

index 0847a8f48cfe1d11d5001c1da2e0a33200a05116..28cb38af1a348799687149e07084cd55cf4aa1a9 100644 (file)
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -2503,24 +2503,8 @@ static bool gbe_need_txtstamp(struct gbe_intf *gbe_intf,
                               const struct netcp_packet *p_info)
  {
         struct sk_buff *skb = p_info->skb;
-       unsigned int class = ptp_classify_raw(skb);
  
-       if (class == PTP_CLASS_NONE)
-               return false;
-
-       switch (class) {
-       case PTP_CLASS_V1_IPV4:
-       case PTP_CLASS_V1_IPV6:
-       case PTP_CLASS_V2_IPV4:
-       case PTP_CLASS_V2_IPV6:
-       case PTP_CLASS_V2_L2:
-       case (PTP_CLASS_V2_VLAN | PTP_CLASS_L2):
-       case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV4):
-       case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV6):
-               return true;
-       }
-
-       return false;
+       return cpts_can_timestamp(gbe_intf->gbe_dev->cpts, skb);
  }
  
  static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c

index eb77201cb71836c33453c58362115461e937e9dd..de8156c6b2925741534a45a6c3a28a3afe9d1ad6 100644 (file)
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -45,9 +45,17 @@ struct geneve_net {
  
  static unsigned int geneve_net_id;
  
+struct geneve_dev_node {
+       struct hlist_node hlist;
+       struct geneve_dev *geneve;
+};
+
  /* Pseudo network device */
  struct geneve_dev {
-       struct hlist_node  hlist;       /* vni hash table */
+       struct geneve_dev_node hlist4;  /* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+       struct geneve_dev_node hlist6;  /* vni hash table for IPv6 socket */
+#endif
         struct net         *net;        /* netns for packet i/o */
         struct net_device  *dev;        /* netdev for geneve tunnel */
         struct ip_tunnel_info info;
@@ -123,16 +131,16 @@ static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
                                         __be32 addr, u8 vni[])
  {
         struct hlist_head *vni_list_head;
-       struct geneve_dev *geneve;
+       struct geneve_dev_node *node;
         __u32 hash;
  
         /* Find the device for this VNI */
         hash = geneve_net_vni_hash(vni);
         vni_list_head = &gs->vni_list[hash];
-       hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
-               if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
-                   addr == geneve->info.key.u.ipv4.dst)
-                       return geneve;
+       hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
+               if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
+                   addr == node->geneve->info.key.u.ipv4.dst)
+                       return node->geneve;
         }
         return NULL;
  }
@@ -142,16 +150,16 @@ static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
                                          struct in6_addr addr6, u8 vni[])
  {
         struct hlist_head *vni_list_head;
-       struct geneve_dev *geneve;
+       struct geneve_dev_node *node;
         __u32 hash;
  
         /* Find the device for this VNI */
         hash = geneve_net_vni_hash(vni);
         vni_list_head = &gs->vni_list[hash];
-       hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
-               if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
-                   ipv6_addr_equal(&addr6, &geneve->info.key.u.ipv6.dst))
-                       return geneve;
+       hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
+               if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
+                   ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst))
+                       return node->geneve;
         }
         return NULL;
  }
@@ -591,6 +599,7 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
  {
         struct net *net = geneve->net;
         struct geneve_net *gn = net_generic(net, geneve_net_id);
+       struct geneve_dev_node *node;
         struct geneve_sock *gs;
         __u8 vni[3];
         __u32 hash;
@@ -609,15 +618,20 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
  out:
         gs->collect_md = geneve->collect_md;
  #if IS_ENABLED(CONFIG_IPV6)
-       if (ipv6)
+       if (ipv6) {
                 rcu_assign_pointer(geneve->sock6, gs);
-       else
+               node = &geneve->hlist6;
+       } else
  #endif
+       {
                 rcu_assign_pointer(geneve->sock4, gs);
+               node = &geneve->hlist4;
+       }
+       node->geneve = geneve;
  
         tunnel_id_to_vni(geneve->info.key.tun_id, vni);
         hash = geneve_net_vni_hash(vni);
-       hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]);
+       hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
         return 0;
  }
  
@@ -644,8 +658,10 @@ static int geneve_stop(struct net_device *dev)
  {
         struct geneve_dev *geneve = netdev_priv(dev);
  
-       if (!hlist_unhashed(&geneve->hlist))
-               hlist_del_rcu(&geneve->hlist);
+       hlist_del_init_rcu(&geneve->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+       hlist_del_init_rcu(&geneve->hlist6.hlist);
+#endif
         geneve_sock_release(geneve);
         return 0;
  }
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c

index 9913721504630f5dcee29536407bf7ecc7f16017..63c98bbbc596dbe11cc74ba71c384da02c5d763b 100644 (file)
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -752,7 +752,7 @@ static int netvsc_set_channels(struct net_device *net,
             channels->rx_count || channels->tx_count || channels->other_count)
                 return -EINVAL;
  
-       if (count > net->num_tx_queues || count > net->num_rx_queues)
+       if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
                 return -EINVAL;
  
         if (!nvdev || nvdev->destroy)
@@ -1179,7 +1179,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
         rndis_dev = ndev->extension;
         if (indir) {
                 for (i = 0; i < ITAB_NUM; i++)
-                       if (indir[i] >= dev->num_rx_queues)
+                       if (indir[i] >= VRSS_CHANNEL_MAX)
                                 return -EINVAL;
  
                 for (i = 0; i < ITAB_NUM; i++)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c

index 9ffff0362a11ceff45ad040c95c0b6a931ef44c1..0f581ee74fe43b5127e338ecdc1da2139b986692 100644 (file)
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,16 +39,20 @@
  #define MACVLAN_HASH_SIZE      (1<<MACVLAN_HASH_BITS)
  #define MACVLAN_BC_QUEUE_LEN   1000
  
+#define MACVLAN_F_PASSTHRU     1
+#define MACVLAN_F_ADDRCHANGE   2
+
  struct macvlan_port {
         struct net_device       *dev;
         struct hlist_head       vlan_hash[MACVLAN_HASH_SIZE];
         struct list_head        vlans;
         struct sk_buff_head     bc_queue;
         struct work_struct      bc_work;
-       bool                    passthru;
+       u32                     flags;
         int                     count;
         struct hlist_head       vlan_source_hash[MACVLAN_HASH_SIZE];
         DECLARE_BITMAP(mc_filter, MACVLAN_MC_FILTER_SZ);
+       unsigned char           perm_addr[ETH_ALEN];
  };
  
  struct macvlan_source_entry {
@@ -66,6 +70,31 @@ struct macvlan_skb_cb {
  
  static void macvlan_port_destroy(struct net_device *dev);
  
+static inline bool macvlan_passthru(const struct macvlan_port *port)
+{
+       return port->flags & MACVLAN_F_PASSTHRU;
+}
+
+static inline void macvlan_set_passthru(struct macvlan_port *port)
+{
+       port->flags |= MACVLAN_F_PASSTHRU;
+}
+
+static inline bool macvlan_addr_change(const struct macvlan_port *port)
+{
+       return port->flags & MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_set_addr_change(struct macvlan_port *port)
+{
+       port->flags |= MACVLAN_F_ADDRCHANGE;
+}
+
+static inline void macvlan_clear_addr_change(struct macvlan_port *port)
+{
+       port->flags &= ~MACVLAN_F_ADDRCHANGE;
+}
+
  /* Hash Ethernet address */
  static u32 macvlan_eth_hash(const unsigned char *addr)
  {
@@ -181,11 +210,12 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
  static bool macvlan_addr_busy(const struct macvlan_port *port,
                               const unsigned char *addr)
  {
-       /* Test to see if the specified multicast address is
+       /* Test to see if the specified address is
          * currently in use by the underlying device or
          * another macvlan.
          */
-       if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
+       if (!macvlan_passthru(port) && !macvlan_addr_change(port) &&
+           ether_addr_equal_64bits(port->dev->dev_addr, addr))
                 return true;
  
         if (macvlan_hash_lookup(port, addr))
@@ -445,7 +475,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
         }
  
         macvlan_forward_source(skb, port, eth->h_source);
-       if (port->passthru)
+       if (macvlan_passthru(port))
                 vlan = list_first_or_null_rcu(&port->vlans,
                                               struct macvlan_dev, list);
         else
@@ -574,7 +604,7 @@ static int macvlan_open(struct net_device *dev)
         struct net_device *lowerdev = vlan->lowerdev;
         int err;
  
-       if (vlan->port->passthru) {
+       if (macvlan_passthru(vlan->port)) {
                 if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
                         err = dev_set_promiscuity(lowerdev, 1);
                         if (err < 0)
@@ -649,7 +679,7 @@ static int macvlan_stop(struct net_device *dev)
         dev_uc_unsync(lowerdev, dev);
         dev_mc_unsync(lowerdev, dev);
  
-       if (vlan->port->passthru) {
+       if (macvlan_passthru(vlan->port)) {
                 if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
                         dev_set_promiscuity(lowerdev, -1);
                 goto hash_del;
@@ -672,6 +702,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
  {
         struct macvlan_dev *vlan = netdev_priv(dev);
         struct net_device *lowerdev = vlan->lowerdev;
+       struct macvlan_port *port = vlan->port;
         int err;
  
         if (!(dev->flags & IFF_UP)) {
@@ -682,7 +713,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
                 if (macvlan_addr_busy(vlan->port, addr))
                         return -EBUSY;
  
-               if (!vlan->port->passthru) {
+               if (!macvlan_passthru(port)) {
                         err = dev_uc_add(lowerdev, addr);
                         if (err)
                                 return err;
@@ -692,6 +723,15 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
  
                 macvlan_hash_change_addr(vlan, addr);
         }
+       if (macvlan_passthru(port) && !macvlan_addr_change(port)) {
+               /* Since addr_change isn't set, we are here due to lower
+                * device change.  Save the lower-dev address so we can
+                * restore it later.
+                */
+               ether_addr_copy(vlan->port->perm_addr,
+                               lowerdev->dev_addr);
+       }
+       macvlan_clear_addr_change(port);
         return 0;
  }
  
@@ -703,8 +743,14 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
         if (!is_valid_ether_addr(addr->sa_data))
                 return -EADDRNOTAVAIL;
  
-       if (vlan->mode == MACVLAN_MODE_PASSTHRU)
+       /* If the addresses are the same, this is a no-op */
+       if (ether_addr_equal(dev->dev_addr, addr->sa_data))
+               return 0;
+
+       if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
+               macvlan_set_addr_change(vlan->port);
                 return dev_set_mac_address(vlan->lowerdev, addr);
+       }
  
         return macvlan_sync_address(dev, addr->sa_data);
  }
@@ -926,7 +972,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
         /* Support unicast filter only on passthru devices.
          * Multicast filter should be allowed on all devices.
          */
-       if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+       if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
                 return -EOPNOTSUPP;
  
         if (flags & NLM_F_REPLACE)
@@ -950,7 +996,7 @@ static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
         /* Support unicast filter only on passthru devices.
          * Multicast filter should be allowed on all devices.
          */
-       if (!vlan->port->passthru && is_unicast_ether_addr(addr))
+       if (!macvlan_passthru(vlan->port) && is_unicast_ether_addr(addr))
                 return -EOPNOTSUPP;
  
         if (is_unicast_ether_addr(addr))
@@ -1118,8 +1164,8 @@ static int macvlan_port_create(struct net_device *dev)
         if (port == NULL)
                 return -ENOMEM;
  
-       port->passthru = false;
         port->dev = dev;
+       ether_addr_copy(port->perm_addr, dev->dev_addr);
         INIT_LIST_HEAD(&port->vlans);
         for (i = 0; i < MACVLAN_HASH_SIZE; i++)
                 INIT_HLIST_HEAD(&port->vlan_hash[i]);
@@ -1159,6 +1205,18 @@ static void macvlan_port_destroy(struct net_device *dev)
                 kfree_skb(skb);
         }
  
+       /* If the lower device address has been changed by passthru
+        * macvlan, put it back.
+        */
+       if (macvlan_passthru(port) &&
+           !ether_addr_equal(port->dev->dev_addr, port->perm_addr)) {
+               struct sockaddr sa;
+
+               sa.sa_family = port->dev->type;
+               memcpy(&sa.sa_data, port->perm_addr, port->dev->addr_len);
+               dev_set_mac_address(port->dev, &sa);
+       }
+
         kfree(port);
  }
  
@@ -1325,7 +1383,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
         port = macvlan_port_get_rtnl(lowerdev);
  
         /* Only 1 macvlan device can be created in passthru mode */
-       if (port->passthru) {
+       if (macvlan_passthru(port)) {
                 /* The macvlan port must be not created this time,
                  * still goto destroy_macvlan_port for readability.
                  */
@@ -1351,7 +1409,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
                         err = -EINVAL;
                         goto destroy_macvlan_port;
                 }
-               port->passthru = true;
+               macvlan_set_passthru(port);
                 eth_hw_addr_inherit(dev, lowerdev);
         }
  
@@ -1435,7 +1493,7 @@ static int macvlan_changelink(struct net_device *dev,
         if (data && data[IFLA_MACVLAN_FLAGS]) {
                 __u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
                 bool promisc = (flags ^ vlan->flags) & MACVLAN_FLAG_NOPROMISC;
-               if (vlan->port->passthru && promisc) {
+               if (macvlan_passthru(vlan->port) && promisc) {
                         int err;
  
                         if (flags & MACVLAN_FLAG_NOPROMISC)
@@ -1598,7 +1656,7 @@ static int macvlan_device_event(struct notifier_block *unused,
                 }
                 break;
         case NETDEV_CHANGEADDR:
-               if (!port->passthru)
+               if (!macvlan_passthru(port))
                         return NOTIFY_DONE;
  
                 vlan = list_first_entry_or_null(&port->vlans,
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c

index ed0d10f54f2607533868dfd10e6bc9d0e09050de..c3065236ffcca6839d1326e60b96ac280787d2ee 100644 (file)
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -908,7 +908,7 @@ static void decode_txts(struct dp83640_private *dp83640,
         if (overflow) {
                 pr_debug("tx timestamp queue overflow, count %d\n", overflow);
                 while (skb) {
-                       skb_complete_tx_timestamp(skb, NULL);
+                       kfree_skb(skb);
                         skb = skb_dequeue(&dp83640->tx_queue);
                 }
                 return;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c

index 8400403b3f622ec148e36193ce191d63b8dfae30..5d314f143aea8634ddd150a236c7eb92cfdc4930 100644 (file)
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2171,6 +2171,7 @@ static struct phy_driver marvell_drivers[] = {
                 .get_sset_count = marvell_get_sset_count,
                 .get_strings = marvell_get_strings,
                 .get_stats = marvell_get_stats,
+               .set_loopback = genphy_loopback,
         },
         {
                 .phy_id = MARVELL_PHY_ID_88E1540,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c

index 9365b07923098c37d187ebe4c316f1d12c7ab003..fdb43dd9b5cd424f4dde02f1257070ffe4b50fb1 100644 (file)
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -620,6 +620,8 @@ static int ksz9031_read_status(struct phy_device *phydev)
         if ((regval & 0xFF) == 0xFF) {
                 phy_init_hw(phydev);
                 phydev->link = 0;
+               if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
+                       phydev->drv->config_intr(phydev);
         }
  
         return 0;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c

index acf00f071c9a0914e8d3554dbea126ed1310ba1f..1790f7fec12573fe21d088f9865126784efc0c08 100644 (file)
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1136,6 +1136,39 @@ int phy_resume(struct phy_device *phydev)
  }
  EXPORT_SYMBOL(phy_resume);
  
+int phy_loopback(struct phy_device *phydev, bool enable)
+{
+       struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
+       int ret = 0;
+
+       mutex_lock(&phydev->lock);
+
+       if (enable && phydev->loopback_enabled) {
+               ret = -EBUSY;
+               goto out;
+       }
+
+       if (!enable && !phydev->loopback_enabled) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (phydev->drv && phydrv->set_loopback)
+               ret = phydrv->set_loopback(phydev, enable);
+       else
+               ret = -EOPNOTSUPP;
+
+       if (ret)
+               goto out;
+
+       phydev->loopback_enabled = enable;
+
+out:
+       mutex_unlock(&phydev->lock);
+       return ret;
+}
+EXPORT_SYMBOL(phy_loopback);
+
  /* Generic PHY support and helper functions */
  
  /**
@@ -1584,6 +1617,23 @@ int genphy_resume(struct phy_device *phydev)
  }
  EXPORT_SYMBOL(genphy_resume);
  
+int genphy_loopback(struct phy_device *phydev, bool enable)
+{
+       int value;
+
+       value = phy_read(phydev, MII_BMCR);
+       if (value < 0)
+               return value;
+
+       if (enable)
+               value |= BMCR_LOOPBACK;
+       else
+               value &= ~BMCR_LOOPBACK;
+
+       return phy_write(phydev, MII_BMCR, value);
+}
+EXPORT_SYMBOL(genphy_loopback);
+
  static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
  {
         /* The default values for phydev->supported are provided by the PHY
@@ -1829,6 +1879,7 @@ static struct phy_driver genphy_driver = {
         .read_status    = genphy_read_status,
         .suspend        = genphy_suspend,
         .resume         = genphy_resume,
+       .set_loopback   = genphy_loopback,
  };
  
  static int __init phy_init(void)
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c

index 300bb1479b3a45bc919d053a799ea058164a0e2b..e9f101c9bae2ce1d9bde5dbe0d473119ead760e6 100644 (file)
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -201,7 +201,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                                 rionet_queue_tx_msg(skb, ndev,
                                         nets[rnet->mport->id].active[i]);
                                 if (count)
-                                       atomic_inc(&skb->users);
+                                       refcount_inc(&skb->users);
                                 count++;
                         }
         } else if (RIONET_MAC_MATCH(eth->h_dest)) {
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c

index 793ce900dffa09fd5c13410d56a99382d63ca2d0..f32261ecd2150036d3575986ab2e1971def5086f 100644 (file)
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1725,6 +1725,18 @@ static const struct driver_info lenovo_info = {
         .tx_fixup = ax88179_tx_fixup,
  };
  
+static const struct driver_info belkin_info = {
+       .description = "Belkin USB Ethernet Adapter",
+       .bind   = ax88179_bind,
+       .unbind = ax88179_unbind,
+       .status = ax88179_status,
+       .link_reset = ax88179_link_reset,
+       .reset  = ax88179_reset,
+       .flags  = FLAG_ETHER | FLAG_FRAMING_AX,
+       .rx_fixup = ax88179_rx_fixup,
+       .tx_fixup = ax88179_tx_fixup,
+};
+
  static const struct usb_device_id products[] = {
  {
         /* ASIX AX88179 10/100/1000 */
@@ -1754,6 +1766,10 @@ static const struct usb_device_id products[] = {
         /* Lenovo OneLinkDock Gigabit LAN */
         USB_DEVICE(0x17ef, 0x304b),
         .driver_info = (unsigned long)&lenovo_info,
+}, {
+       /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
+       USB_DEVICE(0x050d, 0x0128),
+       .driver_info = (unsigned long)&belkin_info,
  },
         { },
  };
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c

index 18fa45fc979b63810d993c9ca47a65b2009c5fb4..7220cd62071726b171ab2ae792f672d7b513343b 100644 (file)
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -643,6 +643,13 @@ static const struct usb_device_id mbim_devs[] = {
           .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
         },
  
+       /* The HP lt4132 (03f0:a31d) is a rebranded Huawei ME906s-158,
+        * therefore it too requires the above "NDP to end" quirk.
+        */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+         .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
+       },
+
         /* Telit LE922A6 in MBIM composition */
         { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1041, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
           .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c

index 2067743f51ca11a583960b0470cfa465b1995eaa..d103a1d4fb36713dac529bbc8703dd3023c6ab66 100644 (file)
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -89,6 +89,8 @@ static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = {
         CDC_NCM_SIMPLE_STAT(rx_ntbs),
  };
  
+#define CDC_NCM_LOW_MEM_MAX_CNT 10
+
  static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset)
  {
         switch (sset) {
@@ -1055,10 +1057,10 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
  
         /* align new NDP */
         if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END))
-               cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+               cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
  
         /* verify that there is room for the NDP and the datagram (reserve) */
-       if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size)
+       if ((ctx->tx_curr_size - skb->len - reserve) < ctx->max_ndp_size)
                 return NULL;
  
         /* link to it */
@@ -1111,13 +1113,41 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
  
         /* allocate a new OUT skb */
         if (!skb_out) {
-               skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC);
+               if (ctx->tx_low_mem_val == 0) {
+                       ctx->tx_curr_size = ctx->tx_max;
+                       skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+                       /* If the memory allocation fails we will wait longer
+                        * each time before attempting another full size
+                        * allocation again to not overload the system
+                        * further.
+                        */
+                       if (skb_out == NULL) {
+                               ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
+                                                             (unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
+                               ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
+                       }
+               }
                 if (skb_out == NULL) {
-                       if (skb != NULL) {
-                               dev_kfree_skb_any(skb);
-                               dev->net->stats.tx_dropped++;
+                       /* See if a very small allocation is possible.
+                        * We will send this packet immediately and hope
+                        * that there is more memory available later.
+                        */
+                       if (skb)
+                               ctx->tx_curr_size = max(skb->len,
+                                       (u32)USB_CDC_NCM_NTB_MIN_OUT_SIZE);
+                       else
+                               ctx->tx_curr_size = USB_CDC_NCM_NTB_MIN_OUT_SIZE;
+                       skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
+
+                       /* No allocation possible so we will abort */
+                       if (skb_out == NULL) {
+                               if (skb != NULL) {
+                                       dev_kfree_skb_any(skb);
+                                       dev->net->stats.tx_dropped++;
+                               }
+                               goto exit_no_skb;
                         }
-                       goto exit_no_skb;
+                       ctx->tx_low_mem_val--;
                 }
                 /* fill out the initial 16-bit NTB header */
                 nth16 = skb_put_zero(skb_out, sizeof(struct usb_cdc_ncm_nth16));
@@ -1148,10 +1178,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
                 ndp16 = cdc_ncm_ndp(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder);
  
                 /* align beginning of next frame */
-               cdc_ncm_align_tail(skb_out,  ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max);
+               cdc_ncm_align_tail(skb_out,  ctx->tx_modulus, ctx->tx_remainder, ctx->tx_curr_size);
  
                 /* check if we had enough room left for both NDP and frame */
-               if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) {
+               if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_curr_size) {
                         if (n == 0) {
                                 /* won't fit, MTU problem? */
                                 dev_kfree_skb_any(skb);
@@ -1227,7 +1257,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
         /* If requested, put NDP at end of frame. */
         if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) {
                 nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data;
-               cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max);
+               cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size);
                 nth16->wNdpIndex = cpu_to_le16(skb_out->len);
                 skb_put_data(skb_out, ctx->delayed_ndp16, ctx->max_ndp_size);
  
@@ -1246,9 +1276,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
          */
         if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
             skb_out->len > ctx->min_tx_pkt) {
-               padding_count = ctx->tx_max - skb_out->len;
+               padding_count = ctx->tx_curr_size - skb_out->len;
                 skb_put_zero(skb_out, padding_count);
-       } else if (skb_out->len < ctx->tx_max &&
+       } else if (skb_out->len < ctx->tx_curr_size &&
                    (skb_out->len % dev->maxpacket) == 0) {
                 skb_put_u8(skb_out, 0); /* force short packet */
         }
diff --git a/drivers/net/veth.c b/drivers/net/veth.c

index b33553b1e19cd9c89bcaf5817a4cb08d54e5c434..f5438d0978cab397455e20aa66744646ffe34c5f 100644 (file)
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -385,7 +385,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
                 tbp = tb;
         }
  
-       if (tbp[IFLA_IFNAME]) {
+       if (ifmp && tbp[IFLA_IFNAME]) {
                 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
                 name_assign_type = NET_NAME_USER;
         } else {
@@ -404,7 +404,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
                 return PTR_ERR(peer);
         }
  
-       if (tbp[IFLA_ADDRESS] == NULL)
+       if (!ifmp || !tbp[IFLA_ADDRESS])
                 eth_hw_addr_random(peer);
  
         if (ifmp && (dev->ifindex != 0))
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

index 5c6388fb7dd13922e310aba329325e84fb434474..2e69bcdc5b0754827ebcada5b16f39710cab3cf1 100644 (file)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1802,6 +1802,7 @@ static void virtnet_freeze_down(struct virtio_device *vdev)
         flush_work(&vi->config_work);
  
         netif_device_detach(vi->dev);
+       netif_tx_disable(vi->dev);
         cancel_delayed_work_sync(&vi->refill);
  
         if (netif_running(vi->dev)) {
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c

index 0dafd8e6c6658820c8be6b16cbf75283805898d4..b04e103350fb7bce54c541efb3ab9b9e505b9d73 100644 (file)
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -229,25 +229,25 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
  static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
                                            __be32 vni)
  {
-       struct vxlan_dev *vxlan;
+       struct vxlan_dev_node *node;
  
         /* For flow based devices, map all packets to VNI 0 */
         if (vs->flags & VXLAN_F_COLLECT_METADATA)
                 vni = 0;
  
-       hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
-               if (vxlan->default_dst.remote_vni != vni)
+       hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
+               if (node->vxlan->default_dst.remote_vni != vni)
                         continue;
  
                 if (IS_ENABLED(CONFIG_IPV6)) {
-                       const struct vxlan_config *cfg = &vxlan->cfg;
+                       const struct vxlan_config *cfg = &node->vxlan->cfg;
  
                         if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
                             cfg->remote_ifindex != ifindex)
                                 continue;
                 }
  
-               return vxlan;
+               return node->vxlan;
         }
  
         return NULL;
@@ -2387,17 +2387,22 @@ static void vxlan_vs_del_dev(struct vxlan_dev *vxlan)
         struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
  
         spin_lock(&vn->sock_lock);
-       hlist_del_init_rcu(&vxlan->hlist);
+       hlist_del_init_rcu(&vxlan->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+       hlist_del_init_rcu(&vxlan->hlist6.hlist);
+#endif
         spin_unlock(&vn->sock_lock);
  }
  
-static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
+static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan,
+                            struct vxlan_dev_node *node)
  {
         struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
         __be32 vni = vxlan->default_dst.remote_vni;
  
+       node->vxlan = vxlan;
         spin_lock(&vn->sock_lock);
-       hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+       hlist_add_head_rcu(&node->hlist, vni_head(vs, vni));
         spin_unlock(&vn->sock_lock);
  }
  
@@ -2656,7 +2661,6 @@ static void vxlan_setup(struct net_device *dev)
         vxlan->age_timer.data = (unsigned long) vxlan;
  
         vxlan->dev = dev;
-       vxlan->net = dev_net(dev);
  
         gro_cells_init(&vxlan->gro_cells, dev);
  
@@ -2727,7 +2731,7 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
         }
  
         if (tb[IFLA_MTU]) {
-               u32 mtu = nla_get_u32(data[IFLA_MTU]);
+               u32 mtu = nla_get_u32(tb[IFLA_MTU]);
  
                 if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU)
                         return -EINVAL;
@@ -2850,6 +2854,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
  {
         struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
         struct vxlan_sock *vs = NULL;
+       struct vxlan_dev_node *node;
  
         if (!vxlan->cfg.no_share) {
                 spin_lock(&vn->sock_lock);
@@ -2867,12 +2872,16 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
         if (IS_ERR(vs))
                 return PTR_ERR(vs);
  #if IS_ENABLED(CONFIG_IPV6)
-       if (ipv6)
+       if (ipv6) {
                 rcu_assign_pointer(vxlan->vn6_sock, vs);
-       else
+               node = &vxlan->hlist6;
+       } else
  #endif
+       {
                 rcu_assign_pointer(vxlan->vn4_sock, vs);
-       vxlan_vs_add_dev(vs, vxlan);
+               node = &vxlan->hlist4;
+       }
+       vxlan_vs_add_dev(vs, vxlan, node);
         return 0;
  }
  
@@ -3028,7 +3037,9 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
  
  static void vxlan_config_apply(struct net_device *dev,
                                struct vxlan_config *conf,
-                              struct net_device *lowerdev, bool changelink)
+                              struct net_device *lowerdev,
+                              struct net *src_net,
+                              bool changelink)
  {
         struct vxlan_dev *vxlan = netdev_priv(dev);
         struct vxlan_rdst *dst = &vxlan->default_dst;
@@ -3044,6 +3055,8 @@ static void vxlan_config_apply(struct net_device *dev,
  
                 if (conf->mtu)
                         dev->mtu = conf->mtu;
+
+               vxlan->net = src_net;
         }
  
         dst->remote_vni = conf->vni;
@@ -3086,7 +3099,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
         if (ret)
                 return ret;
  
-       vxlan_config_apply(dev, conf, lowerdev, changelink);
+       vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
  
         return 0;
  }
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h

index 530586be05b4357dc8ee6439b7a9e225bce012c6..5b1d2e8402d9d5482085018186d947b59cbb35a4 100644 (file)
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -199,6 +199,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
         unsigned long   remaining_credit;
         struct timer_list credit_timeout;
         u64 credit_window_start;
+       bool rate_limited;
  
         /* Statistics */
         struct xenvif_stats stats;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c

index 8397f6c9245158e8b3ff005bc58a419e4250169d..e322a862ddfe70b4e1b2fbdbffd8cd78cbdc4b24 100644 (file)
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -106,7 +106,11 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
  
         if (work_done < budget) {
                 napi_complete_done(napi, work_done);
-               xenvif_napi_schedule_or_enable_events(queue);
+               /* If the queue is rate-limited, it shall be
+                * rescheduled in the timer callback.
+                */
+               if (likely(!queue->rate_limited))
+                       xenvif_napi_schedule_or_enable_events(queue);
         }
  
         return work_done;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c

index 602d408fa25e98a4651716b1390d2507bced4605..5042ff8d449af70b2a05ac6e166eb8acbf7ae44c 100644 (file)
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -180,6 +180,7 @@ static void tx_add_credit(struct xenvif_queue *queue)
                 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
  
         queue->remaining_credit = min(max_credit, max_burst);
+       queue->rate_limited = false;
  }
  
  void xenvif_tx_credit_callback(unsigned long data)
@@ -686,8 +687,10 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
                 msecs_to_jiffies(queue->credit_usec / 1000);
  
         /* Timer could already be pending in rare cases. */
-       if (timer_pending(&queue->credit_timeout))
+       if (timer_pending(&queue->credit_timeout)) {
+               queue->rate_limited = true;
                 return true;
+       }
  
         /* Passed the point where we can replenish credit? */
         if (time_after_eq64(now, next_credit)) {
@@ -702,6 +705,7 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
                 mod_timer(&queue->credit_timeout,
                           next_credit);
                 queue->credit_window_start = next_credit;
+               queue->rate_limited = true;
  
                 return true;
         }
diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig

index c4208487fadc5693d7c4d7dfda9cc3c5f614ab61..b065eb6052150e5c88a6c8ceb13bfa44c29a821e 100644 (file)
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -7,7 +7,7 @@ menu "Near Field Communication (NFC) devices"
  
  config NFC_TRF7970A
         tristate "Texas Instruments TRF7970a NFC driver"
-       depends on SPI && NFC_DIGITAL
+       depends on SPI && NFC_DIGITAL && GPIOLIB
         help
           This option enables the NFC driver for Texas Instruments' TRF7970a
           device. Such device supports 5 different protocols: ISO14443A,
diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c

index badd8167ac737103edb999d3eea6c334cf63846d..ec50027b0d8ba327b5b125249e1fdc142426b7e9 100644 (file)
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -749,11 +749,9 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
         u32 protocols;
         int r;
  
-       info = kzalloc(sizeof(struct fdp_nci_info), GFP_KERNEL);
-       if (!info) {
-               r = -ENOMEM;
-               goto err_info_alloc;
-       }
+       info = devm_kzalloc(dev, sizeof(struct fdp_nci_info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
  
         info->phy = phy;
         info->phy_ops = phy_ops;
@@ -775,8 +773,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
                                    tx_tailroom);
         if (!ndev) {
                 nfc_err(dev, "Cannot allocate nfc ndev\n");
-               r = -ENOMEM;
-               goto err_alloc_ndev;
+               return -ENOMEM;
         }
  
         r = nci_register_device(ndev);
@@ -792,9 +789,6 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
  
  err_regdev:
         nci_free_device(ndev);
-err_alloc_ndev:
-       kfree(info);
-err_info_alloc:
         return r;
  }
  EXPORT_SYMBOL(fdp_nci_probe);
@@ -808,7 +802,6 @@ void fdp_nci_remove(struct nci_dev *ndev)
  
         nci_unregister_device(ndev);
         nci_free_device(ndev);
-       kfree(info);
  }
  EXPORT_SYMBOL(fdp_nci_remove);
  
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c

index e0baec848ff277d15da5bd35c91b06d2de38efb4..c4da50e07bbcbe2318a27f55c5e49baf5f3c077b 100644 (file)
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -27,7 +27,6 @@
  
  #define FDP_I2C_DRIVER_NAME    "fdp_nci_i2c"
  
-#define FDP_DP_POWER_GPIO_NAME "power"
  #define FDP_DP_CLOCK_TYPE_NAME "clock-type"
  #define FDP_DP_CLOCK_FREQ_NAME "clock-freq"
  #define FDP_DP_FW_VSC_CFG_NAME "fw-vsc-cfg"
@@ -281,8 +280,14 @@ vsc_read_err:
                 *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no");
  }
  
-static int fdp_nci_i2c_probe(struct i2c_client *client,
-                            const struct i2c_device_id *id)
+static const struct acpi_gpio_params power_gpios = { 0, 0, false };
+
+static const struct acpi_gpio_mapping acpi_fdp_gpios[] = {
+       { "power-gpios", &power_gpios, 1 },
+       {},
+};
+
+static int fdp_nci_i2c_probe(struct i2c_client *client)
  {
         struct fdp_i2c_phy *phy;
         struct device *dev = &client->dev;
@@ -304,8 +309,7 @@ static int fdp_nci_i2c_probe(struct i2c_client *client,
                 return -ENODEV;
         }
  
-       phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy),
-                          GFP_KERNEL);
+       phy = devm_kzalloc(dev, sizeof(struct fdp_i2c_phy), GFP_KERNEL);
         if (!phy)
                 return -ENOMEM;
  
@@ -313,19 +317,22 @@ static int fdp_nci_i2c_probe(struct i2c_client *client,
         phy->next_read_size = FDP_NCI_I2C_MIN_PAYLOAD;
         i2c_set_clientdata(client, phy);
  
-       r = request_threaded_irq(client->irq, NULL, fdp_nci_i2c_irq_thread_fn,
-                                IRQF_TRIGGER_RISING | IRQF_ONESHOT,
-                                FDP_I2C_DRIVER_NAME, phy);
+       r = devm_request_threaded_irq(dev, client->irq,
+                                     NULL, fdp_nci_i2c_irq_thread_fn,
+                                     IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+                                     FDP_I2C_DRIVER_NAME, phy);
  
         if (r < 0) {
                 nfc_err(&client->dev, "Unable to register IRQ handler\n");
                 return r;
         }
  
-       /* Requesting the power gpio */
-       phy->power_gpio = devm_gpiod_get(dev, FDP_DP_POWER_GPIO_NAME,
-                                        GPIOD_OUT_LOW);
+       r = devm_acpi_dev_add_driver_gpios(dev, acpi_fdp_gpios);
+       if (r)
+               dev_dbg(dev, "Unable to add GPIO mapping table\n");
  
+       /* Requesting the power gpio */
+       phy->power_gpio = devm_gpiod_get(dev, "power", GPIOD_OUT_LOW);
         if (IS_ERR(phy->power_gpio)) {
                 nfc_err(dev, "Power GPIO request failed\n");
                 return PTR_ERR(phy->power_gpio);
@@ -360,12 +367,6 @@ static int fdp_nci_i2c_remove(struct i2c_client *client)
         return 0;
  }
  
-static struct i2c_device_id fdp_nci_i2c_id_table[] = {
-       {"int339a", 0},
-       {}
-};
-MODULE_DEVICE_TABLE(i2c, fdp_nci_i2c_id_table);
-
  static const struct acpi_device_id fdp_nci_i2c_acpi_match[] = {
         {"INT339A", 0},
         {}
@@ -377,8 +378,7 @@ static struct i2c_driver fdp_nci_i2c_driver = {
                    .name = FDP_I2C_DRIVER_NAME,
                    .acpi_match_table = ACPI_PTR(fdp_nci_i2c_acpi_match),
                   },
-       .id_table = fdp_nci_i2c_id_table,
-       .probe = fdp_nci_i2c_probe,
+       .probe_new = fdp_nci_i2c_probe,
         .remove = fdp_nci_i2c_remove,
  };
  module_i2c_driver(fdp_nci_i2c_driver);
diff --git a/drivers/nfc/nfcmrvl/fw_dnld.c b/drivers/nfc/nfcmrvl/fw_dnld.c

index f9f000c546d1182f768b969f5a131d90785673d7..7f8960a46aab0a6a494458f2c0a89cca1716e26d 100644 (file)
--- a/drivers/nfc/nfcmrvl/fw_dnld.c
+++ b/drivers/nfc/nfcmrvl/fw_dnld.c
@@ -457,7 +457,7 @@ int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv)
  
         INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work);
         snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq",
-                dev_name(priv->dev));
+                dev_name(&priv->ndev->nfc_dev->dev));
         priv->fw_dnld.rx_wq = create_singlethread_workqueue(name);
         if (!priv->fw_dnld.rx_wq)
                 return -ENOMEM;
@@ -494,6 +494,7 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name)
  {
         struct nfcmrvl_private *priv = nci_get_drvdata(ndev);
         struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld;
+       int res;
  
         if (!priv->support_fw_dnld)
                 return -ENOTSUPP;
@@ -509,7 +510,9 @@ int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name)
          */
  
         /* Retrieve FW binary */
-       if (request_firmware(&fw_dnld->fw, firmware_name, priv->dev) < 0) {
+       res = request_firmware(&fw_dnld->fw, firmware_name,
+                              &ndev->nfc_dev->dev);
+       if (res < 0) {
                 nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name);
                 return -ENOENT;
         }
diff --git a/drivers/nfc/nfcmrvl/main.c b/drivers/nfc/nfcmrvl/main.c

index c5038e6447bda4e35a6e46ed1cea43c1e6721acb..e65d027b91fafbbd752970cff0afdc9e8cfb0d7c 100644 (file)
--- a/drivers/nfc/nfcmrvl/main.c
+++ b/drivers/nfc/nfcmrvl/main.c
@@ -123,13 +123,14 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
  
         memcpy(&priv->config, pdata, sizeof(*pdata));
  
-       if (priv->config.reset_n_io) {
-               rc = devm_gpio_request_one(dev,
-                                          priv->config.reset_n_io,
-                                          GPIOF_OUT_INIT_LOW,
-                                          "nfcmrvl_reset_n");
-               if (rc < 0)
+       if (gpio_is_valid(priv->config.reset_n_io)) {
+               rc = gpio_request_one(priv->config.reset_n_io,
+                                     GPIOF_OUT_INIT_LOW,
+                                     "nfcmrvl_reset_n");
+               if (rc < 0) {
+                       priv->config.reset_n_io = -EINVAL;
                         nfc_err(dev, "failed to request reset_n io\n");
+               }
         }
  
         if (phy == NFCMRVL_PHY_SPI) {
@@ -154,7 +155,13 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
         if (!priv->ndev) {
                 nfc_err(dev, "nci_allocate_device failed\n");
                 rc = -ENOMEM;
-               goto error;
+               goto error_free_gpio;
+       }
+
+       rc = nfcmrvl_fw_dnld_init(priv);
+       if (rc) {
+               nfc_err(dev, "failed to initialize FW download %d\n", rc);
+               goto error_free_dev;
         }
  
         nci_set_drvdata(priv->ndev, priv);
@@ -162,24 +169,22 @@ struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
         rc = nci_register_device(priv->ndev);
         if (rc) {
                 nfc_err(dev, "nci_register_device failed %d\n", rc);
-               goto error_free_dev;
+               goto error_fw_dnld_deinit;
         }
  
         /* Ensure that controller is powered off */
         nfcmrvl_chip_halt(priv);
  
-       rc = nfcmrvl_fw_dnld_init(priv);
-       if (rc) {
-               nfc_err(dev, "failed to initialize FW download %d\n", rc);
-               goto error_free_dev;
-       }
-
         nfc_info(dev, "registered with nci successfully\n");
         return priv;
  
+error_fw_dnld_deinit:
+       nfcmrvl_fw_dnld_deinit(priv);
  error_free_dev:
         nci_free_device(priv->ndev);
-error:
+error_free_gpio:
+       if (gpio_is_valid(priv->config.reset_n_io))
+               gpio_free(priv->config.reset_n_io);
         kfree(priv);
         return ERR_PTR(rc);
  }
@@ -194,8 +199,8 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv)
  
         nfcmrvl_fw_dnld_deinit(priv);
  
-       if (priv->config.reset_n_io)
-               devm_gpio_free(priv->dev, priv->config.reset_n_io);
+       if (gpio_is_valid(priv->config.reset_n_io))
+               gpio_free(priv->config.reset_n_io);
  
         nci_unregister_device(ndev);
         nci_free_device(ndev);
@@ -262,7 +267,6 @@ int nfcmrvl_parse_dt(struct device_node *node,
         reset_n_io = of_get_named_gpio(node, "reset-n-io", 0);
         if (reset_n_io < 0) {
                 pr_info("no reset-n-io config\n");
-               reset_n_io = 0;
         } else if (!gpio_is_valid(reset_n_io)) {
                 pr_err("invalid reset-n-io GPIO\n");
                 return reset_n_io;
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c

index 83a99e38e7bd316d949e44d1bc530865ac22befe..91162f8e0366c87ab7d3e26f2f02344599fb100b 100644 (file)
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -84,6 +84,7 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
         ret = nfcmrvl_parse_dt(matched_node, pdata);
         if (ret < 0) {
                 pr_err("Failed to get generic entries\n");
+               of_node_put(matched_node);
                 return ret;
         }
  
@@ -97,6 +98,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
         else
                 pdata->break_control = 0;
  
+       of_node_put(matched_node);
+
         return 0;
  }
  
@@ -109,6 +112,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
         struct nfcmrvl_private *priv;
         struct nfcmrvl_platform_data *pdata = NULL;
         struct nfcmrvl_platform_data config;
+       struct device *dev = nu->tty->dev;
  
         /*
          * Platform data cannot be used here since usually it is already used
@@ -116,9 +120,8 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
          * and check if DT entries were added.
          */
  
-       if (nu->tty->dev->parent && nu->tty->dev->parent->of_node)
-               if (nfcmrvl_uart_parse_dt(nu->tty->dev->parent->of_node,
-                                         &config) == 0)
+       if (dev && dev->parent && dev->parent->of_node)
+               if (nfcmrvl_uart_parse_dt(dev->parent->of_node, &config) == 0)
                         pdata = &config;
  
         if (!pdata) {
@@ -131,7 +134,7 @@ static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
         }
  
         priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_UART, nu, &uart_ops,
-                                       nu->tty->dev, pdata);
+                                       dev, pdata);
         if (IS_ERR(priv))
                 return PTR_ERR(priv);
  
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c

index 699aa9d1657532c7d15e46c2c2e178f02c175517..bd35eab652be7cf503adc7d253ecb5ccab7d9ad0 100644 (file)
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -341,15 +341,13 @@ static int nfcmrvl_probe(struct usb_interface *intf,
         init_usb_anchor(&drv_data->deferred);
  
         priv = nfcmrvl_nci_register_dev(NFCMRVL_PHY_USB, drv_data, &usb_ops,
-                                       &drv_data->udev->dev, &config);
+                                       &intf->dev, &config);
         if (IS_ERR(priv))
                 return PTR_ERR(priv);
  
         drv_data->priv = priv;
         drv_data->priv->support_fw_dnld = false;
  
-       priv->dev = &drv_data->udev->dev;
-
         usb_set_intfdata(intf, drv_data);
  
         return 0;
diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c

index a466e79784668ef44955aad0d2b61cc9e1a5b3b6..33449820e75422e70753d767c5b95a061fdfe19a 100644 (file)
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -482,8 +482,10 @@ static int __init nfcsim_init(void)
  exit_err:
         pr_err("Failed to initialize nfcsim driver (%d)\n", rc);
  
-       nfcsim_link_free(link0);
-       nfcsim_link_free(link1);
+       if (link0)
+               nfcsim_link_free(link0);
+       if (link1)
+               nfcsim_link_free(link1);
  
         return rc;
  }
diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c

index fedde9d46ab648e87fd686e6e00d28d4bc130f2b..4b14740edb672c734239eb4c26282c036009d415 100644 (file)
--- a/drivers/nfc/pn544/i2c.c
+++ b/drivers/nfc/pn544/i2c.c
@@ -904,7 +904,7 @@ static int pn544_hci_i2c_probe(struct i2c_client *client,
         phy->i2c_dev = client;
         i2c_set_clientdata(client, phy);
  
-       r = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), acpi_pn544_gpios);
+       r = devm_acpi_dev_add_driver_gpios(dev, acpi_pn544_gpios);
         if (r)
                 dev_dbg(dev, "Unable to add GPIO mapping table\n");
  
@@ -958,7 +958,6 @@ static int pn544_hci_i2c_remove(struct i2c_client *client)
         if (phy->powered)
                 pn544_hci_i2c_disable(phy);
  
-       acpi_dev_remove_driver_gpios(ACPI_COMPANION(&client->dev));
         return 0;
  }
  
diff --git a/drivers/nfc/st-nci/i2c.c b/drivers/nfc/st-nci/i2c.c

index 9dfae0efa922b099e30b984784c82dd12c33996e..515f08d037fb40c0656dd07808c67e6ffffc1f0e 100644 (file)
--- a/drivers/nfc/st-nci/i2c.c
+++ b/drivers/nfc/st-nci/i2c.c
@@ -19,15 +19,12 @@
  
  #include <linux/module.h>
  #include <linux/i2c.h>
-#include <linux/gpio.h>
  #include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
  #include <linux/acpi.h>
  #include <linux/interrupt.h>
  #include <linux/delay.h>
  #include <linux/nfc.h>
-#include <linux/platform_data/st-nci.h>
+#include <linux/of.h>
  
  #include "st-nci.h"
  
@@ -40,18 +37,16 @@
  #define ST_NCI_I2C_MIN_SIZE 4   /* PCB(1) + NCI Packet header(3) */
  #define ST_NCI_I2C_MAX_SIZE 250 /* req 4.2.1 */
  
+#define ST_NCI_DRIVER_NAME "st_nci"
  #define ST_NCI_I2C_DRIVER_NAME "st_nci_i2c"
  
-#define ST_NCI_GPIO_NAME_RESET "reset"
-
  struct st_nci_i2c_phy {
         struct i2c_client *i2c_dev;
         struct llt_ndlc *ndlc;
  
         bool irq_active;
  
-       unsigned int gpio_reset;
-       unsigned int irq_polarity;
+       struct gpio_desc *gpiod_reset;
  
         struct st_nci_se_status se_status;
  };
@@ -60,9 +55,9 @@ static int st_nci_i2c_enable(void *phy_id)
  {
         struct st_nci_i2c_phy *phy = phy_id;
  
-       gpio_set_value(phy->gpio_reset, 0);
+       gpiod_set_value(phy->gpiod_reset, 0);
         usleep_range(10000, 15000);
-       gpio_set_value(phy->gpio_reset, 1);
+       gpiod_set_value(phy->gpiod_reset, 1);
         usleep_range(80000, 85000);
  
         if (phy->ndlc->powered == 0 && phy->irq_active == 0) {
@@ -208,114 +203,18 @@ static struct nfc_phy_ops i2c_phy_ops = {
         .disable = st_nci_i2c_disable,
  };
  
-static int st_nci_i2c_acpi_request_resources(struct i2c_client *client)
-{
-       struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
-       struct gpio_desc *gpiod_reset;
-       struct device *dev = &client->dev;
-       u8 tmp;
-
-       /* Get RESET GPIO from ACPI */
-       gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1,
-                                          GPIOD_OUT_HIGH);
-       if (IS_ERR(gpiod_reset)) {
-               nfc_err(dev, "Unable to get RESET GPIO\n");
-               return -ENODEV;
-       }
-
-       phy->gpio_reset = desc_to_gpio(gpiod_reset);
-
-       phy->irq_polarity = irq_get_trigger_type(client->irq);
-
-       phy->se_status.is_ese_present = false;
-       phy->se_status.is_uicc_present = false;
-
-       if (device_property_present(dev, "ese-present")) {
-               device_property_read_u8(dev, "ese-present", &tmp);
-               phy->se_status.is_ese_present = tmp;
-       }
-
-       if (device_property_present(dev, "uicc-present")) {
-               device_property_read_u8(dev, "uicc-present", &tmp);
-               phy->se_status.is_uicc_present = tmp;
-       }
-
-       return 0;
-}
-
-static int st_nci_i2c_of_request_resources(struct i2c_client *client)
-{
-       struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
-       struct device_node *pp;
-       int gpio;
-       int r;
-
-       pp = client->dev.of_node;
-       if (!pp)
-               return -ENODEV;
-
-       /* Get GPIO from device tree */
-       gpio = of_get_named_gpio(pp, "reset-gpios", 0);
-       if (gpio < 0) {
-               nfc_err(&client->dev,
-                       "Failed to retrieve reset-gpios from device tree\n");
-               return gpio;
-       }
-
-       /* GPIO request and configuration */
-       r = devm_gpio_request_one(&client->dev, gpio,
-                               GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET);
-       if (r) {
-               nfc_err(&client->dev, "Failed to request reset pin\n");
-               return r;
-       }
-       phy->gpio_reset = gpio;
-
-       phy->irq_polarity = irq_get_trigger_type(client->irq);
-
-       phy->se_status.is_ese_present =
-                               of_property_read_bool(pp, "ese-present");
-       phy->se_status.is_uicc_present =
-                               of_property_read_bool(pp, "uicc-present");
-
-       return 0;
-}
-
-static int st_nci_i2c_request_resources(struct i2c_client *client)
-{
-       struct st_nci_nfc_platform_data *pdata;
-       struct st_nci_i2c_phy *phy = i2c_get_clientdata(client);
-       int r;
-
-       pdata = client->dev.platform_data;
-       if (pdata == NULL) {
-               nfc_err(&client->dev, "No platform data\n");
-               return -EINVAL;
-       }
+static const struct acpi_gpio_params reset_gpios = { 1, 0, false };
  
-       /* store for later use */
-       phy->gpio_reset = pdata->gpio_reset;
-       phy->irq_polarity = pdata->irq_polarity;
-
-       r = devm_gpio_request_one(&client->dev,
-                       phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
-                       ST_NCI_GPIO_NAME_RESET);
-       if (r) {
-               pr_err("%s : reset gpio_request failed\n", __FILE__);
-               return r;
-       }
-
-       phy->se_status.is_ese_present = pdata->is_ese_present;
-       phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
-       return 0;
-}
+static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = {
+       { "reset-gpios", &reset_gpios, 1 },
+       {},
+};
  
  static int st_nci_i2c_probe(struct i2c_client *client,
                                   const struct i2c_device_id *id)
  {
+       struct device *dev = &client->dev;
         struct st_nci_i2c_phy *phy;
-       struct st_nci_nfc_platform_data *pdata;
         int r;
  
         dev_dbg(&client->dev, "%s\n", __func__);
@@ -326,8 +225,7 @@ static int st_nci_i2c_probe(struct i2c_client *client,
                 return -ENODEV;
         }
  
-       phy = devm_kzalloc(&client->dev, sizeof(struct st_nci_i2c_phy),
-                          GFP_KERNEL);
+       phy = devm_kzalloc(dev, sizeof(struct st_nci_i2c_phy), GFP_KERNEL);
         if (!phy)
                 return -ENOMEM;
  
@@ -335,32 +233,22 @@ static int st_nci_i2c_probe(struct i2c_client *client,
  
         i2c_set_clientdata(client, phy);
  
-       pdata = client->dev.platform_data;
-       if (!pdata && client->dev.of_node) {
-               r = st_nci_i2c_of_request_resources(client);
-               if (r) {
-                       nfc_err(&client->dev, "No platform data\n");
-                       return r;
-               }
-       } else if (pdata) {
-               r = st_nci_i2c_request_resources(client);
-               if (r) {
-                       nfc_err(&client->dev,
-                               "Cannot get platform resources\n");
-                       return r;
-               }
-       } else if (ACPI_HANDLE(&client->dev)) {
-               r = st_nci_i2c_acpi_request_resources(client);
-               if (r) {
-                       nfc_err(&client->dev, "Cannot get ACPI data\n");
-                       return r;
-               }
-       } else {
-               nfc_err(&client->dev,
-                       "st_nci platform resources not available\n");
+       r = devm_acpi_dev_add_driver_gpios(dev, acpi_st_nci_gpios);
+       if (r)
+               dev_dbg(dev, "Unable to add GPIO mapping table\n");
+
+       /* Get RESET GPIO */
+       phy->gpiod_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(phy->gpiod_reset)) {
+               nfc_err(dev, "Unable to get RESET GPIO\n");
                 return -ENODEV;
         }
  
+       phy->se_status.is_ese_present =
+                               device_property_read_bool(dev, "ese-present");
+       phy->se_status.is_uicc_present =
+                               device_property_read_bool(dev, "uicc-present");
+
         r = ndlc_probe(phy, &i2c_phy_ops, &client->dev,
                         ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
                         &phy->ndlc, &phy->se_status);
@@ -372,7 +260,7 @@ static int st_nci_i2c_probe(struct i2c_client *client,
         phy->irq_active = true;
         r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
                                 st_nci_irq_thread_fn,
-                               phy->irq_polarity | IRQF_ONESHOT,
+                               IRQF_ONESHOT,
                                 ST_NCI_DRIVER_NAME, phy);
         if (r < 0)
                 nfc_err(&client->dev, "Unable to register IRQ handler\n");
diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c

index 89e341eba3ebe97407412c1cec99a3868849493a..14705591b0fb9bcf80de490d796d7d196f971961 100644 (file)
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -19,16 +19,13 @@
  
  #include <linux/module.h>
  #include <linux/spi/spi.h>
-#include <linux/gpio.h>
  #include <linux/gpio/consumer.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
  #include <linux/acpi.h>
  #include <linux/interrupt.h>
  #include <linux/delay.h>
  #include <linux/nfc.h>
+#include <linux/of.h>
  #include <net/nfc/nci.h>
-#include <linux/platform_data/st-nci.h>
  
  #include "st-nci.h"
  
@@ -41,18 +38,16 @@
  #define ST_NCI_SPI_MIN_SIZE 4   /* PCB(1) + NCI Packet header(3) */
  #define ST_NCI_SPI_MAX_SIZE 250 /* req 4.2.1 */
  
+#define ST_NCI_DRIVER_NAME "st_nci"
  #define ST_NCI_SPI_DRIVER_NAME "st_nci_spi"
  
-#define ST_NCI_GPIO_NAME_RESET "reset"
-
  struct st_nci_spi_phy {
         struct spi_device *spi_dev;
         struct llt_ndlc *ndlc;
  
         bool irq_active;
  
-       unsigned int gpio_reset;
-       unsigned int irq_polarity;
+       struct gpio_desc *gpiod_reset;
  
         struct st_nci_se_status se_status;
  };
@@ -61,9 +56,9 @@ static int st_nci_spi_enable(void *phy_id)
  {
         struct st_nci_spi_phy *phy = phy_id;
  
-       gpio_set_value(phy->gpio_reset, 0);
+       gpiod_set_value(phy->gpiod_reset, 0);
         usleep_range(10000, 15000);
-       gpio_set_value(phy->gpio_reset, 1);
+       gpiod_set_value(phy->gpiod_reset, 1);
         usleep_range(80000, 85000);
  
         if (phy->ndlc->powered == 0 && phy->irq_active == 0) {
@@ -223,113 +218,16 @@ static struct nfc_phy_ops spi_phy_ops = {
         .disable = st_nci_spi_disable,
  };
  
-static int st_nci_spi_acpi_request_resources(struct spi_device *spi_dev)
-{
-       struct st_nci_spi_phy *phy = spi_get_drvdata(spi_dev);
-       struct gpio_desc *gpiod_reset;
-       struct device *dev = &spi_dev->dev;
-       u8 tmp;
-
-       /* Get RESET GPIO from ACPI */
-       gpiod_reset = devm_gpiod_get_index(dev, ST_NCI_GPIO_NAME_RESET, 1,
-                                          GPIOD_OUT_HIGH);
-       if (IS_ERR(gpiod_reset)) {
-               nfc_err(dev, "Unable to get RESET GPIO\n");
-               return -ENODEV;
-       }
-
-       phy->gpio_reset = desc_to_gpio(gpiod_reset);
-
-       phy->irq_polarity = irq_get_trigger_type(spi_dev->irq);
-
-       phy->se_status.is_ese_present = false;
-       phy->se_status.is_uicc_present = false;
-
-       if (device_property_present(dev, "ese-present")) {
-               device_property_read_u8(dev, "ese-present", &tmp);
-               tmp = phy->se_status.is_ese_present;
-       }
-
-       if (device_property_present(dev, "uicc-present")) {
-               device_property_read_u8(dev, "uicc-present", &tmp);
-               tmp = phy->se_status.is_uicc_present;
-       }
-
-       return 0;
-}
-
-static int st_nci_spi_of_request_resources(struct spi_device *dev)
-{
-       struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
-       struct device_node *pp;
-       int gpio;
-       int r;
-
-       pp = dev->dev.of_node;
-       if (!pp)
-               return -ENODEV;
-
-       /* Get GPIO from device tree */
-       gpio = of_get_named_gpio(pp, "reset-gpios", 0);
-       if (gpio < 0) {
-               nfc_err(&dev->dev,
-                       "Failed to retrieve reset-gpios from device tree\n");
-               return gpio;
-       }
-
-       /* GPIO request and configuration */
-       r = devm_gpio_request_one(&dev->dev, gpio,
-                               GPIOF_OUT_INIT_HIGH, ST_NCI_GPIO_NAME_RESET);
-       if (r) {
-               nfc_err(&dev->dev, "Failed to request reset pin\n");
-               return r;
-       }
-       phy->gpio_reset = gpio;
-
-       phy->irq_polarity = irq_get_trigger_type(dev->irq);
+static const struct acpi_gpio_params reset_gpios = { 1, 0, false };
  
-       phy->se_status.is_ese_present =
-                               of_property_read_bool(pp, "ese-present");
-       phy->se_status.is_uicc_present =
-                               of_property_read_bool(pp, "uicc-present");
-
-       return 0;
-}
-
-static int st_nci_spi_request_resources(struct spi_device *dev)
-{
-       struct st_nci_nfc_platform_data *pdata;
-       struct st_nci_spi_phy *phy = spi_get_drvdata(dev);
-       int r;
-
-       pdata = dev->dev.platform_data;
-       if (pdata == NULL) {
-               nfc_err(&dev->dev, "No platform data\n");
-               return -EINVAL;
-       }
-
-       /* store for later use */
-       phy->gpio_reset = pdata->gpio_reset;
-       phy->irq_polarity = pdata->irq_polarity;
-
-       r = devm_gpio_request_one(&dev->dev,
-                       phy->gpio_reset, GPIOF_OUT_INIT_HIGH,
-                       ST_NCI_GPIO_NAME_RESET);
-       if (r) {
-               pr_err("%s : reset gpio_request failed\n", __FILE__);
-               return r;
-       }
-
-       phy->se_status.is_ese_present = pdata->is_ese_present;
-       phy->se_status.is_uicc_present = pdata->is_uicc_present;
-
-       return 0;
-}
+static const struct acpi_gpio_mapping acpi_st_nci_gpios[] = {
+       { "reset-gpios", &reset_gpios, 1 },
+       {},
+};
  
  static int st_nci_spi_probe(struct spi_device *dev)
  {
         struct st_nci_spi_phy *phy;
-       struct st_nci_nfc_platform_data *pdata;
         int r;
  
         dev_dbg(&dev->dev, "%s\n", __func__);
@@ -351,32 +249,22 @@ static int st_nci_spi_probe(struct spi_device *dev)
  
         spi_set_drvdata(dev, phy);
  
-       pdata = dev->dev.platform_data;
-       if (!pdata && dev->dev.of_node) {
-               r = st_nci_spi_of_request_resources(dev);
-               if (r) {
-                       nfc_err(&dev->dev, "No platform data\n");
-                       return r;
-               }
-       } else if (pdata) {
-               r = st_nci_spi_request_resources(dev);
-               if (r) {
-                       nfc_err(&dev->dev,
-                               "Cannot get platform resources\n");
-                       return r;
-               }
-       } else if (ACPI_HANDLE(&dev->dev)) {
-               r = st_nci_spi_acpi_request_resources(dev);
-               if (r) {
-                       nfc_err(&dev->dev, "Cannot get ACPI data\n");
-                       return r;
-               }
-       } else {
-               nfc_err(&dev->dev,
-                       "st_nci platform resources not available\n");
-               return -ENODEV;
+       r = devm_acpi_dev_add_driver_gpios(&dev->dev, acpi_st_nci_gpios);
+       if (r)
+               dev_dbg(&dev->dev, "Unable to add GPIO mapping table\n");
+
+       /* Get RESET GPIO */
+       phy->gpiod_reset = devm_gpiod_get(&dev->dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(phy->gpiod_reset)) {
+               nfc_err(&dev->dev, "Unable to get RESET GPIO\n");
+               return PTR_ERR(phy->gpiod_reset);
         }
  
+       phy->se_status.is_ese_present =
+                       device_property_read_bool(&dev->dev, "ese-present");
+       phy->se_status.is_uicc_present =
+                       device_property_read_bool(&dev->dev, "uicc-present");
+
         r = ndlc_probe(phy, &spi_phy_ops, &dev->dev,
                         ST_NCI_FRAME_HEADROOM, ST_NCI_FRAME_TAILROOM,
                         &phy->ndlc, &phy->se_status);
@@ -388,7 +276,7 @@ static int st_nci_spi_probe(struct spi_device *dev)
         phy->irq_active = true;
         r = devm_request_threaded_irq(&dev->dev, dev->irq, NULL,
                                 st_nci_irq_thread_fn,
-                               phy->irq_polarity | IRQF_ONESHOT,
+                               IRQF_ONESHOT,
                                 ST_NCI_SPI_DRIVER_NAME, phy);
         if (r < 0)
                 nfc_err(&dev->dev, "Unable to register IRQ handler\n");
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c

index 4bff76baa3417931fa0d3ec89de40dd6e4f670f4..cd1f7bfa75ebe3a75f5577d33cc162fdfacfa8c1 100644 (file)
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -61,8 +61,6 @@
  #define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci"
  #define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c"
  
-#define ST21NFCA_GPIO_NAME_EN "enable"
-
  struct st21nfca_i2c_phy {
         struct i2c_client *i2c_dev;
         struct nfc_hci_dev *hdev;
@@ -501,41 +499,17 @@ static struct nfc_phy_ops i2c_phy_ops = {
         .disable = st21nfca_hci_i2c_disable,
  };
  
-static int st21nfca_hci_i2c_acpi_request_resources(struct i2c_client *client)
-{
-       struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
-       struct device *dev = &client->dev;
-
-       /* Get EN GPIO from ACPI */
-       phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 1,
-                                             GPIOD_OUT_LOW);
-       if (IS_ERR(phy->gpiod_ena)) {
-               nfc_err(dev, "Unable to get ENABLE GPIO\n");
-               return PTR_ERR(phy->gpiod_ena);
-       }
-
-       return 0;
-}
-
-static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client)
-{
-       struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
-       struct device *dev = &client->dev;
-
-       /* Get GPIO from device tree */
-       phy->gpiod_ena = devm_gpiod_get_index(dev, ST21NFCA_GPIO_NAME_EN, 0,
-                                             GPIOD_OUT_HIGH);
-       if (IS_ERR(phy->gpiod_ena)) {
-               nfc_err(dev, "Failed to request enable pin\n");
-               return PTR_ERR(phy->gpiod_ena);
-       }
+static const struct acpi_gpio_params enable_gpios = { 1, 0, false };
  
-       return 0;
-}
+static const struct acpi_gpio_mapping acpi_st21nfca_gpios[] = {
+       { "enable-gpios", &enable_gpios, 1 },
+       {},
+};
  
  static int st21nfca_hci_i2c_probe(struct i2c_client *client,
                                   const struct i2c_device_id *id)
  {
+       struct device *dev = &client->dev;
         struct st21nfca_i2c_phy *phy;
         int r;
  
@@ -562,21 +536,15 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
         mutex_init(&phy->phy_lock);
         i2c_set_clientdata(client, phy);
  
-       if (client->dev.of_node) {
-               r = st21nfca_hci_i2c_of_request_resources(client);
-               if (r) {
-                       nfc_err(&client->dev, "No platform data\n");
-                       return r;
-               }
-       } else if (ACPI_HANDLE(&client->dev)) {
-               r = st21nfca_hci_i2c_acpi_request_resources(client);
-               if (r) {
-                       nfc_err(&client->dev, "Cannot get ACPI data\n");
-                       return r;
-               }
-       } else {
-               nfc_err(&client->dev, "st21nfca platform resources not available\n");
-               return -ENODEV;
+       r = devm_acpi_dev_add_driver_gpios(dev, acpi_st21nfca_gpios);
+       if (r)
+               dev_dbg(dev, "Unable to add GPIO mapping table\n");
+
+       /* Get EN GPIO from resource provider */
+       phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
+       if (IS_ERR(phy->gpiod_ena)) {
+               nfc_err(dev, "Unable to get ENABLE GPIO\n");
+               return PTR_ERR(phy->gpiod_ena);
         }
  
         phy->se_status.is_ese_present =
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c

index 2d1c8ca6e67907002a05d284316925b951d178de..eee5cc1a9220453139eb0e16f8ff732555f64438 100644 (file)
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -20,9 +20,8 @@
  #include <linux/nfc.h>
  #include <linux/skbuff.h>
  #include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
  #include <linux/of.h>
-#include <linux/of_gpio.h>
  #include <linux/spi/spi.h>
  #include <linux/regulator/consumer.h>
  
@@ -123,11 +122,10 @@
                  NFC_PROTO_ISO14443_B_MASK | NFC_PROTO_FELICA_MASK | \
                  NFC_PROTO_ISO15693_MASK | NFC_PROTO_NFC_DEP_MASK)
  
-#define TRF7970A_AUTOSUSPEND_DELAY             30000 /* 30 seconds */
+#define TRF7970A_AUTOSUSPEND_DELAY             30000   /* 30 seconds */
  #define TRF7970A_13MHZ_CLOCK_FREQUENCY         13560000
  #define TRF7970A_27MHZ_CLOCK_FREQUENCY         27120000
  
-
  #define TRF7970A_RX_SKB_ALLOC_SIZE             256
  
  #define TRF7970A_FIFO_SIZE                     127
@@ -152,7 +150,6 @@
   */
  #define TRF7970A_QUIRK_IRQ_STATUS_READ         BIT(0)
  #define TRF7970A_QUIRK_EN2_MUST_STAY_LOW       BIT(1)
-#define TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE      BIT(2)
  
  /* Direct commands */
  #define TRF7970A_CMD_IDLE                      0x00
@@ -295,7 +292,7 @@
  #define TRF7970A_REG_IO_CTRL_AUTO_REG          BIT(7)
  
  /* IRQ Status Register Bits */
-#define TRF7970A_IRQ_STATUS_NORESP             BIT(0) /* ISO15693 only */
+#define TRF7970A_IRQ_STATUS_NORESP             BIT(0)  /* ISO15693 only */
  #define TRF7970A_IRQ_STATUS_NFC_COL_ERROR      BIT(0)
  #define TRF7970A_IRQ_STATUS_COL                        BIT(1)
  #define TRF7970A_IRQ_STATUS_FRAMING_EOF_ERROR  BIT(2)
@@ -451,16 +448,14 @@ struct trf7970a {
         u8                              md_rf_tech;
         u8                              tx_cmd;
         bool                            issue_eof;
-       bool                            adjust_resp_len;
-       int                             en2_gpio;
-       int                             en_gpio;
+       struct gpio_desc                *en_gpiod;
+       struct gpio_desc                *en2_gpiod;
         struct mutex                    lock;
         unsigned int                    timeout;
         bool                            ignore_timeout;
         struct delayed_work             timeout_work;
  };
  
-
  static int trf7970a_cmd(struct trf7970a *trf, u8 opcode)
  {
         u8 cmd = TRF7970A_CMD_BIT_CTRL | TRF7970A_CMD_BIT_OPCODE(opcode);
@@ -471,7 +466,7 @@ static int trf7970a_cmd(struct trf7970a *trf, u8 opcode)
         ret = spi_write(trf->spi, &cmd, 1);
         if (ret)
                 dev_err(trf->dev, "%s - cmd: 0x%x, ret: %d\n", __func__, cmd,
-                               ret);
+                       ret);
         return ret;
  }
  
@@ -483,14 +478,15 @@ static int trf7970a_read(struct trf7970a *trf, u8 reg, u8 *val)
         ret = spi_write_then_read(trf->spi, &addr, 1, val, 1);
         if (ret)
                 dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr,
-                               ret);
+                       ret);
  
         dev_dbg(trf->dev, "read(0x%x): 0x%x\n", addr, *val);
  
         return ret;
  }
  
-static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len)
+static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf,
+                             size_t len)
  {
         u8 addr = reg | TRF7970A_CMD_BIT_RW | TRF7970A_CMD_BIT_CONTINUOUS;
         struct spi_transfer t[2];
@@ -514,7 +510,7 @@ static int trf7970a_read_cont(struct trf7970a *trf, u8 reg, u8 *buf, size_t len)
         ret = spi_sync(trf->spi, &m);
         if (ret)
                 dev_err(trf->dev, "%s - addr: 0x%x, ret: %d\n", __func__, addr,
-                               ret);
+                       ret);
         return ret;
  }
  
@@ -528,7 +524,7 @@ static int trf7970a_write(struct trf7970a *trf, u8 reg, u8 val)
         ret = spi_write(trf->spi, buf, 2);
         if (ret)
                 dev_err(trf->dev, "%s - write: 0x%x 0x%x, ret: %d\n", __func__,
-                               buf[0], buf[1], ret);
+                       buf[0], buf[1], ret);
  
         return ret;
  }
@@ -550,7 +546,7 @@ static int trf7970a_read_irqstatus(struct trf7970a *trf, u8 *status)
  
         if (ret)
                 dev_err(trf->dev, "%s - irqstatus: Status read failed: %d\n",
-                               __func__, ret);
+                       __func__, ret);
         else
                 *status = buf[0];
  
@@ -564,12 +560,12 @@ static int trf7970a_read_target_proto(struct trf7970a *trf, u8 *target_proto)
         u8 addr;
  
         addr = TRF79070A_NFC_TARGET_PROTOCOL | TRF7970A_CMD_BIT_RW |
-               TRF7970A_CMD_BIT_CONTINUOUS;
+              TRF7970A_CMD_BIT_CONTINUOUS;
  
         ret = spi_write_then_read(trf->spi, &addr, 1, buf, 2);
         if (ret)
                 dev_err(trf->dev, "%s - target_proto: Read failed: %d\n",
-                               __func__, ret);
+                       __func__, ret);
         else
                 *target_proto = buf[0];
  
@@ -600,7 +596,7 @@ static int trf7970a_mode_detect(struct trf7970a *trf, u8 *rf_tech)
                 break;
         default:
                 dev_dbg(trf->dev, "%s - mode_detect: target_proto: 0x%x\n",
-                               __func__, target_proto);
+                       __func__, target_proto);
                 return -EIO;
         }
  
@@ -616,8 +612,8 @@ static void trf7970a_send_upstream(struct trf7970a *trf)
  
         if (trf->rx_skb && !IS_ERR(trf->rx_skb) && !trf->aborting)
                 print_hex_dump_debug("trf7970a rx data: ", DUMP_PREFIX_NONE,
-                               16, 1, trf->rx_skb->data, trf->rx_skb->len,
-                               false);
+                                    16, 1, trf->rx_skb->data, trf->rx_skb->len,
+                                    false);
  
         trf->state = TRF7970A_ST_IDLE;
  
@@ -632,13 +628,6 @@ static void trf7970a_send_upstream(struct trf7970a *trf)
                 trf->aborting = false;
         }
  
-       if (trf->adjust_resp_len) {
-               if (trf->rx_skb)
-                       skb_trim(trf->rx_skb, trf->rx_skb->len - 1);
-
-               trf->adjust_resp_len = false;
-       }
-
         trf->cb(trf->ddev, trf->cb_arg, trf->rx_skb);
  
         trf->rx_skb = NULL;
@@ -657,7 +646,8 @@ static void trf7970a_send_err_upstream(struct trf7970a *trf, int errno)
  }
  
  static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
-               unsigned int len, u8 *prefix, unsigned int prefix_len)
+                            unsigned int len, u8 *prefix,
+                            unsigned int prefix_len)
  {
         struct spi_transfer t[2];
         struct spi_message m;
@@ -665,7 +655,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
         int ret;
  
         print_hex_dump_debug("trf7970a tx data: ", DUMP_PREFIX_NONE,
-                       16, 1, skb->data, len, false);
+                            16, 1, skb->data, len, false);
  
         spi_message_init(&m);
  
@@ -682,7 +672,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
         ret = spi_sync(trf->spi, &m);
         if (ret) {
                 dev_err(trf->dev, "%s - Can't send tx data: %d\n", __func__,
-                               ret);
+                       ret);
                 return ret;
         }
  
@@ -706,7 +696,7 @@ static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
         }
  
         dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n", timeout,
-                       trf->state);
+               trf->state);
  
         schedule_delayed_work(&trf->timeout_work, msecs_to_jiffies(timeout));
  
@@ -774,9 +764,9 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
  
         if (fifo_bytes > skb_tailroom(skb)) {
                 skb = skb_copy_expand(skb, skb_headroom(skb),
-                               max_t(int, fifo_bytes,
-                                       TRF7970A_RX_SKB_ALLOC_SIZE),
-                               GFP_KERNEL);
+                                     max_t(int, fifo_bytes,
+                                           TRF7970A_RX_SKB_ALLOC_SIZE),
+                                     GFP_KERNEL);
                 if (!skb) {
                         trf7970a_send_err_upstream(trf, -ENOMEM);
                         return;
@@ -787,7 +777,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
         }
  
         ret = trf7970a_read_cont(trf, TRF7970A_FIFO_IO_REGISTER,
-                       skb_put(skb, fifo_bytes), fifo_bytes);
+                                skb_put(skb, fifo_bytes), fifo_bytes);
         if (ret) {
                 trf7970a_send_err_upstream(trf, ret);
                 return;
@@ -795,8 +785,7 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
  
         /* If received Type 2 ACK/NACK, shift right 4 bits and pass up */
         if ((trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T) && (skb->len == 1) &&
-                       (trf->special_fcn_reg1 ==
-                                TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) {
+           (trf->special_fcn_reg1 == TRF7970A_SPECIAL_FCN_REG1_4_BIT_RX)) {
                 skb->data[0] >>= 4;
                 status = TRF7970A_IRQ_STATUS_SRX;
         } else {
@@ -819,16 +808,16 @@ static void trf7970a_drain_fifo(struct trf7970a *trf, u8 status)
         }
  
  no_rx_data:
-       if (status == TRF7970A_IRQ_STATUS_SRX) { /* Receive complete */
+       if (status == TRF7970A_IRQ_STATUS_SRX) {        /* Receive complete */
                 trf7970a_send_upstream(trf);
                 return;
         }
  
         dev_dbg(trf->dev, "Setting timeout for %d ms\n",
-                       TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT);
+               TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT);
  
         schedule_delayed_work(&trf->timeout_work,
-                       msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT));
+                          msecs_to_jiffies(TRF7970A_WAIT_FOR_RX_DATA_TIMEOUT));
  }
  
  static irqreturn_t trf7970a_irq(int irq, void *dev_id)
@@ -851,7 +840,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
         }
  
         dev_dbg(trf->dev, "IRQ - state: %d, status: 0x%x\n", trf->state,
-                       status);
+               status);
  
         if (!status) {
                 mutex_unlock(&trf->lock);
@@ -876,7 +865,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
         case TRF7970A_ST_WAIT_FOR_TX_FIFO:
                 if (status & TRF7970A_IRQ_STATUS_TX) {
                         trf->ignore_timeout =
-                               !cancel_delayed_work(&trf->timeout_work);
+                           !cancel_delayed_work(&trf->timeout_work);
                         trf7970a_fill_fifo(trf);
                 } else {
                         trf7970a_send_err_upstream(trf, -EIO);
@@ -886,11 +875,11 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
         case TRF7970A_ST_WAIT_FOR_RX_DATA_CONT:
                 if (status & TRF7970A_IRQ_STATUS_SRX) {
                         trf->ignore_timeout =
-                               !cancel_delayed_work(&trf->timeout_work);
+                           !cancel_delayed_work(&trf->timeout_work);
                         trf7970a_drain_fifo(trf, status);
                 } else if (status & TRF7970A_IRQ_STATUS_FIFO) {
                         ret = trf7970a_read(trf, TRF7970A_FIFO_STATUS,
-                                       &fifo_bytes);
+                                           &fifo_bytes);
  
                         fifo_bytes &= ~TRF7970A_FIFO_STATUS_OVERFLOW;
  
@@ -899,14 +888,14 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
                         else if (!fifo_bytes)
                                 trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET);
                 } else if ((status == TRF7970A_IRQ_STATUS_TX) ||
-                               (!trf->is_initiator &&
-                                (status == (TRF7970A_IRQ_STATUS_TX |
-                                            TRF7970A_IRQ_STATUS_NFC_RF)))) {
+                          (!trf->is_initiator &&
+                           (status == (TRF7970A_IRQ_STATUS_TX |
+                                       TRF7970A_IRQ_STATUS_NFC_RF)))) {
                         trf7970a_cmd(trf, TRF7970A_CMD_FIFO_RESET);
  
                         if (!trf->timeout) {
-                               trf->ignore_timeout = !cancel_delayed_work(
-                                               &trf->timeout_work);
+                               trf->ignore_timeout =
+                                   !cancel_delayed_work(&trf->timeout_work);
                                 trf->rx_skb = ERR_PTR(0);
                                 trf7970a_send_upstream(trf);
                                 break;
@@ -930,13 +919,13 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
                                 break;
                         case NFC_DIGITAL_FRAMING_NFCA_ANTICOL_COMPLETE:
                                 ret = trf7970a_write(trf,
-                                       TRF7970A_SPECIAL_FCN_REG1,
-                                       TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL);
+                                        TRF7970A_SPECIAL_FCN_REG1,
+                                        TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL);
                                 if (ret)
                                         goto err_unlock_exit;
  
                                 trf->special_fcn_reg1 =
-                                       TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL;
+                                   TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL;
                                 break;
                         default:
                                 break;
@@ -944,7 +933,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
  
                         if (iso_ctrl != trf->iso_ctrl) {
                                 ret = trf7970a_write(trf, TRF7970A_ISO_CTRL,
-                                               iso_ctrl);
+                                                    iso_ctrl);
                                 if (ret)
                                         goto err_unlock_exit;
  
@@ -961,7 +950,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
         case TRF7970A_ST_LISTENING:
                 if (status & TRF7970A_IRQ_STATUS_SRX) {
                         trf->ignore_timeout =
-                               !cancel_delayed_work(&trf->timeout_work);
+                           !cancel_delayed_work(&trf->timeout_work);
                         trf7970a_drain_fifo(trf, status);
                 } else if (!(status & TRF7970A_IRQ_STATUS_NFC_RF)) {
                         trf7970a_send_err_upstream(trf, -EIO);
@@ -970,7 +959,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
         case TRF7970A_ST_LISTENING_MD:
                 if (status & TRF7970A_IRQ_STATUS_SRX) {
                         trf->ignore_timeout =
-                               !cancel_delayed_work(&trf->timeout_work);
+                           !cancel_delayed_work(&trf->timeout_work);
  
                         ret = trf7970a_mode_detect(trf, &trf->md_rf_tech);
                         if (ret) {
@@ -985,7 +974,7 @@ static irqreturn_t trf7970a_irq(int irq, void *dev_id)
                 break;
         default:
                 dev_err(trf->dev, "%s - Driver in invalid state: %d\n",
-                               __func__, trf->state);
+                       __func__, trf->state);
         }
  
  err_unlock_exit:
@@ -1010,19 +999,19 @@ static void trf7970a_issue_eof(struct trf7970a *trf)
         trf->state = TRF7970A_ST_WAIT_FOR_RX_DATA;
  
         dev_dbg(trf->dev, "Setting timeout for %d ms, state: %d\n",
-                       trf->timeout, trf->state);
+               trf->timeout, trf->state);
  
         schedule_delayed_work(&trf->timeout_work,
-                       msecs_to_jiffies(trf->timeout));
+                             msecs_to_jiffies(trf->timeout));
  }
  
  static void trf7970a_timeout_work_handler(struct work_struct *work)
  {
         struct trf7970a *trf = container_of(work, struct trf7970a,
-                       timeout_work.work);
+                                           timeout_work.work);
  
         dev_dbg(trf->dev, "Timeout - state: %d, ignore_timeout: %d\n",
-                       trf->state, trf->ignore_timeout);
+               trf->state, trf->ignore_timeout);
  
         mutex_lock(&trf->lock);
  
@@ -1053,7 +1042,7 @@ static int trf7970a_init(struct trf7970a *trf)
                 goto err_out;
  
         ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL,
-                       trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
+                            trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
         if (ret)
                 goto err_out;
  
@@ -1066,13 +1055,13 @@ static int trf7970a_init(struct trf7970a *trf)
         trf->chip_status_ctrl &= ~TRF7970A_CHIP_STATUS_RF_ON;
  
         ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
-                       trf->modulator_sys_clk_ctrl);
+                            trf->modulator_sys_clk_ctrl);
         if (ret)
                 goto err_out;
  
         ret = trf7970a_write(trf, TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS,
-                       TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 |
-                       TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32);
+                            TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLH_96 |
+                            TRF7970A_ADJUTABLE_FIFO_IRQ_LEVELS_WLL_32);
         if (ret)
                 goto err_out;
  
@@ -1093,7 +1082,7 @@ err_out:
  static void trf7970a_switch_rf_off(struct trf7970a *trf)
  {
         if ((trf->state == TRF7970A_ST_PWR_OFF) ||
-                       (trf->state == TRF7970A_ST_RF_OFF))
+           (trf->state == TRF7970A_ST_RF_OFF))
                 return;
  
         dev_dbg(trf->dev, "Switching rf off\n");
@@ -1117,9 +1106,9 @@ static int trf7970a_switch_rf_on(struct trf7970a *trf)
  
         pm_runtime_get_sync(trf->dev);
  
-       if (trf->state != TRF7970A_ST_RF_OFF) { /* Power on, RF off */
+       if (trf->state != TRF7970A_ST_RF_OFF) { /* Power on, RF off */
                 dev_err(trf->dev, "%s - Incorrect state: %d\n", __func__,
-                               trf->state);
+                       trf->state);
                 return -EINVAL;
         }
  
@@ -1154,7 +1143,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on)
                         break;
                 default:
                         dev_err(trf->dev, "%s - Invalid request: %d %d\n",
-                                       __func__, trf->state, on);
+                               __func__, trf->state, on);
                         trf7970a_switch_rf_off(trf);
                         ret = -EINVAL;
                 }
@@ -1165,7 +1154,7 @@ static int trf7970a_switch_rf(struct nfc_digital_dev *ddev, bool on)
                         break;
                 default:
                         dev_err(trf->dev, "%s - Invalid request: %d %d\n",
-                                       __func__, trf->state, on);
+                               __func__, trf->state, on);
                         ret = -EINVAL;
                         /* FALLTHROUGH */
                 case TRF7970A_ST_IDLE:
@@ -1190,36 +1179,36 @@ static int trf7970a_in_config_rf_tech(struct trf7970a *trf, int tech)
         case NFC_DIGITAL_RF_TECH_106A:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443A_106;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_OOK;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_OOK;
                 trf->guard_time = TRF7970A_GUARD_TIME_NFCA;
                 break;
         case NFC_DIGITAL_RF_TECH_106B:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_14443B_106;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_ASK10;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_ASK10;
                 trf->guard_time = TRF7970A_GUARD_TIME_NFCB;
                 break;
         case NFC_DIGITAL_RF_TECH_212F:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_212;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_ASK10;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_ASK10;
                 trf->guard_time = TRF7970A_GUARD_TIME_NFCF;
                 break;
         case NFC_DIGITAL_RF_TECH_424F:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_FELICA_424;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_ASK10;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_ASK10;
                 trf->guard_time = TRF7970A_GUARD_TIME_NFCF;
                 break;
         case NFC_DIGITAL_RF_TECH_ISO15693:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_OOK;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_OOK;
                 trf->guard_time = TRF7970A_GUARD_TIME_15693;
                 break;
         default:
@@ -1246,7 +1235,8 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field)
         u8 rssi;
  
         ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-                       trf->chip_status_ctrl | TRF7970A_CHIP_STATUS_REC_ON);
+                            trf->chip_status_ctrl |
+                            TRF7970A_CHIP_STATUS_REC_ON);
         if (ret)
                 return ret;
  
@@ -1261,7 +1251,7 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field)
                 return ret;
  
         ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-                       trf->chip_status_ctrl);
+                            trf->chip_status_ctrl);
         if (ret)
                 return ret;
  
@@ -1328,15 +1318,15 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing)
                 trf->iso_ctrl = iso_ctrl;
  
                 ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
-                               trf->modulator_sys_clk_ctrl);
+                                    trf->modulator_sys_clk_ctrl);
                 if (ret)
                         return ret;
         }
  
         if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) {
                 ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-                               trf->chip_status_ctrl |
-                                       TRF7970A_CHIP_STATUS_RF_ON);
+                                    trf->chip_status_ctrl |
+                                    TRF7970A_CHIP_STATUS_RF_ON);
                 if (ret)
                         return ret;
  
@@ -1349,7 +1339,7 @@ static int trf7970a_in_config_framing(struct trf7970a *trf, int framing)
  }
  
  static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type,
-               int param)
+                                   int param)
  {
         struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
         int ret;
@@ -1361,7 +1351,7 @@ static int trf7970a_in_configure_hw(struct nfc_digital_dev *ddev, int type,
         trf->is_initiator = true;
  
         if ((trf->state == TRF7970A_ST_PWR_OFF) ||
-                       (trf->state == TRF7970A_ST_RF_OFF)) {
+           (trf->state == TRF7970A_ST_RF_OFF)) {
                 ret = trf7970a_switch_rf_on(trf);
                 if (ret)
                         goto err_unlock;
@@ -1419,7 +1409,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
          * has to send an EOF in order to get a response.
          */
         if ((trf->technology == NFC_DIGITAL_RF_TECH_106A) &&
-                       (trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) {
+           (trf->framing == NFC_DIGITAL_FRAMING_NFCA_T2T)) {
                 if (req[0] == NFC_T2T_CMD_READ)
                         special_fcn_reg1 = 0;
                 else
@@ -1427,7 +1417,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
  
                 if (special_fcn_reg1 != trf->special_fcn_reg1) {
                         ret = trf7970a_write(trf, TRF7970A_SPECIAL_FCN_REG1,
-                                       special_fcn_reg1);
+                                            special_fcn_reg1);
                         if (ret)
                                 return ret;
  
@@ -1447,7 +1437,7 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
                         iso_ctrl |= TRF7970A_ISO_CTRL_15693_SGL_1OF4_2648;
                         break;
                 case (ISO15693_REQ_FLAG_SUB_CARRIER |
-                               ISO15693_REQ_FLAG_DATA_RATE):
+                     ISO15693_REQ_FLAG_DATA_RATE):
                         iso_ctrl |= TRF7970A_ISO_CTRL_15693_DBL_1OF4_2669;
                         break;
                 }
@@ -1460,23 +1450,18 @@ static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
                         trf->iso_ctrl = iso_ctrl;
                 }
  
-               if (trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) {
-                       if (trf7970a_is_iso15693_write_or_lock(req[1]) &&
-                                       (req[0] & ISO15693_REQ_FLAG_OPTION))
-                               trf->issue_eof = true;
-                       else if ((trf->quirks &
-                                       TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE) &&
-                                (req[1] == ISO15693_CMD_READ_MULTIPLE_BLOCK))
-                               trf->adjust_resp_len = true;
-               }
+               if ((trf->framing == NFC_DIGITAL_FRAMING_ISO15693_T5T) &&
+                   trf7970a_is_iso15693_write_or_lock(req[1]) &&
+                   (req[0] & ISO15693_REQ_FLAG_OPTION))
+                       trf->issue_eof = true;
         }
  
         return 0;
  }
  
  static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
-               struct sk_buff *skb, u16 timeout,
-               nfc_digital_cmd_complete_t cb, void *arg)
+                            struct sk_buff *skb, u16 timeout,
+                            nfc_digital_cmd_complete_t cb, void *arg)
  {
         struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
         u8 prefix[5];
@@ -1485,7 +1470,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
         u8 status;
  
         dev_dbg(trf->dev, "New request - state: %d, timeout: %d ms, len: %d\n",
-                       trf->state, timeout, skb->len);
+               trf->state, timeout, skb->len);
  
         if (skb->len > TRF7970A_TX_MAX)
                 return -EINVAL;
@@ -1493,9 +1478,9 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
         mutex_lock(&trf->lock);
  
         if ((trf->state != TRF7970A_ST_IDLE) &&
-                       (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
+           (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
                 dev_err(trf->dev, "%s - Bogus state: %d\n", __func__,
-                               trf->state);
+                       trf->state);
                 ret = -EIO;
                 goto out_err;
         }
@@ -1509,7 +1494,7 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
  
         if (timeout) {
                 trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE,
-                               GFP_KERNEL);
+                                                GFP_KERNEL);
                 if (!trf->rx_skb) {
                         dev_dbg(trf->dev, "Can't alloc rx_skb\n");
                         ret = -ENOMEM;
@@ -1546,14 +1531,14 @@ static int trf7970a_send_cmd(struct nfc_digital_dev *ddev,
          * That totals 5 bytes.
          */
         prefix[0] = TRF7970A_CMD_BIT_CTRL |
-                       TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET);
+           TRF7970A_CMD_BIT_OPCODE(TRF7970A_CMD_FIFO_RESET);
         prefix[1] = TRF7970A_CMD_BIT_CTRL |
-                       TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd);
+           TRF7970A_CMD_BIT_OPCODE(trf->tx_cmd);
         prefix[2] = TRF7970A_CMD_BIT_CONTINUOUS | TRF7970A_TX_LENGTH_BYTE1;
  
         if (trf->framing == NFC_DIGITAL_FRAMING_NFCA_SHORT) {
                 prefix[3] = 0x00;
-               prefix[4] = 0x0f; /* 7 bits */
+               prefix[4] = 0x0f;       /* 7 bits */
         } else {
                 prefix[3] = (len & 0xf00) >> 4;
                 prefix[3] |= ((len & 0xf0) >> 4);
@@ -1587,25 +1572,24 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech)
         switch (tech) {
         case NFC_DIGITAL_RF_TECH_106A:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
-                       TRF7970A_ISO_CTRL_NFC_CE |
-                       TRF7970A_ISO_CTRL_NFC_CE_14443A;
+                   TRF7970A_ISO_CTRL_NFC_CE | TRF7970A_ISO_CTRL_NFC_CE_14443A;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_OOK;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_OOK;
                 break;
         case NFC_DIGITAL_RF_TECH_212F:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
-                       TRF7970A_ISO_CTRL_NFC_NFCF_212;
+                   TRF7970A_ISO_CTRL_NFC_NFCF_212;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_ASK10;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_ASK10;
                 break;
         case NFC_DIGITAL_RF_TECH_424F:
                 trf->iso_ctrl_tech = TRF7970A_ISO_CTRL_NFC_NFC_CE_MODE |
-                       TRF7970A_ISO_CTRL_NFC_NFCF_424;
+                   TRF7970A_ISO_CTRL_NFC_NFCF_424;
                 trf->modulator_sys_clk_ctrl =
-                       (trf->modulator_sys_clk_ctrl & 0xf8) |
-                       TRF7970A_MODULATOR_DEPTH_ASK10;
+                   (trf->modulator_sys_clk_ctrl & 0xf8) |
+                   TRF7970A_MODULATOR_DEPTH_ASK10;
                 break;
         default:
                 dev_dbg(trf->dev, "Unsupported rf technology: %d\n", tech);
@@ -1622,9 +1606,9 @@ static int trf7970a_tg_config_rf_tech(struct trf7970a *trf, int tech)
          * here.
          */
         if ((trf->framing == NFC_DIGITAL_FRAMING_NFC_DEP_ACTIVATED) &&
-                       (trf->iso_ctrl_tech != trf->iso_ctrl)) {
+           (trf->iso_ctrl_tech != trf->iso_ctrl)) {
                 ret = trf7970a_write(trf, TRF7970A_ISO_CTRL,
-                               trf->iso_ctrl_tech);
+                                    trf->iso_ctrl_tech);
  
                 trf->iso_ctrl = trf->iso_ctrl_tech;
         }
@@ -1679,15 +1663,15 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing)
                 trf->iso_ctrl = iso_ctrl;
  
                 ret = trf7970a_write(trf, TRF7970A_MODULATOR_SYS_CLK_CTRL,
-                               trf->modulator_sys_clk_ctrl);
+                                    trf->modulator_sys_clk_ctrl);
                 if (ret)
                         return ret;
         }
  
         if (!(trf->chip_status_ctrl & TRF7970A_CHIP_STATUS_RF_ON)) {
                 ret = trf7970a_write(trf, TRF7970A_CHIP_STATUS_CTRL,
-                               trf->chip_status_ctrl |
-                                       TRF7970A_CHIP_STATUS_RF_ON);
+                                    trf->chip_status_ctrl |
+                                    TRF7970A_CHIP_STATUS_RF_ON);
                 if (ret)
                         return ret;
  
@@ -1698,7 +1682,7 @@ static int trf7970a_tg_config_framing(struct trf7970a *trf, int framing)
  }
  
  static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type,
-               int param)
+                                   int param)
  {
         struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
         int ret;
@@ -1710,7 +1694,7 @@ static int trf7970a_tg_configure_hw(struct nfc_digital_dev *ddev, int type,
         trf->is_initiator = false;
  
         if ((trf->state == TRF7970A_ST_PWR_OFF) ||
-                       (trf->state == TRF7970A_ST_RF_OFF)) {
+           (trf->state == TRF7970A_ST_RF_OFF)) {
                 ret = trf7970a_switch_rf_on(trf);
                 if (ret)
                         goto err_unlock;
@@ -1734,7 +1718,8 @@ err_unlock:
  }
  
  static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
-               nfc_digital_cmd_complete_t cb, void *arg, bool mode_detect)
+                              nfc_digital_cmd_complete_t cb, void *arg,
+                              bool mode_detect)
  {
         struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
         int ret;
@@ -1742,9 +1727,9 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
         mutex_lock(&trf->lock);
  
         if ((trf->state != TRF7970A_ST_IDLE) &&
-                       (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
+           (trf->state != TRF7970A_ST_IDLE_RX_BLOCKED)) {
                 dev_err(trf->dev, "%s - Bogus state: %d\n", __func__,
-                               trf->state);
+                       trf->state);
                 ret = -EIO;
                 goto out_err;
         }
@@ -1757,7 +1742,7 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
         }
  
         trf->rx_skb = nfc_alloc_recv_skb(TRF7970A_RX_SKB_ALLOC_SIZE,
-                       GFP_KERNEL);
+                                        GFP_KERNEL);
         if (!trf->rx_skb) {
                 dev_dbg(trf->dev, "Can't alloc rx_skb\n");
                 ret = -ENOMEM;
@@ -1765,25 +1750,25 @@ static int _trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
         }
  
         ret = trf7970a_write(trf, TRF7970A_RX_SPECIAL_SETTINGS,
-                       TRF7970A_RX_SPECIAL_SETTINGS_HBT |
-                       TRF7970A_RX_SPECIAL_SETTINGS_M848 |
-                       TRF7970A_RX_SPECIAL_SETTINGS_C424 |
-                       TRF7970A_RX_SPECIAL_SETTINGS_C212);
+                            TRF7970A_RX_SPECIAL_SETTINGS_HBT |
+                            TRF7970A_RX_SPECIAL_SETTINGS_M848 |
+                            TRF7970A_RX_SPECIAL_SETTINGS_C424 |
+                            TRF7970A_RX_SPECIAL_SETTINGS_C212);
         if (ret)
                 goto out_err;
  
         ret = trf7970a_write(trf, TRF7970A_REG_IO_CTRL,
-                       trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
+                            trf->io_ctrl | TRF7970A_REG_IO_CTRL_VRS(0x1));
         if (ret)
                 goto out_err;
  
         ret = trf7970a_write(trf, TRF7970A_NFC_LOW_FIELD_LEVEL,
-                       TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3));
+                            TRF7970A_NFC_LOW_FIELD_LEVEL_RFDET(0x3));
         if (ret)
                 goto out_err;
  
         ret = trf7970a_write(trf, TRF7970A_NFC_TARGET_LEVEL,
-                       TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7));
+                            TRF7970A_NFC_TARGET_LEVEL_RFDET(0x7));
         if (ret)
                 goto out_err;
  
@@ -1808,32 +1793,33 @@ out_err:
  }
  
  static int trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
-               nfc_digital_cmd_complete_t cb, void *arg)
+                             nfc_digital_cmd_complete_t cb, void *arg)
  {
         struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
  
         dev_dbg(trf->dev, "Listen - state: %d, timeout: %d ms\n",
-                       trf->state, timeout);
+               trf->state, timeout);
  
         return _trf7970a_tg_listen(ddev, timeout, cb, arg, false);
  }
  
  static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev,
-               u16 timeout, nfc_digital_cmd_complete_t cb, void *arg)
+                                u16 timeout, nfc_digital_cmd_complete_t cb,
+                                void *arg)
  {
         struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
         int ret;
  
         dev_dbg(trf->dev, "Listen MD - state: %d, timeout: %d ms\n",
-                       trf->state, timeout);
+               trf->state, timeout);
  
         ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_RF_TECH,
-                       NFC_DIGITAL_RF_TECH_106A);
+                                      NFC_DIGITAL_RF_TECH_106A);
         if (ret)
                 return ret;
  
         ret = trf7970a_tg_configure_hw(ddev, NFC_DIGITAL_CONFIG_FRAMING,
-                       NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
+                                      NFC_DIGITAL_FRAMING_NFCA_NFC_DEP);
         if (ret)
                 return ret;
  
@@ -1845,7 +1831,7 @@ static int trf7970a_tg_get_rf_tech(struct nfc_digital_dev *ddev, u8 *rf_tech)
         struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
  
         dev_dbg(trf->dev, "Get RF Tech - state: %d, rf_tech: %d\n",
-                       trf->state, trf->md_rf_tech);
+               trf->state, trf->md_rf_tech);
  
         *rf_tech = trf->md_rf_tech;
  
@@ -1908,14 +1894,13 @@ static int trf7970a_power_up(struct trf7970a *trf)
  
         usleep_range(5000, 6000);
  
-       if (!(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) {
-               if (gpio_is_valid(trf->en2_gpio)) {
-                       gpio_set_value(trf->en2_gpio, 1);
-                       usleep_range(1000, 2000);
-               }
+       if (trf->en2_gpiod &&
+           !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW)) {
+               gpiod_set_value_cansleep(trf->en2_gpiod, 1);
+               usleep_range(1000, 2000);
         }
  
-       gpio_set_value(trf->en_gpio, 1);
+       gpiod_set_value_cansleep(trf->en_gpiod, 1);
  
         usleep_range(20000, 21000);
  
@@ -1935,18 +1920,19 @@ static int trf7970a_power_down(struct trf7970a *trf)
  
         if (trf->state != TRF7970A_ST_RF_OFF) {
                 dev_dbg(trf->dev, "Can't power down - not RF_OFF state (%d)\n",
-                               trf->state);
+                       trf->state);
                 return -EBUSY;
         }
  
-       gpio_set_value(trf->en_gpio, 0);
-       if (gpio_is_valid(trf->en2_gpio))
-               gpio_set_value(trf->en2_gpio, 0);
+       gpiod_set_value_cansleep(trf->en_gpiod, 0);
+
+       if (trf->en2_gpiod && !(trf->quirks & TRF7970A_QUIRK_EN2_MUST_STAY_LOW))
+               gpiod_set_value_cansleep(trf->en2_gpiod, 0);
  
         ret = regulator_disable(trf->regulator);
         if (ret)
                 dev_err(trf->dev, "%s - Can't disable VIN: %d\n", __func__,
-                               ret);
+                       ret);
  
         trf->state = TRF7970A_ST_PWR_OFF;
  
@@ -2003,12 +1989,6 @@ static int trf7970a_get_autosuspend_delay(struct device_node *np)
         return autosuspend_delay;
  }
  
-static int trf7970a_get_vin_voltage_override(struct device_node *np,
-               u32 *vin_uvolts)
-{
-       return of_property_read_u32(np, "vin-voltage-override", vin_uvolts);
-}
-
  static int trf7970a_probe(struct spi_device *spi)
  {
         struct device_node *np = spi->dev.of_node;
@@ -2038,53 +2018,48 @@ static int trf7970a_probe(struct spi_device *spi)
                 return ret;
         }
  
-       if (of_property_read_bool(np, "t5t-rmb-extra-byte-quirk"))
-               trf->quirks |= TRF7970A_QUIRK_T5T_RMB_EXTRA_BYTE;
-
         if (of_property_read_bool(np, "irq-status-read-quirk"))
                 trf->quirks |= TRF7970A_QUIRK_IRQ_STATUS_READ;
  
-       /* There are two enable pins - both must be present */
-       trf->en_gpio = of_get_named_gpio(np, "ti,enable-gpios", 0);
-       if (!gpio_is_valid(trf->en_gpio)) {
+       /* There are two enable pins - only EN must be present in the DT */
+       trf->en_gpiod = devm_gpiod_get_index(trf->dev, "ti,enable", 0,
+                                            GPIOD_OUT_LOW);
+       if (IS_ERR(trf->en_gpiod)) {
                 dev_err(trf->dev, "No EN GPIO property\n");
-               return trf->en_gpio;
+               return PTR_ERR(trf->en_gpiod);
         }
  
-       ret = devm_gpio_request_one(trf->dev, trf->en_gpio,
-                       GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN");
-       if (ret) {
-               dev_err(trf->dev, "Can't request EN GPIO: %d\n", ret);
-               return ret;
-       }
-
-       trf->en2_gpio = of_get_named_gpio(np, "ti,enable-gpios", 1);
-       if (!gpio_is_valid(trf->en2_gpio)) {
+       trf->en2_gpiod = devm_gpiod_get_index_optional(trf->dev, "ti,enable", 1,
+                                                      GPIOD_OUT_LOW);
+       if (!trf->en2_gpiod) {
                 dev_info(trf->dev, "No EN2 GPIO property\n");
-       } else {
-               ret = devm_gpio_request_one(trf->dev, trf->en2_gpio,
-                               GPIOF_DIR_OUT | GPIOF_INIT_LOW, "trf7970a EN2");
-               if (ret) {
-                       dev_err(trf->dev, "Can't request EN2 GPIO: %d\n", ret);
-                       return ret;
-               }
+       } else if (IS_ERR(trf->en2_gpiod)) {
+               dev_err(trf->dev, "Error getting EN2 GPIO property: %ld\n",
+                       PTR_ERR(trf->en2_gpiod));
+               return PTR_ERR(trf->en2_gpiod);
+       } else if (of_property_read_bool(np, "en2-rf-quirk")) {
+               trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW;
         }
  
         of_property_read_u32(np, "clock-frequency", &clk_freq);
-       if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) ||
-               (clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) {
+       if ((clk_freq != TRF7970A_27MHZ_CLOCK_FREQUENCY) &&
+           (clk_freq != TRF7970A_13MHZ_CLOCK_FREQUENCY)) {
                 dev_err(trf->dev,
-                       "clock-frequency (%u Hz) unsupported\n",
-                       clk_freq);
+                       "clock-frequency (%u Hz) unsupported\n", clk_freq);
                 return -EINVAL;
         }
  
-       if (of_property_read_bool(np, "en2-rf-quirk"))
-               trf->quirks |= TRF7970A_QUIRK_EN2_MUST_STAY_LOW;
+       if (clk_freq == TRF7970A_27MHZ_CLOCK_FREQUENCY) {
+               trf->modulator_sys_clk_ctrl = TRF7970A_MODULATOR_27MHZ;
+               dev_dbg(trf->dev, "trf7970a configured for 27MHz crystal\n");
+       } else {
+               trf->modulator_sys_clk_ctrl = 0;
+       }
  
         ret = devm_request_threaded_irq(trf->dev, spi->irq, NULL,
-                       trf7970a_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT,
-                       "trf7970a", trf);
+                                       trf7970a_irq,
+                                       IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+                                       "trf7970a", trf);
         if (ret) {
                 dev_err(trf->dev, "Can't request IRQ#%d: %d\n", spi->irq, ret);
                 return ret;
@@ -2106,10 +2081,7 @@ static int trf7970a_probe(struct spi_device *spi)
                 goto err_destroy_lock;
         }
  
-       ret = trf7970a_get_vin_voltage_override(np, &uvolts);
-       if (ret)
-               uvolts = regulator_get_voltage(trf->regulator);
-
+       uvolts = regulator_get_voltage(trf->regulator);
         if (uvolts > 4000000)
                 trf->chip_status_ctrl = TRF7970A_CHIP_STATUS_VRS5_3;
  
@@ -2132,9 +2104,10 @@ static int trf7970a_probe(struct spi_device *spi)
         }
  
         trf->ddev = nfc_digital_allocate_device(&trf7970a_nfc_ops,
-                       TRF7970A_SUPPORTED_PROTOCOLS,
-                       NFC_DIGITAL_DRV_CAPS_IN_CRC |
-                               NFC_DIGITAL_DRV_CAPS_TG_CRC, 0, 0);
+                                               TRF7970A_SUPPORTED_PROTOCOLS,
+                                               NFC_DIGITAL_DRV_CAPS_IN_CRC |
+                                               NFC_DIGITAL_DRV_CAPS_TG_CRC, 0,
+                                               0);
         if (!trf->ddev) {
                 dev_err(trf->dev, "Can't allocate NFC digital device\n");
                 ret = -ENOMEM;
@@ -2157,7 +2130,7 @@ static int trf7970a_probe(struct spi_device *spi)
         ret = nfc_digital_register_device(trf->ddev);
         if (ret) {
                 dev_err(trf->dev, "Can't register NFC digital device: %d\n",
-                               ret);
+                       ret);
                 goto err_shutdown;
         }
  
@@ -2266,29 +2239,31 @@ static int trf7970a_pm_runtime_resume(struct device *dev)
  static const struct dev_pm_ops trf7970a_pm_ops = {
         SET_SYSTEM_SLEEP_PM_OPS(trf7970a_suspend, trf7970a_resume)
         SET_RUNTIME_PM_OPS(trf7970a_pm_runtime_suspend,
-                       trf7970a_pm_runtime_resume, NULL)
+                          trf7970a_pm_runtime_resume, NULL)
  };
  
  static const struct of_device_id trf7970a_of_match[] = {
-       { .compatible = "ti,trf7970a", },
-       { /* sentinel */ },
+       {.compatible = "ti,trf7970a",},
+       {},
  };
+
  MODULE_DEVICE_TABLE(of, trf7970a_of_match);
  
  static const struct spi_device_id trf7970a_id_table[] = {
-       { "trf7970a", 0 },
-       { }
+       {"trf7970a", 0},
+       {}
  };
+
  MODULE_DEVICE_TABLE(spi, trf7970a_id_table);
  
  static struct spi_driver trf7970a_spi_driver = {
         .probe          = trf7970a_probe,
         .remove         = trf7970a_remove,
         .id_table       = trf7970a_id_table,
-       .driver         = {
-               .name   = "trf7970a",
-               .of_match_table = of_match_ptr(trf7970a_of_match),
-               .pm     = &trf7970a_pm_ops,
+       .driver = {
+               .name           = "trf7970a",
+               .of_match_table = of_match_ptr(trf7970a_of_match),
+               .pm             = &trf7970a_pm_ops,
         },
  };
  
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 951042a375d6b22dbd34988e38fef7114593c366..40c7581caeb00d30a60c7b9152ac67dd99a888d6 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1805,7 +1805,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
         if (pci_is_enabled(pdev)) {
                 u32 csts = readl(dev->bar + NVME_REG_CSTS);
  
-               if (dev->ctrl.state == NVME_CTRL_LIVE)
+               if (dev->ctrl.state == NVME_CTRL_LIVE ||
+                   dev->ctrl.state == NVME_CTRL_RESETTING)
                         nvme_start_freeze(&dev->ctrl);
                 dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
                         pdev->error_state  != pci_channel_io_normal);
diff --git a/drivers/s390/net/ctcm_fsms.c b/drivers/s390/net/ctcm_fsms.c

index e9847ce3860d81d51d6e8f26f1bb67220cd68783..570ae3b7adf678e524c0b29c80cc909825f69e8b 100644 (file)
--- a/drivers/s390/net/ctcm_fsms.c
+++ b/drivers/s390/net/ctcm_fsms.c
@@ -217,7 +217,7 @@ void ctcm_purge_skb_queue(struct sk_buff_head *q)
         CTCM_DBF_TEXT(TRACE, CTC_DBF_DEBUG, __func__);
  
         while ((skb = skb_dequeue(q))) {
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_any(skb);
         }
  }
@@ -271,7 +271,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg)
                         priv->stats.tx_bytes += 2;
                         first = 0;
                 }
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_irq(skb);
         }
         spin_lock(&ch->collect_lock);
@@ -297,7 +297,7 @@ static void chx_txdone(fsm_instance *fi, int event, void *arg)
                                 skb_put(ch->trans_skb, skb->len), skb->len);
                         priv->stats.tx_packets++;
                         priv->stats.tx_bytes += skb->len - LL_HEADER_LENGTH;
-                       atomic_dec(&skb->users);
+                       refcount_dec(&skb->users);
                         dev_kfree_skb_irq(skb);
                         i++;
                 }
@@ -1248,7 +1248,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg)
                         priv->stats.tx_bytes += 2;
                         first = 0;
                 }
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_irq(skb);
         }
         spin_lock(&ch->collect_lock);
@@ -1298,7 +1298,7 @@ static void ctcmpc_chx_txdone(fsm_instance *fi, int event, void *arg)
                 data_space -= skb->len;
                 priv->stats.tx_packets++;
                 priv->stats.tx_bytes += skb->len;
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_any(skb);
                 peekskb = skb_peek(&ch->collect_queue);
                 if (peekskb->len > data_space)
@@ -1795,7 +1795,7 @@ static void ctcmpc_chx_send_sweep(fsm_instance *fsm, int event, void *arg)
                 fsm_event(grp->fsm, MPCG_EVENT_INOP, dev);
                                 goto done;
         } else {
-               atomic_inc(&skb->users);
+               refcount_inc(&skb->users);
                 skb_queue_tail(&wch->io_queue, skb);
         }
  
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c

index 99121352c57beec4bceb976f7d06e4e262dd4585..e8782a8619f79276b6876870af80494b502ce142 100644 (file)
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -483,7 +483,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
                         spin_unlock_irqrestore(&ch->collect_lock, saveflags);
                         return -EBUSY;
                 } else {
-                       atomic_inc(&skb->users);
+                       refcount_inc(&skb->users);
                         header.length = l;
                         header.type = be16_to_cpu(skb->protocol);
                         header.unused = 0;
@@ -500,7 +500,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
          * Protect skb against beeing free'd by upper
          * layers.
          */
-       atomic_inc(&skb->users);
+       refcount_inc(&skb->users);
         ch->prof.txlen += skb->len;
         header.length = skb->len + LL_HEADER_LENGTH;
         header.type = be16_to_cpu(skb->protocol);
@@ -517,14 +517,14 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
         if (hi) {
                 nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
                 if (!nskb) {
-                       atomic_dec(&skb->users);
+                       refcount_dec(&skb->users);
                         skb_pull(skb, LL_HEADER_LENGTH + 2);
                         ctcm_clear_busy(ch->netdev);
                         return -ENOMEM;
                 } else {
                         skb_put_data(nskb, skb->data, skb->len);
-                       atomic_inc(&nskb->users);
-                       atomic_dec(&skb->users);
+                       refcount_inc(&nskb->users);
+                       refcount_dec(&skb->users);
                         dev_kfree_skb_irq(skb);
                         skb = nskb;
                 }
@@ -542,7 +542,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
                          * Remove our header. It gets added
                          * again on retransmit.
                          */
-                       atomic_dec(&skb->users);
+                       refcount_dec(&skb->users);
                         skb_pull(skb, LL_HEADER_LENGTH + 2);
                         ctcm_clear_busy(ch->netdev);
                         return -ENOMEM;
@@ -553,7 +553,7 @@ static int ctcm_transmit_skb(struct channel *ch, struct sk_buff *skb)
                 ch->ccw[1].count = skb->len;
                 skb_copy_from_linear_data(skb,
                                 skb_put(ch->trans_skb, skb->len), skb->len);
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_irq(skb);
                 ccw_idx = 0;
         } else {
@@ -679,7 +679,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
  
         if ((fsm_getstate(ch->fsm) != CTC_STATE_TXIDLE) || grp->in_sweep) {
                 spin_lock_irqsave(&ch->collect_lock, saveflags);
-               atomic_inc(&skb->users);
+               refcount_inc(&skb->users);
                 p_header = kmalloc(PDU_HEADER_LENGTH, gfp_type());
  
                 if (!p_header) {
@@ -716,7 +716,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
          * Protect skb against beeing free'd by upper
          * layers.
          */
-       atomic_inc(&skb->users);
+       refcount_inc(&skb->users);
  
         /*
          * IDAL support in CTCM is broken, so we have to
@@ -729,8 +729,8 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
                         goto nomem_exit;
                 } else {
                         skb_put_data(nskb, skb->data, skb->len);
-                       atomic_inc(&nskb->users);
-                       atomic_dec(&skb->users);
+                       refcount_inc(&nskb->users);
+                       refcount_dec(&skb->users);
                         dev_kfree_skb_irq(skb);
                         skb = nskb;
                 }
@@ -810,7 +810,7 @@ static int ctcmpc_transmit_skb(struct channel *ch, struct sk_buff *skb)
                 ch->trans_skb->len = 0;
                 ch->ccw[1].count = skb->len;
                 skb_put_data(ch->trans_skb, skb->data, skb->len);
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_irq(skb);
                 ccw_idx = 0;
                 CTCM_PR_DBGDATA("%s(%s): trans_skb len: %04x\n"
@@ -855,7 +855,7 @@ nomem_exit:
                         "%s(%s): MEMORY allocation ERROR\n",
                         CTCM_FUNTAIL, ch->id);
         rc = -ENOMEM;
-       atomic_dec(&skb->users);
+       refcount_dec(&skb->users);
         dev_kfree_skb_any(skb);
         fsm_event(priv->mpcg->fsm, MPCG_EVENT_INOP, dev);
  done:
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c

index 7db427c0a6a46bf8190ba3b9ccb0453422a1d337..1579695f4e640428296bfb88898b20553c56f2bd 100644 (file)
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -743,7 +743,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
         conn->prof.tx_pending--;
         if (single_flag) {
                 if ((skb = skb_dequeue(&conn->commit_queue))) {
-                       atomic_dec(&skb->users);
+                       refcount_dec(&skb->users);
                         if (privptr) {
                                 privptr->stats.tx_packets++;
                                 privptr->stats.tx_bytes +=
@@ -766,7 +766,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
                 txbytes += skb->len;
                 txpackets++;
                 stat_maxcq++;
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_any(skb);
         }
         if (conn->collect_len > conn->prof.maxmulti)
@@ -958,7 +958,7 @@ static void netiucv_purge_skb_queue(struct sk_buff_head *q)
         struct sk_buff *skb;
  
         while ((skb = skb_dequeue(q))) {
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_any(skb);
         }
  }
@@ -1176,7 +1176,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
                         IUCV_DBF_TEXT(data, 2,
                                       "EBUSY from netiucv_transmit_skb\n");
                 } else {
-                       atomic_inc(&skb->users);
+                       refcount_inc(&skb->users);
                         skb_queue_tail(&conn->collect_queue, skb);
                         conn->collect_len += l;
                         rc = 0;
@@ -1245,7 +1245,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
                 } else {
                         if (copied)
                                 dev_kfree_skb(skb);
-                       atomic_inc(&nskb->users);
+                       refcount_inc(&nskb->users);
                         skb_queue_tail(&conn->commit_queue, nskb);
                 }
         }
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c

index 3b657d5b7e491bc604366b210cd9f180ad4794da..aec06e10b96911b88402edcb87c7a867cbc3bc3a 100644 (file)
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1242,7 +1242,7 @@ static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf)
                                 iucv->sk_txnotify(skb, TX_NOTIFY_GENERALERROR);
                         }
                 }
-               atomic_dec(&skb->users);
+               refcount_dec(&skb->users);
                 dev_kfree_skb_any(skb);
                 skb = skb_dequeue(&buf->skb_list);
         }
@@ -3975,7 +3975,7 @@ static inline int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
         int flush_cnt = 0, hdr_len, large_send = 0;
  
         buffer = buf->buffer;
-       atomic_inc(&skb->users);
+       refcount_inc(&skb->users);
         skb_queue_tail(&buf->skb_list, skb);
  
         /*check first on TSO ....*/
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c

index 2ee92aa90fe93d74013d1a97eb3e59ba3389d79b..e937490d5d9747dd0a246169ee0af9936c5b728e 100644 (file)
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
                 QEDI_ERR(&qedi->dbg_ctx,
                          "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x, task=%p\n",
                          protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
-               WARN_ON(1);
         }
  }
  
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c

index f46880315ba89ab208a2f4169039773bcb521305..5f5a4ef2e52965647e1e3db5b4cdd7bdb8021af8 100644 (file)
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1499,11 +1499,9 @@ err_idx:
  
  void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
  {
-       if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+       if (!test_and_clear_bit(idx, qedi->task_idx_map))
                 QEDI_ERR(&qedi->dbg_ctx,
                          "FW task context, already cleared, tid=0x%x\n", idx);
-               WARN_ON(1);
-       }
  }
  
  void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c

index 0d8f81591bed076fa1f89f7cd27360776488f349..3fdca2cdd8da954b5a9c9d906c8b2d2b5e14f040 100644 (file)
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1279,6 +1279,18 @@ iscsit_get_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr,
          */
         if (dump_payload)
                 goto after_immediate_data;
+       /*
+        * Check for underflow case where both EDTL and immediate data payload
+        * exceeds what is presented by CDB's TRANSFER LENGTH, and what has
+        * already been set in target_cmd_size_check() as se_cmd->data_length.
+        *
+        * For this special case, fail the command and dump the immediate data
+        * payload.
+        */
+       if (cmd->first_burst_len > cmd->se_cmd.data_length) {
+               cmd->sense_reason = TCM_INVALID_CDB_FIELD;
+               goto after_immediate_data;
+       }
  
         immed_ret = iscsit_handle_immediate_data(cmd, hdr,
                                         cmd->first_burst_len);
@@ -4423,8 +4435,11 @@ static void iscsit_logout_post_handler_closesession(
          * always sleep waiting for RX/TX thread shutdown to complete
          * within iscsit_close_connection().
          */
-       if (!conn->conn_transport->rdma_shutdown)
+       if (!conn->conn_transport->rdma_shutdown) {
                 sleep = cmpxchg(&conn->tx_thread_active, true, false);
+               if (!sleep)
+                       return;
+       }
  
         atomic_set(&conn->conn_logout_remove, 0);
         complete(&conn->conn_logout_comp);
@@ -4440,8 +4455,11 @@ static void iscsit_logout_post_handler_samecid(
  {
         int sleep = 1;
  
-       if (!conn->conn_transport->rdma_shutdown)
+       if (!conn->conn_transport->rdma_shutdown) {
                 sleep = cmpxchg(&conn->tx_thread_active, true, false);
+               if (!sleep)
+                       return;
+       }
  
         atomic_set(&conn->conn_logout_remove, 0);
         complete(&conn->conn_logout_comp);
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h

index 9ab7090f7c839c6900cb30ddf7db1b8be4bc78cf..0912de7c0cf8f3ade048de694b4e75f77fe27acd 100644 (file)
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -136,7 +136,7 @@ int init_se_kmem_caches(void);
  void   release_se_kmem_caches(void);
  u32    scsi_get_new_index(scsi_index_t);
  void   transport_subsystem_check_init(void);
-void   transport_cmd_finish_abort(struct se_cmd *, int);
+int    transport_cmd_finish_abort(struct se_cmd *, int);
  unsigned char *transport_dump_cmd_direction(struct se_cmd *);
  void   transport_dump_dev_state(struct se_device *, char *, int *);
  void   transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c

index dce1e1b47316173329292f90276843d26d32407b..13f47bf4d16b1d790ab470b92127254835e76078 100644 (file)
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr)
         kfree(tmr);
  }
  
-static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
+static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
  {
         unsigned long flags;
         bool remove = true, send_tas;
@@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas)
                 transport_send_task_abort(cmd);
         }
  
-       transport_cmd_finish_abort(cmd, remove);
+       return transport_cmd_finish_abort(cmd, remove);
  }
  
  static int target_check_cdb_and_preempt(struct list_head *list,
@@ -184,8 +184,8 @@ void core_tmr_abort_task(
                 cancel_work_sync(&se_cmd->work);
                 transport_wait_for_tasks(se_cmd);
  
-               transport_cmd_finish_abort(se_cmd, true);
-               target_put_sess_cmd(se_cmd);
+               if (!transport_cmd_finish_abort(se_cmd, true))
+                       target_put_sess_cmd(se_cmd);
  
                 printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
                                 " ref_tag: %llu\n", ref_tag);
@@ -281,8 +281,8 @@ static void core_tmr_drain_tmr_list(
                 cancel_work_sync(&cmd->work);
                 transport_wait_for_tasks(cmd);
  
-               transport_cmd_finish_abort(cmd, 1);
-               target_put_sess_cmd(cmd);
+               if (!transport_cmd_finish_abort(cmd, 1))
+                       target_put_sess_cmd(cmd);
         }
  }
  
@@ -380,8 +380,8 @@ static void core_tmr_drain_state_list(
                 cancel_work_sync(&cmd->work);
                 transport_wait_for_tasks(cmd);
  
-               core_tmr_handle_tas_abort(cmd, tas);
-               target_put_sess_cmd(cmd);
+               if (!core_tmr_handle_tas_abort(cmd, tas))
+                       target_put_sess_cmd(cmd);
         }
  }
  
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c

index 6025935036c976edeeee0d7a91df79a66aa84a2b..f1b3a46bdcaffaf8a301569ef7e328ad2c47087f 100644 (file)
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd)
                 percpu_ref_put(&lun->lun_ref);
  }
  
-void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
+int transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
  {
         bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF);
+       int ret = 0;
  
         if (cmd->se_cmd_flags & SCF_SE_LUN_CMD)
                 transport_lun_remove_cmd(cmd);
@@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
                 cmd->se_tfo->aborted_task(cmd);
  
         if (transport_cmd_check_stop_to_fabric(cmd))
-               return;
+               return 1;
         if (remove && ack_kref)
-               transport_put_cmd(cmd);
+               ret = transport_put_cmd(cmd);
+
+       return ret;
  }
  
  static void target_complete_failure_work(struct work_struct *work)
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c

index 734cbf8d9676bd6f6f26561249504ccffd9f8360..dd9f1bebb5a3a980b55e5d0fb758c93e4f694722 100644 (file)
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -344,7 +344,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
         int status;
  
         token = (autofs_wqt_t) param->fail.token;
-       status = param->fail.status ? param->fail.status : -ENOENT;
+       status = param->fail.status < 0 ? param->fail.status : -ENOENT;
         return autofs4_wait_release(sbi, token, status);
  }
  
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 519599dddd3692ee373a9eb00d95d5757556ad42..0a7404ef9335bf7ea19926e2896c9b737951afe5 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -263,7 +263,10 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
                 kfree(vecs);
  
         if (unlikely(bio.bi_error))
-               return bio.bi_error;
+               ret = bio.bi_error;
+
+       bio_uninit(&bio);
+
         return ret;
  }
  
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index 0fd081bd2a2f5d3fb4ed18fdcb7a1371cf9f5627..fcef70602b278b48ffd74e97f10d2a57b6968cc3 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3271,7 +3271,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
         if (!is_sync_kiocb(iocb))
                 ctx->iocb = iocb;
  
-       if (to->type & ITER_IOVEC)
+       if (to->type == ITER_IOVEC)
                 ctx->should_dirty = true;
  
         rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c

index b08531977daa4084f774c75de33204b2b6fa0902..3b147dc6af6344ee5c5e616466403f2cc211dbcb 100644 (file)
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -810,7 +810,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
  
         if (!pages) {
                 pages = vmalloc(max_pages * sizeof(struct page *));
-               if (!bv) {
+               if (!pages) {
                         kvfree(bv);
                         return -ENOMEM;
                 }
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c

index 27bc360c7ffd7e1081f907c5f080dc4ba439fbfc..a723df3e01978cdca30afbf107772898ce66f33a 100644 (file)
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
                      struct cifs_fid *fid, __u16 search_flags,
                      struct cifs_search_info *srch_inf)
  {
-       return CIFSFindFirst(xid, tcon, path, cifs_sb,
-                            &fid->netfid, search_flags, srch_inf, true);
+       int rc;
+
+       rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+                          &fid->netfid, search_flags, srch_inf, true);
+       if (rc)
+               cifs_dbg(FYI, "find first failed=%d\n", rc);
+       return rc;
  }
  
  static int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c

index c58691834eb2b74fa34f3fe2661ed3e211c4d22e..7e48561abd299012616428d28f256906a7c5381f 100644 (file)
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -982,7 +982,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
         rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
         kfree(utf16_path);
         if (rc) {
-               cifs_dbg(VFS, "open dir failed\n");
+               cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
                 return rc;
         }
  
@@ -992,7 +992,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
         rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
                                   fid->volatile_fid, 0, srch_inf);
         if (rc) {
-               cifs_dbg(VFS, "query directory failed\n");
+               cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
                 SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
         }
         return rc;
@@ -1809,7 +1809,8 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
  
         sg = init_sg(rqst, sign);
         if (!sg) {
-               cifs_dbg(VFS, "%s: Failed to init sg %d", __func__, rc);
+               cifs_dbg(VFS, "%s: Failed to init sg", __func__);
+               rc = -ENOMEM;
                 goto free_req;
         }
  
@@ -1817,6 +1818,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
         iv = kzalloc(iv_len, GFP_KERNEL);
         if (!iv) {
                 cifs_dbg(VFS, "%s: Failed to alloc IV", __func__);
+               rc = -ENOMEM;
                 goto free_sg;
         }
         iv[0] = 3;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c

index 3cb5c9e2d4e78f641549818fbbad7681b193854d..de50e749ff058d79c67f7462962614c8c835ecdb 100644 (file)
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -188,8 +188,6 @@ static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
         pcreatetime = (__u64 *)value;
         *pcreatetime = CIFS_I(inode)->createtime;
         return sizeof(__u64);
-
-       return rc;
  }
  
  
diff --git a/fs/dax.c b/fs/dax.c

index 2a6889b3585f068c73091d8895639b7e941d702a..9187f3b07f3e7f7b8546724d83dd06f4d16e7d8b 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -859,6 +859,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
                         if (ret < 0)
                                 goto out;
                 }
+               start_index = indices[pvec.nr - 1] + 1;
         }
  out:
         put_dax(dax_dev);
diff --git a/fs/exec.c b/fs/exec.c

index 72934df6847150ba50dfbadad78fe10e01d2eadd..904199086490d5fdf05d0eda850d04a3ce572fa5 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,8 +220,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
  
         if (write) {
                 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+               unsigned long ptr_size;
                 struct rlimit *rlim;
  
+               /*
+                * Since the stack will hold pointers to the strings, we
+                * must account for them as well.
+                *
+                * The size calculation is the entire vma while each arg page is
+                * built, so each time we get here it's calculating how far it
+                * is currently (rather than each call being just the newly
+                * added size from the arg page).  As a result, we need to
+                * always add the entire size of the pointers, so that on the
+                * last call to get_arg_page() we'll actually have the entire
+                * correct size.
+                */
+               ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+               if (ptr_size > ULONG_MAX - size)
+                       goto fail;
+               size += ptr_size;
+
                 acct_arg_size(bprm, size / PAGE_SIZE);
  
                 /*
@@ -239,13 +257,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                  *    to work from.
                  */
                 rlim = current->signal->rlim;
-               if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
-                       put_page(page);
-                       return NULL;
-               }
+               if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+                       goto fail;
         }
  
         return page;
+
+fail:
+       put_page(page);
+       return NULL;
  }
  
  static void put_arg_page(struct page *page)
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c

index c14758e08d738eec44bf08c79acee71366ce0e70..390ac9c39c5932ef93f3ae8d3a5615f2737848af 100644 (file)
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -753,7 +753,6 @@ static void nfs4_callback_free_slot(struct nfs4_session *session,
          * A single slot, so highest used slotid is either 0 or -1
          */
         nfs4_free_slot(tbl, slot);
-       nfs4_slot_tbl_drain_complete(tbl);
         spin_unlock(&tbl->slot_tbl_lock);
  }
  
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 32ccd7754f8a2875933d1f9c532b54c656971bfd..2ac00bf4ecf146815bff44755f4406161e569007 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1946,29 +1946,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  }
  EXPORT_SYMBOL_GPL(nfs_link);
  
-static void
-nfs_complete_rename(struct rpc_task *task, struct nfs_renamedata *data)
-{
-       struct dentry *old_dentry = data->old_dentry;
-       struct dentry *new_dentry = data->new_dentry;
-       struct inode *old_inode = d_inode(old_dentry);
-       struct inode *new_inode = d_inode(new_dentry);
-
-       nfs_mark_for_revalidate(old_inode);
-
-       switch (task->tk_status) {
-       case 0:
-               if (new_inode != NULL)
-                       nfs_drop_nlink(new_inode);
-               d_move(old_dentry, new_dentry);
-               nfs_set_verifier(new_dentry,
-                                       nfs_save_change_attribute(data->new_dir));
-               break;
-       case -ENOENT:
-               nfs_dentry_handle_enoent(old_dentry);
-       }
-}
-
  /*
   * RENAME
   * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -1999,7 +1976,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  {
         struct inode *old_inode = d_inode(old_dentry);
         struct inode *new_inode = d_inode(new_dentry);
-       struct dentry *dentry = NULL;
+       struct dentry *dentry = NULL, *rehash = NULL;
         struct rpc_task *task;
         int error = -EBUSY;
  
@@ -2022,8 +1999,10 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                  * To prevent any new references to the target during the
                  * rename, we unhash the dentry in advance.
                  */
-               if (!d_unhashed(new_dentry))
+               if (!d_unhashed(new_dentry)) {
                         d_drop(new_dentry);
+                       rehash = new_dentry;
+               }
  
                 if (d_count(new_dentry) > 2) {
                         int err;
@@ -2040,6 +2019,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                 goto out;
  
                         new_dentry = dentry;
+                       rehash = NULL;
                         new_inode = NULL;
                 }
         }
@@ -2048,8 +2028,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (new_inode != NULL)
                 NFS_PROTO(new_inode)->return_delegation(new_inode);
  
-       task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
-                                       nfs_complete_rename);
+       task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
         if (IS_ERR(task)) {
                 error = PTR_ERR(task);
                 goto out;
@@ -2059,9 +2038,27 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (error == 0)
                 error = task->tk_status;
         rpc_put_task(task);
+       nfs_mark_for_revalidate(old_inode);
  out:
+       if (rehash)
+               d_rehash(rehash);
         trace_nfs_rename_exit(old_dir, old_dentry,
                         new_dir, new_dentry, error);
+       if (!error) {
+               if (new_inode != NULL)
+                       nfs_drop_nlink(new_inode);
+               /*
+                * The d_move() should be here instead of in an async RPC completion
+                * handler because we need the proper locks to move the dentry.  If
+                * we're interrupted by a signal, the async RPC completion handler
+                * should mark the directories for revalidation.
+                */
+               d_move(old_dentry, new_dentry);
+               nfs_set_verifier(new_dentry,
+                                       nfs_save_change_attribute(new_dir));
+       } else if (error == -ENOENT)
+               nfs_dentry_handle_enoent(old_dentry);
+
         /* new dentry created? */
         if (dentry)
                 dput(dentry);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index c08c46a3b8cde00ef5aa40fae87ed2fce06faea1..dbfa18900e25a38a0998a2d429644a860c559ac2 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2589,7 +2589,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
  
         /* Except MODE, it seems harmless of setting twice. */
         if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
-               attrset[1] & FATTR4_WORD1_MODE)
+               (attrset[1] & FATTR4_WORD1_MODE ||
+                attrset[2] & FATTR4_WORD2_MODE_UMASK))
                 sattr->ia_valid &= ~ATTR_MODE;
  
         if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -8416,6 +8417,7 @@ static void nfs4_layoutget_release(void *calldata)
         size_t max_pages = max_response_pages(server);
  
         dprintk("--> %s\n", __func__);
+       nfs4_sequence_free_slot(&lgp->res.seq_res);
         nfs4_free_pages(lgp->args.layout.pages, max_pages);
         pnfs_put_layout_hdr(NFS_I(inode)->layout);
         put_nfs_open_context(lgp->args.ctx);
@@ -8490,7 +8492,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
         /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
         if (status == 0 && lgp->res.layoutp->len)
                 lseg = pnfs_layout_process(lgp);
-       nfs4_sequence_free_slot(&lgp->res.seq_res);
         rpc_put_task(task);
         dprintk("<-- %s status=%d\n", __func__, status);
         if (status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

index b34de036501bc90e48be043aec38485f7f755e55..cbf82b0d446759a4934fbf7329e6a767ad5530ce 100644 (file)
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2134,6 +2134,8 @@ again:
         put_rpccred(cred);
         switch (status) {
         case 0:
+       case -EINTR:
+       case -ERESTARTSYS:
                 break;
         case -ETIMEDOUT:
                 if (clnt->cl_softrtry)
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c

index 3b7c937a36b528e67511a23b136215ffaab6d8e4..4689940a953c2f7fc3b3a0e07a7d01c8c09e7d9a 100644 (file)
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2591,6 +2591,10 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
         struct ocfs2_lock_res *lockres;
  
         lockres = &OCFS2_I(inode)->ip_inode_lockres;
+       /* had_lock means that the currect process already takes the cluster
+        * lock previously. If had_lock is 1, we have nothing to do here, and
+        * it will get unlocked where we got the lock.
+        */
         if (!had_lock) {
                 ocfs2_remove_holder(lockres, oh);
                 ocfs2_inode_unlock(inode, ex);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c

index 3c5384d9b3a549f319b114a782c7daab966a2c28..f70c3778d600c6be63996572bee6fd46ac03440c 100644 (file)
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1328,20 +1328,21 @@ static int ocfs2_xattr_get(struct inode *inode,
                            void *buffer,
                            size_t buffer_size)
  {
-       int ret;
+       int ret, had_lock;
         struct buffer_head *di_bh = NULL;
+       struct ocfs2_lock_holder oh;
  
-       ret = ocfs2_inode_lock(inode, &di_bh, 0);
-       if (ret < 0) {
-               mlog_errno(ret);
-               return ret;
+       had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 0, &oh);
+       if (had_lock < 0) {
+               mlog_errno(had_lock);
+               return had_lock;
         }
         down_read(&OCFS2_I(inode)->ip_xattr_sem);
         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
                                      name, buffer, buffer_size);
         up_read(&OCFS2_I(inode)->ip_xattr_sem);
  
-       ocfs2_inode_unlock(inode, 0);
+       ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
  
         brelse(di_bh);
  
@@ -3537,11 +3538,12 @@ int ocfs2_xattr_set(struct inode *inode,
  {
         struct buffer_head *di_bh = NULL;
         struct ocfs2_dinode *di;
-       int ret, credits, ref_meta = 0, ref_credits = 0;
+       int ret, credits, had_lock, ref_meta = 0, ref_credits = 0;
         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
         struct inode *tl_inode = osb->osb_tl_inode;
         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
         struct ocfs2_refcount_tree *ref_tree = NULL;
+       struct ocfs2_lock_holder oh;
  
         struct ocfs2_xattr_info xi = {
                 .xi_name_index = name_index,
@@ -3572,8 +3574,9 @@ int ocfs2_xattr_set(struct inode *inode,
                 return -ENOMEM;
         }
  
-       ret = ocfs2_inode_lock(inode, &di_bh, 1);
-       if (ret < 0) {
+       had_lock = ocfs2_inode_lock_tracker(inode, &di_bh, 1, &oh);
+       if (had_lock < 0) {
+               ret = had_lock;
                 mlog_errno(ret);
                 goto cleanup_nolock;
         }
@@ -3670,7 +3673,7 @@ cleanup:
                 if (ret)
                         mlog_errno(ret);
         }
-       ocfs2_inode_unlock(inode, 1);
+       ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
  cleanup_nolock:
         brelse(di_bh);
         brelse(xbs.xattr_bh);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 09af0f7cd55e278312881999755d3d8d0793d5c8..3b91faacc1baeaff2ac762e2438e5fcbe48cc76b 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1316,9 +1316,12 @@ xfs_vm_bmap(
          * The swap code (ab-)uses ->bmap to get a block mapping and then
          * bypasseѕ the file system for actual I/O.  We really can't allow
          * that on reflinks inodes, so we have to skip out here.  And yes,
-        * 0 is the magic code for a bmap error..
+        * 0 is the magic code for a bmap error.
+        *
+        * Since we don't pass back blockdev info, we can't return bmap
+        * information for rt files either.
          */
-       if (xfs_is_reflink_inode(ip))
+       if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
                 return 0;
  
         filemap_write_and_wait(mapping);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h

index 197f3fffc9a7151ed61d0b960f5e452f6beccb5c..408c7820e200f99e50d978c342dd67cc55166d0c 100644 (file)
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -210,7 +210,8 @@ struct acpi_device_flags {
         u32 of_compatible_ok:1;
         u32 coherent_dma:1;
         u32 cca_seen:1;
-       u32 reserved:20;
+       u32 spi_i2c_slave:1;
+       u32 reserved:19;
  };
  
  /* File System */
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h

index c1da539f5e28a965702c39d21375923e8271f67f..4d97a89da0660c3d91ce0da6af025e04e8e27601 100644 (file)
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -254,7 +254,7 @@ static inline void atm_return(struct atm_vcc *vcc,int truesize)
  
  static inline int atm_may_send(struct atm_vcc *vcc,unsigned int size)
  {
-       return (size + atomic_read(&sk_atm(vcc)->sk_wmem_alloc)) <
+       return (size + refcount_read(&sk_atm(vcc)->sk_wmem_alloc)) <
                sk_atm(vcc)->sk_sndbuf;
  }
  
diff --git a/include/linux/bio.h b/include/linux/bio.h

index d1b04b0e99cf8c293d4ded6eccb2b0aa2fce0d41..a7e29fa0981f148602dcb11dd53e07e1c46147d3 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -426,6 +426,7 @@ extern void bio_advance(struct bio *, unsigned);
  
  extern void bio_init(struct bio *bio, struct bio_vec *table,
                      unsigned short max_vecs);
+extern void bio_uninit(struct bio *);
  extern void bio_reset(struct bio *);
  void bio_chain(struct bio *, struct bio *);
  
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index b74a3edcb3da82903568981a5b49fbbf1f4269cb..1ddd36bd2173b98e925eabdf083a796bfcabdd07 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,6 +391,8 @@ struct request_queue {
         int                     nr_rqs[2];      /* # allocated [a]sync rqs */
         int                     nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
  
+       atomic_t                shared_hctx_restart;
+
         struct blk_queue_stats  *stats;
         struct rq_wb            *rq_wb;
  
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index c970a25d2a49b34325aa6ae3e386b9ed93743c14..360c082e885c7777ef6d9509dec75bbb6ee3fff3 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -7,6 +7,7 @@
  struct sock;
  struct cgroup;
  struct sk_buff;
+struct bpf_sock_ops_kern;
  
  #ifdef CONFIG_CGROUP_BPF
  
@@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
  int __cgroup_bpf_run_filter_sk(struct sock *sk,
                                enum bpf_attach_type type);
  
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+                                    struct bpf_sock_ops_kern *sock_ops,
+                                    enum bpf_attach_type type);
+
  /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
  #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                            \
  ({                                                                           \
@@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
         __ret;                                                                 \
  })
  
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)                                \
+({                                                                            \
+       int __ret = 0;                                                         \
+       if (cgroup_bpf_enabled && (sock_ops)->sk) {            \
+               typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);               \
+               if (sk_fullsock(__sk))                                         \
+                       __ret = __cgroup_bpf_run_filter_sock_ops(__sk,         \
+                                                                sock_ops,     \
+                                                        BPF_CGROUP_SOCK_OPS); \
+       }                                                                      \
+       __ret;                                                                 \
+})
  #else
  
  struct cgroup_bpf {};
@@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
  #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
  #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
  #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
  
  #endif /* CONFIG_CGROUP_BPF */
  
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index deca4e7f28451f95bf566a201271fd94633f42e3..b69e7a5869ffb33fcf70ba4486bfe9001d720fd8 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -36,6 +36,7 @@ struct bpf_map_ops {
                                 int fd);
         void (*map_fd_put_ptr)(void *ptr);
         u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
+       u32 (*map_fd_sys_lookup_elem)(void *ptr);
  };
  
  struct bpf_map {
@@ -155,9 +156,14 @@ struct bpf_prog;
  struct bpf_insn_access_aux {
         enum bpf_reg_type reg_type;
         int ctx_field_size;
-       int converted_op_size;
  };
  
+static inline void
+bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
+{
+       aux->ctx_field_size = size;
+}
+
  struct bpf_verifier_ops {
         /* return eBPF function prototype for verification */
         const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -172,7 +178,7 @@ struct bpf_verifier_ops {
         u32 (*convert_ctx_access)(enum bpf_access_type type,
                                   const struct bpf_insn *src,
                                   struct bpf_insn *dst,
-                                 struct bpf_prog *prog);
+                                 struct bpf_prog *prog, u32 *target_size);
         int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
                         union bpf_attr __user *uattr);
  };
@@ -288,9 +294,11 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
  
  int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
                                  void *key, void *value, u64 map_flags);
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
  void bpf_fd_array_map_clear(struct bpf_map *map);
  int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
                                 void *key, void *value, u64 map_flags);
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
  
  /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
   * forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h

index 03bf223f18be0001c80f4307ed4f521361984104..3d137c33d664a7d2ca3843c0ce8ab8f190e87ebb 100644 (file)
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
  BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
  BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
  BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
  #endif
  #ifdef CONFIG_BPF_EVENTS
  BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 1fa26dc562cef1345430cb3f0a6d71a10092e1a7..f1fc9baa35091f8e6523e8deb1554bcf600986a7 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -337,6 +337,22 @@ struct bpf_prog_aux;
         bpf_size;                                               \
  })
  
+#define bpf_size_to_bytes(bpf_size)                            \
+({                                                             \
+       int bytes = -EINVAL;                                    \
+                                                               \
+       if (bpf_size == BPF_B)                                  \
+               bytes = sizeof(u8);                             \
+       else if (bpf_size == BPF_H)                             \
+               bytes = sizeof(u16);                            \
+       else if (bpf_size == BPF_W)                             \
+               bytes = sizeof(u32);                            \
+       else if (bpf_size == BPF_DW)                            \
+               bytes = sizeof(u64);                            \
+                                                               \
+       bytes;                                                  \
+})
+
  #define BPF_SIZEOF(type)                                       \
         ({                                                      \
                 const int __size = bytes_to_bpf_size(sizeof(type)); \
@@ -351,6 +367,13 @@ struct bpf_prog_aux;
                 __size;                                         \
         })
  
+#define BPF_LDST_BYTES(insn)                                   \
+       ({                                                      \
+               const int __size = bpf_size_to_bytes(BPF_SIZE(insn->code)); \
+               WARN_ON(__size < 0);                            \
+               __size;                                         \
+       })
+
  #define __BPF_MAP_0(m, v, ...) v
  #define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
  #define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
@@ -401,6 +424,18 @@ struct bpf_prog_aux;
  #define BPF_CALL_4(name, ...)  BPF_CALL_x(4, name, __VA_ARGS__)
  #define BPF_CALL_5(name, ...)  BPF_CALL_x(5, name, __VA_ARGS__)
  
+#define bpf_ctx_range(TYPE, MEMBER)                                            \
+       offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)                             \
+       offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+
+#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)                           \
+       ({                                                                      \
+               BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE));             \
+               *(PTR_SIZE) = (SIZE);                                           \
+               offsetof(TYPE, MEMBER);                                         \
+       })
+
  #ifdef CONFIG_COMPAT
  /* A struct sock_filter is architecture independent. */
  struct compat_sock_fprog {
@@ -564,6 +599,18 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
         return prog->type == BPF_PROG_TYPE_UNSPEC;
  }
  
+static inline bool
+bpf_ctx_narrow_access_ok(u32 off, u32 size, const u32 size_default)
+{
+       bool off_ok;
+#ifdef __LITTLE_ENDIAN
+       off_ok = (off & (size_default - 1)) == 0;
+#else
+       off_ok = (off & (size_default - 1)) + size == size_default;
+#endif
+       return off_ok && size <= size_default && (size & (size - 1)) == 0;
+}
+
  #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
  
  #ifdef CONFIG_ARCH_HAS_SET_MEMORY
@@ -898,4 +945,13 @@ static inline int bpf_tell_extensions(void)
         return SKF_AD_MAX;
  }
  
+struct bpf_sock_ops_kern {
+       struct  sock *sk;
+       u32     op;
+       union {
+               u32 reply;
+               u32 replylong[4];
+       };
+};
+
  #endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/igmp.h b/include/linux/igmp.h

index 12f6fba6d21ad593115301e121897c44817680f1..97caf1821de8ceebe6016b134ae291d802fcb78d 100644 (file)
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -18,6 +18,7 @@
  #include <linux/skbuff.h>
  #include <linux/timer.h>
  #include <linux/in.h>
+#include <linux/refcount.h>
  #include <uapi/linux/igmp.h>
  
  static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
@@ -84,7 +85,7 @@ struct ip_mc_list {
         struct ip_mc_list __rcu *next_hash;
         struct timer_list       timer;
         int                     users;
-       atomic_t                refcnt;
+       refcount_t              refcnt;
         spinlock_t              lock;
         char                    tm_running;
         char                    reporter;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h

index e7c04c4e4bcd3e538b365b138a848df9ed96ede6..fb3f809e34e4796b406bf4da2323a940d28517fb 100644 (file)
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -11,6 +11,7 @@
  #include <linux/timer.h>
  #include <linux/sysctl.h>
  #include <linux/rtnetlink.h>
+#include <linux/refcount.h>
  
  struct ipv4_devconf {
         void    *sysctl;
@@ -22,7 +23,7 @@ struct ipv4_devconf {
  
  struct in_device {
         struct net_device       *dev;
-       atomic_t                refcnt;
+       refcount_t              refcnt;
         int                     dead;
         struct in_ifaddr        *ifa_list;      /* IP ifaddr chain              */
  
@@ -219,7 +220,7 @@ static inline struct in_device *in_dev_get(const struct net_device *dev)
         rcu_read_lock();
         in_dev = __in_dev_get_rcu(dev);
         if (in_dev)
-               atomic_inc(&in_dev->refcnt);
+               refcount_inc(&in_dev->refcnt);
         rcu_read_unlock();
         return in_dev;
  }
@@ -240,12 +241,12 @@ void in_dev_finish_destroy(struct in_device *idev);
  
  static inline void in_dev_put(struct in_device *idev)
  {
-       if (atomic_dec_and_test(&idev->refcnt))
+       if (refcount_dec_and_test(&idev->refcnt))
                 in_dev_finish_destroy(idev);
  }
  
-#define __in_dev_put(idev)  atomic_dec(&(idev)->refcnt)
-#define in_dev_hold(idev)   atomic_inc(&(idev)->refcnt)
+#define __in_dev_put(idev)  refcount_dec(&(idev)->refcnt)
+#define in_dev_hold(idev)   refcount_inc(&(idev)->refcnt)
  
  #endif /* __KERNEL__ */
  
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h

index 556e1c31b5d0852795b3b2873cd456263079cc96..f31a0b5377e1d469050163cf87537c759229696e 100644 (file)
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1103,6 +1103,9 @@ enum mlx5_mcam_feature_groups {
  #define MLX5_CAP_FPGA(mdev, cap) \
         MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
  
+#define MLX5_CAP64_FPGA(mdev, cap) \
+       MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+
  enum {
         MLX5_CMD_STAT_OK                        = 0x0,
         MLX5_CMD_STAT_INT_ERR                   = 0x1,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h

index 750701b3b863e20056819db8103e59d6f9d8e843..df6ce59a1f954257cdef95a7733736e42c8b9491 100644 (file)
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,6 +44,7 @@
  #include <linux/workqueue.h>
  #include <linux/mempool.h>
  #include <linux/interrupt.h>
+#include <linux/idr.h>
  
  #include <linux/mlx5/device.h>
  #include <linux/mlx5/doorbell.h>
@@ -110,6 +111,7 @@ enum {
         MLX5_REG_DCBX_APP        = 0x4021,
         MLX5_REG_FPGA_CAP        = 0x4022,
         MLX5_REG_FPGA_CTRL       = 0x4023,
+       MLX5_REG_FPGA_ACCESS_REG = 0x4024,
         MLX5_REG_PCAP            = 0x5001,
         MLX5_REG_PMTU            = 0x5003,
         MLX5_REG_PTYS            = 0x5004,
@@ -737,6 +739,14 @@ struct mlx5e_resources {
         struct mlx5_sq_bfreg       bfreg;
  };
  
+#define MLX5_MAX_RESERVED_GIDS 8
+
+struct mlx5_rsvd_gids {
+       unsigned int start;
+       unsigned int count;
+       struct ida ida;
+};
+
  struct mlx5_core_dev {
         struct pci_dev         *pdev;
         /* sync pci state */
@@ -766,6 +776,10 @@ struct mlx5_core_dev {
         atomic_t                num_qps;
         u32                     issi;
         struct mlx5e_resources  mlx5e_res;
+       struct {
+               struct mlx5_rsvd_gids   reserved_gids;
+               atomic_t                roce_en;
+       } roce;
  #ifdef CONFIG_MLX5_FPGA
         struct mlx5_fpga_device *fpga;
  #endif
@@ -932,6 +946,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev);
  void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
  void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
  void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
+void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
  int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
                         struct mlx5_buf *buf, int node);
  int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
@@ -1045,6 +1060,11 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
                      bool map_wc, bool fast_path);
  void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
  
+unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
+int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
+                          u8 roce_version, u8 roce_l3_type, const u8 *gid,
+                          const u8 *mac, bool vlan, u16 vlan_id);
+
  static inline int fw_initializing(struct mlx5_core_dev *dev)
  {
         return ioread32be(&dev->iseg->initializing) >> 31;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h

index d6b99d5d0f2418557f75de12d4d44ff00fe2bedf..87869c04849ad6681cb3604320c8c39a5745e995 100644 (file)
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -232,6 +232,11 @@ enum {
         MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
         MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
         MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+       MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
+       MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
+       MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
+       MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
+       MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
         MLX5_CMD_OP_MAX
  };
  
@@ -600,7 +605,10 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
         u8         tunnel_statless_gre[0x1];
         u8         tunnel_stateless_vxlan[0x1];
  
-       u8         reserved_at_20[0x20];
+       u8         swp[0x1];
+       u8         swp_csum[0x1];
+       u8         swp_lso[0x1];
+       u8         reserved_at_23[0x1d];
  
         u8         reserved_at_40[0x10];
         u8         lro_min_mss_size[0x10];
@@ -2433,7 +2441,8 @@ struct mlx5_ifc_sqc_bits {
         u8         min_wqe_inline_mode[0x3];
         u8         state[0x4];
         u8         reg_umr[0x1];
-       u8         reserved_at_d[0x13];
+       u8         allow_swp[0x1];
+       u8         reserved_at_e[0x12];
  
         u8         reserved_at_20[0x8];
         u8         user_index[0x18];
@@ -8304,6 +8313,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
         struct mlx5_ifc_sltp_reg_bits sltp_reg;
         struct mlx5_ifc_mtpps_reg_bits mtpps_reg;
         struct mlx5_ifc_mtppse_reg_bits mtppse_reg;
+       struct mlx5_ifc_fpga_access_reg_bits fpga_access_reg;
         struct mlx5_ifc_fpga_ctrl_bits fpga_ctrl_bits;
         struct mlx5_ifc_fpga_cap_bits fpga_cap_bits;
         struct mlx5_ifc_mcqi_reg_bits mcqi_reg;
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h

index 0032d10ac6cfc8d0dcf97c55930b20e73d892327..255a88d08078e8a33b5163d3d5747f7b6185bd78 100644 (file)
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -32,6 +32,14 @@
  #ifndef MLX5_IFC_FPGA_H
  #define MLX5_IFC_FPGA_H
  
+enum {
+       MLX5_FPGA_CAP_SANDBOX_VENDOR_ID_MLNX = 0x2c9,
+};
+
+enum {
+       MLX5_FPGA_CAP_SANDBOX_PRODUCT_ID_IPSEC    = 0x2,
+};
+
  struct mlx5_ifc_fpga_shell_caps_bits {
         u8         max_num_qps[0x10];
         u8         reserved_at_10[0x8];
@@ -108,6 +116,15 @@ struct mlx5_ifc_fpga_cap_bits {
         u8         reserved_at_500[0x300];
  };
  
+enum {
+       MLX5_FPGA_CTRL_OPERATION_LOAD                = 0x1,
+       MLX5_FPGA_CTRL_OPERATION_RESET               = 0x2,
+       MLX5_FPGA_CTRL_OPERATION_FLASH_SELECT        = 0x3,
+       MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON   = 0x4,
+       MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF  = 0x5,
+       MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX       = 0x6,
+};
+
  struct mlx5_ifc_fpga_ctrl_bits {
         u8         reserved_at_0[0x8];
         u8         operation[0x8];
@@ -141,4 +158,275 @@ struct mlx5_ifc_fpga_error_event_bits {
         u8         reserved_at_60[0x80];
  };
  
+#define MLX5_FPGA_ACCESS_REG_SIZE_MAX 64
+
+struct mlx5_ifc_fpga_access_reg_bits {
+       u8         reserved_at_0[0x20];
+
+       u8         reserved_at_20[0x10];
+       u8         size[0x10];
+
+       u8         address[0x40];
+
+       u8         data[0][0x8];
+};
+
+enum mlx5_ifc_fpga_qp_state {
+       MLX5_FPGA_QPC_STATE_INIT    = 0x0,
+       MLX5_FPGA_QPC_STATE_ACTIVE  = 0x1,
+       MLX5_FPGA_QPC_STATE_ERROR   = 0x2,
+};
+
+enum mlx5_ifc_fpga_qp_type {
+       MLX5_FPGA_QPC_QP_TYPE_SHELL_QP    = 0x0,
+       MLX5_FPGA_QPC_QP_TYPE_SANDBOX_QP  = 0x1,
+};
+
+enum mlx5_ifc_fpga_qp_service_type {
+       MLX5_FPGA_QPC_ST_RC  = 0x0,
+};
+
+struct mlx5_ifc_fpga_qpc_bits {
+       u8         state[0x4];
+       u8         reserved_at_4[0x1b];
+       u8         qp_type[0x1];
+
+       u8         reserved_at_20[0x4];
+       u8         st[0x4];
+       u8         reserved_at_28[0x10];
+       u8         traffic_class[0x8];
+
+       u8         ether_type[0x10];
+       u8         prio[0x3];
+       u8         dei[0x1];
+       u8         vid[0xc];
+
+       u8         reserved_at_60[0x20];
+
+       u8         reserved_at_80[0x8];
+       u8         next_rcv_psn[0x18];
+
+       u8         reserved_at_a0[0x8];
+       u8         next_send_psn[0x18];
+
+       u8         reserved_at_c0[0x10];
+       u8         pkey[0x10];
+
+       u8         reserved_at_e0[0x8];
+       u8         remote_qpn[0x18];
+
+       u8         reserved_at_100[0x15];
+       u8         rnr_retry[0x3];
+       u8         reserved_at_118[0x5];
+       u8         retry_count[0x3];
+
+       u8         reserved_at_120[0x20];
+
+       u8         reserved_at_140[0x10];
+       u8         remote_mac_47_32[0x10];
+
+       u8         remote_mac_31_0[0x20];
+
+       u8         remote_ip[16][0x8];
+
+       u8         reserved_at_200[0x40];
+
+       u8         reserved_at_240[0x10];
+       u8         fpga_mac_47_32[0x10];
+
+       u8         fpga_mac_31_0[0x20];
+
+       u8         fpga_ip[16][0x8];
+};
+
+struct mlx5_ifc_fpga_create_qp_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x40];
+
+       struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_create_qp_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x8];
+       u8         fpga_qpn[0x18];
+
+       u8         reserved_at_60[0x20];
+
+       struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x8];
+       u8         fpga_qpn[0x18];
+
+       u8         field_select[0x20];
+
+       struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_modify_qp_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_fpga_query_qp_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x8];
+       u8         fpga_qpn[0x18];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+
+       struct mlx5_ifc_fpga_qpc_bits fpga_qpc;
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         clear[0x1];
+       u8         reserved_at_41[0x7];
+       u8         fpga_qpn[0x18];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_query_qp_counters_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+
+       u8         rx_ack_packets[0x40];
+
+       u8         rx_send_packets[0x40];
+
+       u8         tx_ack_packets[0x40];
+
+       u8         tx_send_packets[0x40];
+
+       u8         rx_total_drop[0x40];
+
+       u8         reserved_at_1c0[0x1c0];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0x8];
+       u8         fpga_qpn[0x18];
+
+       u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_fpga_destroy_qp_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_ipsec_extended_cap_bits {
+       u8         encapsulation[0x20];
+
+       u8         reserved_0[0x15];
+       u8         ipv4_fragment[0x1];
+       u8         ipv6[0x1];
+       u8         esn[0x1];
+       u8         lso[0x1];
+       u8         transport_and_tunnel_mode[0x1];
+       u8         tunnel_mode[0x1];
+       u8         transport_mode[0x1];
+       u8         ah_esp[0x1];
+       u8         esp[0x1];
+       u8         ah[0x1];
+       u8         ipv4_options[0x1];
+
+       u8         auth_alg[0x20];
+
+       u8         enc_alg[0x20];
+
+       u8         sa_cap[0x20];
+
+       u8         reserved_1[0x10];
+       u8         number_of_ipsec_counters[0x10];
+
+       u8         ipsec_counters_addr_low[0x20];
+       u8         ipsec_counters_addr_high[0x20];
+};
+
+struct mlx5_ifc_ipsec_counters_bits {
+       u8         dec_in_packets[0x40];
+
+       u8         dec_out_packets[0x40];
+
+       u8         dec_bypass_packets[0x40];
+
+       u8         enc_in_packets[0x40];
+
+       u8         enc_out_packets[0x40];
+
+       u8         enc_bypass_packets[0x40];
+
+       u8         drop_dec_packets[0x40];
+
+       u8         failed_auth_dec_packets[0x40];
+
+       u8         drop_enc_packets[0x40];
+
+       u8         success_add_sa[0x40];
+
+       u8         fail_add_sa[0x40];
+
+       u8         success_delete_sa[0x40];
+
+       u8         fail_delete_sa[0x40];
+
+       u8         dropped_cmd[0x40];
+};
+
  #endif /* MLX5_IFC_FPGA_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h

index 1f637f4d126556bbdb3c1362b32fb3e76ff077ea..6f41270d80c03128bdeb60e5c6fc1b6ca2b5fe54 100644 (file)
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -225,10 +225,20 @@ enum {
         MLX5_ETH_WQE_INSERT_VLAN        = 1 << 15,
  };
  
+enum {
+       MLX5_ETH_WQE_SWP_INNER_L3_IPV6  = 1 << 0,
+       MLX5_ETH_WQE_SWP_INNER_L4_UDP   = 1 << 1,
+       MLX5_ETH_WQE_SWP_OUTER_L3_IPV6  = 1 << 4,
+       MLX5_ETH_WQE_SWP_OUTER_L4_UDP   = 1 << 5,
+};
+
  struct mlx5_wqe_eth_seg {
-       u8              rsvd0[4];
+       u8              swp_outer_l4_offset;
+       u8              swp_outer_l3_offset;
+       u8              swp_inner_l4_offset;
+       u8              swp_inner_l3_offset;
         u8              cs_flags;
-       u8              rsvd1;
+       u8              swp_flags;
         __be16          mss;
         __be32          rsvd2;
         union {
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h

index 996711d8a7b4b536990c05d698243cdbb3413e44..41d04e9d088a8bee6410ee7c4fa2d150fd299b88 100644 (file)
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -1,7 +1,6 @@
  #ifndef _NFNETLINK_H
  #define _NFNETLINK_H
  
-
  #include <linux/netlink.h>
  #include <linux/capability.h>
  #include <net/netlink.h>
@@ -10,13 +9,16 @@
  struct nfnl_callback {
         int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb,
                     const struct nlmsghdr *nlh,
-                   const struct nlattr * const cda[]);
+                   const struct nlattr * const cda[],
+                   struct netlink_ext_ack *extack);
         int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb,
                         const struct nlmsghdr *nlh,
-                       const struct nlattr * const cda[]);
+                       const struct nlattr * const cda[],
+                       struct netlink_ext_ack *extack);
         int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
                           const struct nlmsghdr *nlh,
-                         const struct nlattr * const cda[]);
+                         const struct nlattr * const cda[],
+                         struct netlink_ext_ack *extack);
         const struct nla_policy *policy;        /* netlink attribute policy */
         const u_int16_t attr_count;             /* number of nlattr's */
  };
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h

index e0cbf17af780e1d3e4c2be6bba351c9a27cebf88..2c2a5514b0df98a0fd92294aad15b00855c9256a 100644 (file)
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -122,8 +122,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb,
  #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS))
  /* Clear the bit in the hook mask that tells if the rule is on a base chain */
  #define CLEAR_BASE_CHAIN_BIT (par->hook_mask &= ~(1 << NF_BR_NUMHOOKS))
-/* True if the target is not a standard target */
-#define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0)
  
  static inline bool ebt_invalid_target(int target)
  {
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h

index 1828900c94118ac959168873a91a6dcd4cb8d4cf..27c0aaa22cb0f146c87b4e0abe6191fc7f1bbc8f 100644 (file)
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -11,6 +11,7 @@
  #include <linux/interrupt.h>
  #include <linux/rcupdate.h>
  #include <linux/list.h>
+#include <linux/refcount.h>
  
  union inet_addr {
         __u32           all[4];
@@ -34,7 +35,7 @@ struct netpoll {
  };
  
  struct netpoll_info {
-       atomic_t refcnt;
+       refcount_t refcnt;
  
         struct semaphore dev_lock;
  
diff --git a/include/linux/phy.h b/include/linux/phy.h

index 1d8d70193782debe0ac689c8c1615a632dfb8c59..2a9567bb818636ddf979d8a2d6708991ee06d1a5 100644 (file)
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -372,6 +372,7 @@ struct phy_c45_device_ids {
   * has_fixups: Set to true if this phy has fixups/quirks.
   * suspended: Set to true if this phy has been suspended successfully.
   * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal.
+ * loopback_enabled: Set true if this phy has been loopbacked successfully.
   * state: state of the PHY for management purposes
   * dev_flags: Device-specific flags used by the PHY driver.
   * link_timeout: The number of timer firings to wait before the
@@ -409,6 +410,7 @@ struct phy_device {
         bool has_fixups;
         bool suspended;
         bool sysfs_links;
+       bool loopback_enabled;
  
         enum phy_state state;
  
@@ -648,6 +650,7 @@ struct phy_driver {
         int (*set_tunable)(struct phy_device *dev,
                             struct ethtool_tunable *tuna,
                             const void *data);
+       int (*set_loopback)(struct phy_device *dev, bool enable);
  };
  #define to_phy_driver(d) container_of(to_mdio_common_driver(d),                \
                                       struct phy_driver, mdiodrv)
@@ -793,6 +796,7 @@ void phy_device_remove(struct phy_device *phydev);
  int phy_init_hw(struct phy_device *phydev);
  int phy_suspend(struct phy_device *phydev);
  int phy_resume(struct phy_device *phydev);
+int phy_loopback(struct phy_device *phydev, bool enable);
  struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
                               phy_interface_t interface);
  struct phy_device *phy_find_first(struct mii_bus *bus);
@@ -847,6 +851,7 @@ int genphy_update_link(struct phy_device *phydev);
  int genphy_read_status(struct phy_device *phydev);
  int genphy_suspend(struct phy_device *phydev);
  int genphy_resume(struct phy_device *phydev);
+int genphy_loopback(struct phy_device *phydev, bool enable);
  int genphy_soft_reset(struct phy_device *phydev);
  static inline int genphy_no_soft_reset(struct phy_device *phydev)
  {
diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h

index a6f9d633f5bedf8ca1c3ca302de283edb18bfaf2..9e75ac8d19be3ee1fa619af941a418a23261e902 100644 (file)
--- a/include/linux/platform_data/nfcmrvl.h
+++ b/include/linux/platform_data/nfcmrvl.h
@@ -23,7 +23,7 @@ struct nfcmrvl_platform_data {
          */
  
         /* GPIO that is wired to RESET_N signal */
-       unsigned int reset_n_io;
+       int reset_n_io;
         /* Tell if transport is muxed in HCI one */
         unsigned int hci_muxed;
  
diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h

deleted file mode 100644 (file)

index f6494b3..0000000
--- a/include/linux/platform_data/st-nci.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Driver include for ST NCI NFC chip family.
- *
- * Copyright (C) 2014-2015  STMicroelectronics SAS. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ST_NCI_H_
-#define _ST_NCI_H_
-
-#define ST_NCI_DRIVER_NAME "st_nci"
-
-struct st_nci_nfc_platform_data {
-       unsigned int gpio_reset;
-       unsigned int irq_polarity;
-       bool is_ese_present;
-       bool is_uicc_present;
-};
-
-#endif /* _ST_NCI_H_ */
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h

index a567cbf8c5b465edf3aff52d0454886e3d0239c7..39e2a2ac247197d67e1489dfefe608409a821706 100644 (file)
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -38,6 +38,8 @@
  #include <linux/slab.h>
  
  /* dma_addr_t manip */
+#define PTR_LO(x)               ((u32)(((uintptr_t)(x)) & 0xffffffff))
+#define PTR_HI(x)               ((u32)((((uintptr_t)(x)) >> 16) >> 16))
  #define DMA_LO_LE(x)           cpu_to_le32(lower_32_bits(x))
  #define DMA_HI_LE(x)           cpu_to_le32(upper_32_bits(x))
  #define DMA_REGPAIR_LE(x, val) do { \
@@ -778,7 +780,7 @@ enum protocol_type {
         PROTOCOLID_ROCE,
         PROTOCOLID_CORE,
         PROTOCOLID_ETH,
-       PROTOCOLID_RESERVED4,
+       PROTOCOLID_IWARP,
         PROTOCOLID_RESERVED5,
         PROTOCOLID_PREROCE,
         PROTOCOLID_COMMON,
diff --git a/include/linux/qed/iwarp_common.h b/include/linux/qed/iwarp_common.h

new file mode 100644 (file)

index 0000000..b8b3e1c
--- /dev/null
+++ b/include/linux/qed/iwarp_common.h
@@ -0,0 +1,53 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __IWARP_COMMON__
+#define __IWARP_COMMON__
+#include <linux/qed/rdma_common.h>
+/************************/
+/* IWARP FW CONSTANTS  */
+/************************/
+
+#define IWARP_ACTIVE_MODE 0
+#define IWARP_PASSIVE_MODE 1
+
+#define IWARP_SHARED_QUEUE_PAGE_SIZE           (0x8000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_OFFSET   (0x4000)
+#define IWARP_SHARED_QUEUE_PAGE_RQ_PBL_MAX_SIZE (0x1000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_OFFSET   (0x5000)
+#define IWARP_SHARED_QUEUE_PAGE_SQ_PBL_MAX_SIZE (0x3000)
+
+#define IWARP_REQ_MAX_INLINE_DATA_SIZE          (128)
+#define IWARP_REQ_MAX_SINGLE_SQ_WQE_SIZE        (176)
+
+#define IWARP_MAX_QPS                           (64 * 1024)
+
+#endif /* __IWARP_COMMON__ */
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h

index 5958b45eb6996cedfee6466b201a0fd3c4a70046..dd7a3b86bb9e515f45a9a2f17dac407f46f6ced2 100644 (file)
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -47,9 +47,10 @@ enum qed_ll2_conn_type {
         QED_LL2_TYPE_FCOE,
         QED_LL2_TYPE_ISCSI,
         QED_LL2_TYPE_TEST,
-       QED_LL2_TYPE_ISCSI_OOO,
+       QED_LL2_TYPE_OOO,
         QED_LL2_TYPE_RESERVED2,
         QED_LL2_TYPE_ROCE,
+       QED_LL2_TYPE_IWARP,
         QED_LL2_TYPE_RESERVED3,
         MAX_QED_LL2_RX_CONN_TYPE
  };
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h

index ff9be01b5f53dfc7ba7338de6c951a3c44a7fc58..4dd72ba210f5c2035a5c66bb076b0c7010136a9f 100644 (file)
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -470,6 +470,101 @@ struct qed_rdma_counters_out_params {
  #define QED_ROCE_TX_HEAD_FAILURE        (1)
  #define QED_ROCE_TX_FRAG_FAILURE        (2)
  
+enum qed_iwarp_event_type {
+       QED_IWARP_EVENT_MPA_REQUEST,      /* Passive side request received */
+       QED_IWARP_EVENT_PASSIVE_COMPLETE, /* ack on mpa response */
+       QED_IWARP_EVENT_ACTIVE_COMPLETE,  /* Active side reply received */
+       QED_IWARP_EVENT_DISCONNECT,
+       QED_IWARP_EVENT_CLOSE,
+       QED_IWARP_EVENT_IRQ_FULL,
+       QED_IWARP_EVENT_RQ_EMPTY,
+       QED_IWARP_EVENT_LLP_TIMEOUT,
+       QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR,
+       QED_IWARP_EVENT_CQ_OVERFLOW,
+       QED_IWARP_EVENT_QP_CATASTROPHIC,
+       QED_IWARP_EVENT_ACTIVE_MPA_REPLY,
+       QED_IWARP_EVENT_LOCAL_ACCESS_ERROR,
+       QED_IWARP_EVENT_REMOTE_OPERATION_ERROR,
+       QED_IWARP_EVENT_TERMINATE_RECEIVED
+};
+
+enum qed_tcp_ip_version {
+       QED_TCP_IPV4,
+       QED_TCP_IPV6,
+};
+
+struct qed_iwarp_cm_info {
+       enum qed_tcp_ip_version ip_version;
+       u32 remote_ip[4];
+       u32 local_ip[4];
+       u16 remote_port;
+       u16 local_port;
+       u16 vlan;
+       u8 ord;
+       u8 ird;
+       u16 private_data_len;
+       const void *private_data;
+};
+
+struct qed_iwarp_cm_event_params {
+       enum qed_iwarp_event_type event;
+       const struct qed_iwarp_cm_info *cm_info;
+       void *ep_context;       /* To be passed to accept call */
+       int status;
+};
+
+typedef int (*iwarp_event_handler) (void *context,
+                                   struct qed_iwarp_cm_event_params *event);
+
+struct qed_iwarp_connect_in {
+       iwarp_event_handler event_cb;
+       void *cb_context;
+       struct qed_rdma_qp *qp;
+       struct qed_iwarp_cm_info cm_info;
+       u16 mss;
+       u8 remote_mac_addr[ETH_ALEN];
+       u8 local_mac_addr[ETH_ALEN];
+};
+
+struct qed_iwarp_connect_out {
+       void *ep_context;
+};
+
+struct qed_iwarp_listen_in {
+       iwarp_event_handler event_cb;
+       void *cb_context;       /* passed to event_cb */
+       u32 max_backlog;
+       enum qed_tcp_ip_version ip_version;
+       u32 ip_addr[4];
+       u16 port;
+       u16 vlan;
+};
+
+struct qed_iwarp_listen_out {
+       void *handle;
+};
+
+struct qed_iwarp_accept_in {
+       void *ep_context;
+       void *cb_context;
+       struct qed_rdma_qp *qp;
+       const void *private_data;
+       u16 private_data_len;
+       u8 ord;
+       u8 ird;
+};
+
+struct qed_iwarp_reject_in {
+       void *ep_context;
+       void *cb_context;
+       const void *private_data;
+       u16 private_data_len;
+};
+
+struct qed_iwarp_send_rtr_in {
+       void *ep_context;
+};
+
  struct qed_roce_ll2_header {
         void *vaddr;
         dma_addr_t baddr;
@@ -491,6 +586,7 @@ struct qed_roce_ll2_packet {
  
  enum qed_rdma_type {
         QED_RDMA_TYPE_ROCE,
+       QED_RDMA_TYPE_IWARP
  };
  
  struct qed_dev_rdma_info {
@@ -575,6 +671,24 @@ struct qed_rdma_ops {
         int (*ll2_set_mac_filter)(struct qed_dev *cdev,
                                   u8 *old_mac_address, u8 *new_mac_address);
  
+       int (*iwarp_connect)(void *rdma_cxt,
+                            struct qed_iwarp_connect_in *iparams,
+                            struct qed_iwarp_connect_out *oparams);
+
+       int (*iwarp_create_listen)(void *rdma_cxt,
+                                  struct qed_iwarp_listen_in *iparams,
+                                  struct qed_iwarp_listen_out *oparams);
+
+       int (*iwarp_accept)(void *rdma_cxt,
+                           struct qed_iwarp_accept_in *iparams);
+
+       int (*iwarp_reject)(void *rdma_cxt,
+                           struct qed_iwarp_reject_in *iparams);
+
+       int (*iwarp_destroy_listen)(void *rdma_cxt, void *handle);
+
+       int (*iwarp_send_rtr)(void *rdma_cxt,
+                             struct qed_iwarp_send_rtr_in *iparams);
  };
  
  const struct qed_rdma_ops *qed_get_rdma_ops(void);
diff --git a/include/linux/sctp.h b/include/linux/sctp.h

index 7a4804c4a59362f519e0e6429496c1307320f125..99e866487e2f5cea6311b1617afe115c94da00f5 100644 (file)
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -57,12 +57,12 @@
  #include <uapi/linux/sctp.h>
  
  /* Section 3.1.  SCTP Common Header Format */
-typedef struct sctphdr {
+struct sctphdr {
         __be16 source;
         __be16 dest;
         __be32 vtag;
         __le32 checksum;
-} sctp_sctphdr_t;
+};
  
  static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
  {
@@ -70,11 +70,11 @@ static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
  }
  
  /* Section 3.2.  Chunk Field Descriptions. */
-typedef struct sctp_chunkhdr {
+struct sctp_chunkhdr {
         __u8 type;
         __u8 flags;
         __be16 length;
-} sctp_chunkhdr_t;
+};
  
  
  /* Section 3.2.  Chunk Type Values.
@@ -82,7 +82,7 @@ typedef struct sctp_chunkhdr {
   * Value field. It takes a value from 0 to 254. The value of 255 is
   * reserved for future use as an extension field.
   */
-typedef enum {
+enum sctp_cid {
         SCTP_CID_DATA                   = 0,
          SCTP_CID_INIT                  = 1,
          SCTP_CID_INIT_ACK              = 2,
@@ -109,7 +109,7 @@ typedef enum {
         SCTP_CID_ASCONF                 = 0xC1,
         SCTP_CID_ASCONF_ACK             = 0x80,
         SCTP_CID_RECONF                 = 0x82,
-} sctp_cid_t; /* enum */
+}; /* enum */
  
  
  /* Section 3.2
@@ -117,12 +117,12 @@ typedef enum {
   *  the action that must be taken if the processing endpoint does not
   *  recognize the Chunk Type.
   */
-typedef enum {
+enum {
         SCTP_CID_ACTION_DISCARD     = 0x00,
         SCTP_CID_ACTION_DISCARD_ERR = 0x40,
         SCTP_CID_ACTION_SKIP        = 0x80,
         SCTP_CID_ACTION_SKIP_ERR    = 0xc0,
-} sctp_cid_action_t;
+};
  
  enum { SCTP_CID_ACTION_MASK = 0xc0, };
  
@@ -162,12 +162,12 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
   * Section 3.2.1 Optional/Variable-length Parmaeter Format.
   */
  
-typedef struct sctp_paramhdr {
+struct sctp_paramhdr {
         __be16 type;
         __be16 length;
-} sctp_paramhdr_t;
+};
  
-typedef enum {
+enum sctp_param {
  
         /* RFC 2960 Section 3.3.5 */
         SCTP_PARAM_HEARTBEAT_INFO               = cpu_to_be16(1),
@@ -207,7 +207,7 @@ typedef enum {
         SCTP_PARAM_RESET_RESPONSE               = cpu_to_be16(0x0010),
         SCTP_PARAM_RESET_ADD_OUT_STREAMS        = cpu_to_be16(0x0011),
         SCTP_PARAM_RESET_ADD_IN_STREAMS         = cpu_to_be16(0x0012),
-} sctp_param_t; /* enum */
+}; /* enum */
  
  
  /* RFC 2960 Section 3.2.1
@@ -216,29 +216,29 @@ typedef enum {
   *  not recognize the Parameter Type.
   *
   */
-typedef enum {
+enum {
         SCTP_PARAM_ACTION_DISCARD     = cpu_to_be16(0x0000),
         SCTP_PARAM_ACTION_DISCARD_ERR = cpu_to_be16(0x4000),
         SCTP_PARAM_ACTION_SKIP        = cpu_to_be16(0x8000),
         SCTP_PARAM_ACTION_SKIP_ERR    = cpu_to_be16(0xc000),
-} sctp_param_action_t;
+};
  
  enum { SCTP_PARAM_ACTION_MASK = cpu_to_be16(0xc000), };
  
  /* RFC 2960 Section 3.3.1 Payload Data (DATA) (0) */
  
-typedef struct sctp_datahdr {
+struct sctp_datahdr {
         __be32 tsn;
         __be16 stream;
         __be16 ssn;
         __be32 ppid;
         __u8  payload[0];
-} sctp_datahdr_t;
+};
  
-typedef struct sctp_data_chunk {
-        sctp_chunkhdr_t chunk_hdr;
-        sctp_datahdr_t  data_hdr;
-} sctp_data_chunk_t;
+struct sctp_data_chunk {
+       struct sctp_chunkhdr chunk_hdr;
+       struct sctp_datahdr data_hdr;
+};
  
  /* DATA Chuck Specific Flags */
  enum {
@@ -257,54 +257,54 @@ enum { SCTP_DATA_FRAG_MASK = 0x03, };
   *  This chunk is used to initiate a SCTP association between two
   *  endpoints.
   */
-typedef struct sctp_inithdr {
+struct sctp_inithdr {
         __be32 init_tag;
         __be32 a_rwnd;
         __be16 num_outbound_streams;
         __be16 num_inbound_streams;
         __be32 initial_tsn;
         __u8  params[0];
-} sctp_inithdr_t;
+};
  
-typedef struct sctp_init_chunk {
-       sctp_chunkhdr_t chunk_hdr;
-       sctp_inithdr_t init_hdr;
-} sctp_init_chunk_t;
+struct sctp_init_chunk {
+       struct sctp_chunkhdr chunk_hdr;
+       struct sctp_inithdr init_hdr;
+};
  
  
  /* Section 3.3.2.1. IPv4 Address Parameter (5) */
  typedef struct sctp_ipv4addr_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         struct in_addr  addr;
  } sctp_ipv4addr_param_t;
  
  /* Section 3.3.2.1. IPv6 Address Parameter (6) */
  typedef struct sctp_ipv6addr_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         struct in6_addr addr;
  } sctp_ipv6addr_param_t;
  
  /* Section 3.3.2.1 Cookie Preservative (9) */
  typedef struct sctp_cookie_preserve_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __be32          lifespan_increment;
  } sctp_cookie_preserve_param_t;
  
  /* Section 3.3.2.1 Host Name Address (11) */
  typedef struct sctp_hostname_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         uint8_t hostname[0];
  } sctp_hostname_param_t;
  
  /* Section 3.3.2.1 Supported Address Types (12) */
  typedef struct sctp_supported_addrs_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __be16 types[0];
  } sctp_supported_addrs_param_t;
  
  /* Appendix A. ECN Capable (32768) */
  typedef struct sctp_ecn_capable_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
  } sctp_ecn_capable_param_t;
  
  /* ADDIP Section 3.2.6 Adaptation Layer Indication */
@@ -321,19 +321,19 @@ typedef struct sctp_supported_ext_param {
  
  /* AUTH Section 3.1 Random */
  typedef struct sctp_random_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u8 random_val[0];
  } sctp_random_param_t;
  
  /* AUTH Section 3.2 Chunk List */
  typedef struct sctp_chunks_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u8 chunks[0];
  } sctp_chunks_param_t;
  
  /* AUTH Section 3.3 HMAC Algorithm */
  typedef struct sctp_hmac_algo_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __be16 hmac_ids[0];
  } sctp_hmac_algo_param_t;
  
@@ -341,18 +341,18 @@ typedef struct sctp_hmac_algo_param {
   *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
   *   association.
   */
-typedef sctp_init_chunk_t sctp_initack_chunk_t;
+typedef struct sctp_init_chunk sctp_initack_chunk_t;
  
  /* Section 3.3.3.1 State Cookie (7) */
  typedef struct sctp_cookie_param {
-       sctp_paramhdr_t p;
+       struct sctp_paramhdr p;
         __u8 body[0];
  } sctp_cookie_param_t;
  
  /* Section 3.3.3.1 Unrecognized Parameters (8) */
  typedef struct sctp_unrecognized_param {
-       sctp_paramhdr_t param_hdr;
-       sctp_paramhdr_t unrecognized;
+       struct sctp_paramhdr param_hdr;
+       struct sctp_paramhdr unrecognized;
  } sctp_unrecognized_param_t;
  
  
@@ -386,7 +386,7 @@ typedef struct sctp_sackhdr {
  } sctp_sackhdr_t;
  
  typedef struct sctp_sack_chunk {
-       sctp_chunkhdr_t chunk_hdr;
+       struct sctp_chunkhdr chunk_hdr;
         sctp_sackhdr_t sack_hdr;
  } sctp_sack_chunk_t;
  
@@ -399,11 +399,11 @@ typedef struct sctp_sack_chunk {
   */
  
  typedef struct sctp_heartbeathdr {
-       sctp_paramhdr_t info;
+       struct sctp_paramhdr info;
  } sctp_heartbeathdr_t;
  
  typedef struct sctp_heartbeat_chunk {
-       sctp_chunkhdr_t chunk_hdr;
+       struct sctp_chunkhdr chunk_hdr;
         sctp_heartbeathdr_t hb_hdr;
  } sctp_heartbeat_chunk_t;
  
@@ -413,7 +413,7 @@ typedef struct sctp_heartbeat_chunk {
   * chunk descriptor.
   */
  typedef struct sctp_abort_chunk {
-        sctp_chunkhdr_t uh;
+       struct sctp_chunkhdr uh;
  } sctp_abort_chunk_t;
  
  
@@ -425,8 +425,8 @@ typedef struct sctp_shutdownhdr {
  } sctp_shutdownhdr_t;
  
  struct sctp_shutdown_chunk_t {
-        sctp_chunkhdr_t    chunk_hdr;
-        sctp_shutdownhdr_t shutdown_hdr;
+       struct sctp_chunkhdr chunk_hdr;
+       sctp_shutdownhdr_t shutdown_hdr;
  };
  
  /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
@@ -438,8 +438,8 @@ typedef struct sctp_errhdr {
  } sctp_errhdr_t;
  
  typedef struct sctp_operr_chunk {
-        sctp_chunkhdr_t chunk_hdr;
-       sctp_errhdr_t   err_hdr;
+       struct sctp_chunkhdr chunk_hdr;
+       sctp_errhdr_t err_hdr;
  } sctp_operr_chunk_t;
  
  /* RFC 2960 3.3.10 - Operation Error
@@ -528,7 +528,7 @@ typedef struct sctp_ecnehdr {
  } sctp_ecnehdr_t;
  
  typedef struct sctp_ecne_chunk {
-       sctp_chunkhdr_t chunk_hdr;
+       struct sctp_chunkhdr chunk_hdr;
         sctp_ecnehdr_t ence_hdr;
  } sctp_ecne_chunk_t;
  
@@ -540,7 +540,7 @@ typedef struct sctp_cwrhdr {
  } sctp_cwrhdr_t;
  
  typedef struct sctp_cwr_chunk {
-       sctp_chunkhdr_t chunk_hdr;
+       struct sctp_chunkhdr chunk_hdr;
         sctp_cwrhdr_t cwr_hdr;
  } sctp_cwr_chunk_t;
  
@@ -639,7 +639,7 @@ struct sctp_fwdtsn_chunk {
   *     report status of ASCONF processing.
   */
  typedef struct sctp_addip_param {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr    param_hdr;
         __be32          crr_id;
  } sctp_addip_param_t;
  
@@ -649,7 +649,7 @@ typedef struct sctp_addiphdr {
  } sctp_addiphdr_t;
  
  typedef struct sctp_addip_chunk {
-       sctp_chunkhdr_t chunk_hdr;
+       struct sctp_chunkhdr chunk_hdr;
         sctp_addiphdr_t addip_hdr;
  } sctp_addip_chunk_t;
  
@@ -709,7 +709,7 @@ typedef struct sctp_authhdr {
  } sctp_authhdr_t;
  
  typedef struct sctp_auth_chunk {
-       sctp_chunkhdr_t chunk_hdr;
+       struct sctp_chunkhdr chunk_hdr;
         sctp_authhdr_t auth_hdr;
  } sctp_auth_chunk_t;
  
@@ -719,12 +719,12 @@ struct sctp_infox {
  };
  
  struct sctp_reconf_chunk {
-       sctp_chunkhdr_t chunk_hdr;
+       struct sctp_chunkhdr chunk_hdr;
         __u8 params[0];
  };
  
  struct sctp_strreset_outreq {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u32 request_seq;
         __u32 response_seq;
         __u32 send_reset_at_tsn;
@@ -732,18 +732,18 @@ struct sctp_strreset_outreq {
  };
  
  struct sctp_strreset_inreq {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u32 request_seq;
         __u16 list_of_streams[0];
  };
  
  struct sctp_strreset_tsnreq {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u32 request_seq;
  };
  
  struct sctp_strreset_addstrm {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u32 request_seq;
         __u16 number_of_streams;
         __u16 reserved;
@@ -760,13 +760,13 @@ enum {
  };
  
  struct sctp_strreset_resp {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u32 response_seq;
         __u32 result;
  };
  
  struct sctp_strreset_resptsn {
-       sctp_paramhdr_t param_hdr;
+       struct sctp_paramhdr param_hdr;
         __u32 response_seq;
         __u32 result;
         __u32 senders_next_tsn;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index a17e235639ae194a36387fbe2e4f6fad379d41e7..3d3ceaac13b18b49f5b459cffc027ac003530f1d 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -252,7 +252,7 @@ struct nf_conntrack {
  
  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  struct nf_bridge_info {
-       atomic_t                use;
+       refcount_t              use;
         enum {
                 BRNF_PROTO_UNCHANGED,
                 BRNF_PROTO_8021Q,
@@ -761,7 +761,7 @@ struct sk_buff {
         unsigned char           *head,
                                 *data;
         unsigned int            truesize;
-       atomic_t                users;
+       refcount_t              users;
  };
  
  #ifdef __KERNEL__
@@ -872,9 +872,9 @@ static inline bool skb_unref(struct sk_buff *skb)
  {
         if (unlikely(!skb))
                 return false;
-       if (likely(atomic_read(&skb->users) == 1))
+       if (likely(refcount_read(&skb->users) == 1))
                 smp_rmb();
-       else if (likely(!atomic_dec_and_test(&skb->users)))
+       else if (likely(!refcount_dec_and_test(&skb->users)))
                 return false;
  
         return true;
@@ -915,7 +915,7 @@ struct sk_buff_fclones {
  
         struct sk_buff  skb2;
  
-       atomic_t        fclone_ref;
+       refcount_t      fclone_ref;
  };
  
  /**
@@ -935,7 +935,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
         fclones = container_of(skb, struct sk_buff_fclones, skb1);
  
         return skb->fclone == SKB_FCLONE_ORIG &&
-              atomic_read(&fclones->fclone_ref) > 1 &&
+              refcount_read(&fclones->fclone_ref) > 1 &&
                fclones->skb2.sk == sk;
  }
  
@@ -1283,7 +1283,7 @@ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list,
   */
  static inline struct sk_buff *skb_get(struct sk_buff *skb)
  {
-       atomic_inc(&skb->users);
+       refcount_inc(&skb->users);
         return skb;
  }
  
@@ -1384,7 +1384,7 @@ static inline void __skb_header_release(struct sk_buff *skb)
   */
  static inline int skb_shared(const struct sk_buff *skb)
  {
-       return atomic_read(&skb->users) != 1;
+       return refcount_read(&skb->users) != 1;
  }
  
  /**
@@ -2206,6 +2206,11 @@ static inline int skb_mac_offset(const struct sk_buff *skb)
         return skb_mac_header(skb) - skb->data;
  }
  
+static inline u32 skb_mac_header_len(const struct sk_buff *skb)
+{
+       return skb->network_header - skb->mac_header;
+}
+
  static inline int skb_mac_header_was_set(const struct sk_buff *skb)
  {
         return skb->mac_header != (typeof(skb->mac_header))~0U;
@@ -3589,13 +3594,13 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
  static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
  {
-       if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
+       if (nf_bridge && refcount_dec_and_test(&nf_bridge->use))
                 kfree(nf_bridge);
  }
  static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
  {
         if (nf_bridge)
-               atomic_inc(&nf_bridge->use);
+               refcount_inc(&nf_bridge->use);
  }
  #endif /* CONFIG_BRIDGE_NETFILTER */
  static inline void nf_reset(struct sk_buff *skb)
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h

index 07ef550c662708035459293fe39fa895cca9fce5..93315d6b21a85fea729970574eecd66027f8f520 100644 (file)
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -84,6 +84,7 @@ struct kmem_cache {
         int red_left_pad;       /* Left redzone padding size */
  #ifdef CONFIG_SYSFS
         struct kobject kobj;    /* For sysfs */
+       struct work_struct kobj_remove_work;
  #endif
  #ifdef CONFIG_MEMCG
         struct memcg_cache_params memcg_params;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h

index 110f4532188c7b6c50cf1ce4dcd5dc525beae9a1..f7043ccca81cc65f15c252158e745dc4c2948155 100644 (file)
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,6 @@
   */
  struct tk_read_base {
         struct clocksource      *clock;
-       u64                     (*read)(struct clocksource *cs);
         u64                     mask;
         u64                     cycle_last;
         u32                     mult;
@@ -58,7 +57,7 @@ struct tk_read_base {
   *                     interval.
   * @xtime_remainder:   Shifted nano seconds left over when rounding
   *                     @cycle_interval
- * @raw_interval:      Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:      Shifted raw nano seconds accumulated per NTP interval.
   * @ntp_error:         Difference between accumulated time and NTP time in ntp
   *                     shifted nano seconds.
   * @ntp_error_shift:   Shift conversion between clock shifted nano seconds and
@@ -100,7 +99,7 @@ struct timekeeper {
         u64                     cycle_interval;
         u64                     xtime_interval;
         s64                     xtime_remainder;
-       u32                     raw_interval;
+       u64                     raw_interval;
         /* The ntp_tick_length() value currently being used.
          * This cached copy ensures we consistently apply the tick
          * length for an entire tick, as ntp_tick_length may change
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h

index 00d232406f18dbd1b6c27d1d24ca4c417d200530..021f7a88f52c929ec804d856a8f6541e1c4c6490 100644 (file)
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -117,6 +117,9 @@ struct cdc_ncm_ctx {
         u32 tx_curr_frame_num;
         u32 rx_max;
         u32 tx_max;
+       u32 tx_curr_size;
+       u32 tx_low_mem_max_cnt;
+       u32 tx_low_mem_val;
         u32 max_datagram_size;
         u16 tx_max_datagrams;
         u16 tx_remainder;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h

index fd60eccb59a67969eba53416a376a3c912d62d81..3a385e4767f031fb93d3558d935c9806699b4c74 100644 (file)
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -4,6 +4,7 @@
  #include <linux/socket.h>
  #include <linux/un.h>
  #include <linux/mutex.h>
+#include <linux/refcount.h>
  #include <net/sock.h>
  
  void unix_inflight(struct user_struct *user, struct file *fp);
@@ -21,7 +22,7 @@ extern spinlock_t unix_table_lock;
  extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
  
  struct unix_address {
-       atomic_t        refcnt;
+       refcount_t      refcnt;
         int             len;
         unsigned int    hash;
         struct sockaddr_un name[0];
diff --git a/include/net/arp.h b/include/net/arp.h

index 65619a2de6f44178d037823c3ad30c0f83f12456..17d90e4e8dc5ca8303f78ade32f1621d8a5ca706 100644 (file)
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -28,7 +28,7 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
  
         rcu_read_lock_bh();
         n = __ipv4_neigh_lookup_noref(dev, key);
-       if (n && !atomic_inc_not_zero(&n->refcnt))
+       if (n && !refcount_inc_not_zero(&n->refcnt))
                 n = NULL;
         rcu_read_unlock_bh();
  
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h

index 76c7300626d675bd70374efe45e5608c65f44f84..c487bfa2f47907c8c2f059f3162e33c1d9f43b79 100644 (file)
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -5,6 +5,7 @@
  #include <linux/slab.h>
  #include <linux/netdevice.h>
  #include <linux/fib_rules.h>
+#include <linux/refcount.h>
  #include <net/flow.h>
  #include <net/rtnetlink.h>
  
@@ -29,7 +30,7 @@ struct fib_rule {
         struct fib_rule __rcu   *ctarget;
         struct net              *fr_net;
  
-       atomic_t                refcnt;
+       refcount_t              refcnt;
         u32                     pref;
         int                     suppress_ifgroup;
         int                     suppress_prefixlen;
@@ -103,12 +104,12 @@ struct fib_rules_ops {
  
  static inline void fib_rule_get(struct fib_rule *rule)
  {
-       atomic_inc(&rule->refcnt);
+       refcount_inc(&rule->refcnt);
  }
  
  static inline void fib_rule_put(struct fib_rule *rule)
  {
-       if (atomic_dec_and_test(&rule->refcnt))
+       if (refcount_dec_and_test(&rule->refcnt))
                 kfree_rcu(rule, rcu);
  }
  
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h

index 975779d0e7b086dd648429b76e3558d681e62a3d..440c1e9d062392ba10f1ea39ec7a1b9f4e46fb0f 100644 (file)
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -50,7 +50,7 @@ struct inet_frag_queue {
         spinlock_t              lock;
         struct timer_list       timer;
         struct hlist_node       list;
-       atomic_t                refcnt;
+       refcount_t              refcnt;
         struct sk_buff          *fragments;
         struct sk_buff          *fragments_tail;
         ktime_t                 stamp;
@@ -129,7 +129,7 @@ void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
  
  static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
  {
-       if (atomic_dec_and_test(&q->refcnt))
+       if (refcount_dec_and_test(&q->refcnt))
                 inet_frag_destroy(q, f);
  }
  
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h

index 1178931288cbfc4d32ef5868ec11d98b2ba8df7d..5026b1f08bb87bf7b9be9df84d70fedf2bd8707f 100644 (file)
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -32,7 +32,7 @@
  #include <net/tcp_states.h>
  #include <net/netns/hash.h>
  
-#include <linux/atomic.h>
+#include <linux/refcount.h>
  #include <asm/byteorder.h>
  
  /* This is for all connections with a full identity, no wildcards.
@@ -334,7 +334,7 @@ static inline struct sock *inet_lookup(struct net *net,
         sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
                            dport, dif, &refcounted);
  
-       if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+       if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
  }
@@ -359,7 +359,6 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
                              refcounted);
  }
  
-u32 sk_ehashfn(const struct sock *sk);
  u32 inet6_ehashfn(const struct net *net,
                   const struct in6_addr *laddr, const u16 lport,
                   const struct in6_addr *faddr, const __be16 fport);
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h

index 235c7811a86a1df748d838c6b4fd453cea0cbf40..f2a215fc78e4686f597c4041796663a7a463e73f 100644 (file)
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -46,7 +46,7 @@ struct inet_peer {
                 struct rcu_head     gc_rcu;
         };
         /*
-        * Once inet_peer is queued for deletion (refcnt == -1), following field
+        * Once inet_peer is queued for deletion (refcnt == 0), following field
          * is not available: rid
          * We can share memory with rcu_head to help keep inet_peer small.
          */
@@ -60,7 +60,7 @@ struct inet_peer {
  
         /* following fields might be frequently dirtied */
         __u32                   dtime;  /* the time of last use of not referenced entries */
-       atomic_t                refcnt;
+       refcount_t              refcnt;
  };
  
  struct inet_peer_base {
diff --git a/include/net/ndisc.h b/include/net/ndisc.h

index 1036c902d2c9904ed084fcd5a4d8dc70b7902cbe..31b1bb11ba3ff29f51d5023a271ee470564ebc3e 100644 (file)
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -384,7 +384,7 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
  
         rcu_read_lock_bh();
         n = __ipv6_neigh_lookup_noref(dev, pkey);
-       if (n && !atomic_inc_not_zero(&n->refcnt))
+       if (n && !refcount_inc_not_zero(&n->refcnt))
                 n = NULL;
         rcu_read_unlock_bh();
  
diff --git a/include/net/neighbour.h b/include/net/neighbour.h

index 639b67564a7d05df0ccf9aa503131543f155ef91..afc39e3a3f7c030d7f1d5d3d6ae39b0acd635c9b 100644 (file)
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -17,6 +17,7 @@
   */
  
  #include <linux/atomic.h>
+#include <linux/refcount.h>
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <linux/rcupdate.h>
@@ -76,7 +77,7 @@ struct neigh_parms {
         void    *sysctl_table;
  
         int dead;
-       atomic_t refcnt;
+       refcount_t refcnt;
         struct rcu_head rcu_head;
  
         int     reachable_time;
@@ -137,7 +138,7 @@ struct neighbour {
         unsigned long           confirmed;
         unsigned long           updated;
         rwlock_t                lock;
-       atomic_t                refcnt;
+       refcount_t              refcnt;
         struct sk_buff_head     arp_queue;
         unsigned int            arp_queue_len_bytes;
         struct timer_list       timer;
@@ -395,12 +396,12 @@ void neigh_sysctl_unregister(struct neigh_parms *p);
  
  static inline void __neigh_parms_put(struct neigh_parms *parms)
  {
-       atomic_dec(&parms->refcnt);
+       refcount_dec(&parms->refcnt);
  }
  
  static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
  {
-       atomic_inc(&parms->refcnt);
+       refcount_inc(&parms->refcnt);
         return parms;
  }
  
@@ -410,18 +411,18 @@ static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
  
  static inline void neigh_release(struct neighbour *neigh)
  {
-       if (atomic_dec_and_test(&neigh->refcnt))
+       if (refcount_dec_and_test(&neigh->refcnt))
                 neigh_destroy(neigh);
  }
  
  static inline struct neighbour * neigh_clone(struct neighbour *neigh)
  {
         if (neigh)
-               atomic_inc(&neigh->refcnt);
+               refcount_inc(&neigh->refcnt);
         return neigh;
  }
  
-#define neigh_hold(n)  atomic_inc(&(n)->refcnt)
+#define neigh_hold(n)  refcount_inc(&(n)->refcnt)
  
  static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
  {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h

index fe80bb48ab1f0c7b1665a10a9bf2388b32eb5013..31a2b51bef2c8f05a68f3d6ff3923d94a77592f8 100644 (file)
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -5,6 +5,7 @@
  #define __NET_NET_NAMESPACE_H
  
  #include <linux/atomic.h>
+#include <linux/refcount.h>
  #include <linux/workqueue.h>
  #include <linux/list.h>
  #include <linux/sysctl.h>
@@ -46,7 +47,7 @@ struct netns_ipvs;
  #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
  
  struct net {
-       atomic_t                passive;        /* To decided when the network
+       refcount_t              passive;        /* To decided when the network
                                                  * namespace should be freed.
                                                  */
         atomic_t                count;          /* To decided when the network
@@ -158,6 +159,7 @@ extern struct net init_net;
  struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
                         struct net *old_net);
  
+void net_ns_barrier(void);
  #else /* CONFIG_NET_NS */
  #include <linux/sched.h>
  #include <linux/nsproxy.h>
@@ -168,6 +170,8 @@ static inline struct net *copy_net_ns(unsigned long flags,
                 return ERR_PTR(-EINVAL);
         return old_net;
  }
+
+static inline void net_ns_barrier(void) {}
  #endif /* CONFIG_NET_NS */
  
  
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h

index 0b0c35c37125eb8c15ba42e9413ba8206447a2c6..925524ede6c8fed1a814d8112ec1c33f9e47d8b0 100644 (file)
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -8,7 +8,7 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
         skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
  
         if (likely(skb->nf_bridge))
-               atomic_set(&(skb->nf_bridge->use), 1);
+               refcount_set(&(skb->nf_bridge->use), 1);
  
         return skb->nf_bridge;
  }
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h

index 8ece3612d0cd6bf8fe4ae6c347c2cb30da2f553f..48407569585da8b44db8ee089e96fdf53c7469de 100644 (file)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -225,9 +225,13 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
                                u32 seq);
  
  /* Iterate over all conntracks: if iter returns true, it's deleted. */
-void nf_ct_iterate_cleanup(struct net *net,
-                          int (*iter)(struct nf_conn *i, void *data),
-                          void *data, u32 portid, int report);
+void nf_ct_iterate_cleanup_net(struct net *net,
+                              int (*iter)(struct nf_conn *i, void *data),
+                              void *data, u32 portid, int report);
+
+/* also set unconfirmed conntracks as dying. Only use in module exit path. */
+void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data),
+                          void *data);
  
  struct nf_conntrack_zone;
  
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h

index e01559b4d7818a17fdf43d908a349597be343eba..6d14b36e3a49084a58333ceff5db0b1957aff387 100644 (file)
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -71,7 +71,7 @@ struct nf_conntrack_l3proto {
         struct module *me;
  };
  
-extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
  
  #ifdef CONFIG_SYSCTL
  /* Protocol pernet registration. */
@@ -100,7 +100,7 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
  static inline struct nf_conntrack_l3proto *
  __nf_ct_l3proto_find(u_int16_t l3proto)
  {
-       if (unlikely(l3proto >= AF_MAX))
+       if (unlikely(l3proto >= NFPROTO_NUMPROTO))
                 return &nf_conntrack_l3proto_generic;
         return rcu_dereference(nf_ct_l3protos[l3proto]);
  }
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h

index 8a8bab8d7b15a8e9c746a899dcf474740e9f6f25..bd5be0d691d51dfe7453db5a9fa6adf225b361dc 100644 (file)
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -281,6 +281,23 @@ struct nft_set_estimate {
         enum nft_set_class      space;
  };
  
+/**
+ *      struct nft_set_type - nf_tables set type
+ *
+ *      @select_ops: function to select nft_set_ops
+ *      @ops: default ops, used when no select_ops functions is present
+ *      @list: used internally
+ *      @owner: module reference
+ */
+struct nft_set_type {
+       const struct nft_set_ops        *(*select_ops)(const struct nft_ctx *,
+                                                      const struct nft_set_desc *desc,
+                                                      u32 flags);
+       const struct nft_set_ops        *ops;
+       struct list_head                list;
+       struct module                   *owner;
+};
+
  struct nft_set_ext;
  struct nft_expr;
  
@@ -297,8 +314,6 @@ struct nft_expr;
   *     @privsize: function to return size of set private data
   *     @init: initialize private data of new set instance
   *     @destroy: destroy private data of set instance
- *     @list: nf_tables_set_ops list node
- *     @owner: module reference
   *     @elemsize: element private size
   *     @features: features supported by the implementation
   */
@@ -336,7 +351,8 @@ struct nft_set_ops {
                                                 struct nft_set *set,
                                                 struct nft_set_iter *iter);
  
-       unsigned int                    (*privsize)(const struct nlattr * const nla[]);
+       unsigned int                    (*privsize)(const struct nlattr * const nla[],
+                                                   const struct nft_set_desc *desc);
         bool                            (*estimate)(const struct nft_set_desc *desc,
                                                     u32 features,
                                                     struct nft_set_estimate *est);
@@ -345,14 +361,13 @@ struct nft_set_ops {
                                                 const struct nlattr * const nla[]);
         void                            (*destroy)(const struct nft_set *set);
  
-       struct list_head                list;
-       struct module                   *owner;
         unsigned int                    elemsize;
         u32                             features;
+       const struct nft_set_type       *type;
  };
  
-int nft_register_set(struct nft_set_ops *ops);
-void nft_unregister_set(struct nft_set_ops *ops);
+int nft_register_set(struct nft_set_type *type);
+void nft_unregister_set(struct nft_set_type *type);
  
  /**
   *     struct nft_set - nf_tables set instance
diff --git a/include/net/netlabel.h b/include/net/netlabel.h

index efe98068880f5559b97477374cd25f11957b150e..72d6435fc16ca7b3bce37e5be23b5b2593f47d56 100644 (file)
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -37,7 +37,7 @@
  #include <linux/in6.h>
  #include <net/netlink.h>
  #include <net/request_sock.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
  
  struct cipso_v4_doi;
  struct calipso_doi;
@@ -136,7 +136,7 @@ struct netlbl_audit {
   *
   */
  struct netlbl_lsm_cache {
-       atomic_t refcount;
+       refcount_t refcount;
         void (*free) (const void *data);
         void *data;
  };
@@ -295,7 +295,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
  
         cache = kzalloc(sizeof(*cache), flags);
         if (cache)
-               atomic_set(&cache->refcount, 1);
+               refcount_set(&cache->refcount, 1);
         return cache;
  }
  
@@ -309,7 +309,7 @@ static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
   */
  static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache)
  {
-       if (!atomic_dec_and_test(&cache->refcount))
+       if (!refcount_dec_and_test(&cache->refcount))
                 return;
  
         if (cache->free)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h

index 53ced67c4ae9d85e83ee6df37f84158c9fd9c4d1..23e22054aa60d653a4b4db62cbea13d30adfd945 100644 (file)
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -19,6 +19,7 @@
  #include <linux/spinlock.h>
  #include <linux/types.h>
  #include <linux/bug.h>
+#include <linux/refcount.h>
  
  #include <net/sock.h>
  
@@ -89,7 +90,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
                 return NULL;
         req->rsk_listener = NULL;
         if (attach_listener) {
-               if (unlikely(!atomic_inc_not_zero(&sk_listener->sk_refcnt))) {
+               if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) {
                         kmem_cache_free(ops->slab, req);
                         return NULL;
                 }
@@ -100,7 +101,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
         sk_node_init(&req_to_sk(req)->sk_node);
         sk_tx_queue_clear(req_to_sk(req));
         req->saved_syn = NULL;
-       atomic_set(&req->rsk_refcnt, 0);
+       refcount_set(&req->rsk_refcnt, 0);
  
         return req;
  }
@@ -108,7 +109,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
  static inline void reqsk_free(struct request_sock *req)
  {
         /* temporary debugging */
-       WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
+       WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
  
         req->rsk_ops->destructor(req);
         if (req->rsk_listener)
@@ -119,7 +120,7 @@ static inline void reqsk_free(struct request_sock *req)
  
  static inline void reqsk_put(struct request_sock *req)
  {
-       if (atomic_dec_and_test(&req->rsk_refcnt))
+       if (refcount_dec_and_test(&req->rsk_refcnt))
                 reqsk_free(req);
  }
  
diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h

index 9b9fb122b31f6b78884a0392081a3afceb49337d..171244bd856f04fcd8d5aeda80824c2ccf7bd569 100644 (file)
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -97,8 +97,10 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
                                      struct sctp_hmac_algo_param *hmacs);
  int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
                                     __be16 hmac_id);
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc);
+int sctp_auth_send_cid(enum sctp_cid chunk,
+                      const struct sctp_association *asoc);
+int sctp_auth_recv_cid(enum sctp_cid chunk,
+                      const struct sctp_association *asoc);
  void sctp_auth_calculate_hmac(const struct sctp_association *asoc,
                             struct sk_buff *skb,
                             struct sctp_auth_chunk *auth, gfp_t gfp);
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h

index d4a20d00461cff61767c7e4d0aaae1b2c05bfa4e..d4679e7a5ed580d2735e90b1e5089e9ee1f3b1d1 100644 (file)
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -132,7 +132,7 @@ typedef union {
         struct sctp_association *asoc;
         struct sctp_transport *transport;
         struct sctp_bind_addr *bp;
-       sctp_init_chunk_t *init;
+       struct sctp_init_chunk *init;
         struct sctp_ulpevent *ulpevent;
         struct sctp_packet *packet;
         sctp_sackhdr_t *sackh;
@@ -173,7 +173,7 @@ SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk)
  SCTP_ARG_CONSTRUCTOR(ASOC,     struct sctp_association *, asoc)
  SCTP_ARG_CONSTRUCTOR(TRANSPORT,        struct sctp_transport *, transport)
  SCTP_ARG_CONSTRUCTOR(BA,       struct sctp_bind_addr *, bp)
-SCTP_ARG_CONSTRUCTOR(PEER_INIT,        sctp_init_chunk_t *, init)
+SCTP_ARG_CONSTRUCTOR(PEER_INIT,        struct sctp_init_chunk *, init)
  SCTP_ARG_CONSTRUCTOR(ULPEVENT,  struct sctp_ulpevent *, ulpevent)
  SCTP_ARG_CONSTRUCTOR(PACKET,   struct sctp_packet *, packet)
  SCTP_ARG_CONSTRUCTOR(SACKH,    sctp_sackhdr_t *, sackh)
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h

index b07a745ab69ff5f0a365937c8c5b1342165b6a44..9b18044c551eb6b81e21d7cb3f72068ad0cff67a 100644 (file)
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -130,7 +130,7 @@ typedef enum {
   */
  
  typedef union {
-       sctp_cid_t chunk;
+       enum sctp_cid chunk;
         sctp_event_timeout_t timeout;
         sctp_event_other_t other;
         sctp_event_primitive_t primitive;
@@ -141,7 +141,7 @@ static inline sctp_subtype_t        \
  SCTP_ST_## _name (_type _arg)          \
  { sctp_subtype_t _retval; _retval._elt = _arg; return _retval; }
  
-SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,                sctp_cid_t,             chunk)
+SCTP_SUBTYPE_CONSTRUCTOR(CHUNK,                enum sctp_cid,          chunk)
  SCTP_SUBTYPE_CONSTRUCTOR(TIMEOUT,      sctp_event_timeout_t,   timeout)
  SCTP_SUBTYPE_CONSTRUCTOR(OTHER,                sctp_event_other_t,     other)
  SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,    sctp_event_primitive_t, primitive)
@@ -152,7 +152,7 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive)
  /* Calculate the actual data size in a data chunk */
  #define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end)\
                                 - (unsigned long)(c->chunk_hdr)\
-                               - sizeof(sctp_data_chunk_t)))
+                               - sizeof(struct sctp_data_chunk)))
  
  /* Internal error codes */
  typedef enum {
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h

index 069582ee5d7fd5b0e92edea68cb2406fbbe6db00..a9519a06a23b2083b4eec970eb2d99d98216aa83 100644 (file)
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -470,7 +470,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
  #define _sctp_walk_params(pos, chunk, end, member)\
  for (pos.v = chunk->member;\
       pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
-     ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
+     ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
       pos.v += SCTP_PAD4(ntohs(pos.p->length)))
  
  #define sctp_walk_errors(err, chunk_hdr)\
@@ -478,7 +478,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
  
  #define _sctp_walk_errors(err, chunk_hdr, end)\
  for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
-           sizeof(sctp_chunkhdr_t));\
+           sizeof(struct sctp_chunkhdr));\
       (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
       ntohs(err->length) >= sizeof(sctp_errhdr_t); \
       err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h

index 47113f2c4b0a2b6c596d2f28018a1e1941cb6ede..860f378333b577b5f26357c512e34893a837cfe3 100644 (file)
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -325,19 +325,17 @@ void sctp_generate_heartbeat_event(unsigned long peer);
  void sctp_generate_reconf_event(unsigned long peer);
  void sctp_generate_proto_unreach_event(unsigned long peer);
  
-void sctp_ootb_pkt_free(struct sctp_packet *);
+void sctp_ootb_pkt_free(struct sctp_packet *packet);
  
-struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *,
-                                      const struct sctp_association *,
-                                      struct sctp_chunk *,
+struct sctp_association *sctp_unpack_cookie(const struct sctp_endpoint *ep,
+                                      const struct sctp_association *asoc,
+                                      struct sctp_chunk *chunk,
                                        gfp_t gfp, int *err,
                                        struct sctp_chunk **err_chk_p);
-int sctp_addip_addr_config(struct sctp_association *, sctp_param_t,
-                          struct sockaddr_storage*, int);
  
  /* 3rd level prototypes */
-__u32 sctp_generate_tag(const struct sctp_endpoint *);
-__u32 sctp_generate_tsn(const struct sctp_endpoint *);
+__u32 sctp_generate_tag(const struct sctp_endpoint *ep);
+__u32 sctp_generate_tsn(const struct sctp_endpoint *ep);
  
  /* Extern declarations for major data structures.  */
  extern sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES];
@@ -349,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
         __u16 size;
  
         size = ntohs(chunk->chunk_hdr->length);
-       size -= sizeof(sctp_data_chunk_t);
+       size -= sizeof(struct sctp_data_chunk);
  
         return size;
  }
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h

index e26763bfabd62c73a16373728d968797c2cef58b..07c11fefa8c49fee9d9b5a668241b14a3fdf6058 100644 (file)
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -310,9 +310,10 @@ struct sctp_cookie {
  
         __u32 adaptation_ind;
  
-       __u8 auth_random[sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH];
+       __u8 auth_random[sizeof(struct sctp_paramhdr) +
+                        SCTP_AUTH_RANDOM_LENGTH];
         __u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2];
-       __u8 auth_chunks[sizeof(sctp_paramhdr_t) + SCTP_AUTH_MAX_CHUNKS];
+       __u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS];
  
         /* This is a shim for my peer's INIT packet, followed by
          * a copy of the raw address list of the association.
@@ -1297,11 +1298,11 @@ int sctp_has_association(struct net *net, const union sctp_addr *laddr,
  
  int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
                      const struct sctp_association *asoc,
-                    sctp_cid_t, sctp_init_chunk_t *peer_init,
+                    enum sctp_cid cid, struct sctp_init_chunk *peer_init,
                      struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
  int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
                       const union sctp_addr *peer,
-                     sctp_init_chunk_t *init, gfp_t gfp);
+                     struct sctp_init_chunk *init, gfp_t gfp);
  __u32 sctp_generate_tag(const struct sctp_endpoint *);
  __u32 sctp_generate_tsn(const struct sctp_endpoint *);
  
diff --git a/include/net/sock.h b/include/net/sock.h

index 00d09140e35474fb686a41e019d3f82f8920da47..60200f4f402895017d88fe32e97ff65766d5b843 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -66,6 +66,7 @@
  #include <linux/poll.h>
  
  #include <linux/atomic.h>
+#include <linux/refcount.h>
  #include <net/dst.h>
  #include <net/checksum.h>
  #include <net/tcp_states.h>
@@ -219,7 +220,7 @@ struct sock_common {
                 u32             skc_tw_rcv_nxt; /* struct tcp_timewait_sock  */
         };
  
-       atomic_t                skc_refcnt;
+       refcount_t              skc_refcnt;
         /* private: */
         int                     skc_dontcopy_end[0];
         union {
@@ -390,7 +391,7 @@ struct sock {
  
         /* ===== cache line for TX ===== */
         int                     sk_wmem_queued;
-       atomic_t                sk_wmem_alloc;
+       refcount_t              sk_wmem_alloc;
         unsigned long           sk_tsq_flags;
         struct sk_buff          *sk_send_head;
         struct sk_buff_head     sk_write_queue;
@@ -611,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk)
  
  static __always_inline void sock_hold(struct sock *sk)
  {
-       atomic_inc(&sk->sk_refcnt);
+       refcount_inc(&sk->sk_refcnt);
  }
  
  /* Ungrab socket in the context, which assumes that socket refcnt
@@ -619,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk)
   */
  static __always_inline void __sock_put(struct sock *sk)
  {
-       atomic_dec(&sk->sk_refcnt);
+       refcount_dec(&sk->sk_refcnt);
  }
  
  static inline bool sk_del_node_init(struct sock *sk)
@@ -628,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk)
  
         if (rc) {
                 /* paranoid for a while -acme */
-               WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+               WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
                 __sock_put(sk);
         }
         return rc;
@@ -650,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
  
         if (rc) {
                 /* paranoid for a while -acme */
-               WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+               WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
                 __sock_put(sk);
         }
         return rc;
@@ -1144,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
  
  static inline void sk_refcnt_debug_release(const struct sock *sk)
  {
-       if (atomic_read(&sk->sk_refcnt) != 1)
+       if (refcount_read(&sk->sk_refcnt) != 1)
                 printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
-                      sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
+                      sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt));
  }
  #else /* SOCK_REFCNT_DEBUG */
  #define sk_refcnt_debug_inc(sk) do { } while (0)
@@ -1636,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk);
  /* Ungrab socket and destroy it, if it was the last reference. */
  static inline void sock_put(struct sock *sk)
  {
-       if (atomic_dec_and_test(&sk->sk_refcnt))
+       if (refcount_dec_and_test(&sk->sk_refcnt))
                 sk_free(sk);
  }
  /* Generic version of sock_put(), dealing with all sockets
@@ -1911,7 +1912,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
   */
  static inline int sk_wmem_alloc_get(const struct sock *sk)
  {
-       return atomic_read(&sk->sk_wmem_alloc) - 1;
+       return refcount_read(&sk->sk_wmem_alloc) - 1;
  }
  
  /**
@@ -2055,7 +2056,7 @@ static inline unsigned long sock_wspace(struct sock *sk)
         int amt = 0;
  
         if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
-               amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+               amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
                 if (amt < 0)
                         amt = 0;
         }
@@ -2136,7 +2137,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
   */
  static inline bool sock_writeable(const struct sock *sk)
  {
-       return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+       return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
  }
  
  static inline gfp_t gfp_any(void)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h

index c784a6ac6ef1b95fa6252e694427b895d42dd76c..8ae9e3b6392e3c74ce93a5cf089fea1749f81619 100644 (file)
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -217,6 +217,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
  
  bool switchdev_port_same_parent_id(struct net_device *a,
                                    struct net_device *b);
+
+#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops))
  #else
  
  static inline void switchdev_deferred_process(void)
@@ -322,6 +324,8 @@ static inline bool switchdev_port_same_parent_id(struct net_device *a,
         return false;
  }
  
+#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0)
+
  #endif
  
  #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/include/net/tcp.h b/include/net/tcp.h

index d0751b79d99cbd02258420adf707f755630f6bb0..70483296157f87acdf5acd5e96eaa910119ba220 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
  #include <linux/seq_file.h>
  #include <linux/memcontrol.h>
  
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf-cgroup.h>
+
  extern struct inet_hashinfo tcp_hashinfo;
  
  extern struct percpu_counter tcp_orphan_count;
@@ -1000,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
  void tcp_get_available_congestion_control(char *buf, size_t len);
  void tcp_get_allowed_congestion_control(char *buf, size_t len);
  int tcp_set_allowed_congestion_control(char *allowed);
-int tcp_set_congestion_control(struct sock *sk, const char *name);
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
+void tcp_reinit_congestion_control(struct sock *sk,
+                                  const struct tcp_congestion_ops *ca);
  u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
  void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
  
@@ -2021,4 +2027,62 @@ int tcp_set_ulp(struct sock *sk, const char *name);
  void tcp_get_available_ulp(char *buf, size_t len);
  void tcp_cleanup_ulp(struct sock *sk);
  
+/* Call BPF_SOCK_OPS program that returns an int. If the return value
+ * is < 0, then the BPF op failed (for example if the loaded BPF
+ * program does not support the chosen operation or there is no BPF
+ * program loaded).
+ */
+#ifdef CONFIG_BPF
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+       struct bpf_sock_ops_kern sock_ops;
+       int ret;
+
+       if (sk_fullsock(sk))
+               sock_owned_by_me(sk);
+
+       memset(&sock_ops, 0, sizeof(sock_ops));
+       sock_ops.sk = sk;
+       sock_ops.op = op;
+
+       ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+       if (ret == 0)
+               ret = sock_ops.reply;
+       else
+               ret = -1;
+       return ret;
+}
+#else
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+       return -EPERM;
+}
+#endif
+
+static inline u32 tcp_timeout_init(struct sock *sk)
+{
+       int timeout;
+
+       timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+
+       if (timeout <= 0)
+               timeout = TCP_TIMEOUT_INIT;
+       return timeout;
+}
+
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
+{
+       int rwnd;
+
+       rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+
+       if (rwnd < 0)
+               rwnd = 0;
+       return rwnd;
+}
+
+static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
+{
+       return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+}
  #endif /* _TCP_H */
diff --git a/include/net/udp.h b/include/net/udp.h

index 1468dbd0f09ad5eeb559fffb545bda6f051f7301..972ce4baab6b2a4b0539624d1a671c632d77514c 100644 (file)
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -302,6 +302,67 @@ struct sock *__udp6_lib_lookup(struct net *net,
  struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
                                  __be16 sport, __be16 dport);
  
+/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
+ * possibly multiple cache miss on dequeue()
+ */
+#if BITS_PER_LONG == 64
+
+/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on
+ * cold cache lines at recvmsg time.
+ * skb->len can be stored on 16 bits since the udp header has been already
+ * validated and pulled.
+ */
+struct udp_dev_scratch {
+       u32 truesize;
+       u16 len;
+       bool is_linear;
+       bool csum_unnecessary;
+};
+
+static inline unsigned int udp_skb_len(struct sk_buff *skb)
+{
+       return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
+}
+
+static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+       return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
+}
+
+static inline bool udp_skb_is_linear(struct sk_buff *skb)
+{
+       return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
+}
+
+#else
+static inline unsigned int udp_skb_len(struct sk_buff *skb)
+{
+       return skb->len;
+}
+
+static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
+{
+       return skb_csum_unnecessary(skb);
+}
+
+static inline bool udp_skb_is_linear(struct sk_buff *skb)
+{
+       return !skb_is_nonlinear(skb);
+}
+#endif
+
+static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
+                                 struct iov_iter *to)
+{
+       int n, copy = len - off;
+
+       n = copy_to_iter(skb->data + off, copy, to);
+       if (n == copy)
+               return 0;
+
+       return -EFAULT;
+}
+
  /*
   *     SNMP statistics for UDP and UDP-Lite
   */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h

index b816a0a6686e2b93527d3bf0dac5b3dd56380150..326e8498b10e0efc5295b1eb5807fc94e09cbecb 100644 (file)
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -221,9 +221,17 @@ struct vxlan_config {
         bool                    no_share;
  };
  
+struct vxlan_dev_node {
+       struct hlist_node hlist;
+       struct vxlan_dev *vxlan;
+};
+
  /* Pseudo network device */
  struct vxlan_dev {
-       struct hlist_node hlist;        /* vni hash table */
+       struct vxlan_dev_node hlist4;   /* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+       struct vxlan_dev_node hlist6;   /* vni hash table for IPv6 socket */
+#endif
         struct list_head  next;         /* vxlan's per namespace list */
         struct vxlan_sock __rcu *vn4_sock;      /* listening socket for IPv4 */
  #if IS_ENABLED(CONFIG_IPV6)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h

index 01f5bc144ee54700d27d75e9835b8ddf656e6fb0..01fa357e9a3290b2ea8da69ef513524af5a75442 100644 (file)
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1854,8 +1854,9 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
  }
  #endif
  
-#ifdef CONFIG_XFRM_OFFLOAD
  void __net_init xfrm_dev_init(void);
+
+#ifdef CONFIG_XFRM_OFFLOAD
  int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features);
  int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
                        struct xfrm_user_offload *xuo);
@@ -1881,10 +1882,6 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
         }
  }
  #else
-static inline void __net_init xfrm_dev_init(void)
-{
-}
-
  static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
  {
         return 0;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index f94b48b168dcc82cbd67b916951a4699f4f675ef..e99e3e6f8b3741829404c5bdce8c92af70a110f6 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_LWT_IN,
         BPF_PROG_TYPE_LWT_OUT,
         BPF_PROG_TYPE_LWT_XMIT,
+       BPF_PROG_TYPE_SOCK_OPS,
  };
  
  enum bpf_attach_type {
         BPF_CGROUP_INET_INGRESS,
         BPF_CGROUP_INET_EGRESS,
         BPF_CGROUP_INET_SOCK_CREATE,
+       BPF_CGROUP_SOCK_OPS,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -518,6 +520,25 @@ union bpf_attr {
   *     Set full skb->hash.
   *     @skb: pointer to skb
   *     @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ *     Calls setsockopt. Not all opts are available, only those with
+ *     integer optvals plus TCP_CONGESTION.
+ *     Supported levels: SOL_SOCKET and IPROTO_TCP
+ *     @bpf_socket: pointer to bpf_socket
+ *     @level: SOL_SOCKET or IPROTO_TCP
+ *     @optname: option name
+ *     @optval: pointer to option value
+ *     @optlen: length of optval in byes
+ *     Return: 0 or negative error
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ *     Grow or shrink room in sk_buff.
+ *     @skb: pointer to skb
+ *     @len_diff: (signed) amount of room to grow/shrink
+ *     @mode: operation mode (enum bpf_adj_room_mode)
+ *     @flags: reserved for future use
+ *     Return: 0 on success or negative error code
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -568,7 +589,9 @@ union bpf_attr {
         FN(probe_read_str),             \
         FN(get_socket_cookie),          \
         FN(get_socket_uid),             \
-       FN(set_hash),
+       FN(set_hash),                   \
+       FN(setsockopt),                 \
+       FN(skb_adjust_room),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -618,6 +641,11 @@ enum bpf_func_id {
  /* BPF_FUNC_perf_event_output for sk_buff input context. */
  #define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
  
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+       BPF_ADJ_ROOM_NET,
+};
+
  /* user accessible mirror of in-kernel sk_buff.
   * new fields can only be added to the end of this structure
   */
@@ -720,4 +748,56 @@ struct bpf_map_info {
         __u32 map_flags;
  } __attribute__((aligned(8)));
  
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+       __u32 op;
+       union {
+               __u32 reply;
+               __u32 replylong[4];
+       };
+       __u32 family;
+       __u32 remote_ip4;       /* Stored in network byte order */
+       __u32 local_ip4;        /* Stored in network byte order */
+       __u32 remote_ip6[4];    /* Stored in network byte order */
+       __u32 local_ip6[4];     /* Stored in network byte order */
+       __u32 remote_port;      /* Stored in network byte order */
+       __u32 local_port;       /* stored in host byte order */
+};
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+       BPF_SOCK_OPS_VOID,
+       BPF_SOCK_OPS_TIMEOUT_INIT,      /* Should return SYN-RTO value to use or
+                                        * -1 if default value should be used
+                                        */
+       BPF_SOCK_OPS_RWND_INIT,         /* Should return initial advertized
+                                        * window (in packets) or -1 if default
+                                        * value should be used
+                                        */
+       BPF_SOCK_OPS_TCP_CONNECT_CB,    /* Calls BPF program right before an
+                                        * active connection is initialized
+                                        */
+       BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB,     /* Calls BPF program when an
+                                                * active connection is
+                                                * established
+                                                */
+       BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,    /* Calls BPF program when a
+                                                * passive connection is
+                                                * established
+                                                */
+       BPF_SOCK_OPS_NEEDS_ECN,         /* If connection's congestion control
+                                        * needs ECN
+                                        */
+};
+
+#define TCP_BPF_IW             1001    /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP  1002    /* Set sndcwnd_clamp */
+
  #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h

index ced9d8b974268ed270661c3e2da77165e3a24784..6217ff8500a1d818fd1002fbd6f81c0c11974665 100644 (file)
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -121,6 +121,7 @@ typedef __s32 sctp_assoc_t;
  #define SCTP_RESET_STREAMS     119
  #define SCTP_RESET_ASSOC       120
  #define SCTP_ADD_STREAMS       121
+#define SCTP_SOCKOPT_PEELOFF_FLAGS 122
  
  /* PR-SCTP policies */
  #define SCTP_PR_SCTP_NONE      0x0000
@@ -978,6 +979,11 @@ typedef struct {
         int sd;
  } sctp_peeloff_arg_t;
  
+typedef struct {
+       sctp_peeloff_arg_t p_arg;
+       unsigned flags;
+} sctp_peeloff_flags_arg_t;
+
  /*
   *  Peer Address Thresholds socket option
   */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c

index ecb43542246e4b89d0e10ab77de4da168789040e..d771a38725006379315cc22782efedb3775b4569 100644 (file)
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -334,6 +334,26 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
         return NULL;
  }
  
+/* only called from syscall */
+int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+       void **elem, *ptr;
+       int ret =  0;
+
+       if (!map->ops->map_fd_sys_lookup_elem)
+               return -ENOTSUPP;
+
+       rcu_read_lock();
+       elem = array_map_lookup_elem(map, key);
+       if (elem && (ptr = READ_ONCE(*elem)))
+               *value = map->ops->map_fd_sys_lookup_elem(ptr);
+       else
+               ret = -ENOENT;
+       rcu_read_unlock();
+
+       return ret;
+}
+
  /* only called from syscall */
  int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
                                  void *key, void *value, u64 map_flags)
@@ -400,6 +420,11 @@ static void prog_fd_array_put_ptr(void *ptr)
         bpf_prog_put(ptr);
  }
  
+static u32 prog_fd_array_sys_lookup_elem(void *ptr)
+{
+       return ((struct bpf_prog *)ptr)->aux->id;
+}
+
  /* decrement refcnt of all bpf_progs that are stored in this map */
  void bpf_fd_array_map_clear(struct bpf_map *map)
  {
@@ -418,6 +443,7 @@ const struct bpf_map_ops prog_array_map_ops = {
         .map_delete_elem = fd_array_map_delete_elem,
         .map_fd_get_ptr = prog_fd_array_get_ptr,
         .map_fd_put_ptr = prog_fd_array_put_ptr,
+       .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
  };
  
  static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -585,4 +611,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
         .map_delete_elem = fd_array_map_delete_elem,
         .map_fd_get_ptr = bpf_map_fd_get_ptr,
         .map_fd_put_ptr = bpf_map_fd_put_ptr,
+       .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
  };
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index ea6033cba94721fd8ca080354c825771d93620fb..546113430049d63ec178160d5a8b57e19e210505 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
         return ret;
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
+
+/**
+ * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
+ * @sk: socket to get cgroup from
+ * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
+ * sk with connection information (IP addresses, etc.) May not contain
+ * cgroup info if it is a req sock.
+ * @type: The type of program to be exectuted
+ *
+ * socket passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock_ops
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+                                    struct bpf_sock_ops_kern *sock_ops,
+                                    enum bpf_attach_type type)
+{
+       struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+       struct bpf_prog *prog;
+       int ret = 0;
+
+
+       rcu_read_lock();
+
+       prog = rcu_dereference(cgrp->bpf.effective[type]);
+       if (prog)
+               ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
+
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 774069ca18a739f95d9f4ca67bce0a56261e5d3d..ad5f55922a136f19af951992e5a973dfa5de9bb6 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1297,7 +1297,9 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
   */
  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
  {
-       fp->bpf_func = interpreters[round_down(fp->aux->stack_depth, 32) / 32];
+       u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+
+       fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
  
         /* eBPF JITs can rewrite the program in case constant
          * blinding is active. However, in case of error during
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c

index 004334ea13ba3f56f10b33e9a1a50a97906000e4..4fb463172aa88e81d9caaaee271d8d8c8a9db978 100644 (file)
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1243,6 +1243,26 @@ static void fd_htab_map_free(struct bpf_map *map)
         htab_map_free(map);
  }
  
+/* only called from syscall */
+int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+       void **ptr;
+       int ret = 0;
+
+       if (!map->ops->map_fd_sys_lookup_elem)
+               return -ENOTSUPP;
+
+       rcu_read_lock();
+       ptr = htab_map_lookup_elem(map, key);
+       if (ptr)
+               *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+       else
+               ret = -ENOENT;
+       rcu_read_unlock();
+
+       return ret;
+}
+
  /* only called from syscall */
  int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
                                 void *key, void *value, u64 map_flags)
@@ -1305,4 +1325,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
         .map_delete_elem = htab_map_delete_elem,
         .map_fd_get_ptr = bpf_map_fd_get_ptr,
         .map_fd_put_ptr = bpf_map_fd_put_ptr,
+       .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
  };
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c

index 59bcdf821ae47477f2a058af3e6918f0ce222325..1da574612bea75dabfe60c6967dce9c55a5e662b 100644 (file)
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -95,3 +95,8 @@ void bpf_map_fd_put_ptr(void *ptr)
          */
         bpf_map_put(ptr);
  }
+
+u32 bpf_map_fd_sys_lookup_elem(void *ptr)
+{
+       return ((struct bpf_map *)ptr)->id;
+}
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h

index 177fadb689dca9075ff9c43055d392821908ba6d..6183db9ec08cce0ba6aaa656cffdcb710bb8c797 100644 (file)
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -19,5 +19,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
  void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
                          int ufd);
  void bpf_map_fd_put_ptr(void *ptr);
+u32 bpf_map_fd_sys_lookup_elem(void *ptr);
  
  #endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 8942c820d620a15ef38ff6904c0a2f647cdab7be..18980472f5b06d1cda703e56efacd67a37640d3c 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -24,6 +24,13 @@
  #include <linux/kernel.h>
  #include <linux/idr.h>
  
+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
+                          (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+                          (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+                          (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+
  DEFINE_PER_CPU(int, bpf_prog_active);
  static DEFINE_IDR(prog_idr);
  static DEFINE_SPINLOCK(prog_idr_lock);
@@ -209,10 +216,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
         const struct bpf_map *map = filp->private_data;
         const struct bpf_array *array;
         u32 owner_prog_type = 0;
+       u32 owner_jited = 0;
  
         if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
                 array = container_of(map, struct bpf_array, map);
                 owner_prog_type = array->owner_prog_type;
+               owner_jited = array->owner_jited;
         }
  
         seq_printf(m,
@@ -229,9 +238,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
                    map->map_flags,
                    map->pages * 1ULL << PAGE_SHIFT);
  
-       if (owner_prog_type)
+       if (owner_prog_type) {
                 seq_printf(m, "owner_prog_type:\t%u\n",
                            owner_prog_type);
+               seq_printf(m, "owner_jited:\t%u\n",
+                          owner_jited);
+       }
  }
  #endif
  
@@ -411,6 +423,8 @@ static int map_lookup_elem(union bpf_attr *attr)
             map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
+       else if (IS_FD_MAP(map))
+               value_size = sizeof(u32);
         else
                 value_size = map->value_size;
  
@@ -426,9 +440,10 @@ static int map_lookup_elem(union bpf_attr *attr)
                 err = bpf_percpu_array_copy(map, key, value);
         } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
                 err = bpf_stackmap_copy(map, key, value);
-       } else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
-                  map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-               err = -ENOTSUPP;
+       } else if (IS_FD_ARRAY(map)) {
+               err = bpf_fd_array_map_lookup_elem(map, key, value);
+       } else if (IS_FD_HASH(map)) {
+               err = bpf_fd_htab_map_lookup_elem(map, key, value);
         } else {
                 rcu_read_lock();
                 ptr = map->ops->map_lookup_elem(map, key);
@@ -1069,6 +1084,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
         case BPF_CGROUP_INET_SOCK_CREATE:
                 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
                 break;
+       case BPF_CGROUP_SOCK_OPS:
+               ptype = BPF_PROG_TYPE_SOCK_OPS;
+               break;
         default:
                 return -EINVAL;
         }
@@ -1109,6 +1127,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
         case BPF_CGROUP_INET_INGRESS:
         case BPF_CGROUP_INET_EGRESS:
         case BPF_CGROUP_INET_SOCK_CREATE:
+       case BPF_CGROUP_SOCK_OPS:
                 cgrp = cgroup_get_from_fd(attr->target_fd);
                 if (IS_ERR(cgrp))
                         return PTR_ERR(cgrp);
@@ -1123,6 +1142,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
  
         return ret;
  }
+
  #endif /* CONFIG_CGROUP_BPF */
  
  #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 74ea96ea391b38105750242ad7cb854174764afb..6a86723c5b64bad1534d6519f33e03e9ad4fad99 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -546,20 +546,6 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
         return 0;
  }
  
-static int bpf_size_to_bytes(int bpf_size)
-{
-       if (bpf_size == BPF_W)
-               return 4;
-       else if (bpf_size == BPF_H)
-               return 2;
-       else if (bpf_size == BPF_B)
-               return 1;
-       else if (bpf_size == BPF_DW)
-               return 8;
-       else
-               return -EINVAL;
-}
-
  static bool is_spillable_regtype(enum bpf_reg_type type)
  {
         switch (type) {
@@ -761,7 +747,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
  static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
                             enum bpf_access_type t, enum bpf_reg_type *reg_type)
  {
-       struct bpf_insn_access_aux info = { .reg_type = *reg_type };
+       struct bpf_insn_access_aux info = {
+               .reg_type = *reg_type,
+       };
  
         /* for analyzer ctx accesses are already validated and converted */
         if (env->analyzer_ops)
@@ -769,25 +757,14 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
  
         if (env->prog->aux->ops->is_valid_access &&
             env->prog->aux->ops->is_valid_access(off, size, t, &info)) {
-               /* a non zero info.ctx_field_size indicates:
-                * . For this field, the prog type specific ctx conversion algorithm
-                *   only supports whole field access.
-                * . This ctx access is a candiate for later verifier transformation
-                *   to load the whole field and then apply a mask to get correct result.
-                * a non zero info.converted_op_size indicates perceived actual converted
-                * value width in convert_ctx_access.
+               /* A non zero info.ctx_field_size indicates that this field is a
+                * candidate for later verifier transformation to load the whole
+                * field and then apply a mask when accessed with a narrower
+                * access than actual ctx access size. A zero info.ctx_field_size
+                * will only allow for whole field access and rejects any other
+                * type of narrower access.
                  */
-               if ((info.ctx_field_size && !info.converted_op_size) ||
-                   (!info.ctx_field_size &&  info.converted_op_size)) {
-                       verbose("verifier bug in is_valid_access prog type=%u off=%d size=%d\n",
-                               env->prog->type, off, size);
-                       return -EACCES;
-               }
-
-               if (info.ctx_field_size) {
-                       env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
-                       env->insn_aux_data[insn_idx].converted_op_size = info.converted_op_size;
-               }
+               env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
                 *reg_type = info.reg_type;
  
                 /* remember the offset of last byte accessed in ctx */
@@ -1016,6 +993,11 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
         if (err)
                 return err;
  
+       if (is_pointer_value(env, insn->src_reg)) {
+               verbose("R%d leaks addr into mem\n", insn->src_reg);
+               return -EACCES;
+       }
+
         /* check whether atomic_add can read the memory */
         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
                                BPF_SIZE(insn->code), BPF_READ, -1);
@@ -1675,6 +1657,65 @@ static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
         return 0;
  }
  
+static int evaluate_reg_imm_alu_unknown(struct bpf_verifier_env *env,
+                                       struct bpf_insn *insn)
+{
+       struct bpf_reg_state *regs = env->cur_state.regs;
+       struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+       struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+       u8 opcode = BPF_OP(insn->code);
+       s64 imm_log2 = __ilog2_u64((long long)dst_reg->imm);
+
+       /* BPF_X code with src_reg->type UNKNOWN_VALUE here. */
+       if (src_reg->imm > 0 && dst_reg->imm) {
+               switch (opcode) {
+               case BPF_ADD:
+                       /* dreg += sreg
+                        * where both have zero upper bits. Adding them
+                        * can only result making one more bit non-zero
+                        * in the larger value.
+                        * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
+                        *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
+                        */
+                       dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
+                       dst_reg->imm--;
+                       break;
+               case BPF_AND:
+                       /* dreg &= sreg
+                        * AND can not extend zero bits only shrink
+                        * Ex.  0x00..00ffffff
+                        *    & 0x0f..ffffffff
+                        *     ----------------
+                        *      0x00..00ffffff
+                        */
+                       dst_reg->imm = max(src_reg->imm, 63 - imm_log2);
+                       break;
+               case BPF_OR:
+                       /* dreg |= sreg
+                        * OR can only extend zero bits
+                        * Ex.  0x00..00ffffff
+                        *    | 0x0f..ffffffff
+                        *     ----------------
+                        *      0x0f..00ffffff
+                        */
+                       dst_reg->imm = min(src_reg->imm, 63 - imm_log2);
+                       break;
+               case BPF_SUB:
+               case BPF_MUL:
+               case BPF_RSH:
+               case BPF_LSH:
+                       /* These may be flushed out later */
+               default:
+                       mark_reg_unknown_value(regs, insn->dst_reg);
+               }
+       } else {
+               mark_reg_unknown_value(regs, insn->dst_reg);
+       }
+
+       dst_reg->type = UNKNOWN_VALUE;
+       return 0;
+}
+
  static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
                                 struct bpf_insn *insn)
  {
@@ -1684,6 +1725,9 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
         u8 opcode = BPF_OP(insn->code);
         u64 dst_imm = dst_reg->imm;
  
+       if (BPF_SRC(insn->code) == BPF_X && src_reg->type == UNKNOWN_VALUE)
+               return evaluate_reg_imm_alu_unknown(env, insn);
+
         /* dst_reg->type == CONST_IMM here. Simulate execution of insns
          * containing ALU ops. Don't care about overflow or negative
          * values, just add/sub/... them; registers are in u64.
@@ -3396,11 +3440,13 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
  static int convert_ctx_accesses(struct bpf_verifier_env *env)
  {
         const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+       int i, cnt, size, ctx_field_size, delta = 0;
         const int insn_cnt = env->prog->len;
         struct bpf_insn insn_buf[16], *insn;
         struct bpf_prog *new_prog;
         enum bpf_access_type type;
-       int i, cnt, off, size, ctx_field_size, converted_op_size, is_narrower_load, delta = 0;
+       bool is_narrower_load;
+       u32 target_size;
  
         if (ops->gen_prologue) {
                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
@@ -3440,39 +3486,50 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                 if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
                         continue;
  
-               off = insn->off;
-               size = bpf_size_to_bytes(BPF_SIZE(insn->code));
                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
-               converted_op_size = env->insn_aux_data[i + delta].converted_op_size;
-               is_narrower_load = type == BPF_READ && size < ctx_field_size;
+               size = BPF_LDST_BYTES(insn);
  
                 /* If the read access is a narrower load of the field,
                  * convert to a 4/8-byte load, to minimum program type specific
                  * convert_ctx_access changes. If conversion is successful,
                  * we will apply proper mask to the result.
                  */
+               is_narrower_load = size < ctx_field_size;
                 if (is_narrower_load) {
-                       int size_code = BPF_H;
+                       u32 off = insn->off;
+                       u8 size_code;
  
+                       if (type == BPF_WRITE) {
+                               verbose("bpf verifier narrow ctx access misconfigured\n");
+                               return -EINVAL;
+                       }
+
+                       size_code = BPF_H;
                         if (ctx_field_size == 4)
                                 size_code = BPF_W;
                         else if (ctx_field_size == 8)
                                 size_code = BPF_DW;
+
                         insn->off = off & ~(ctx_field_size - 1);
                         insn->code = BPF_LDX | BPF_MEM | size_code;
                 }
-               cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
-               if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+
+               target_size = 0;
+               cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
+                                             &target_size);
+               if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
+                   (ctx_field_size && !target_size)) {
                         verbose("bpf verifier is misconfigured\n");
                         return -EINVAL;
                 }
-               if (is_narrower_load && size < converted_op_size) {
+
+               if (is_narrower_load && size < target_size) {
                         if (ctx_field_size <= 4)
                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
-                                                       (1 << size * 8) - 1);
+                                                               (1 << size * 8) - 1);
                         else
                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
-                                                       (1 << size * 8) - 1);
+                                                               (1 << size * 8) - 1);
                 }
  
                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c

index 2831480c63a28b8e9b8cee1c0b30968860b3fcd0..ee97196bb1510e4f95cfd6ddc039fa9700cdc828 100644 (file)
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -580,7 +580,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
         int ret = -ENOMEM, max_order = 0;
  
         if (!has_aux(event))
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
  
         if (event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) {
                 /*
diff --git a/kernel/signal.c b/kernel/signal.c

index ca92bcfeb322f3f836031ec8b3ab21867f39adf5..45b4c1ffe14ef4334a918d0fc9ed407d801cafe6 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -510,7 +510,8 @@ int unhandled_signal(struct task_struct *tsk, int sig)
         return !tsk->ptrace;
  }
  
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+                          bool *resched_timer)
  {
         struct sigqueue *q, *first = NULL;
  
@@ -532,6 +533,12 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
  still_pending:
                 list_del_init(&first->list);
                 copy_siginfo(info, &first->info);
+
+               *resched_timer =
+                       (first->flags & SIGQUEUE_PREALLOC) &&
+                       (info->si_code == SI_TIMER) &&
+                       (info->si_sys_private);
+
                 __sigqueue_free(first);
         } else {
                 /*
@@ -548,12 +555,12 @@ still_pending:
  }
  
  static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-                       siginfo_t *info)
+                       siginfo_t *info, bool *resched_timer)
  {
         int sig = next_signal(pending, mask);
  
         if (sig)
-               collect_signal(sig, pending, info);
+               collect_signal(sig, pending, info, resched_timer);
         return sig;
  }
  
@@ -565,15 +572,16 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
   */
  int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
  {
+       bool resched_timer = false;
         int signr;
  
         /* We only dequeue private signals from ourselves, we don't let
          * signalfd steal them
          */
-       signr = __dequeue_signal(&tsk->pending, mask, info);
+       signr = __dequeue_signal(&tsk->pending, mask, info, &resched_timer);
         if (!signr) {
                 signr = __dequeue_signal(&tsk->signal->shared_pending,
-                                        mask, info);
+                                        mask, info, &resched_timer);
  #ifdef CONFIG_POSIX_TIMERS
                 /*
                  * itimer signal ?
@@ -621,7 +629,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
                 current->jobctl |= JOBCTL_STOP_DEQUEUED;
         }
  #ifdef CONFIG_POSIX_TIMERS
-       if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
+       if (resched_timer) {
                 /*
                  * Release the siglock to ensure proper locking order
                  * of timer locks outside of siglocks.  Note, we leave
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c

index 9652bc57fd09811fa4e3ffbabc81b9139e75f125..b602c48cb84123890dbdc40b9a3da94439ba3a9d 100644 (file)
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
         tk->offs_boot = ktime_add(tk->offs_boot, delta);
  }
  
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       return clock->read(clock);
+}
+
  #ifdef CONFIG_DEBUG_TIMEKEEPING
  #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
  
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
          */
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-               now = tkr->read(tkr->clock);
+               now = tk_clock_read(tkr);
                 last = tkr->cycle_last;
                 mask = tkr->mask;
                 max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
         u64 cycle_now, delta;
  
         /* read clocksource */
-       cycle_now = tkr->read(tkr->clock);
+       cycle_now = tk_clock_read(tkr);
  
         /* calculate the delta since the last update_wall_time */
         delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         ++tk->cs_was_changed_seq;
         old_clock = tk->tkr_mono.clock;
         tk->tkr_mono.clock = clock;
-       tk->tkr_mono.read = clock->read;
         tk->tkr_mono.mask = clock->mask;
-       tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+       tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
  
         tk->tkr_raw.clock = clock;
-       tk->tkr_raw.read = clock->read;
         tk->tkr_raw.mask = clock->mask;
         tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
  
@@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         /* Go back from cycles -> shifted ns */
         tk->xtime_interval = interval * clock->mult;
         tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-       tk->raw_interval = (interval * clock->mult) >> clock->shift;
+       tk->raw_interval = interval * clock->mult;
  
          /* if changing clocks, convert xtime_nsec shift units */
         if (old_clock) {
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
  
                 now += timekeeping_delta_to_ns(tkr,
                                 clocksource_delta(
-                                       tkr->read(tkr->clock),
+                                       tk_clock_read(tkr),
                                         tkr->cycle_last,
                                         tkr->mask));
         } while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
         return cycles_at_suspend;
  }
  
+static struct clocksource dummy_clock = {
+       .read = dummy_clock_read,
+};
+
  /**
   * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
   * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
         struct tk_read_base *tkr = &tk->tkr_mono;
  
         memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-       cycles_at_suspend = tkr->read(tkr->clock);
-       tkr_dummy.read = dummy_clock_read;
+       cycles_at_suspend = tk_clock_read(tkr);
+       tkr_dummy.clock = &dummy_clock;
         update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
  
         tkr = &tk->tkr_raw;
         memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-       tkr_dummy.read = dummy_clock_read;
+       tkr_dummy.clock = &dummy_clock;
         update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
  }
  
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
   */
  static void timekeeping_forward_now(struct timekeeper *tk)
  {
-       struct clocksource *clock = tk->tkr_mono.clock;
         u64 cycle_now, delta;
         u64 nsec;
  
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
         delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
         tk->tkr_mono.cycle_last = cycle_now;
         tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
                 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
                 base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
                  * Check whether the system counter value provided by the
                  * device driver is on the current timekeeping interval.
                  */
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                 interval_start = tk->tkr_mono.cycle_last;
                 if (!cycle_between(interval_start, cycles, now)) {
                         clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
          * The less preferred source will only be tried if there is no better
          * usable source. The rtc part is handled separately in rtc core code.
          */
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
         if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
                 cycle_now > tk->tkr_mono.cycle_last) {
                 u64 nsec, cyc_delta;
@@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
                                     u32 shift, unsigned int *clock_set)
  {
         u64 interval = tk->cycle_interval << shift;
-       u64 raw_nsecs;
+       u64 snsec_per_sec;
  
         /* If the offset is smaller than a shifted interval, do nothing */
         if (offset < interval)
@@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
         *clock_set |= accumulate_nsecs_to_secs(tk);
  
         /* Accumulate raw time */
-       raw_nsecs = (u64)tk->raw_interval << shift;
-       raw_nsecs += tk->raw_time.tv_nsec;
-       if (raw_nsecs >= NSEC_PER_SEC) {
-               u64 raw_secs = raw_nsecs;
-               raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-               tk->raw_time.tv_sec += raw_secs;
+       tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+       snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+       while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+               tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+               tk->raw_time.tv_sec++;
         }
-       tk->raw_time.tv_nsec = raw_nsecs;
+       tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
  
         /* Accumulate error between NTP and clock interval */
         tk->ntp_error += tk->ntp_tick << shift;
@@ -2030,7 +2051,7 @@ void update_wall_time(void)
  #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
         offset = real_tk->cycle_interval;
  #else
-       offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+       offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
                                    tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
  #endif
  
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index 97c46b440cd6431b1a968dff366ce3cfddfa8ef3..37385193a6084ed1b8fdd794eb0938c2021fcfa4 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -122,8 +122,8 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
  }
  
  /*
- * limited trace_printk()
- * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
+ * Only limited trace_printk() conversion specifiers allowed:
+ * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
   */
  BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
            u64, arg2, u64, arg3)
@@ -198,7 +198,8 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
                         i++;
                 }
  
-               if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
+               if (fmt[i] != 'i' && fmt[i] != 'd' &&
+                   fmt[i] != 'u' && fmt[i] != 'x')
                         return -EINVAL;
                 fmt_cnt++;
         }
@@ -583,7 +584,8 @@ const struct bpf_verifier_ops tracepoint_prog_ops = {
  static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
                                     struct bpf_insn_access_aux *info)
  {
-       int sample_period_off;
+       const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
+                                        sample_period);
  
         if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
                 return false;
@@ -592,43 +594,35 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
         if (off % size != 0)
                 return false;
  
-       /* permit 1, 2, 4 byte narrower and 8 normal read access to sample_period */
-       sample_period_off = offsetof(struct bpf_perf_event_data, sample_period);
-       if (off >= sample_period_off && off < sample_period_off + sizeof(__u64)) {
-               int allowed;
-
-#ifdef __LITTLE_ENDIAN
-               allowed = (off & 0x7) == 0 && size <= 8 && (size & (size - 1)) == 0;
-#else
-               allowed = ((off & 0x7) + size) == 8 && size <= 8 && (size & (size - 1)) == 0;
-#endif
-               if (!allowed)
+       switch (off) {
+       case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
+               bpf_ctx_record_field_size(info, size_sp);
+               if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
                         return false;
-               info->ctx_field_size = 8;
-               info->converted_op_size = 8;
-       } else {
+               break;
+       default:
                 if (size != sizeof(long))
                         return false;
         }
+
         return true;
  }
  
  static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
                                       const struct bpf_insn *si,
                                       struct bpf_insn *insn_buf,
-                                     struct bpf_prog *prog)
+                                     struct bpf_prog *prog, u32 *target_size)
  {
         struct bpf_insn *insn = insn_buf;
  
         switch (si->off) {
         case offsetof(struct bpf_perf_event_data, sample_period):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
-
                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
                                                        data), si->dst_reg, si->src_reg,
                                       offsetof(struct bpf_perf_event_data_kern, data));
                 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
-                                     offsetof(struct perf_sample_data, period));
+                                     bpf_target_off(struct perf_sample_data, period, 8,
+                                                    target_size));
                 break;
         default:
                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
diff --git a/lib/cmdline.c b/lib/cmdline.c

index 3c6432df7e63466a24d41dead807c7ef14c0ab86..4c0888c4a68d9621717f9012d3ca4cc0b72df20b 100644 (file)
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -23,14 +23,14 @@
   *     the values[M, M+1, ..., N] into the ints array in get_options.
   */
  
-static int get_range(char **str, int *pint)
+static int get_range(char **str, int *pint, int n)
  {
         int x, inc_counter, upper_range;
  
         (*str)++;
         upper_range = simple_strtol((*str), NULL, 0);
         inc_counter = upper_range - *pint;
-       for (x = *pint; x < upper_range; x++)
+       for (x = *pint; n && x < upper_range; x++, n--)
                 *pint++ = x;
         return inc_counter;
  }
@@ -97,7 +97,7 @@ char *get_options(const char *str, int nints, int *ints)
                         break;
                 if (res == 3) {
                         int range_nums;
-                       range_nums = get_range((char **)&str, ints + i);
+                       range_nums = get_range((char **)&str, ints + i, nints - i);
                         if (range_nums < 0)
                                 break;
                         /*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c

index 945fd1ca49b5af0bc3b87dbfe8098f0f602775a2..df4ebdb2b10a373723330dc0124957cd2cb1c021 100644 (file)
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -652,7 +652,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
                         spin_unlock(ptl);
                         free_page_and_swap_cache(src_page);
                 }
-               cond_resched();
         }
  }
  
diff --git a/mm/slub.c b/mm/slub.c

index 7449593fca724147cef5b8f7a46752333e5e0585..8addc535bcdc58794fe40e72a729e4589d44d2b6 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5625,6 +5625,28 @@ static char *create_unique_id(struct kmem_cache *s)
         return name;
  }
  
+static void sysfs_slab_remove_workfn(struct work_struct *work)
+{
+       struct kmem_cache *s =
+               container_of(work, struct kmem_cache, kobj_remove_work);
+
+       if (!s->kobj.state_in_sysfs)
+               /*
+                * For a memcg cache, this may be called during
+                * deactivation and again on shutdown.  Remove only once.
+                * A cache is never shut down before deactivation is
+                * complete, so no need to worry about synchronization.
+                */
+               return;
+
+#ifdef CONFIG_MEMCG
+       kset_unregister(s->memcg_kset);
+#endif
+       kobject_uevent(&s->kobj, KOBJ_REMOVE);
+       kobject_del(&s->kobj);
+       kobject_put(&s->kobj);
+}
+
  static int sysfs_slab_add(struct kmem_cache *s)
  {
         int err;
@@ -5632,6 +5654,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
         struct kset *kset = cache_kset(s);
         int unmergeable = slab_unmergeable(s);
  
+       INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
+
         if (!kset) {
                 kobject_init(&s->kobj, &slab_ktype);
                 return 0;
@@ -5695,20 +5719,8 @@ static void sysfs_slab_remove(struct kmem_cache *s)
                  */
                 return;
  
-       if (!s->kobj.state_in_sysfs)
-               /*
-                * For a memcg cache, this may be called during
-                * deactivation and again on shutdown.  Remove only once.
-                * A cache is never shut down before deactivation is
-                * complete, so no need to worry about synchronization.
-                */
-               return;
-
-#ifdef CONFIG_MEMCG
-       kset_unregister(s->memcg_kset);
-#endif
-       kobject_uevent(&s->kobj, KOBJ_REMOVE);
-       kobject_del(&s->kobj);
+       kobject_get(&s->kobj);
+       schedule_work(&s->kobj_remove_work);
  }
  
  void sysfs_slab_release(struct kmem_cache *s)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index 34a1c3e46ed72594b499e7f61e8aacdd4c5fe818..ecc97f74ab182fe9aeb7d7eda5166dfdb5b03095 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -287,10 +287,21 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
         if (p4d_none(*p4d))
                 return NULL;
         pud = pud_offset(p4d, addr);
-       if (pud_none(*pud))
+
+       /*
+        * Don't dereference bad PUD or PMD (below) entries. This will also
+        * identify huge mappings, which we may encounter on architectures
+        * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
+        * identified as vmalloc addresses by is_vmalloc_addr(), but are
+        * not [unambiguously] associated with a struct page, so there is
+        * no correct value to return for them.
+        */
+       WARN_ON_ONCE(pud_bad(*pud));
+       if (pud_none(*pud) || pud_bad(*pud))
                 return NULL;
         pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       WARN_ON_ONCE(pmd_bad(*pmd));
+       if (pmd_none(*pmd) || pmd_bad(*pmd))
                 return NULL;
  
         ptep = pte_offset_map(pmd, addr);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c

index fca84e111c8959ace7d6ec9a10dd0081c2944cd7..4e111196f90216488de317efc8a5a792e75603a0 100644 (file)
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -252,7 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
  
         ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
         pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
-       atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
         ATM_SKB(skb)->atm_options = atmvcc->atm_options;
         dev->stats.tx_packets++;
         dev->stats.tx_bytes += skb->len;
diff --git a/net/atm/clip.c b/net/atm/clip.c

index a7e4018370b4e6842da1639b5e5d39e051ad3eee..f271a7bcf5b2e63702ba159cb44cfef2b730dba6 100644 (file)
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -137,11 +137,11 @@ static int neigh_check_cb(struct neighbour *n)
         if (entry->vccs || time_before(jiffies, entry->expires))
                 return 0;
  
-       if (atomic_read(&n->refcnt) > 1) {
+       if (refcount_read(&n->refcnt) > 1) {
                 struct sk_buff *skb;
  
                 pr_debug("destruction postponed with ref %d\n",
-                        atomic_read(&n->refcnt));
+                        refcount_read(&n->refcnt));
  
                 while ((skb = skb_dequeue(&n->arp_queue)) != NULL)
                         dev_kfree_skb(skb);
@@ -381,7 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
                 memcpy(here, llc_oui, sizeof(llc_oui));
                 ((__be16 *) here)[3] = skb->protocol;
         }
-       atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
         ATM_SKB(skb)->atm_options = vcc->atm_options;
         entry->vccs->last_use = jiffies;
         pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
@@ -767,7 +767,7 @@ static void atmarp_info(struct seq_file *seq, struct neighbour *n,
                         seq_printf(seq, "(resolving)\n");
                 else
                         seq_printf(seq, "(expired, ref %d)\n",
-                                  atomic_read(&entry->neigh->refcnt));
+                                  refcount_read(&entry->neigh->refcnt));
         } else if (!svc) {
                 seq_printf(seq, "%d.%d.%d\n",
                            clip_vcc->vcc->dev->number,
diff --git a/net/atm/common.c b/net/atm/common.c

index f06422f4108d209fde356457c453164f2f4d7289..8a4f99114cd2b5c2a80fa964b00f52a10160b36f 100644 (file)
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -80,9 +80,9 @@ static void vcc_sock_destruct(struct sock *sk)
                 printk(KERN_DEBUG "%s: rmem leakage (%d bytes) detected.\n",
                        __func__, atomic_read(&sk->sk_rmem_alloc));
  
-       if (atomic_read(&sk->sk_wmem_alloc))
+       if (refcount_read(&sk->sk_wmem_alloc))
                 printk(KERN_DEBUG "%s: wmem leakage (%d bytes) detected.\n",
-                      __func__, atomic_read(&sk->sk_wmem_alloc));
+                      __func__, refcount_read(&sk->sk_wmem_alloc));
  }
  
  static void vcc_def_wakeup(struct sock *sk)
@@ -101,7 +101,7 @@ static inline int vcc_writable(struct sock *sk)
         struct atm_vcc *vcc = atm_sk(sk);
  
         return (vcc->qos.txtp.max_sdu +
-               atomic_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
+               refcount_read(&sk->sk_wmem_alloc)) <= sk->sk_sndbuf;
  }
  
  static void vcc_write_space(struct sock *sk)
@@ -156,7 +156,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family, i
         memset(&vcc->local, 0, sizeof(struct sockaddr_atmsvc));
         memset(&vcc->remote, 0, sizeof(struct sockaddr_atmsvc));
         vcc->qos.txtp.max_sdu = 1 << 16; /* for meta VCs */
-       atomic_set(&sk->sk_wmem_alloc, 1);
+       refcount_set(&sk->sk_wmem_alloc, 1);
         atomic_set(&sk->sk_rmem_alloc, 0);
         vcc->push = NULL;
         vcc->pop = NULL;
@@ -630,7 +630,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
                 goto out;
         }
         pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
-       atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk->sk_wmem_alloc);
  
         skb->dev = NULL; /* for paths shared with net_device interfaces */
         ATM_SKB(skb)->atm_options = vcc->atm_options;
diff --git a/net/atm/lec.c b/net/atm/lec.c

index 09cfe87f0a44d64dbbdb9209d8eb358efa68f887..75545717fa46a9b4113a586ff1c9247687711d82 100644 (file)
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -181,7 +181,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
         ATM_SKB(skb)->vcc = vcc;
         ATM_SKB(skb)->atm_options = vcc->atm_options;
  
-       atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
         if (vcc->send(vcc, skb) < 0) {
                 dev->stats.tx_dropped++;
                 return;
@@ -345,7 +345,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
         int i;
         char *tmp;              /* FIXME */
  
-       atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+       WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
         mesg = (struct atmlec_msg *)skb->data;
         tmp = skb->data;
         tmp += sizeof(struct atmlec_msg);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c

index a190800572bdfe3473d9b49f19ac20354f6f3539..680a4b9095a14dcf7eabb235300a19082746f7b6 100644 (file)
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -555,7 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
                                         sizeof(struct llc_snap_hdr));
         }
  
-       atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
         ATM_SKB(skb)->atm_options = entry->shortcut->atm_options;
         entry->shortcut->send(entry->shortcut, skb);
         entry->packets_fwded++;
@@ -911,7 +911,7 @@ static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb)
  
         struct mpoa_client *mpc = find_mpc_by_vcc(vcc);
         struct k_message *mesg = (struct k_message *)skb->data;
-       atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+       WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
  
         if (mpc == NULL) {
                 pr_info("no mpc found\n");
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c

index c4e09846d1dea258b7ca8e02a4d9510b9d8d0ba1..21d9d341a6199255a017437954e4b688f1ba5bfd 100644 (file)
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -350,7 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
                 return 1;
         }
  
-       atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
         ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
         pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
                  skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
diff --git a/net/atm/proc.c b/net/atm/proc.c

index bbb6461a4b7fbe9a9d2b2d957bca7974331d42aa..27c9c01c537d7b998c32b304d931feca940c2953 100644 (file)
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -211,7 +211,7 @@ static void vcc_info(struct seq_file *seq, struct atm_vcc *vcc)
                    vcc->flags, sk->sk_err,
                    sk_wmem_alloc_get(sk), sk->sk_sndbuf,
                    sk_rmem_alloc_get(sk), sk->sk_rcvbuf,
-                  atomic_read(&sk->sk_refcnt));
+                  refcount_read(&sk->sk_refcnt));
  }
  
  static void svc_info(struct seq_file *seq, struct atm_vcc *vcc)
diff --git a/net/atm/raw.c b/net/atm/raw.c

index 2e17e97a7a8b02e9570a385e1d99297fa50f7348..821c0797553daf870d3b9656dccb820506dc1ebc 100644 (file)
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -35,7 +35,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
  
         pr_debug("(%d) %d -= %d\n",
                  vcc->vci, sk_wmem_alloc_get(sk), skb->truesize);
-       atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+       WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
         dev_kfree_skb_any(skb);
         sk->sk_write_space(sk);
  }
diff --git a/net/atm/signaling.c b/net/atm/signaling.c

index f640a99e14b83ae989248276342415989aaf7626..983c3a21a13316a9eeafa29fc3ba53dfffe97c81 100644 (file)
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -67,7 +67,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
         struct sock *sk;
  
         msg = (struct atmsvc_msg *) skb->data;
-       atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+       WARN_ON(refcount_sub_and_test(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc));
         vcc = *(struct atm_vcc **) &msg->vcc;
         pr_debug("%d (0x%lx)\n", (int)msg->type, (unsigned long)vcc);
         sk = sk_atm(vcc);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c

index 8a8f77a247e60ad569b157c21ca6740cd847b3e1..91e3ba28070647bc93960c1d729aa200b666b249 100644 (file)
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -657,7 +657,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
                 seq_printf(seq,
                            "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu",
                            sk,
-                          atomic_read(&sk->sk_refcnt),
+                          refcount_read(&sk->sk_refcnt),
                            sk_rmem_alloc_get(sk),
                            sk_wmem_alloc_get(sk),
                            from_kuid(seq_user_ns(seq), sock_i_uid(sk)),
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c

index 9a40013da9151c0ec70704ddbc92760810a5028b..7b3965861013c4ed85f818efaff54336a3dc84fe 100644 (file)
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -481,16 +481,16 @@ static int bnep_session(void *arg)
         struct net_device *dev = s->dev;
         struct sock *sk = s->sock->sk;
         struct sk_buff *skb;
-       wait_queue_t wait;
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
  
         BT_DBG("");
  
         set_user_nice(current, -15);
  
-       init_waitqueue_entry(&wait, current);
         add_wait_queue(sk_sleep(sk), &wait);
         while (1) {
-               set_current_state(TASK_INTERRUPTIBLE);
+               /* Ensure session->terminate is updated */
+               smp_mb__before_atomic();
  
                 if (atomic_read(&s->terminate))
                         break;
@@ -512,9 +512,8 @@ static int bnep_session(void *arg)
                                 break;
                 netif_wake_queue(dev);
  
-               schedule();
+               wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
         }
-       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
  
         /* Cleanup session */
@@ -663,7 +662,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
         s = __bnep_get_session(req->dst);
         if (s) {
                 atomic_inc(&s->terminate);
-               wake_up_process(s->task);
+               wake_up_interruptible(sk_sleep(s->sock->sk));
         } else
                 err = -ENOENT;
  
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c

index f4c64ef01c24b8c24a44c0c3fde579f0cc5e8904..7f26a5a19ff6d8fb87ef5404e32eb5e2ce0b5b53 100644 (file)
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -280,16 +280,16 @@ static int cmtp_session(void *arg)
         struct cmtp_session *session = arg;
         struct sock *sk = session->sock->sk;
         struct sk_buff *skb;
-       wait_queue_t wait;
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
  
         BT_DBG("session %p", session);
  
         set_user_nice(current, -15);
  
-       init_waitqueue_entry(&wait, current);
         add_wait_queue(sk_sleep(sk), &wait);
         while (1) {
-               set_current_state(TASK_INTERRUPTIBLE);
+               /* Ensure session->terminate is updated */
+               smp_mb__before_atomic();
  
                 if (atomic_read(&session->terminate))
                         break;
@@ -306,9 +306,8 @@ static int cmtp_session(void *arg)
  
                 cmtp_process_transmit(session);
  
-               schedule();
+               wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
         }
-       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
  
         down_write(&cmtp_session_sem);
@@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
                 err = cmtp_attach_device(session);
                 if (err < 0) {
                         atomic_inc(&session->terminate);
-                       wake_up_process(session->task);
+                       wake_up_interruptible(sk_sleep(session->sock->sk));
                         up_write(&cmtp_session_sem);
                         return err;
                 }
@@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
  
                 /* Stop session thread */
                 atomic_inc(&session->terminate);
-               wake_up_process(session->task);
+
+               /* Ensure session->terminate is updated */
+               smp_mb__after_atomic();
+
+               wake_up_interruptible(sk_sleep(session->sock->sk));
         } else
                 err = -ENOENT;
  
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c

index d860e3cc23cf151583e5d455edce572328e2168f..6bc679cd3481836f191bd0189d54c03d5a057c0b 100644 (file)
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3096,15 +3096,14 @@ int hci_register_dev(struct hci_dev *hdev)
  
         BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
  
-       hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
-                                         WQ_MEM_RECLAIM, 1, hdev->name);
+       hdev->workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name);
         if (!hdev->workqueue) {
                 error = -ENOMEM;
                 goto err;
         }
  
-       hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
-                                             WQ_MEM_RECLAIM, 1, hdev->name);
+       hdev->req_workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI,
+                                                     hdev->name);
         if (!hdev->req_workqueue) {
                 destroy_workqueue(hdev->workqueue);
                 error = -ENOMEM;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c

index 961f7f53e178206a3c793c5db42e3b24ad240582..472b3907b1b0f271d5abc3e13fcd0d4d6c59d15a 100644 (file)
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -36,6 +36,7 @@
  #define VERSION "1.2"
  
  static DECLARE_RWSEM(hidp_session_sem);
+static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq);
  static LIST_HEAD(hidp_session_list);
  
  static unsigned char hidp_keycode[256] = {
@@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session *session)
   * Wake up session thread and notify it to stop. This is asynchronous and
   * returns immediately. Call this whenever a runtime error occurs and you want
   * the session to stop.
- * Note: wake_up_process() performs any necessary memory-barriers for us.
+ * Note: wake_up_interruptible() performs any necessary memory-barriers for us.
   */
  static void hidp_session_terminate(struct hidp_session *session)
  {
         atomic_inc(&session->terminate);
-       wake_up_process(session->task);
+       wake_up_interruptible(&hidp_session_wq);
  }
  
  /*
@@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session)
         struct sock *ctrl_sk = session->ctrl_sock->sk;
         struct sock *intr_sk = session->intr_sock->sk;
         struct sk_buff *skb;
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
  
+       add_wait_queue(&hidp_session_wq, &wait);
         for (;;) {
                 /*
                  * This thread can be woken up two ways:
@@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session *session)
                  *    session->terminate flag and wakes this thread up.
                  *  - Via modifying the socket state of ctrl/intr_sock. This
                  *    thread is woken up by ->sk_state_changed().
-                *
-                * Note: set_current_state() performs any necessary
-                * memory-barriers for us.
                  */
-               set_current_state(TASK_INTERRUPTIBLE);
  
+               /* Ensure session->terminate is updated */
+               smp_mb__before_atomic();
                 if (atomic_read(&session->terminate))
                         break;
  
@@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session *session)
                 hidp_process_transmit(session, &session->ctrl_transmit,
                                       session->ctrl_sock);
  
-               schedule();
+               wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
         }
+       remove_wait_queue(&hidp_session_wq, &wait);
  
         atomic_inc(&session->terminate);
-       set_current_state(TASK_RUNNING);
+
+       /* Ensure session->terminate is updated */
+       smp_mb__after_atomic();
+}
+
+static int hidp_session_wake_function(wait_queue_t *wait,
+                                     unsigned int mode,
+                                     int sync, void *key)
+{
+       wake_up_interruptible(&hidp_session_wq);
+       return false;
  }
  
  /*
@@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session)
  static int hidp_session_thread(void *arg)
  {
         struct hidp_session *session = arg;
-       wait_queue_t ctrl_wait, intr_wait;
+       DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function);
+       DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function);
  
         BT_DBG("session %p", session);
  
@@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg)
         set_user_nice(current, -15);
         hidp_set_timer(session);
  
-       init_waitqueue_entry(&ctrl_wait, current);
-       init_waitqueue_entry(&intr_wait, current);
         add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
         add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
         /* This memory barrier is paired with wq_has_sleeper(). See
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c

index 507b80d59dec4fd3b0eb3c50ed1cd95a78adfcb7..67a8642f57ea7c8ceadc1d379019ea6135877068 100644 (file)
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -87,7 +87,8 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
  
         BT_DBG("sk %p", sk);
  
-       if (!addr || addr->sa_family != AF_BLUETOOTH)
+       if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+           addr->sa_family != AF_BLUETOOTH)
                 return -EINVAL;
  
         memset(&la, 0, sizeof(la));
@@ -181,7 +182,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
  
         BT_DBG("sk %p", sk);
  
-       if (!addr || alen < sizeof(addr->sa_family) ||
+       if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
             addr->sa_family != AF_BLUETOOTH)
                 return -EINVAL;
  
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c

index ac3c650cb234f9985ddf0b54924db9000c4586c3..1aaccf63747937bb1f335c1490bae29c9ff898e7 100644 (file)
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -197,7 +197,7 @@ static void rfcomm_sock_kill(struct sock *sk)
         if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
                 return;
  
-       BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+       BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
  
         /* Kill poor orphan */
         bt_sock_unlink(&rfcomm_sk_list, sk);
@@ -339,7 +339,8 @@ static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr
         struct sock *sk = sock->sk;
         int len, err = 0;
  
-       if (!addr || addr->sa_family != AF_BLUETOOTH)
+       if (!addr || addr_len < offsetofend(struct sockaddr, sa_family) ||
+           addr->sa_family != AF_BLUETOOTH)
                 return -EINVAL;
  
         memset(&sa, 0, sizeof(sa));
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c

index 728e0c8dc8e74ccb134b8ed1d493ea8ee49bf49b..795e920a3281939f8f84e76f9f3fc6161146a558 100644 (file)
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -524,10 +524,8 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
  
         BT_DBG("sk %p %pMR", sk, &sa->sco_bdaddr);
  
-       if (!addr || addr->sa_family != AF_BLUETOOTH)
-               return -EINVAL;
-
-       if (addr_len < sizeof(struct sockaddr_sco))
+       if (!addr || addr_len < sizeof(struct sockaddr_sco) ||
+           addr->sa_family != AF_BLUETOOTH)
                 return -EINVAL;
  
         lock_sock(sk);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c

index 067cf03134492a33f10982755105e103666cd1ef..2261e5194c82cf72cfce944de61eb263b4375157 100644 (file)
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -149,12 +149,12 @@ static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
  {
         struct nf_bridge_info *nf_bridge = skb->nf_bridge;
  
-       if (atomic_read(&nf_bridge->use) > 1) {
+       if (refcount_read(&nf_bridge->use) > 1) {
                 struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
  
                 if (tmp) {
                         memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
-                       atomic_set(&tmp->use, 1);
+                       refcount_set(&tmp->use, 1);
                 }
                 nf_bridge_put(nf_bridge);
                 nf_bridge = tmp;
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c

index 0b5dd607444c71fd2e7c5200d219977aa526c190..723f25eed8ea0de05cb554ec105d6048585ea0be 100644 (file)
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -865,7 +865,7 @@ static struct attribute *bridge_attrs[] = {
         NULL
  };
  
-static struct attribute_group bridge_group = {
+static const struct attribute_group bridge_group = {
         .name = SYSFS_BRIDGE_ATTR,
         .attrs = bridge_attrs,
  };
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c

index e0bb624c3845eff5d756830504709eeb3d5bf960..dfc86a0199dab9683ff7cd8e7346b7ea19c8ce13 100644 (file)
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -61,7 +61,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
             (strcmp(par->table, "broute") != 0 ||
             hook_mask & ~(1 << NF_BR_BROUTING)))
                 return -EINVAL;
-       if (INVALID_TARGET)
+       if (ebt_invalid_target(info->target))
                 return -EINVAL;
         return 0;
  }
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c

index 66697cbd0a8b82c9e132aebda93b96ee09e48fe8..19f0f9592d32e906f9d42ab8608c8f042031e820 100644 (file)
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -44,7 +44,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
         tmp = info->target | ~EBT_VERDICT_BITS;
         if (BASE_CHAIN && tmp == EBT_RETURN)
                 return -EINVAL;
-       if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+       if (ebt_invalid_target(tmp))
                 return -EINVAL;
         tmp = info->target & ~EBT_VERDICT_BITS;
         if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c

index 8d2a85e0594ee3c5c91c3c4b4c05c71fc6f40601..a7223eaf490b74b92b8725702033b859070ccdcb 100644 (file)
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -47,7 +47,7 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
             (strcmp(par->table, "broute") != 0 ||
             hook_mask & ~(1 << NF_BR_BROUTING)))
                 return -EINVAL;
-       if (INVALID_TARGET)
+       if (ebt_invalid_target(info->target))
                 return -EINVAL;
         return 0;
  }
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c

index e56ccd060d2680da042322cea43177a27744b7ca..11cf9e9e9222e241cf803a54d376fc9274279447 100644 (file)
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -51,7 +51,7 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
         if (BASE_CHAIN && tmp == EBT_RETURN)
                 return -EINVAL;
  
-       if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+       if (ebt_invalid_target(tmp))
                 return -EINVAL;
         tmp = info->target | EBT_VERDICT_BITS;
         if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c

index 7506b853a84d65a30e2b746c2b23e9ff5bcb6b76..632d5a416d973306aa01552acc0a8bf80b2177aa 100644 (file)
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1013,7 +1013,7 @@ static const struct proto_ops caif_stream_ops = {
  static void caif_sock_destructor(struct sock *sk)
  {
         struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-       caif_assert(!atomic_read(&sk->sk_wmem_alloc));
+       caif_assert(!refcount_read(&sk->sk_wmem_alloc));
         caif_assert(sk_unhashed(sk));
         caif_assert(!sk->sk_socket);
         if (!sock_flag(sk, SOCK_DEAD)) {
diff --git a/net/core/datagram.c b/net/core/datagram.c

index e5311a7c70daf1d6c60d67fc6cf383e85cacc82e..454ec89233331ed0676acaaac11c41000a4d0d23 100644 (file)
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
                                 }
                         }
                         *peeked = 1;
-                       atomic_inc(&skb->users);
+                       refcount_inc(&skb->users);
                 } else {
                         __skb_unlink(skb, queue);
                         if (destructor)
@@ -358,7 +358,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
                 spin_lock_bh(&sk_queue->lock);
                 if (skb == skb_peek(sk_queue)) {
                         __skb_unlink(skb, sk_queue);
-                       atomic_dec(&skb->users);
+                       refcount_dec(&skb->users);
                         if (destructor)
                                 destructor(sk, skb);
                         err = 0;
@@ -614,7 +614,7 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
                 skb->data_len += copied;
                 skb->len += copied;
                 skb->truesize += truesize;
-               atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+               refcount_add(truesize, &skb->sk->sk_wmem_alloc);
                 while (copied) {
                         int size = min_t(int, copied, PAGE_SIZE - start);
                         skb_fill_page_desc(skb, frag++, pages[n], start, size);
diff --git a/net/core/dev.c b/net/core/dev.c

index a91572aa73d58c9a11febb1810f6bec0158b63df..7098fba52be1aae749e198b3c2ef999efbd986cd 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1862,7 +1862,7 @@ static inline int deliver_skb(struct sk_buff *skb,
  {
         if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
                 return -ENOMEM;
-       atomic_inc(&skb->users);
+       refcount_inc(&skb->users);
         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
  }
  
@@ -2484,10 +2484,10 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
         if (unlikely(!skb))
                 return;
  
-       if (likely(atomic_read(&skb->users) == 1)) {
+       if (likely(refcount_read(&skb->users) == 1)) {
                 smp_rmb();
-               atomic_set(&skb->users, 0);
-       } else if (likely(!atomic_dec_and_test(&skb->users))) {
+               refcount_set(&skb->users, 0);
+       } else if (likely(!refcount_dec_and_test(&skb->users))) {
                 return;
         }
         get_kfree_skb_cb(skb)->reason = reason;
@@ -3955,7 +3955,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
  
                         clist = clist->next;
  
-                       WARN_ON(atomic_read(&skb->users));
+                       WARN_ON(refcount_read(&skb->users));
                         if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
                                 trace_consume_skb(skb);
                         else
@@ -4844,6 +4844,13 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
  }
  EXPORT_SYMBOL(gro_find_complete_by_type);
  
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+       skb_dst_drop(skb);
+       secpath_reset(skb);
+       kmem_cache_free(skbuff_head_cache, skb);
+}
+
  static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
  {
         switch (ret) {
@@ -4857,13 +4864,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
                 break;
  
         case GRO_MERGED_FREE:
-               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
-                       skb_dst_drop(skb);
-                       secpath_reset(skb);
-                       kmem_cache_free(skbuff_head_cache, skb);
-               } else {
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+                       napi_skb_free_stolen_head(skb);
+               else
                         __kfree_skb(skb);
-               }
                 break;
  
         case GRO_HELD:
@@ -4935,10 +4939,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
                 break;
  
         case GRO_DROP:
-       case GRO_MERGED_FREE:
                 napi_reuse_skb(napi, skb);
                 break;
  
+       case GRO_MERGED_FREE:
+               if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+                       napi_skb_free_stolen_head(skb);
+               else
+                       napi_reuse_skb(napi, skb);
+               break;
+
         case GRO_MERGED:
         case GRO_CONSUMED:
                 break;
@@ -7825,7 +7835,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
  {
  #if BITS_PER_LONG == 64
         BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
-       memcpy(stats64, netdev_stats, sizeof(*stats64));
+       memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
         /* zero out counters that only exist in rtnl_link_stats64 */
         memset((char *)stats64 + sizeof(*netdev_stats), 0,
                sizeof(*stats64) - sizeof(*netdev_stats));
@@ -7867,9 +7877,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
         } else {
                 netdev_stats_to_stats64(storage, &dev->stats);
         }
-       storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
-       storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
-       storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+       storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+       storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+       storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
         return storage;
  }
  EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c

index 3bba291c6c32e4359a6d626fbd492f7d07fd3e4c..a0093e1b0235355db66b980580243dd6619c9aa6 100644 (file)
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -46,7 +46,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
         if (r == NULL)
                 return -ENOMEM;
  
-       atomic_set(&r->refcnt, 1);
+       refcount_set(&r->refcnt, 1);
         r->action = FR_ACT_TO_TBL;
         r->pref = pref;
         r->table = table;
@@ -283,7 +283,7 @@ jumped:
  
                 if (err != -EAGAIN) {
                         if ((arg->flags & FIB_LOOKUP_NOREF) ||
-                           likely(atomic_inc_not_zero(&rule->refcnt))) {
+                           likely(refcount_inc_not_zero(&rule->refcnt))) {
                                 arg->rule = rule;
                                 goto out;
                         }
@@ -517,7 +517,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
                 last = r;
         }
  
-       fib_rule_get(rule);
+       refcount_set(&rule->refcnt, 1);
  
         if (last)
                 list_add_rcu(&rule->list, &last->list);
diff --git a/net/core/filter.c b/net/core/filter.c

index b39c869d22e3b12a53ab9a4065aa674899902696..94169572d00283d6521ca59b4088e57f38dbd415 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -54,6 +54,7 @@
  #include <net/dst.h>
  #include <net/sock_reuseport.h>
  #include <net/busy_poll.h>
+#include <net/tcp.h>
  
  /**
   *     sk_filter_trim_cap - run a packet through a socket filter
@@ -2011,7 +2012,7 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
  static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
  {
         const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
-       u32 off = skb->network_header - skb->mac_header;
+       u32 off = skb_mac_header_len(skb);
         int ret;
  
         ret = skb_cow(skb, len_diff);
@@ -2047,7 +2048,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
  static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
  {
         const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
-       u32 off = skb->network_header - skb->mac_header;
+       u32 off = skb_mac_header_len(skb);
         int ret;
  
         ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2153,6 +2154,124 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = {
         .arg2_type      = ARG_ANYTHING,
  };
  
+static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
+{
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               return sizeof(struct iphdr);
+       case htons(ETH_P_IPV6):
+               return sizeof(struct ipv6hdr);
+       default:
+               return ~0U;
+       }
+}
+
+static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
+{
+       u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+       int ret;
+
+       ret = skb_cow(skb, len_diff);
+       if (unlikely(ret < 0))
+               return ret;
+
+       ret = bpf_skb_net_hdr_push(skb, off, len_diff);
+       if (unlikely(ret < 0))
+               return ret;
+
+       if (skb_is_gso(skb)) {
+               /* Due to header grow, MSS needs to be downgraded. */
+               skb_shinfo(skb)->gso_size -= len_diff;
+               /* Header must be checked, and gso_segs recomputed. */
+               skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+               skb_shinfo(skb)->gso_segs = 0;
+       }
+
+       return 0;
+}
+
+static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
+{
+       u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
+       int ret;
+
+       ret = skb_unclone(skb, GFP_ATOMIC);
+       if (unlikely(ret < 0))
+               return ret;
+
+       ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
+       if (unlikely(ret < 0))
+               return ret;
+
+       if (skb_is_gso(skb)) {
+               /* Due to header shrink, MSS can be upgraded. */
+               skb_shinfo(skb)->gso_size += len_diff;
+               /* Header must be checked, and gso_segs recomputed. */
+               skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+               skb_shinfo(skb)->gso_segs = 0;
+       }
+
+       return 0;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+       return skb->dev->mtu + skb->dev->hard_header_len;
+}
+
+static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
+{
+       bool trans_same = skb->transport_header == skb->network_header;
+       u32 len_cur, len_diff_abs = abs(len_diff);
+       u32 len_min = bpf_skb_net_base_len(skb);
+       u32 len_max = __bpf_skb_max_len(skb);
+       __be16 proto = skb->protocol;
+       bool shrink = len_diff < 0;
+       int ret;
+
+       if (unlikely(len_diff_abs > 0xfffU))
+               return -EFAULT;
+       if (unlikely(proto != htons(ETH_P_IP) &&
+                    proto != htons(ETH_P_IPV6)))
+               return -ENOTSUPP;
+
+       len_cur = skb->len - skb_network_offset(skb);
+       if (skb_transport_header_was_set(skb) && !trans_same)
+               len_cur = skb_network_header_len(skb);
+       if ((shrink && (len_diff_abs >= len_cur ||
+                       len_cur - len_diff_abs < len_min)) ||
+           (!shrink && (skb->len + len_diff_abs > len_max &&
+                        !skb_is_gso(skb))))
+               return -ENOTSUPP;
+
+       ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
+                      bpf_skb_net_grow(skb, len_diff_abs);
+
+       bpf_compute_data_end(skb);
+       return 0;
+}
+
+BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
+          u32, mode, u64, flags)
+{
+       if (unlikely(flags))
+               return -EINVAL;
+       if (likely(mode == BPF_ADJ_ROOM_NET))
+               return bpf_skb_adjust_net(skb, len_diff);
+
+       return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
+       .func           = bpf_skb_adjust_room,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_ANYTHING,
+};
+
  static u32 __bpf_skb_min_len(const struct sk_buff *skb)
  {
         u32 min_len = skb_network_offset(skb);
@@ -2165,11 +2284,6 @@ static u32 __bpf_skb_min_len(const struct sk_buff *skb)
         return min_len;
  }
  
-static u32 __bpf_skb_max_len(const struct sk_buff *skb)
-{
-       return skb->dev->mtu + skb->dev->hard_header_len;
-}
-
  static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
  {
         unsigned int old_len = skb->len;
@@ -2306,6 +2420,7 @@ bool bpf_helper_changes_pkt_data(void *func)
             func == bpf_skb_change_proto ||
             func == bpf_skb_change_head ||
             func == bpf_skb_change_tail ||
+           func == bpf_skb_adjust_room ||
             func == bpf_skb_pull_data ||
             func == bpf_clone_redirect ||
             func == bpf_l3_csum_replace ||
@@ -2672,6 +2787,109 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
         .arg1_type      = ARG_PTR_TO_CTX,
  };
  
+BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+          int, level, int, optname, char *, optval, int, optlen)
+{
+       struct sock *sk = bpf_sock->sk;
+       int ret = 0;
+       int val;
+
+       if (!sk_fullsock(sk))
+               return -EINVAL;
+
+       if (level == SOL_SOCKET) {
+               if (optlen != sizeof(int))
+                       return -EINVAL;
+               val = *((int *)optval);
+
+               /* Only some socketops are supported */
+               switch (optname) {
+               case SO_RCVBUF:
+                       sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+                       sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+                       break;
+               case SO_SNDBUF:
+                       sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+                       sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+                       break;
+               case SO_MAX_PACING_RATE:
+                       sk->sk_max_pacing_rate = val;
+                       sk->sk_pacing_rate = min(sk->sk_pacing_rate,
+                                                sk->sk_max_pacing_rate);
+                       break;
+               case SO_PRIORITY:
+                       sk->sk_priority = val;
+                       break;
+               case SO_RCVLOWAT:
+                       if (val < 0)
+                               val = INT_MAX;
+                       sk->sk_rcvlowat = val ? : 1;
+                       break;
+               case SO_MARK:
+                       sk->sk_mark = val;
+                       break;
+               default:
+                       ret = -EINVAL;
+               }
+#ifdef CONFIG_INET
+       } else if (level == SOL_TCP &&
+                  sk->sk_prot->setsockopt == tcp_setsockopt) {
+               if (optname == TCP_CONGESTION) {
+                       char name[TCP_CA_NAME_MAX];
+
+                       strncpy(name, optval, min_t(long, optlen,
+                                                   TCP_CA_NAME_MAX-1));
+                       name[TCP_CA_NAME_MAX-1] = 0;
+                       ret = tcp_set_congestion_control(sk, name, false);
+                       if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
+                               /* replacing an existing ca */
+                               tcp_reinit_congestion_control(sk,
+                                       inet_csk(sk)->icsk_ca_ops);
+               } else {
+                       struct tcp_sock *tp = tcp_sk(sk);
+
+                       if (optlen != sizeof(int))
+                               return -EINVAL;
+
+                       val = *((int *)optval);
+                       /* Only some options are supported */
+                       switch (optname) {
+                       case TCP_BPF_IW:
+                               if (val <= 0 || tp->data_segs_out > 0)
+                                       ret = -EINVAL;
+                               else
+                                       tp->snd_cwnd = val;
+                               break;
+                       case TCP_BPF_SNDCWND_CLAMP:
+                               if (val <= 0) {
+                                       ret = -EINVAL;
+                               } else {
+                                       tp->snd_cwnd_clamp = val;
+                                       tp->snd_ssthresh = val;
+                               }
+                       default:
+                               ret = -EINVAL;
+                       }
+               }
+               ret = -EINVAL;
+#endif
+       } else {
+               ret = -EINVAL;
+       }
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_setsockopt_proto = {
+       .func           = bpf_setsockopt,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
  static const struct bpf_func_proto *
  bpf_base_func_proto(enum bpf_func_id func_id)
  {
@@ -2745,6 +2963,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
                 return &bpf_skb_change_proto_proto;
         case BPF_FUNC_skb_change_type:
                 return &bpf_skb_change_type_proto;
+       case BPF_FUNC_skb_adjust_room:
+               return &bpf_skb_adjust_room_proto;
         case BPF_FUNC_skb_change_tail:
                 return &bpf_skb_change_tail_proto;
         case BPF_FUNC_skb_get_tunnel_key:
@@ -2822,6 +3042,17 @@ lwt_inout_func_proto(enum bpf_func_id func_id)
         }
  }
  
+static const struct bpf_func_proto *
+       sock_ops_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_setsockopt_proto;
+       default:
+               return bpf_base_func_proto(func_id);
+       }
+}
+
  static const struct bpf_func_proto *
  lwt_xmit_func_proto(enum bpf_func_id func_id)
  {
@@ -2857,38 +3088,11 @@ lwt_xmit_func_proto(enum bpf_func_id func_id)
         }
  }
  
-static void __set_access_aux_info(int off, struct bpf_insn_access_aux *info)
+static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
+                                   struct bpf_insn_access_aux *info)
  {
-       info->ctx_field_size = 4;
-       switch (off) {
-       case offsetof(struct __sk_buff, pkt_type) ...
-            offsetof(struct __sk_buff, pkt_type) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, vlan_present) ...
-            offsetof(struct __sk_buff, vlan_present) + sizeof(__u32) - 1:
-               info->converted_op_size = 1;
-               break;
-       case offsetof(struct __sk_buff, queue_mapping) ...
-            offsetof(struct __sk_buff, queue_mapping) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, protocol) ...
-            offsetof(struct __sk_buff, protocol) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, vlan_tci) ...
-            offsetof(struct __sk_buff, vlan_tci) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, vlan_proto) ...
-            offsetof(struct __sk_buff, vlan_proto) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, tc_index) ...
-            offsetof(struct __sk_buff, tc_index) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, tc_classid) ...
-            offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
-               info->converted_op_size = 2;
-               break;
-       default:
-               info->converted_op_size = 4;
-       }
-}
+       const int size_default = sizeof(__u32);
  
-static bool __is_valid_access(int off, int size, enum bpf_access_type type,
-                             struct bpf_insn_access_aux *info)
-{
         if (off < 0 || off >= sizeof(struct __sk_buff))
                 return false;
  
@@ -2897,40 +3101,24 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type,
                 return false;
  
         switch (off) {
-       case offsetof(struct __sk_buff, cb[0]) ...
-            offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
-               if (off + size >
-                   offsetof(struct __sk_buff, cb[4]) + sizeof(__u32))
+       case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+               if (off + size > offsetofend(struct __sk_buff, cb[4]))
                         return false;
                 break;
-       case offsetof(struct __sk_buff, data) ...
-            offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
-               if (size != sizeof(__u32))
+       case bpf_ctx_range(struct __sk_buff, data):
+       case bpf_ctx_range(struct __sk_buff, data_end):
+               if (size != size_default)
                         return false;
-               info->reg_type = PTR_TO_PACKET;
-               break;
-       case offsetof(struct __sk_buff, data_end) ...
-            offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
-               if (size != sizeof(__u32))
-                       return false;
-               info->reg_type = PTR_TO_PACKET_END;
                 break;
         default:
+               /* Only narrow read access allowed for now. */
                 if (type == BPF_WRITE) {
-                       if (size != sizeof(__u32))
+                       if (size != size_default)
                                 return false;
                 } else {
-                       int allowed;
-
-                       /* permit narrower load for not cb/data/data_end fields */
-#ifdef __LITTLE_ENDIAN
-                       allowed = (off & 0x3) == 0 && size <= 4 && (size & (size - 1)) == 0;
-#else
-                       allowed = (off & 0x3) + size == 4 && size <= 4 && (size & (size - 1)) == 0;
-#endif
-                       if (!allowed)
+                       bpf_ctx_record_field_size(info, size_default);
+                       if (!bpf_ctx_narrow_access_ok(off, size, size_default))
                                 return false;
-                       __set_access_aux_info(off, info);
                 }
         }
  
@@ -2942,26 +3130,22 @@ static bool sk_filter_is_valid_access(int off, int size,
                                       struct bpf_insn_access_aux *info)
  {
         switch (off) {
-       case offsetof(struct __sk_buff, tc_classid) ...
-            offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, data) ...
-            offsetof(struct __sk_buff, data) + sizeof(__u32) - 1:
-       case offsetof(struct __sk_buff, data_end) ...
-            offsetof(struct __sk_buff, data_end) + sizeof(__u32) - 1:
+       case bpf_ctx_range(struct __sk_buff, tc_classid):
+       case bpf_ctx_range(struct __sk_buff, data):
+       case bpf_ctx_range(struct __sk_buff, data_end):
                 return false;
         }
  
         if (type == BPF_WRITE) {
                 switch (off) {
-               case offsetof(struct __sk_buff, cb[0]) ...
-                    offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+               case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                         break;
                 default:
                         return false;
                 }
         }
  
-       return __is_valid_access(off, size, type, info);
+       return bpf_skb_is_valid_access(off, size, type, info);
  }
  
  static bool lwt_is_valid_access(int off, int size,
@@ -2969,24 +3153,31 @@ static bool lwt_is_valid_access(int off, int size,
                                 struct bpf_insn_access_aux *info)
  {
         switch (off) {
-       case offsetof(struct __sk_buff, tc_classid) ...
-            offsetof(struct __sk_buff, tc_classid) + sizeof(__u32) - 1:
+       case bpf_ctx_range(struct __sk_buff, tc_classid):
                 return false;
         }
  
         if (type == BPF_WRITE) {
                 switch (off) {
-               case offsetof(struct __sk_buff, mark):
-               case offsetof(struct __sk_buff, priority):
-               case offsetof(struct __sk_buff, cb[0]) ...
-                    offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+               case bpf_ctx_range(struct __sk_buff, mark):
+               case bpf_ctx_range(struct __sk_buff, priority):
+               case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                         break;
                 default:
                         return false;
                 }
         }
  
-       return __is_valid_access(off, size, type, info);
+       switch (off) {
+       case bpf_ctx_range(struct __sk_buff, data):
+               info->reg_type = PTR_TO_PACKET;
+               break;
+       case bpf_ctx_range(struct __sk_buff, data_end):
+               info->reg_type = PTR_TO_PACKET_END;
+               break;
+       }
+
+       return bpf_skb_is_valid_access(off, size, type, info);
  }
  
  static bool sock_filter_is_valid_access(int off, int size,
@@ -3058,19 +3249,27 @@ static bool tc_cls_act_is_valid_access(int off, int size,
  {
         if (type == BPF_WRITE) {
                 switch (off) {
-               case offsetof(struct __sk_buff, mark):
-               case offsetof(struct __sk_buff, tc_index):
-               case offsetof(struct __sk_buff, priority):
-               case offsetof(struct __sk_buff, cb[0]) ...
-                    offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
-               case offsetof(struct __sk_buff, tc_classid):
+               case bpf_ctx_range(struct __sk_buff, mark):
+               case bpf_ctx_range(struct __sk_buff, tc_index):
+               case bpf_ctx_range(struct __sk_buff, priority):
+               case bpf_ctx_range(struct __sk_buff, tc_classid):
+               case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                         break;
                 default:
                         return false;
                 }
         }
  
-       return __is_valid_access(off, size, type, info);
+       switch (off) {
+       case bpf_ctx_range(struct __sk_buff, data):
+               info->reg_type = PTR_TO_PACKET;
+               break;
+       case bpf_ctx_range(struct __sk_buff, data_end):
+               info->reg_type = PTR_TO_PACKET_END;
+               break;
+       }
+
+       return bpf_skb_is_valid_access(off, size, type, info);
  }
  
  static bool __is_valid_xdp_access(int off, int size)
@@ -3110,101 +3309,141 @@ void bpf_warn_invalid_xdp_action(u32 act)
  }
  EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
  
+static bool __is_valid_sock_ops_access(int off, int size)
+{
+       if (off < 0 || off >= sizeof(struct bpf_sock_ops))
+               return false;
+       /* The verifier guarantees that size > 0. */
+       if (off % size != 0)
+               return false;
+       if (size != sizeof(__u32))
+               return false;
+
+       return true;
+}
+
+static bool sock_ops_is_valid_access(int off, int size,
+                                    enum bpf_access_type type,
+                                    struct bpf_insn_access_aux *info)
+{
+       if (type == BPF_WRITE) {
+               switch (off) {
+               case offsetof(struct bpf_sock_ops, op) ...
+                    offsetof(struct bpf_sock_ops, replylong[3]):
+                       break;
+               default:
+                       return false;
+               }
+       }
+
+       return __is_valid_sock_ops_access(off, size);
+}
+
  static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                                   const struct bpf_insn *si,
                                   struct bpf_insn *insn_buf,
-                                 struct bpf_prog *prog)
+                                 struct bpf_prog *prog, u32 *target_size)
  {
         struct bpf_insn *insn = insn_buf;
         int off;
  
         switch (si->off) {
         case offsetof(struct __sk_buff, len):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-
                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                     offsetof(struct sk_buff, len));
+                                     bpf_target_off(struct sk_buff, len, 4,
+                                                    target_size));
                 break;
  
         case offsetof(struct __sk_buff, protocol):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-
                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-                                     offsetof(struct sk_buff, protocol));
+                                     bpf_target_off(struct sk_buff, protocol, 2,
+                                                    target_size));
                 break;
  
         case offsetof(struct __sk_buff, vlan_proto):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
-
                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-                                     offsetof(struct sk_buff, vlan_proto));
+                                     bpf_target_off(struct sk_buff, vlan_proto, 2,
+                                                    target_size));
                 break;
  
         case offsetof(struct __sk_buff, priority):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
-
                 if (type == BPF_WRITE)
                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                             offsetof(struct sk_buff, priority));
+                                             bpf_target_off(struct sk_buff, priority, 4,
+                                                            target_size));
                 else
                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                             offsetof(struct sk_buff, priority));
+                                             bpf_target_off(struct sk_buff, priority, 4,
+                                                            target_size));
                 break;
  
         case offsetof(struct __sk_buff, ingress_ifindex):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
-
                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                     offsetof(struct sk_buff, skb_iif));
+                                     bpf_target_off(struct sk_buff, skb_iif, 4,
+                                                    target_size));
                 break;
  
         case offsetof(struct __sk_buff, ifindex):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
                                       si->dst_reg, si->src_reg,
                                       offsetof(struct sk_buff, dev));
                 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
-                                     offsetof(struct net_device, ifindex));
+                                     bpf_target_off(struct net_device, ifindex, 4,
+                                                    target_size));
                 break;
  
         case offsetof(struct __sk_buff, hash):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-
                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                     offsetof(struct sk_buff, hash));
+                                     bpf_target_off(struct sk_buff, hash, 4,
+                                                    target_size));
                 break;
  
         case offsetof(struct __sk_buff, mark):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
                 if (type == BPF_WRITE)
                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                             offsetof(struct sk_buff, mark));
+                                             bpf_target_off(struct sk_buff, mark, 4,
+                                                            target_size));
                 else
                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                             offsetof(struct sk_buff, mark));
+                                             bpf_target_off(struct sk_buff, mark, 4,
+                                                            target_size));
                 break;
  
         case offsetof(struct __sk_buff, pkt_type):
-               return convert_skb_access(SKF_AD_PKTTYPE, si->dst_reg,
-                                         si->src_reg, insn);
+               *target_size = 1;
+               *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+                                     PKT_TYPE_OFFSET());
+               *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+               *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
+#endif
+               break;
  
         case offsetof(struct __sk_buff, queue_mapping):
-               return convert_skb_access(SKF_AD_QUEUE, si->dst_reg,
-                                         si->src_reg, insn);
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct sk_buff, queue_mapping, 2,
+                                                    target_size));
+               break;
  
         case offsetof(struct __sk_buff, vlan_present):
-               return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
-                                         si->dst_reg, si->src_reg, insn);
-
         case offsetof(struct __sk_buff, vlan_tci):
-               return convert_skb_access(SKF_AD_VLAN_TAG,
-                                         si->dst_reg, si->src_reg, insn);
+               BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
+                                     bpf_target_off(struct sk_buff, vlan_tci, 2,
+                                                    target_size));
+               if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
+                       *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
+                                               ~VLAN_TAG_PRESENT);
+               } else {
+                       *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
+                       *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+               }
+               break;
  
         case offsetof(struct __sk_buff, cb[0]) ...
-            offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+            offsetofend(struct __sk_buff, cb[4]) - 1:
                 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
                 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
                               offsetof(struct qdisc_skb_cb, data)) %
@@ -3230,6 +3469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                 off -= offsetof(struct __sk_buff, tc_classid);
                 off += offsetof(struct sk_buff, cb);
                 off += offsetof(struct qdisc_skb_cb, tc_classid);
+               *target_size = 2;
                 if (type == BPF_WRITE)
                         *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
                                               si->src_reg, off);
@@ -3255,14 +3495,14 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
  
         case offsetof(struct __sk_buff, tc_index):
  #ifdef CONFIG_NET_SCHED
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
-
                 if (type == BPF_WRITE)
                         *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
-                                             offsetof(struct sk_buff, tc_index));
+                                             bpf_target_off(struct sk_buff, tc_index, 2,
+                                                            target_size));
                 else
                         *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-                                             offsetof(struct sk_buff, tc_index));
+                                             bpf_target_off(struct sk_buff, tc_index, 2,
+                                                            target_size));
  #else
                 if (type == BPF_WRITE)
                         *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
@@ -3273,10 +3513,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
  
         case offsetof(struct __sk_buff, napi_id):
  #if defined(CONFIG_NET_RX_BUSY_POLL)
-               BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, napi_id) != 4);
-
                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-                                     offsetof(struct sk_buff, napi_id));
+                                     bpf_target_off(struct sk_buff, napi_id, 4,
+                                                    target_size));
                 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
                 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
  #else
@@ -3291,7 +3530,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
  static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
                                           const struct bpf_insn *si,
                                           struct bpf_insn *insn_buf,
-                                         struct bpf_prog *prog)
+                                         struct bpf_prog *prog, u32 *target_size)
  {
         struct bpf_insn *insn = insn_buf;
  
@@ -3335,22 +3574,22 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
  static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
                                          const struct bpf_insn *si,
                                          struct bpf_insn *insn_buf,
-                                        struct bpf_prog *prog)
+                                        struct bpf_prog *prog, u32 *target_size)
  {
         struct bpf_insn *insn = insn_buf;
  
         switch (si->off) {
         case offsetof(struct __sk_buff, ifindex):
-               BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-
                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
                                       si->dst_reg, si->src_reg,
                                       offsetof(struct sk_buff, dev));
                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
-                                     offsetof(struct net_device, ifindex));
+                                     bpf_target_off(struct net_device, ifindex, 4,
+                                                    target_size));
                 break;
         default:
-               return bpf_convert_ctx_access(type, si, insn_buf, prog);
+               return bpf_convert_ctx_access(type, si, insn_buf, prog,
+                                             target_size);
         }
  
         return insn - insn_buf;
@@ -3359,7 +3598,7 @@ static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
  static u32 xdp_convert_ctx_access(enum bpf_access_type type,
                                   const struct bpf_insn *si,
                                   struct bpf_insn *insn_buf,
-                                 struct bpf_prog *prog)
+                                 struct bpf_prog *prog, u32 *target_size)
  {
         struct bpf_insn *insn = insn_buf;
  
@@ -3379,6 +3618,139 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
         return insn - insn_buf;
  }
  
+static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
+                                      const struct bpf_insn *si,
+                                      struct bpf_insn *insn_buf,
+                                      struct bpf_prog *prog,
+                                      u32 *target_size)
+{
+       struct bpf_insn *insn = insn_buf;
+       int off;
+
+       switch (si->off) {
+       case offsetof(struct bpf_sock_ops, op) ...
+            offsetof(struct bpf_sock_ops, replylong[3]):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
+                            FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
+               BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
+                            FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
+               BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
+                            FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+               off = si->off;
+               off -= offsetof(struct bpf_sock_ops, op);
+               off += offsetof(struct bpf_sock_ops_kern, op);
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+                                             off);
+               else
+                       *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+                                             off);
+               break;
+
+       case offsetof(struct bpf_sock_ops, family):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                             struct bpf_sock_ops_kern, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sock_ops_kern, sk));
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+                                     offsetof(struct sock_common, skc_family));
+               break;
+
+       case offsetof(struct bpf_sock_ops, remote_ip4):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                               struct bpf_sock_ops_kern, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sock_ops_kern, sk));
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                     offsetof(struct sock_common, skc_daddr));
+               break;
+
+       case offsetof(struct bpf_sock_ops, local_ip4):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
+
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                             struct bpf_sock_ops_kern, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sock_ops_kern, sk));
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                     offsetof(struct sock_common,
+                                              skc_rcv_saddr));
+               break;
+
+       case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
+            offsetof(struct bpf_sock_ops, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+                                         skc_v6_daddr.s6_addr32[0]) != 4);
+
+               off = si->off;
+               off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                               struct bpf_sock_ops_kern, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sock_ops_kern, sk));
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                     offsetof(struct sock_common,
+                                              skc_v6_daddr.s6_addr32[0]) +
+                                     off);
+#else
+               *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+               break;
+
+       case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
+            offsetof(struct bpf_sock_ops, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+                                         skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+
+               off = si->off;
+               off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                               struct bpf_sock_ops_kern, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sock_ops_kern, sk));
+               *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                     offsetof(struct sock_common,
+                                              skc_v6_rcv_saddr.s6_addr32[0]) +
+                                     off);
+#else
+               *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+               break;
+
+       case offsetof(struct bpf_sock_ops, remote_port):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                               struct bpf_sock_ops_kern, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sock_ops_kern, sk));
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+                                     offsetof(struct sock_common, skc_dport));
+#ifndef __BIG_ENDIAN_BITFIELD
+               *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+               break;
+
+       case offsetof(struct bpf_sock_ops, local_port):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+
+               *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                               struct bpf_sock_ops_kern, sk),
+                                     si->dst_reg, si->src_reg,
+                                     offsetof(struct bpf_sock_ops_kern, sk));
+               *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+                                     offsetof(struct sock_common, skc_num));
+               break;
+       }
+       return insn - insn_buf;
+}
+
  const struct bpf_verifier_ops sk_filter_prog_ops = {
         .get_func_proto         = sk_filter_func_proto,
         .is_valid_access        = sk_filter_is_valid_access,
@@ -3428,6 +3800,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = {
         .convert_ctx_access     = sock_filter_convert_ctx_access,
  };
  
+const struct bpf_verifier_ops sock_ops_prog_ops = {
+       .get_func_proto         = sock_ops_func_proto,
+       .is_valid_access        = sock_ops_is_valid_access,
+       .convert_ctx_access     = sock_ops_convert_ctx_access,
+};
+
  int sk_detach_filter(struct sock *sk)
  {
         int ret = -ENOENT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c

index dadb5eef91c39eb192b2babdac6f1ce26df69d3b..e31fc11a80001503a6c6224f8289378e4246d310 100644 (file)
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -124,7 +124,7 @@ static bool neigh_del(struct neighbour *n, __u8 state,
         bool retval = false;
  
         write_lock(&n->lock);
-       if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
+       if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
                 struct neighbour *neigh;
  
                 neigh = rcu_dereference_protected(n->next,
@@ -254,7 +254,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
                         neigh_del_timer(n);
                         n->dead = 1;
  
-                       if (atomic_read(&n->refcnt) != 1) {
+                       if (refcount_read(&n->refcnt) != 1) {
                                 /* The most unpleasant situation.
                                    We must destroy neighbour entry,
                                    but someone still uses it.
@@ -335,7 +335,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
  
         NEIGH_CACHE_STAT_INC(tbl, allocs);
         n->tbl            = tbl;
-       atomic_set(&n->refcnt, 1);
+       refcount_set(&n->refcnt, 1);
         n->dead           = 1;
  out:
         return n;
@@ -444,7 +444,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
         rcu_read_lock_bh();
         n = __neigh_lookup_noref(tbl, pkey, dev);
         if (n) {
-               if (!atomic_inc_not_zero(&n->refcnt))
+               if (!refcount_inc_not_zero(&n->refcnt))
                         n = NULL;
                 NEIGH_CACHE_STAT_INC(tbl, hits);
         }
@@ -473,7 +473,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
              n = rcu_dereference_bh(n->next)) {
                 if (!memcmp(n->primary_key, pkey, key_len) &&
                     net_eq(dev_net(n->dev), net)) {
-                       if (!atomic_inc_not_zero(&n->refcnt))
+                       if (!refcount_inc_not_zero(&n->refcnt))
                                 n = NULL;
                         NEIGH_CACHE_STAT_INC(tbl, hits);
                         break;
@@ -709,7 +709,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms);
  
  static inline void neigh_parms_put(struct neigh_parms *parms)
  {
-       if (atomic_dec_and_test(&parms->refcnt))
+       if (refcount_dec_and_test(&parms->refcnt))
                 neigh_parms_destroy(parms);
  }
  
@@ -821,7 +821,7 @@ static void neigh_periodic_work(struct work_struct *work)
                         if (time_before(n->used, n->confirmed))
                                 n->used = n->confirmed;
  
-                       if (atomic_read(&n->refcnt) == 1 &&
+                       if (refcount_read(&n->refcnt) == 1 &&
                             (state == NUD_FAILED ||
                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
                                 *np = n->next;
@@ -1479,7 +1479,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
         if (p) {
                 p->tbl            = tbl;
-               atomic_set(&p->refcnt, 1);
+               refcount_set(&p->refcnt, 1);
                 p->reachable_time =
                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
                 dev_hold(dev);
@@ -1542,7 +1542,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
         INIT_LIST_HEAD(&tbl->parms_list);
         list_add(&tbl->parms.list, &tbl->parms_list);
         write_pnet(&tbl->parms.net, &init_net);
-       atomic_set(&tbl->parms.refcnt, 1);
+       refcount_set(&tbl->parms.refcnt, 1);
         tbl->parms.reachable_time =
                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
  
@@ -1796,7 +1796,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
  
         if ((parms->dev &&
              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
-           nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
+           nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
             /* approximative value for deprecated QUEUE_LEN (in packets) */
@@ -2234,7 +2234,7 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
-       ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
+       ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
         read_unlock_bh(&neigh->lock);
  
         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c

index 58e6cc70500d7c68245a6bf7c3c345b59c57bbad..b4f9922b6f233da3d4a189b80b12034f6f7fddeb 100644 (file)
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -626,7 +626,7 @@ static struct attribute *netstat_attrs[] = {
  };
  
  
-static struct attribute_group netstat_group = {
+static const struct attribute_group netstat_group = {
         .name  = "statistics",
         .attrs  = netstat_attrs,
  };
@@ -636,7 +636,7 @@ static struct attribute *wireless_attrs[] = {
         NULL
  };
  
-static struct attribute_group wireless_group = {
+static const struct attribute_group wireless_group = {
         .name = "wireless",
         .attrs = wireless_attrs,
  };
@@ -1204,7 +1204,7 @@ static struct attribute *dql_attrs[] = {
         NULL
  };
  
-static struct attribute_group dql_group = {
+static const struct attribute_group dql_group = {
         .name  = "byte_queue_limits",
         .attrs  = dql_attrs,
  };
@@ -1448,7 +1448,7 @@ static void *net_grab_current_ns(void)
         struct net *ns = current->nsproxy->net_ns;
  #ifdef CONFIG_NET_NS
         if (ns)
-               atomic_inc(&ns->passive);
+               refcount_inc(&ns->passive);
  #endif
         return ns;
  }
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c

index 2178db8e47cd332a3ca0de850cd574a5acaadb10..8726d051f31dd2ddce96a8cd7b1ccf50435f0d92 100644 (file)
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -284,7 +284,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
         LIST_HEAD(net_exit_list);
  
         atomic_set(&net->count, 1);
-       atomic_set(&net->passive, 1);
+       refcount_set(&net->passive, 1);
         net->dev_base_seq = 1;
         net->user_ns = user_ns;
         idr_init(&net->netns_ids);
@@ -380,7 +380,7 @@ static void net_free(struct net *net)
  void net_drop_ns(void *p)
  {
         struct net *ns = p;
-       if (ns && atomic_dec_and_test(&ns->passive))
+       if (ns && refcount_dec_and_test(&ns->passive))
                 net_free(ns);
  }
  
@@ -501,6 +501,23 @@ static void cleanup_net(struct work_struct *work)
                 net_drop_ns(net);
         }
  }
+
+/**
+ * net_ns_barrier - wait until concurrent net_cleanup_work is done
+ *
+ * cleanup_net runs from work queue and will first remove namespaces
+ * from the global list, then run net exit functions.
+ *
+ * Call this in module exit path to make sure that all netns
+ * ->exit ops have been invoked before the function is removed.
+ */
+void net_ns_barrier(void)
+{
+       mutex_lock(&net_mutex);
+       mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL(net_ns_barrier);
+
  static DECLARE_WORK(net_cleanup_work, cleanup_net);
  
  void __put_net(struct net *net)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c

index 37c1e34ddd8551d5b91b54d1f8161b6520b2623e..d3408a69316622a59de0ce19d5197be56d5a87a4 100644 (file)
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -277,7 +277,7 @@ static void zap_completion_queue(void)
                         struct sk_buff *skb = clist;
                         clist = clist->next;
                         if (!skb_irq_freeable(skb)) {
-                               atomic_inc(&skb->users);
+                               refcount_inc(&skb->users);
                                 dev_kfree_skb_any(skb); /* put this one back */
                         } else {
                                 __kfree_skb(skb);
@@ -309,7 +309,7 @@ repeat:
                 return NULL;
         }
  
-       atomic_set(&skb->users, 1);
+       refcount_set(&skb->users, 1);
         skb_reserve(skb, reserve);
         return skb;
  }
@@ -632,7 +632,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
                 skb_queue_head_init(&npinfo->txq);
                 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
  
-               atomic_set(&npinfo->refcnt, 1);
+               refcount_set(&npinfo->refcnt, 1);
  
                 ops = np->dev->netdev_ops;
                 if (ops->ndo_netpoll_setup) {
@@ -642,7 +642,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
                 }
         } else {
                 npinfo = rtnl_dereference(ndev->npinfo);
-               atomic_inc(&npinfo->refcnt);
+               refcount_inc(&npinfo->refcnt);
         }
  
         npinfo->netpoll = np;
@@ -821,7 +821,7 @@ void __netpoll_cleanup(struct netpoll *np)
  
         synchronize_srcu(&netpoll_srcu);
  
-       if (atomic_dec_and_test(&npinfo->refcnt)) {
+       if (refcount_dec_and_test(&npinfo->refcnt)) {
                 const struct net_device_ops *ops;
  
                 ops = np->dev->netdev_ops;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c

index 2dd42c5b0366f2c0206402d58e95d79b9ff50469..6e1e10ff433a5f4097d1d4b33848ab13d4e005c6 100644 (file)
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3363,7 +3363,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
  {
         ktime_t idle_start = ktime_get();
  
-       while (atomic_read(&(pkt_dev->skb->users)) != 1) {
+       while (refcount_read(&(pkt_dev->skb->users)) != 1) {
                 if (signal_pending(current))
                         break;
  
@@ -3420,7 +3420,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
         if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
                 skb = pkt_dev->skb;
                 skb->protocol = eth_type_trans(skb, skb->dev);
-               atomic_add(burst, &skb->users);
+               refcount_add(burst, &skb->users);
                 local_bh_disable();
                 do {
                         ret = netif_receive_skb(skb);
@@ -3428,11 +3428,11 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
                                 pkt_dev->errors++;
                         pkt_dev->sofar++;
                         pkt_dev->seq_num++;
-                       if (atomic_read(&skb->users) != burst) {
+                       if (refcount_read(&skb->users) != burst) {
                                 /* skb was queued by rps/rfs or taps,
                                  * so cannot reuse this skb
                                  */
-                               atomic_sub(burst - 1, &skb->users);
+                               WARN_ON(refcount_sub_and_test(burst - 1, &skb->users));
                                 /* get out of the loop and wait
                                  * until skb is consumed
                                  */
@@ -3446,7 +3446,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
                 goto out; /* Skips xmit_mode M_START_XMIT */
         } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {
                 local_bh_disable();
-               atomic_inc(&pkt_dev->skb->users);
+               refcount_inc(&pkt_dev->skb->users);
  
                 ret = dev_queue_xmit(pkt_dev->skb);
                 switch (ret) {
@@ -3487,7 +3487,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
                 pkt_dev->last_ok = 0;
                 goto unlock;
         }
-       atomic_add(burst, &pkt_dev->skb->users);
+       refcount_add(burst, &pkt_dev->skb->users);
  
  xmit_more:
         ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0);
@@ -3513,11 +3513,11 @@ xmit_more:
                 /* fallthru */
         case NETDEV_TX_BUSY:
                 /* Retry it next time */
-               atomic_dec(&(pkt_dev->skb->users));
+               refcount_dec(&(pkt_dev->skb->users));
                 pkt_dev->last_ok = 0;
         }
         if (unlikely(burst))
-               atomic_sub(burst, &pkt_dev->skb->users);
+               WARN_ON(refcount_sub_and_test(burst, &pkt_dev->skb->users));
  unlock:
         HARD_TX_UNLOCK(odev, txq);
  
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c

index ed51de525a8869a16f9a5aa01c60bb130dd6e1c9..d1ba90980be1325e86836795a354214cb96a4079 100644 (file)
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -649,7 +649,7 @@ int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int g
  
         NETLINK_CB(skb).dst_group = group;
         if (echo)
-               atomic_inc(&skb->users);
+               refcount_inc(&skb->users);
         netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
         if (echo)
                 err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c

index f75897a33fa4a181e145a2f6473375eaaeceb9bc..8b11341ed69ad97d34dd3e9b73c8c44ef7c452ff 100644 (file)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -176,7 +176,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
         memset(skb, 0, offsetof(struct sk_buff, tail));
         skb->head = NULL;
         skb->truesize = sizeof(struct sk_buff);
-       atomic_set(&skb->users, 1);
+       refcount_set(&skb->users, 1);
  
         skb->mac_header = (typeof(skb->mac_header))~0U;
  out:
@@ -247,7 +247,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
         /* Account for allocated memory : skb + skb->head */
         skb->truesize = SKB_TRUESIZE(size);
         skb->pfmemalloc = pfmemalloc;
-       atomic_set(&skb->users, 1);
+       refcount_set(&skb->users, 1);
         skb->head = data;
         skb->data = data;
         skb_reset_tail_pointer(skb);
@@ -268,7 +268,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
  
                 kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
                 skb->fclone = SKB_FCLONE_ORIG;
-               atomic_set(&fclones->fclone_ref, 1);
+               refcount_set(&fclones->fclone_ref, 1);
  
                 fclones->skb2.fclone = SKB_FCLONE_CLONE;
         }
@@ -314,7 +314,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
  
         memset(skb, 0, offsetof(struct sk_buff, tail));
         skb->truesize = SKB_TRUESIZE(size);
-       atomic_set(&skb->users, 1);
+       refcount_set(&skb->users, 1);
         skb->head = data;
         skb->data = data;
         skb_reset_tail_pointer(skb);
@@ -629,7 +629,7 @@ static void kfree_skbmem(struct sk_buff *skb)
                  * This test would have no chance to be true for the clone,
                  * while here, branch prediction will be good.
                  */
-               if (atomic_read(&fclones->fclone_ref) == 1)
+               if (refcount_read(&fclones->fclone_ref) == 1)
                         goto fastpath;
                 break;
  
@@ -637,7 +637,7 @@ static void kfree_skbmem(struct sk_buff *skb)
                 fclones = container_of(skb, struct sk_buff_fclones, skb2);
                 break;
         }
-       if (!atomic_dec_and_test(&fclones->fclone_ref))
+       if (!refcount_dec_and_test(&fclones->fclone_ref))
                 return;
  fastpath:
         kmem_cache_free(skbuff_fclone_cache, fclones);
@@ -915,7 +915,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
         C(head_frag);
         C(data);
         C(truesize);
-       atomic_set(&n->users, 1);
+       refcount_set(&n->users, 1);
  
         atomic_inc(&(skb_shinfo(skb)->dataref));
         skb->cloned = 1;
@@ -1027,9 +1027,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
                 return NULL;
  
         if (skb->fclone == SKB_FCLONE_ORIG &&
-           atomic_read(&fclones->fclone_ref) == 1) {
+           refcount_read(&fclones->fclone_ref) == 1) {
                 n = &fclones->skb2;
-               atomic_set(&fclones->fclone_ref, 2);
+               refcount_set(&fclones->fclone_ref, 2);
         } else {
                 if (skb_pfmemalloc(skb))
                         gfp_mask |= __GFP_MEMALLOC;
@@ -3024,7 +3024,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
                 get_page(pfrag->page);
  
                 skb->truesize += copy;
-               atomic_add(copy, &sk->sk_wmem_alloc);
+               refcount_add(copy, &sk->sk_wmem_alloc);
                 skb->len += copy;
                 skb->data_len += copy;
                 offset += copy;
@@ -3844,7 +3844,7 @@ struct sk_buff *skb_clone_sk(struct sk_buff *skb)
         struct sock *sk = skb->sk;
         struct sk_buff *clone;
  
-       if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+       if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
                 return NULL;
  
         clone = skb_clone(skb, GFP_ATOMIC);
@@ -3915,7 +3915,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
         /* Take a reference to prevent skb_orphan() from freeing the socket,
          * but only if the socket refcount is not zero.
          */
-       if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+       if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
                 *skb_hwtstamps(skb) = *hwtstamps;
                 __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
                 sock_put(sk);
@@ -3997,7 +3997,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
         /* Take a reference to prevent skb_orphan() from freeing the socket,
          * but only if the socket refcount is not zero.
          */
-       if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+       if (likely(refcount_inc_not_zero(&sk->sk_refcnt))) {
                 err = sock_queue_err_skb(sk, skb);
                 sock_put(sk);
         }
diff --git a/net/core/sock.c b/net/core/sock.c

index 6f4b090241c1d1b4f792874491e23aa1506dd8f1..ba0ef6a7dbaf20d415119ef690401bb20ab64eb1 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1528,7 +1528,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                 if (likely(sk->sk_net_refcnt))
                         get_net(net);
                 sock_net_set(sk, net);
-               atomic_set(&sk->sk_wmem_alloc, 1);
+               refcount_set(&sk->sk_wmem_alloc, 1);
  
                 mem_cgroup_sk_alloc(sk);
                 cgroup_sk_alloc(&sk->sk_cgrp_data);
@@ -1552,7 +1552,7 @@ static void __sk_destruct(struct rcu_head *head)
                 sk->sk_destruct(sk);
  
         filter = rcu_dereference_check(sk->sk_filter,
-                                      atomic_read(&sk->sk_wmem_alloc) == 0);
+                                      refcount_read(&sk->sk_wmem_alloc) == 0);
         if (filter) {
                 sk_filter_uncharge(sk, filter);
                 RCU_INIT_POINTER(sk->sk_filter, NULL);
@@ -1602,7 +1602,7 @@ void sk_free(struct sock *sk)
          * some packets are still in some tx queue.
          * If not null, sock_wfree() will call __sk_free(sk) later
          */
-       if (atomic_dec_and_test(&sk->sk_wmem_alloc))
+       if (refcount_dec_and_test(&sk->sk_wmem_alloc))
                 __sk_free(sk);
  }
  EXPORT_SYMBOL(sk_free);
@@ -1659,7 +1659,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                 /*
                  * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
                  */
-               atomic_set(&newsk->sk_wmem_alloc, 1);
+               refcount_set(&newsk->sk_wmem_alloc, 1);
                 atomic_set(&newsk->sk_omem_alloc, 0);
                 sk_init_common(newsk);
  
@@ -1708,7 +1708,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                  * (Documentation/RCU/rculist_nulls.txt for details)
                  */
                 smp_wmb();
-               atomic_set(&newsk->sk_refcnt, 2);
+               refcount_set(&newsk->sk_refcnt, 2);
  
                 /*
                  * Increment the counter in the same struct proto as the master
@@ -1787,7 +1787,7 @@ void sock_wfree(struct sk_buff *skb)
                  * Keep a reference on sk_wmem_alloc, this will be released
                  * after sk_write_space() call
                  */
-               atomic_sub(len - 1, &sk->sk_wmem_alloc);
+               WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
                 sk->sk_write_space(sk);
                 len = 1;
         }
@@ -1795,7 +1795,7 @@ void sock_wfree(struct sk_buff *skb)
          * if sk_wmem_alloc reaches 0, we must finish what sk_free()
          * could not do because of in-flight packets
          */
-       if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
+       if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
                 __sk_free(sk);
  }
  EXPORT_SYMBOL(sock_wfree);
@@ -1807,7 +1807,7 @@ void __sock_wfree(struct sk_buff *skb)
  {
         struct sock *sk = skb->sk;
  
-       if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
+       if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
                 __sk_free(sk);
  }
  
@@ -1829,7 +1829,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
          * is enough to guarantee sk_free() wont free this sock until
          * all in-flight packets are completed
          */
-       atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk->sk_wmem_alloc);
  }
  EXPORT_SYMBOL(skb_set_owner_w);
  
@@ -1851,8 +1851,8 @@ void skb_orphan_partial(struct sk_buff *skb)
                 ) {
                 struct sock *sk = skb->sk;
  
-               if (atomic_inc_not_zero(&sk->sk_refcnt)) {
-                       atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+               if (refcount_inc_not_zero(&sk->sk_refcnt)) {
+                       WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
                         skb->destructor = sock_efree;
                 }
         } else {
@@ -1912,7 +1912,7 @@ EXPORT_SYMBOL(sock_i_ino);
  struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
                              gfp_t priority)
  {
-       if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+       if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
                 struct sk_buff *skb = alloc_skb(size, priority);
                 if (skb) {
                         skb_set_owner_w(skb, sk);
@@ -1987,7 +1987,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                         break;
                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-               if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+               if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
                         break;
                 if (sk->sk_shutdown & SEND_SHUTDOWN)
                         break;
@@ -2310,7 +2310,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
                 if (sk->sk_type == SOCK_STREAM) {
                         if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
                                 return 1;
-               } else if (atomic_read(&sk->sk_wmem_alloc) <
+               } else if (refcount_read(&sk->sk_wmem_alloc) <
                            prot->sysctl_wmem[0])
                                 return 1;
         }
@@ -2577,7 +2577,7 @@ static void sock_def_write_space(struct sock *sk)
         /* Do not wake up a writer until he can make "significant"
          * progress.  --DaveM
          */
-       if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+       if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
                 wq = rcu_dereference(sk->sk_wq);
                 if (skwq_has_sleeper(wq))
                         wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
@@ -2687,7 +2687,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
          * (Documentation/RCU/rculist_nulls.txt for details)
          */
         smp_wmb();
-       atomic_set(&sk->sk_refcnt, 1);
+       refcount_set(&sk->sk_refcnt, 1);
         atomic_set(&sk->sk_drops, 0);
  }
  EXPORT_SYMBOL(sock_init_data);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c

index 4fccc0c37fbdb4406d1370802d57a8f0aa9d376f..c376af5bfdfb34774d82de6858ec0e4b6e3380bb 100644 (file)
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -353,7 +353,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
         if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-               atomic_inc(&skb->users);
+               refcount_inc(&skb->users);
                 ireq->pktopts = skb;
         }
         ireq->ir_iif = sk->sk_bound_dev_if;
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c

index eeb5fc561f800f042023f95e88f0fe5fd64d3f52..21dedf6fd0f76dec22b2b3685beb89cfefea7ded 100644 (file)
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -559,7 +559,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq,
                    (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
                    (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
                    dn->n.nud_state,
-                  atomic_read(&dn->n.refcnt),
+                  refcount_read(&dn->n.refcnt),
                    dn->blksize,
                    (dn->n.dev) ? dn->n.dev->name : "?");
         read_unlock(&n->lock);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index 58925b6597de83e7d643fb9b1c7e992c9748ae1c..76c2077c3f5b697bf8e0d4b030b70dde8fc70345 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
         }
  
         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-       WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+       WARN_ON(refcount_read(&sk->sk_wmem_alloc));
         WARN_ON(sk->sk_wmem_queued);
         WARN_ON(sk->sk_forward_alloc);
  
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c

index ae206163c273381ba6e8bd8a24fa050619a4a6ae..c2044775ae7daae04dd272e4206d49d5b5861779 100644 (file)
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -265,7 +265,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
                     entry->key_len == key_len &&
                     memcmp(entry->key, key, key_len) == 0) {
                         entry->activity += 1;
-                       atomic_inc(&entry->lsm_data->refcount);
+                       refcount_inc(&entry->lsm_data->refcount);
                         secattr->cache = entry->lsm_data;
                         secattr->flags |= NETLBL_SECATTR_CACHE;
                         secattr->type = NETLBL_NLTYPE_CIPSOV4;
@@ -332,7 +332,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
         }
         entry->key_len = cipso_ptr_len;
         entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
-       atomic_inc(&secattr->cache->refcount);
+       refcount_inc(&secattr->cache->refcount);
         entry->lsm_data = secattr->cache;
  
         bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c

index a7dd088d5fc96da259813cae48c374527c9e251e..38d9af9b917c08685deb3288baab84189ec9c9a0 100644 (file)
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -252,7 +252,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
         /* Reference in_dev->dev */
         dev_hold(dev);
         /* Account for reference dev->ip_ptr (below) */
-       in_dev_hold(in_dev);
+       refcount_set(&in_dev->refcnt, 1);
  
         err = devinet_sysctl_register(in_dev);
         if (err) {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c

index 1f18b46502539617f9bd33ea7d584c4df7f818dd..0cbee0a666ffd2a1b7451b0b07513b1cf1cebfc0 100644 (file)
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
                         skb->data_len += tailen;
                         skb->truesize += tailen;
                         if (sk)
-                               atomic_add(tailen, &sk->sk_wmem_alloc);
+                               refcount_add(tailen, &sk->sk_wmem_alloc);
  
                         goto out;
                 }
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c

index c4032302d7cd40402decc817a3dbf4f2708b863b..28f14afd0dd3a392da3b84c5e791fffaf46ad254 100644 (file)
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -173,7 +173,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
  
  static void ip_ma_put(struct ip_mc_list *im)
  {
-       if (atomic_dec_and_test(&im->refcnt)) {
+       if (refcount_dec_and_test(&im->refcnt)) {
                 in_dev_put(im->interface);
                 kfree_rcu(im, rcu);
         }
@@ -199,7 +199,7 @@ static void igmp_stop_timer(struct ip_mc_list *im)
  {
         spin_lock_bh(&im->lock);
         if (del_timer(&im->timer))
-               atomic_dec(&im->refcnt);
+               refcount_dec(&im->refcnt);
         im->tm_running = 0;
         im->reporter = 0;
         im->unsolicit_count = 0;
@@ -213,7 +213,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
  
         im->tm_running = 1;
         if (!mod_timer(&im->timer, jiffies+tv+2))
-               atomic_inc(&im->refcnt);
+               refcount_inc(&im->refcnt);
  }
  
  static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -249,7 +249,7 @@ static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
                         spin_unlock_bh(&im->lock);
                         return;
                 }
-               atomic_dec(&im->refcnt);
+               refcount_dec(&im->refcnt);
         }
         igmp_start_timer(im, max_delay);
         spin_unlock_bh(&im->lock);
@@ -1374,7 +1374,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
         /* initial mode is (EX, empty) */
         im->sfmode = MCAST_EXCLUDE;
         im->sfcount[MCAST_EXCLUDE] = 1;
-       atomic_set(&im->refcnt, 1);
+       refcount_set(&im->refcnt, 1);
         spin_lock_init(&im->lock);
  #ifdef CONFIG_IP_MULTICAST
         setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c

index a3fa1a5b6d98eb1502ada67bf5d63b12f199c640..4089c013cb03b12e31ddffbb7ae903542c012ae0 100644 (file)
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -756,7 +756,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
          * are committed to memory and refcnt initialized.
          */
         smp_wmb();
-       atomic_set(&req->rsk_refcnt, 2 + 1);
+       refcount_set(&req->rsk_refcnt, 2 + 1);
  }
  
  void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c

index b5e9317eaf9eea802089dc0c947d88a932ee857a..96e95e83cc61625c2e36ecf48d02292411bef25f 100644 (file)
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -276,11 +276,11 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
  void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
  {
         if (del_timer(&fq->timer))
-               atomic_dec(&fq->refcnt);
+               refcount_dec(&fq->refcnt);
  
         if (!(fq->flags & INET_FRAG_COMPLETE)) {
                 fq_unlink(fq, f);
-               atomic_dec(&fq->refcnt);
+               refcount_dec(&fq->refcnt);
         }
  }
  EXPORT_SYMBOL(inet_frag_kill);
@@ -329,7 +329,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
          */
         hlist_for_each_entry(qp, &hb->chain, list) {
                 if (qp->net == nf && f->match(qp, arg)) {
-                       atomic_inc(&qp->refcnt);
+                       refcount_inc(&qp->refcnt);
                         spin_unlock(&hb->chain_lock);
                         qp_in->flags |= INET_FRAG_COMPLETE;
                         inet_frag_put(qp_in, f);
@@ -339,9 +339,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
  #endif
         qp = qp_in;
         if (!mod_timer(&qp->timer, jiffies + nf->timeout))
-               atomic_inc(&qp->refcnt);
+               refcount_inc(&qp->refcnt);
  
-       atomic_inc(&qp->refcnt);
+       refcount_inc(&qp->refcnt);
         hlist_add_head(&qp->list, &hb->chain);
  
         spin_unlock(&hb->chain_lock);
@@ -370,7 +370,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
  
         setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
         spin_lock_init(&q->lock);
-       atomic_set(&q->refcnt, 1);
+       refcount_set(&q->refcnt, 1);
  
         return q;
  }
@@ -405,7 +405,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
         spin_lock(&hb->chain_lock);
         hlist_for_each_entry(q, &hb->chain, list) {
                 if (q->net == nf && f->match(q, key)) {
-                       atomic_inc(&q->refcnt);
+                       refcount_inc(&q->refcnt);
                         spin_unlock(&hb->chain_lock);
                         return q;
                 }
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c

index e9a59d2d91d4061f299065173d9212eefe72ef89..2e3389d614d1689856c3a8a9929dba8f7e7e1a37 100644 (file)
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -43,7 +43,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
  /* This function handles inet_sock, but also timewait and request sockets
   * for IPv4/IPv6.
   */
-u32 sk_ehashfn(const struct sock *sk)
+static u32 sk_ehashfn(const struct sock *sk)
  {
  #if IS_ENABLED(CONFIG_IPV6)
         if (sk->sk_family == AF_INET6 &&
@@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(__inet_lookup_listener);
  /* All sockets share common refcount, but have different destructors */
  void sock_gen_put(struct sock *sk)
  {
-       if (!atomic_dec_and_test(&sk->sk_refcnt))
+       if (!refcount_dec_and_test(&sk->sk_refcnt))
                 return;
  
         if (sk->sk_state == TCP_TIME_WAIT)
@@ -287,7 +287,7 @@ begin:
                         continue;
                 if (likely(INET_MATCH(sk, net, acookie,
                                       saddr, daddr, ports, dif))) {
-                       if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+                       if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
                                 goto out;
                         if (unlikely(!INET_MATCH(sk, net, acookie,
                                                  saddr, daddr, ports, dif))) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c

index f8aff2c71cdee55ebb4ac6001e71a874e9eaf6bb..5b039159e67a60c13bc2399ae140c90d31ae3dc5 100644 (file)
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -76,7 +76,7 @@ void inet_twsk_free(struct inet_timewait_sock *tw)
  
  void inet_twsk_put(struct inet_timewait_sock *tw)
  {
-       if (atomic_dec_and_test(&tw->tw_refcnt))
+       if (refcount_dec_and_test(&tw->tw_refcnt))
                 inet_twsk_free(tw);
  }
  EXPORT_SYMBOL_GPL(inet_twsk_put);
@@ -131,7 +131,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
          * We can use atomic_set() because prior spin_lock()/spin_unlock()
          * committed into memory all tw fields.
          */
-       atomic_set(&tw->tw_refcnt, 4);
+       refcount_set(&tw->tw_refcnt, 4);
         inet_twsk_add_node_rcu(tw, &ehead->chain);
  
         /* Step 3: Remove SK from hash chain */
@@ -195,7 +195,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
                  * to a non null value before everything is setup for this
                  * timewait socket.
                  */
-               atomic_set(&tw->tw_refcnt, 0);
+               refcount_set(&tw->tw_refcnt, 0);
  
                 __module_get(tw->tw_prot->owner);
         }
@@ -278,7 +278,7 @@ restart:
                                 atomic_read(&twsk_net(tw)->count))
                                 continue;
  
-                       if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
+                       if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
                                 continue;
  
                         if (unlikely((tw->tw_family != family) ||
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c

index 86fa4580954060b1798c7f3382d77cbec41e6ff7..c5a117cc66198ca0fe9d49e1c15f3c3110a0d634 100644 (file)
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -115,7 +115,7 @@ static void inetpeer_gc_worker(struct work_struct *work)
  
                 n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
  
-               if (!atomic_read(&p->refcnt)) {
+               if (refcount_read(&p->refcnt) == 1) {
                         list_del(&p->gc_list);
                         kmem_cache_free(peer_cachep, p);
                 }
@@ -202,10 +202,11 @@ static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr,
                 int cmp = inetpeer_addr_cmp(daddr, &u->daddr);
                 if (cmp == 0) {
                         /* Before taking a reference, check if this entry was
-                        * deleted (refcnt=-1)
+                        * deleted (refcnt=0)
                          */
-                       if (!atomic_add_unless(&u->refcnt, 1, -1))
+                       if (!refcount_inc_not_zero(&u->refcnt)) {
                                 u = NULL;
+                       }
                         return u;
                 }
                 if (cmp == -1)
@@ -382,11 +383,10 @@ static int inet_peer_gc(struct inet_peer_base *base,
         while (stackptr > stack) {
                 stackptr--;
                 p = rcu_deref_locked(**stackptr, base);
-               if (atomic_read(&p->refcnt) == 0) {
+               if (refcount_read(&p->refcnt) == 1) {
                         smp_rmb();
                         delta = (__u32)jiffies - p->dtime;
-                       if (delta >= ttl &&
-                           atomic_cmpxchg(&p->refcnt, 0, -1) == 0) {
+                       if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) {
                                 p->gc_next = gchead;
                                 gchead = p;
                         }
@@ -432,7 +432,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
  relookup:
         p = lookup(daddr, stack, base);
         if (p != peer_avl_empty) {
-               atomic_inc(&p->refcnt);
+               refcount_inc(&p->refcnt);
                 write_sequnlock_bh(&base->lock);
                 return p;
         }
@@ -444,7 +444,7 @@ relookup:
         p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
         if (p) {
                 p->daddr = *daddr;
-               atomic_set(&p->refcnt, 1);
+               refcount_set(&p->refcnt, 2);
                 atomic_set(&p->rid, 0);
                 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
                 p->rate_tokens = 0;
@@ -468,7 +468,7 @@ void inet_putpeer(struct inet_peer *p)
  {
         p->dtime = (__u32)jiffies;
         smp_mb__before_atomic();
-       atomic_dec(&p->refcnt);
+       refcount_dec(&p->refcnt);
  }
  EXPORT_SYMBOL_GPL(inet_putpeer);
  
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c

index b3cdeec85f1f2c612c362590e828f50596a5c247..9a8cfac503dc8c83e24a7b168dbd4da781a30284 100644 (file)
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -312,7 +312,7 @@ static int ip_frag_reinit(struct ipq *qp)
         unsigned int sum_truesize = 0;
  
         if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
-               atomic_inc(&qp->q.refcnt);
+               refcount_inc(&qp->q.refcnt);
                 return -ETIMEDOUT;
         }
  
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 7a3fd25e8913a99d0fcbb256bc9001f6f1d4dd6f..2e61e2af251ae7b34c2ce19190119fe7a60e0e02 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -964,7 +964,8 @@ static int __ip_append_data(struct sock *sk,
                 csummode = CHECKSUM_PARTIAL;
  
         cork->length += length;
-       if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
+       if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
+            (skb && skb_is_gso(skb))) &&
             (sk->sk_protocol == IPPROTO_UDP) &&
             (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
             (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
@@ -1036,7 +1037,7 @@ alloc_new_skb:
                                                 (flags & MSG_DONTWAIT), &err);
                         } else {
                                 skb = NULL;
-                               if (atomic_read(&sk->sk_wmem_alloc) <=
+                               if (refcount_read(&sk->sk_wmem_alloc) <=
                                     2 * sk->sk_sndbuf)
                                         skb = sock_wmalloc(sk,
                                                            alloclen + hh_len + 15, 1,
@@ -1144,7 +1145,7 @@ alloc_new_skb:
                         skb->len += copy;
                         skb->data_len += copy;
                         skb->truesize += copy;
-                       atomic_add(copy, &sk->sk_wmem_alloc);
+                       refcount_add(copy, &sk->sk_wmem_alloc);
                 }
                 offset += copy;
                 length -= copy;
@@ -1368,7 +1369,7 @@ ssize_t   ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
                 skb->len += len;
                 skb->data_len += len;
                 skb->truesize += len;
-               atomic_add(len, &sk->sk_wmem_alloc);
+               refcount_add(len, &sk->sk_wmem_alloc);
                 offset += len;
                 size -= len;
         }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c

index a1d521be612bd6d2bd92c64dbb32fefd80881470..bb909f1d7537a9d56fc960b7932f5cc881ec6a16 100644 (file)
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2406,6 +2406,67 @@ errout:
         rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
  }
  
+static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+                            struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(in_skb->sk);
+       struct nlattr *tb[RTA_MAX + 1];
+       struct sk_buff *skb = NULL;
+       struct mfc_cache *cache;
+       struct mr_table *mrt;
+       struct rtmsg *rtm;
+       __be32 src, grp;
+       u32 tableid;
+       int err;
+
+       err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
+                         rtm_ipv4_policy, extack);
+       if (err < 0)
+               goto errout;
+
+       rtm = nlmsg_data(nlh);
+
+       src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
+       grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
+       tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+
+       mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
+       if (IS_ERR(mrt)) {
+               err = PTR_ERR(mrt);
+               goto errout_free;
+       }
+
+       /* entries are added/deleted only under RTNL */
+       rcu_read_lock();
+       cache = ipmr_cache_find(mrt, src, grp);
+       rcu_read_unlock();
+       if (!cache) {
+               err = -ENOENT;
+               goto errout_free;
+       }
+
+       skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
+       if (!skb) {
+               err = -ENOBUFS;
+               goto errout_free;
+       }
+
+       err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
+                              nlh->nlmsg_seq, cache,
+                              RTM_NEWROUTE, 0);
+       if (err < 0)
+               goto errout_free;
+
+       err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+
+errout:
+       return err;
+
+errout_free:
+       kfree_skb(skb);
+       goto errout;
+}
+
  static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
  {
         struct net *net = sock_net(skb->sk);
@@ -3053,7 +3114,7 @@ int __init ip_mr_init(void)
         }
  #endif
         rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
-                     NULL, ipmr_rtm_dumproute, NULL);
+                     ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL);
         rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
                       ipmr_rtm_route, NULL, NULL);
         rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c

index 038f293c23766de6cd77f361269743498c2dd18e..7d72decb80f9f9c4150bd2a42c4b802ba1fd7f17 100644 (file)
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -47,7 +47,7 @@ struct clusterip_config {
  
         __be32 clusterip;                       /* the IP address */
         u_int8_t clustermac[ETH_ALEN];          /* the MAC address */
-       struct net_device *dev;                 /* device */
+       int ifindex;                            /* device ifindex */
         u_int16_t num_total_nodes;              /* total number of nodes */
         unsigned long local_nodes;              /* node number array */
  
@@ -57,6 +57,9 @@ struct clusterip_config {
         enum clusterip_hashmode hash_mode;      /* which hashing mode */
         u_int32_t hash_initval;                 /* hash initialization */
         struct rcu_head rcu;
+
+       char ifname[IFNAMSIZ];                  /* device ifname */
+       struct notifier_block notifier;         /* refresh c->ifindex in it */
  };
  
  #ifdef CONFIG_PROC_FS
@@ -98,9 +101,8 @@ clusterip_config_put(struct clusterip_config *c)
   * entry(rule) is removed, remove the config from lists, but don't free it
   * yet, since proc-files could still be holding references */
  static inline void
-clusterip_config_entry_put(struct clusterip_config *c)
+clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
  {
-       struct net *net = dev_net(c->dev);
         struct clusterip_net *cn = net_generic(net, clusterip_net_id);
  
         local_bh_disable();
@@ -109,8 +111,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
                 spin_unlock(&cn->lock);
                 local_bh_enable();
  
-               dev_mc_del(c->dev, c->clustermac);
-               dev_put(c->dev);
+               unregister_netdevice_notifier(&c->notifier);
  
                 /* In case anyone still accesses the file, the open/close
                  * functions are also incrementing the refcount on their own,
@@ -170,19 +171,55 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
                 set_bit(i->local_nodes[n] - 1, &c->local_nodes);
  }
  
-static struct clusterip_config *
-clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
-                     struct net_device *dev)
+static int
+clusterip_netdev_event(struct notifier_block *this, unsigned long event,
+                      void *ptr)
  {
-       struct net *net = dev_net(dev);
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
         struct clusterip_config *c;
+
+       c = container_of(this, struct clusterip_config, notifier);
+       switch (event) {
+       case NETDEV_REGISTER:
+               if (!strcmp(dev->name, c->ifname)) {
+                       c->ifindex = dev->ifindex;
+                       dev_mc_add(dev, c->clustermac);
+               }
+               break;
+       case NETDEV_UNREGISTER:
+               if (dev->ifindex == c->ifindex) {
+                       dev_mc_del(dev, c->clustermac);
+                       c->ifindex = -1;
+               }
+               break;
+       case NETDEV_CHANGENAME:
+               if (!strcmp(dev->name, c->ifname)) {
+                       c->ifindex = dev->ifindex;
+                       dev_mc_add(dev, c->clustermac);
+               } else if (dev->ifindex == c->ifindex) {
+                       dev_mc_del(dev, c->clustermac);
+                       c->ifindex = -1;
+               }
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static struct clusterip_config *
+clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
+                     __be32 ip, const char *iniface)
+{
         struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+       struct clusterip_config *c;
+       int err;
  
         c = kzalloc(sizeof(*c), GFP_ATOMIC);
         if (!c)
                 return ERR_PTR(-ENOMEM);
  
-       c->dev = dev;
+       strcpy(c->ifname, iniface);
+       c->ifindex = -1;
         c->clusterip = ip;
         memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
         c->num_total_nodes = i->num_total_nodes;
@@ -213,17 +250,27 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
                                           cn->procdir,
                                           &clusterip_proc_fops, c);
                 if (!c->pde) {
-                       spin_lock_bh(&cn->lock);
-                       list_del_rcu(&c->list);
-                       spin_unlock_bh(&cn->lock);
-                       kfree(c);
-
-                       return ERR_PTR(-ENOMEM);
+                       err = -ENOMEM;
+                       goto err;
                 }
         }
  #endif
  
-       return c;
+       c->notifier.notifier_call = clusterip_netdev_event;
+       err = register_netdevice_notifier(&c->notifier);
+       if (!err)
+               return c;
+
+#ifdef CONFIG_PROC_FS
+       proc_remove(c->pde);
+err:
+#endif
+       spin_lock_bh(&cn->lock);
+       list_del_rcu(&c->list);
+       spin_unlock_bh(&cn->lock);
+       kfree(c);
+
+       return ERR_PTR(err);
  }
  
  #ifdef CONFIG_PROC_FS
@@ -425,14 +472,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
                                         e->ip.iniface);
                                 return -ENOENT;
                         }
+                       dev_put(dev);
  
-                       config = clusterip_config_init(cipinfo,
-                                                       e->ip.dst.s_addr, dev);
-                       if (IS_ERR(config)) {
-                               dev_put(dev);
+                       config = clusterip_config_init(par->net, cipinfo,
+                                                      e->ip.dst.s_addr,
+                                                      e->ip.iniface);
+                       if (IS_ERR(config))
                                 return PTR_ERR(config);
-                       }
-                       dev_mc_add(config->dev, config->clustermac);
                 }
         }
         cipinfo->config = config;
@@ -458,7 +504,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
  
         /* if no more entries are referencing the config, remove it
          * from the list and destroy the proc entry */
-       clusterip_config_entry_put(cipinfo->config);
+       clusterip_config_entry_put(par->net, cipinfo->config);
  
         clusterip_config_put(cipinfo->config);
  
@@ -558,10 +604,9 @@ arp_mangle(void *priv,
          * addresses on different interfacs.  However, in the CLUSTERIP case
          * this wouldn't work, since we didn't subscribe the mcast group on
          * other interfaces */
-       if (c->dev != state->out) {
-               pr_debug("not mangling arp reply on different "
-                        "interface: cip'%s'-skb'%s'\n",
-                        c->dev->name, state->out->name);
+       if (c->ifindex != state->out->ifindex) {
+               pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
+                        c->ifindex, state->out->ifindex);
                 clusterip_config_put(c);
                 return NF_ACCEPT;
         }
@@ -743,14 +788,20 @@ static const struct file_operations clusterip_proc_fops = {
  static int clusterip_net_init(struct net *net)
  {
         struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+       int ret;
  
         INIT_LIST_HEAD(&cn->configs);
  
         spin_lock_init(&cn->lock);
  
+       ret = nf_register_net_hook(net, &cip_arp_ops);
+       if (ret < 0)
+               return ret;
+
  #ifdef CONFIG_PROC_FS
         cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
         if (!cn->procdir) {
+               nf_unregister_net_hook(net, &cip_arp_ops);
                 pr_err("Unable to proc dir entry\n");
                 return -ENOMEM;
         }
@@ -765,6 +816,7 @@ static void clusterip_net_exit(struct net *net)
         struct clusterip_net *cn = net_generic(net, clusterip_net_id);
         proc_remove(cn->procdir);
  #endif
+       nf_unregister_net_hook(net, &cip_arp_ops);
  }
  
  static struct pernet_operations clusterip_net_ops = {
@@ -786,17 +838,11 @@ static int __init clusterip_tg_init(void)
         if (ret < 0)
                 goto cleanup_subsys;
  
-       ret = nf_register_hook(&cip_arp_ops);
-       if (ret < 0)
-               goto cleanup_target;
-
         pr_info("ClusterIP Version %s loaded successfully\n",
                 CLUSTERIP_VERSION);
  
         return 0;
  
-cleanup_target:
-       xt_unregister_target(&clusterip_tg_reg);
  cleanup_subsys:
         unregister_pernet_subsys(&clusterip_net_ops);
         return ret;
@@ -806,7 +852,6 @@ static void __exit clusterip_tg_exit(void)
  {
         pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
  
-       nf_unregister_hook(&cip_arp_ops);
         xt_unregister_target(&clusterip_tg_reg);
         unregister_pernet_subsys(&clusterip_net_ops);
  
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c

index dc1dea15c1b4fb0d7ad3ad02779cde147e52c2cf..f39037fca923566c9d75c7b88d881cd3a50a6992 100644 (file)
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -98,8 +98,8 @@ static int masq_device_event(struct notifier_block *this,
                  */
                 NF_CT_ASSERT(dev->ifindex != 0);
  
-               nf_ct_iterate_cleanup(net, device_cmp,
-                                     (void *)(long)dev->ifindex, 0, 0);
+               nf_ct_iterate_cleanup_net(net, device_cmp,
+                                         (void *)(long)dev->ifindex, 0, 0);
         }
  
         return NOTIFY_DONE;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c

index ccfbce13a6333a65dab64e4847dd510dfafb1b43..b8f0db54b1978c800c1f606e0d06a015499a2972 100644 (file)
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -290,7 +290,7 @@ void ping_close(struct sock *sk, long timeout)
  {
         pr_debug("ping_close(sk=%p,sk->num=%u)\n",
                  inet_sk(sk), inet_sk(sk)->inet_num);
-       pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
+       pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt));
  
         sk_common_release(sk);
  }
@@ -1127,7 +1127,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
                 0, 0L, 0,
                 from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
                 0, sock_i_ino(sp),
-               atomic_read(&sp->sk_refcnt), sp,
+               refcount_read(&sp->sk_refcnt), sp,
                 atomic_read(&sp->sk_drops));
  }
  
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c

index bdffad875691ce7240040cfaf188eb4cf3ec7307..b0bb5d0a30bd50b84f0d6a3bccd39b996b4678b3 100644 (file)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1063,7 +1063,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
                 0, 0L, 0,
                 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
                 0, sock_i_ino(sp),
-               atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
+               refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
  }
  
  static int raw_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c

index 7835bb4a1fab2b335c65001cc3c9233ffb4fd5cc..0905cf04c2a4e41e06a047ab52de6ab95a5afb61 100644 (file)
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -213,7 +213,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
         child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
                                                  NULL, &own_req);
         if (child) {
-               atomic_set(&req->rsk_refcnt, 1);
+               refcount_set(&req->rsk_refcnt, 1);
                 tcp_sk(child)->tsoffset = tsoff;
                 sock_rps_save_rxhash(child, skb);
                 inet_csk_reqsk_queue_add(sk, req, child);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index 058f509ca98ebeb950c286d135f95373c1536dbc..71ce33decd971feee72f225b2bf7ccaf2f5f456f 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -664,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
         return skb->len < size_goal &&
                sysctl_tcp_autocorking &&
                skb != tcp_write_queue_head(sk) &&
-              atomic_read(&sk->sk_wmem_alloc) > skb->truesize;
+              refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
  }
  
  static void tcp_push(struct sock *sk, int flags, int mss_now,
@@ -692,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
                 /* It is possible TX completion already happened
                  * before we set TSQ_THROTTLED.
                  */
-               if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize)
+               if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize)
                         return;
         }
  
@@ -2350,6 +2350,8 @@ int tcp_disconnect(struct sock *sk, int flags)
         tcp_init_send_head(sk);
         memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
         __sk_dst_reset(sk);
+       dst_release(sk->sk_rx_dst);
+       sk->sk_rx_dst = NULL;
         tcp_saved_syn_free(tp);
  
         /* Clean up fastopen related fields */
@@ -2479,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                 name[val] = 0;
  
                 lock_sock(sk);
-               err = tcp_set_congestion_control(sk, name);
+               err = tcp_set_congestion_control(sk, name, true);
                 release_sock(sk);
                 return err;
         }
@@ -3062,6 +3064,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                 if (get_user(len, optlen))
                         return -EFAULT;
                 len = min_t(unsigned int, len, TCP_ULP_NAME_MAX);
+               if (!icsk->icsk_ulp_ops) {
+                       if (put_user(0, optlen))
+                               return -EFAULT;
+                       return 0;
+               }
                 if (put_user(len, optlen))
                         return -EFAULT;
                 if (copy_to_user(optval, icsk->icsk_ulp_ops->name, len))
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c

index 324c9bcc5456b499b59cef40838b4e9829119e13..fde983f6376be98247b9f2ff3d0307c2502e6392 100644 (file)
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
                 INET_ECN_dontxmit(sk);
  }
  
-static void tcp_reinit_congestion_control(struct sock *sk,
-                                         const struct tcp_congestion_ops *ca)
+void tcp_reinit_congestion_control(struct sock *sk,
+                                  const struct tcp_congestion_ops *ca)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
  
@@ -333,8 +333,12 @@ out:
         return ret;
  }
  
-/* Change congestion control for socket */
-int tcp_set_congestion_control(struct sock *sk, const char *name)
+/* Change congestion control for socket. If load is false, then it is the
+ * responsibility of the caller to call tcp_init_congestion_control or
+ * tcp_reinit_congestion_control (if the current congestion control was
+ * already initialized.
+ */
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         const struct tcp_congestion_ops *ca;
@@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
                 return -EPERM;
  
         rcu_read_lock();
-       ca = __tcp_ca_find_autoload(name);
+       if (!load)
+               ca = tcp_ca_find(name);
+       else
+               ca = __tcp_ca_find_autoload(name);
         /* No change asking for existing value */
         if (ca == icsk->icsk_ca_ops) {
                 icsk->icsk_ca_setsockopt = 1;
                 goto out;
         }
-       if (!ca)
+       if (!ca) {
                 err = -ENOENT;
-       else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
-                  ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
+       } else if (!load) {
+               icsk->icsk_ca_ops = ca;
+               if (!try_module_get(ca->owner))
+                       err = -EBUSY;
+       } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
+                    ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
                 err = -EPERM;
-       else if (!try_module_get(ca->owner))
+       } else if (!try_module_get(ca->owner)) {
                 err = -EBUSY;
-       else
+       } else {
                 tcp_reinit_congestion_control(sk, ca);
+       }
   out:
         rcu_read_unlock();
         return err;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c

index 4af82b914dd4bbdc47e37cf1cf70f206bd186db5..ce9c7fef200f3a493de69e87464e7a1e0ba9d889 100644 (file)
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -214,13 +214,14 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
         inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
                                   TCP_TIMEOUT_INIT, TCP_RTO_MAX);
  
-       atomic_set(&req->rsk_refcnt, 2);
+       refcount_set(&req->rsk_refcnt, 2);
  
         /* Now finish processing the fastopen child socket. */
         inet_csk(child)->icsk_af_ops->rebuild_header(child);
         tcp_init_congestion_control(child);
         tcp_mtup_init(child);
         tcp_init_metrics(child);
+       tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
         tcp_init_buffer_space(child);
  
         tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 2ab7e2fa9bb9727a6d22552f851d6254ea074481..2920e0cb09f8d3e743eb4f49c16060ba1af48ed4 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5571,7 +5571,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
         icsk->icsk_af_ops->rebuild_header(sk);
  
         tcp_init_metrics(sk);
-
+       tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
         tcp_init_congestion_control(sk);
  
         /* Prevent spurious tcp_cwnd_restart() on first data
@@ -5977,6 +5977,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                 } else {
                         /* Make sure socket is routed, for correct metrics. */
                         icsk->icsk_af_ops->rebuild_header(sk);
+                       tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
                         tcp_init_congestion_control(sk);
  
                         tcp_mtup_init(sk);
@@ -6190,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
         ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
  
         if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
-           (ecn_ok_dst & DST_FEATURE_ECN_CA))
+           (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
+           tcp_bpf_ca_needs_ecn((struct sock *)req))
                 inet_rsk(req)->ecn_ok = 1;
  }
  
@@ -6406,7 +6408,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
         } else {
                 tcp_rsk(req)->tfo_listener = false;
                 if (!want_cookie)
-                       inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+                       inet_csk_reqsk_queue_hash_add(sk, req,
+                               tcp_timeout_init((struct sock *)req));
                 af_ops->send_synack(sk, dst, &fl, req, &foc,
                                     !want_cookie ? TCP_SYNACK_NORMAL :
                                                    TCP_SYNACK_COOKIE);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index d774bcd9a54bce80ae679c8700a0bc8eaec1a088..6ec6900eb300364bc908de8707b2a175abf04b77 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2323,7 +2323,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
                 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
                 icsk->icsk_probes_out,
                 sock_i_ino(sk),
-               atomic_read(&sk->sk_refcnt), sk,
+               refcount_read(&sk->sk_refcnt), sk,
                 jiffies_to_clock_t(icsk->icsk_rto),
                 jiffies_to_clock_t(icsk->icsk_ack.ato),
                 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
@@ -2349,7 +2349,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
                 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
-               atomic_read(&tw->tw_refcnt), tw);
+               refcount_read(&tw->tw_refcnt), tw);
  }
  
  #define TMPSZ 150
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index d30ee31e94ebd4d76a0f2bd910f213bb994770f3..0ff83c1637d894b7e653bcbc7be35099d036ea07 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
         int full_space = tcp_full_space(sk_listener);
         u32 window_clamp;
         __u8 rcv_wscale;
+       u32 rcv_wnd;
         int mss;
  
         mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
@@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req,
             (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
                 req->rsk_window_clamp = full_space;
  
+       rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req);
+       if (rcv_wnd == 0)
+               rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+       else if (full_space < rcv_wnd * mss)
+               full_space = rcv_wnd * mss;
+
         /* tcp_full_space because it is guaranteed to be the first packet */
         tcp_select_initial_window(full_space,
                 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
                 &req->rsk_window_clamp,
                 ireq->wscale_ok,
                 &rcv_wscale,
-               dst_metric(dst, RTAX_INITRWND));
+               rcv_wnd);
         ireq->rcv_wscale = rcv_wscale;
  }
  EXPORT_SYMBOL(tcp_openreq_init_rwin);
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c

index bc68da38ea869ccf8100fd130b33f9d10f185255..11f69bbf93072b7b4dbc3a0485c9f9e0b9ba30b3 100644 (file)
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
                 swap(gso_skb->sk, skb->sk);
                 swap(gso_skb->destructor, skb->destructor);
                 sum_truesize += skb->truesize;
-               atomic_add(sum_truesize - gso_skb->truesize,
+               refcount_add(sum_truesize - gso_skb->truesize,
                            &skb->sk->sk_wmem_alloc);
         }
  
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 9a9c395b62352513a42b3353434be8134ac9cdd7..4d36f0b093e61162a81aa52987e3a9d7719517c0 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
         TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
         if (!(tp->ecn_flags & TCP_ECN_OK))
                 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
-       else if (tcp_ca_needs_ecn(sk))
+       else if (tcp_ca_needs_ecn(sk) ||
+                tcp_bpf_ca_needs_ecn(sk))
                 INET_ECN_xmit(sk);
  }
  
@@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
  static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
  {
         struct tcp_sock *tp = tcp_sk(sk);
+       bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
         bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
-                      tcp_ca_needs_ecn(sk);
+               tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
  
         if (!use_ecn) {
                 const struct dst_entry *dst = __sk_dst_get(sk);
@@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
         if (use_ecn) {
                 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
                 tp->ecn_flags = TCP_ECN_OK;
-               if (tcp_ca_needs_ecn(sk))
+               if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
                         INET_ECN_xmit(sk);
         }
  }
@@ -861,12 +863,11 @@ void tcp_wfree(struct sk_buff *skb)
         struct sock *sk = skb->sk;
         struct tcp_sock *tp = tcp_sk(sk);
         unsigned long flags, nval, oval;
-       int wmem;
  
         /* Keep one reference on sk_wmem_alloc.
          * Will be released by sk_free() from here or tcp_tasklet_func()
          */
-       wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
+       WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc));
  
         /* If this softirq is serviced by ksoftirqd, we are likely under stress.
          * Wait until our queues (qdisc + devices) are drained.
@@ -875,7 +876,7 @@ void tcp_wfree(struct sk_buff *skb)
          * - chance for incoming ACK (processed by another cpu maybe)
          *   to migrate this flow (skb->ooo_okay will be eventually set)
          */
-       if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
+       if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
                 goto out;
  
         for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
@@ -925,7 +926,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
                 if (nval != oval)
                         continue;
  
-               if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+               if (!refcount_inc_not_zero(&sk->sk_wmem_alloc))
                         break;
                 /* queue this socket to tasklet queue */
                 tsq = this_cpu_ptr(&tsq_tasklet);
@@ -1045,7 +1046,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
         skb->sk = sk;
         skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
         skb_set_hash_from_sk(skb, sk);
-       atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+       refcount_add(skb->truesize, &sk->sk_wmem_alloc);
  
         skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
  
@@ -2176,7 +2177,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
         limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
         limit <<= factor;
  
-       if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+       if (refcount_read(&sk->sk_wmem_alloc) > limit) {
                 /* Always send the 1st or 2nd skb in write queue.
                  * No need to wait for TX completion to call us back,
                  * after softirq/tasklet schedule.
@@ -2192,7 +2193,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
                  * test again the condition.
                  */
                 smp_mb__after_atomic();
-               if (atomic_read(&sk->sk_wmem_alloc) > limit)
+               if (refcount_read(&sk->sk_wmem_alloc) > limit)
                         return true;
         }
         return false;
@@ -2812,7 +2813,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
         /* Do not sent more than we queued. 1/4 is reserved for possible
          * copying overhead: fragmentation, tunneling, mangling etc.
          */
-       if (atomic_read(&sk->sk_wmem_alloc) >
+       if (refcount_read(&sk->sk_wmem_alloc) >
             min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
                   sk->sk_sndbuf))
                 return -EAGAIN;
@@ -3267,6 +3268,7 @@ static void tcp_connect_init(struct sock *sk)
         const struct dst_entry *dst = __sk_dst_get(sk);
         struct tcp_sock *tp = tcp_sk(sk);
         __u8 rcv_wscale;
+       u32 rcv_wnd;
  
         /* We'll fix this up when we get a response from the other end.
          * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -3300,13 +3302,17 @@ static void tcp_connect_init(struct sock *sk)
             (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
                 tp->window_clamp = tcp_full_space(sk);
  
+       rcv_wnd = tcp_rwnd_init_bpf(sk);
+       if (rcv_wnd == 0)
+               rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+
         tcp_select_initial_window(tcp_full_space(sk),
                                   tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
                                   &tp->rcv_wnd,
                                   &tp->window_clamp,
                                   sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
                                   &rcv_wscale,
-                                 dst_metric(dst, RTAX_INITRWND));
+                                 rcv_wnd);
  
         tp->rx_opt.rcv_wscale = rcv_wscale;
         tp->rcv_ssthresh = tp->rcv_wnd;
@@ -3327,7 +3333,7 @@ static void tcp_connect_init(struct sock *sk)
         tp->rcv_wup = tp->rcv_nxt;
         tp->copied_seq = tp->rcv_nxt;
  
-       inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
+       inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
         inet_csk(sk)->icsk_retransmits = 0;
         tcp_clear_retrans(tp);
  }
@@ -3440,6 +3446,7 @@ int tcp_connect(struct sock *sk)
         struct sk_buff *buff;
         int err;
  
+       tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
         tcp_connect_init(sk);
  
         if (unlikely(tp->repair)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index 47c7aa0501af70e53332cc9e4ae79151f548d6f9..25294d43e1470757e4631a8ac2af7fda7e810008 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -577,7 +577,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
  
         sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport,
                                dif, &udp_table, NULL);
-       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+       if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
  }
@@ -1163,24 +1163,7 @@ out:
         return ret;
  }
  
-/* Copy as much information as possible into skb->dev_scratch to avoid
- * possibly multiple cache miss on dequeue();
- */
  #if BITS_PER_LONG == 64
-
-/* we can store multiple info here: truesize, len and the bit needed to
- * compute skb_csum_unnecessary will be on cold cache lines at recvmsg
- * time.
- * skb->len can be stored on 16 bits since the udp header has been already
- * validated and pulled.
- */
-struct udp_dev_scratch {
-       u32 truesize;
-       u16 len;
-       bool is_linear;
-       bool csum_unnecessary;
-};
-
  static void udp_set_dev_scratch(struct sk_buff *skb)
  {
         struct udp_dev_scratch *scratch;
@@ -1197,22 +1180,6 @@ static int udp_skb_truesize(struct sk_buff *skb)
  {
         return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
  }
-
-static unsigned int udp_skb_len(struct sk_buff *skb)
-{
-       return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
-}
-
-static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
-{
-       return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
-}
-
-static bool udp_skb_is_linear(struct sk_buff *skb)
-{
-       return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
-}
-
  #else
  static void udp_set_dev_scratch(struct sk_buff *skb)
  {
@@ -1223,21 +1190,6 @@ static int udp_skb_truesize(struct sk_buff *skb)
  {
         return skb->dev_scratch;
  }
-
-static unsigned int udp_skb_len(struct sk_buff *skb)
-{
-       return skb->len;
-}
-
-static bool udp_skb_csum_unnecessary(struct sk_buff *skb)
-{
-       return skb_csum_unnecessary(skb);
-}
-
-static bool udp_skb_is_linear(struct sk_buff *skb)
-{
-       return !skb_is_nonlinear(skb);
-}
  #endif
  
  /* fully reclaim rmem/fwd memory allocated for skb */
@@ -1598,18 +1550,6 @@ busy_check:
  }
  EXPORT_SYMBOL_GPL(__skb_recv_udp);
  
-static int copy_linear_skb(struct sk_buff *skb, int len, int off,
-                          struct iov_iter *to)
-{
-       int n, copy = len - off;
-
-       n = copy_to_iter(skb->data + off, copy, to);
-       if (n == copy)
-               return 0;
-
-       return -EFAULT;
-}
-
  /*
   *     This should be easy, if there is something there we
   *     return it, otherwise we block.
@@ -2302,7 +2242,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
                                              uh->source, iph->saddr, dif);
         }
  
-       if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+       if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
                 return;
  
         skb->sk = sk;
@@ -2751,7 +2691,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
                 0, 0L, 0,
                 from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
                 0, sock_i_ino(sp),
-               atomic_read(&sp->sk_refcnt), sp,
+               refcount_read(&sp->sk_refcnt), sp,
                 atomic_read(&sp->sk_drops));
  }
  
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c

index 9a89c10a55f0c0c5919638d34a2d065f86a80a98..4515836d2a3ac309c9305a4ee1ce95fa0b6e26d4 100644 (file)
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -55,7 +55,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
                                 req->id.idiag_dport,
                                 req->id.idiag_if, tbl, NULL);
  #endif
-       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+       if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         rcu_read_unlock();
         err = -ENOENT;
@@ -206,7 +206,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb,
                 return -EINVAL;
         }
  
-       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+       if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
  
         rcu_read_unlock();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index a885ffcf097317659e5aabaca9036eadc9e5287e..114fb64cf176c2c148d53c67c6dafa7891ae84aa 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1927,15 +1927,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
         if (dad_failed)
                 ifp->flags |= IFA_F_DADFAILED;
  
-       if (ifp->flags&IFA_F_PERMANENT) {
-               spin_lock_bh(&ifp->lock);
-               addrconf_del_dad_work(ifp);
-               ifp->flags |= IFA_F_TENTATIVE;
-               spin_unlock_bh(&ifp->lock);
-               if (dad_failed)
-                       ipv6_ifa_notify(0, ifp);
-               in6_ifa_put(ifp);
-       } else if (ifp->flags&IFA_F_TEMPORARY) {
+       if (ifp->flags&IFA_F_TEMPORARY) {
                 struct inet6_ifaddr *ifpub;
                 spin_lock_bh(&ifp->lock);
                 ifpub = ifp->ifpub;
@@ -1948,6 +1940,14 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
                         spin_unlock_bh(&ifp->lock);
                 }
                 ipv6_del_addr(ifp);
+       } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) {
+               spin_lock_bh(&ifp->lock);
+               addrconf_del_dad_work(ifp);
+               ifp->flags |= IFA_F_TENTATIVE;
+               spin_unlock_bh(&ifp->lock);
+               if (dad_failed)
+                       ipv6_ifa_notify(0, ifp);
+               in6_ifa_put(ifp);
         } else {
                 ipv6_del_addr(ifp);
         }
@@ -3384,6 +3384,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
         struct netdev_notifier_changeupper_info *info;
         struct inet6_dev *idev = __in6_dev_get(dev);
+       struct net *net = dev_net(dev);
         int run_pending = 0;
         int err;
  
@@ -3399,7 +3400,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
         case NETDEV_CHANGEMTU:
                 /* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
                 if (dev->mtu < IPV6_MIN_MTU) {
-                       addrconf_ifdown(dev, 1);
+                       addrconf_ifdown(dev, dev != net->loopback_dev);
                         break;
                 }
  
@@ -3515,7 +3516,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
                          * IPV6_MIN_MTU stop IPv6 on this interface.
                          */
                         if (dev->mtu < IPV6_MIN_MTU)
-                               addrconf_ifdown(dev, 1);
+                               addrconf_ifdown(dev, dev != net->loopback_dev);
                 }
                 break;
  
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c

index 8d772fea1ddecd427a66c18f34d50f969186f02a..44067521e7cd56e84870fb1a8209b68349ac22f4 100644 (file)
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -227,7 +227,7 @@ static int calipso_cache_check(const unsigned char *key,
                     entry->key_len == key_len &&
                     memcmp(entry->key, key, key_len) == 0) {
                         entry->activity += 1;
-                       atomic_inc(&entry->lsm_data->refcount);
+                       refcount_inc(&entry->lsm_data->refcount);
                         secattr->cache = entry->lsm_data;
                         secattr->flags |= NETLBL_SECATTR_CACHE;
                         secattr->type = NETLBL_NLTYPE_CALIPSO;
@@ -296,7 +296,7 @@ static int calipso_cache_add(const unsigned char *calipso_ptr,
         }
         entry->key_len = calipso_ptr_len;
         entry->hash = calipso_map_cache_hash(calipso_ptr, calipso_ptr_len);
-       atomic_inc(&secattr->cache->refcount);
+       refcount_inc(&secattr->cache->refcount);
         entry->lsm_data = secattr->cache;
  
         bkt = entry->hash & (CALIPSO_CACHE_BUCKETS - 1);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c

index e011122ebd43c190aec3812099345ec852444284..a1f9187130067dba6d485dbaade21d62080fcbf6 100644 (file)
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -250,8 +250,14 @@ ipv4_connected:
          */
  
         err = ip6_datagram_dst_update(sk, true);
-       if (err)
+       if (err) {
+               /* Reset daddr and dport so that udp_v6_early_demux()
+                * fails to find this socket
+                */
+               memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr));
+               inet->inet_dport = 0;
                 goto out;
+       }
  
         sk->sk_state = TCP_ESTABLISHED;
         sk_set_txhash(sk);
@@ -1035,6 +1041,6 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
                    0,
                    sock_i_ino(sp),
-                  atomic_read(&sp->sk_refcnt), sp,
+                  refcount_read(&sp->sk_refcnt), sp,
                    atomic_read(&sp->sk_drops));
  }
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c

index 71faffdd55d95724ad6066138de234070c972006..9ed35473dcb53bd6ae52f8f86e1558410bbcd7b6 100644 (file)
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
                         skb->data_len += tailen;
                         skb->truesize += tailen;
                         if (sk)
-                               atomic_add(tailen, &sk->sk_wmem_alloc);
+                               refcount_add(tailen, &sk->sk_wmem_alloc);
  
                         goto out;
                 }
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c

index d950d43ba255442cf3079546a46b95693029f10c..f02f131f6435a967de395b9a7069051c93a039d7 100644 (file)
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -30,6 +30,25 @@
  #include <net/ipv6.h>
  #include <linux/icmpv6.h>
  
+static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
+{
+       int off = sizeof(struct ipv6hdr);
+       struct ipv6_opt_hdr *exthdr;
+
+       if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+               return offsetof(struct ipv6hdr, nexthdr);
+
+       while (off < nhlen) {
+               exthdr = (void *)ipv6_hdr + off;
+               if (exthdr->nexthdr == NEXTHDR_ESP)
+                       return off;
+
+               off += ipv6_optlen(exthdr);
+       }
+
+       return 0;
+}
+
  static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
                                          struct sk_buff *skb)
  {
@@ -38,6 +57,7 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
         struct xfrm_state *x;
         __be32 seq;
         __be32 spi;
+       int nhoff;
         int err;
  
         skb_pull(skb, offset);
@@ -72,6 +92,11 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
  
         xo->flags |= XFRM_GRO;
  
+       nhoff = esp6_nexthdr_esp_offset(ipv6_hdr(skb), offset);
+       if (!nhoff)
+               goto out;
+
+       IP6CB(skb)->nhoff = nhoff;
         XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
         XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
         XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c

index d0900918a19e5e5cec30831b64c764057892162d..b13b8f93079dae620e9e9fe58233baef01ecf84f 100644 (file)
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -75,7 +75,7 @@ begin:
                         continue;
                 if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
                         continue;
-               if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+               if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
                         goto out;
  
                 if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
@@ -172,7 +172,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
  
         sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr,
                             ntohs(dport), dif, &refcounted);
-       if (sk && !refcounted && !atomic_inc_not_zero(&sk->sk_refcnt))
+       if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
  }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index 5baa6fab4b9745d28f1dc3295dfe6684dfbd7df6..1422d6c083773549d667dc88ffe07d447d5c8e97 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1385,7 +1385,7 @@ emsgsize:
          */
  
         cork->length += length;
-       if ((((length + fragheaderlen) > mtu) ||
+       if ((((length + (skb ? skb->len : headersize)) > mtu) ||
              (skb && skb_is_gso(skb))) &&
             (sk->sk_protocol == IPPROTO_UDP) &&
             (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
@@ -1472,7 +1472,7 @@ alloc_new_skb:
                                                 (flags & MSG_DONTWAIT), &err);
                         } else {
                                 skb = NULL;
-                               if (atomic_read(&sk->sk_wmem_alloc) <=
+                               if (refcount_read(&sk->sk_wmem_alloc) <=
                                     2 * sk->sk_sndbuf)
                                         skb = sock_wmalloc(sk,
                                                            alloclen + hh_len, 1,
@@ -1581,7 +1581,7 @@ alloc_new_skb:
                         skb->len += copy;
                         skb->data_len += copy;
                         skb->truesize += copy;
-                       atomic_add(copy, &sk->sk_wmem_alloc);
+                       refcount_add(copy, &sk->sk_wmem_alloc);
                 }
                 offset += copy;
                 length -= copy;
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c

index 2297c9f073bac63c90ed0578b474fd53b556a6e2..d7b679037baee5c1c79a477e9774a023ea9550e3 100644 (file)
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -75,8 +75,8 @@ static int masq_device_event(struct notifier_block *this,
         struct net *net = dev_net(dev);
  
         if (event == NETDEV_DOWN)
-               nf_ct_iterate_cleanup(net, device_cmp,
-                                     (void *)(long)dev->ifindex, 0, 0);
+               nf_ct_iterate_cleanup_net(net, device_cmp,
+                                         (void *)(long)dev->ifindex, 0, 0);
  
         return NOTIFY_DONE;
  }
@@ -99,7 +99,7 @@ static void iterate_cleanup_work(struct work_struct *work)
         w = container_of(work, struct masq_dev_work, work);
  
         index = w->ifindex;
-       nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
+       nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0);
  
         put_net(w->net);
         kfree(w);
@@ -110,12 +110,12 @@ static void iterate_cleanup_work(struct work_struct *work)
  /* ipv6 inet notifier is an atomic notifier, i.e. we cannot
   * schedule.
   *
- * Unfortunately, nf_ct_iterate_cleanup can run for a long
+ * Unfortunately, nf_ct_iterate_cleanup_net can run for a long
   * time if there are lots of conntracks and the system
   * handles high softirq load, so it frequently calls cond_resched
   * while iterating the conntrack table.
   *
- * So we defer nf_ct_iterate_cleanup walk to the system workqueue.
+ * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue.
   *
   * As we can have 'a lot' of inet_events (depending on amount
   * of ipv6 addresses being deleted), we also need to add an upper
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 2e44900760611f062f62a10b7f09378ef1ef02a2..0488a24c2a4425657f2e851aa3cd78fcd1dcea98 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3730,7 +3730,11 @@ static int ip6_route_dev_notify(struct notifier_block *this,
                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
  #endif
-        } else if (event == NETDEV_UNREGISTER) {
+        } else if (event == NETDEV_UNREGISTER &&
+                   dev->reg_state != NETREG_UNREGISTERED) {
+               /* NETDEV_UNREGISTER could be fired for multiple times by
+                * netdev_wait_allrefs(). Make sure we only call this once.
+                */
                 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
                 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c

index e9958b1398cb9edb54078c88bac2c921c79b3edd..ac912bb217471c048df3b76aa3d7b82886221dc1 100644 (file)
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -305,7 +305,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
          * we try harder to allocate.
          */
         kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
-               kcalloc(cmax, sizeof(*kp), GFP_KERNEL) :
+               kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
                 NULL;
  
         rcu_read_lock();
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c

index 2f7e99af67dbfd2324d39086881b9475045d7e1f..7b75b062073087b7b715629de839dcc107144402 100644 (file)
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -194,7 +194,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
         if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
             np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
             np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
-               atomic_inc(&skb->users);
+               refcount_inc(&skb->users);
                 ireq->pktopts = skb;
         }
  
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index f85cbfc183d61e4fa9f0cfebbf16cac6675e3fe3..2521690d62d6e591af594c3629f71f004240ed68 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -734,7 +734,7 @@ static void tcp_v6_init_req(struct request_sock *req,
              np->rxopt.bits.rxinfo ||
              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
              np->rxopt.bits.rxohlim || np->repflow)) {
-               atomic_inc(&skb->users);
+               refcount_inc(&skb->users);
                 ireq->pktopts = skb;
         }
  }
@@ -1809,7 +1809,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
                    icsk->icsk_probes_out,
                    sock_i_ino(sp),
-                  atomic_read(&sp->sk_refcnt), sp,
+                  refcount_read(&sp->sk_refcnt), sp,
                    jiffies_to_clock_t(icsk->icsk_rto),
                    jiffies_to_clock_t(icsk->icsk_ack.ato),
                    (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
@@ -1842,7 +1842,7 @@ static void get_timewait6_sock(struct seq_file *seq,
                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
                    tw->tw_substate, 0, 0,
                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
-                  atomic_read(&tw->tw_refcnt), tw);
+                  refcount_read(&tw->tw_refcnt), tw);
  }
  
  static int tcp6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index d1d72880572938ad99a4779f4216ccb4f14a8f92..4a3e65626e8baddf5d7d1c246e6e5fded2b08b8a 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -325,7 +325,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
  
         sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
                                 dif, &udp_table, NULL);
-       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+       if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                 sk = NULL;
         return sk;
  }
@@ -362,7 +362,7 @@ try_again:
         if (!skb)
                 return err;
  
-       ulen = skb->len;
+       ulen = udp_skb_len(skb);
         copied = len;
         if (copied > ulen - off)
                 copied = ulen - off;
@@ -379,14 +379,18 @@ try_again:
  
         if (copied < ulen || peeking ||
             (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
-               checksum_valid = !udp_lib_checksum_complete(skb);
+               checksum_valid = udp_skb_csum_unnecessary(skb) ||
+                               !__udp_lib_checksum_complete(skb);
                 if (!checksum_valid)
                         goto csum_copy_err;
         }
  
-       if (checksum_valid || skb_csum_unnecessary(skb))
-               err = skb_copy_datagram_msg(skb, off, msg, copied);
-       else {
+       if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
+               if (udp_skb_is_linear(skb))
+                       err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
+               else
+                       err = skb_copy_datagram_msg(skb, off, msg, copied);
+       } else {
                 err = skb_copy_and_csum_datagram_msg(skb, off, msg);
                 if (err == -EINVAL)
                         goto csum_copy_err;
@@ -881,7 +885,8 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
         struct sock *sk;
  
         udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-               if (INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
+               if (sk->sk_state == TCP_ESTABLISHED &&
+                   INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif))
                         return sk;
                 /* Only check first socket in chain */
                 break;
@@ -911,7 +916,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
         else
                 return;
  
-       if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2))
+       if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
                 return;
  
         skb->sk = sk;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c

index 08a807b29298f5a2c14c30eaedcba87f3057431d..3ef5d913e7a3b5a7407bb926d5ff354ae778ccb8 100644 (file)
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -43,8 +43,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
                 return 1;
  #endif
  
-       ipv6_hdr(skb)->payload_len = htons(skb->len);
         __skb_push(skb, skb->data - skb_network_header(skb));
+       ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
  
         if (xo && (xo->flags & XFRM_GRO)) {
                 skb_mac_header_rebuild(skb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c

index ac033e413bc5c98b53ecd31394dd80d4679717e1..148533169b1ddb6d20aea50523e6311188e80f13 100644 (file)
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -402,7 +402,7 @@ static void iucv_sock_destruct(struct sock *sk)
         }
  
         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-       WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+       WARN_ON(refcount_read(&sk->sk_wmem_alloc));
         WARN_ON(sk->sk_wmem_queued);
         WARN_ON(sk->sk_forward_alloc);
  }
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c

index bf75c9231ccacfe18cf7ebcc30d7c9a45a811aab..c343ac60bf502eca0577f75c06d72f1418559079 100644 (file)
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -162,7 +162,7 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
                    psock->sk->sk_receive_queue.qlen,
                    atomic_read(&psock->sk->sk_rmem_alloc),
                    psock->sk->sk_write_queue.qlen,
-                  atomic_read(&psock->sk->sk_wmem_alloc));
+                  refcount_read(&psock->sk->sk_wmem_alloc));
  
         if (psock->done)
                 seq_puts(seq, "Done ");
diff --git a/net/key/af_key.c b/net/key/af_key.c

index ce9b8565d82516ae8a440ff8ed45c184e76d2e14..edcf1d0f82c80bab61fbf93fb962c52c9360baad 100644 (file)
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -109,7 +109,7 @@ static void pfkey_sock_destruct(struct sock *sk)
         }
  
         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-       WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+       WARN_ON(refcount_read(&sk->sk_wmem_alloc));
  
         atomic_dec(&net_pfkey->socks_nr);
  }
@@ -203,11 +203,11 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
  
         sock_hold(sk);
         if (*skb2 == NULL) {
-               if (atomic_read(&skb->users) != 1) {
+               if (refcount_read(&skb->users) != 1) {
                         *skb2 = skb_clone(skb, allocation);
                 } else {
                         *skb2 = skb;
-                       atomic_inc(&skb->users);
+                       refcount_inc(&skb->users);
                 }
         }
         if (*skb2 != NULL) {
@@ -1150,6 +1150,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                         goto out;
         }
  
+       err = -ENOBUFS;
         key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
         if (sa->sadb_sa_auth) {
                 int keysize = 0;
@@ -1161,8 +1162,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                 if (key)
                         keysize = (key->sadb_key_bits + 7) / 8;
                 x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
-               if (!x->aalg)
+               if (!x->aalg) {
+                       err = -ENOMEM;
                         goto out;
+               }
                 strcpy(x->aalg->alg_name, a->name);
                 x->aalg->alg_key_len = 0;
                 if (key) {
@@ -1181,8 +1184,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                                 goto out;
                         }
                         x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
-                       if (!x->calg)
+                       if (!x->calg) {
+                               err = -ENOMEM;
                                 goto out;
+                       }
                         strcpy(x->calg->alg_name, a->name);
                         x->props.calgo = sa->sadb_sa_encrypt;
                 } else {
@@ -1196,8 +1201,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                         if (key)
                                 keysize = (key->sadb_key_bits + 7) / 8;
                         x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
-                       if (!x->ealg)
+                       if (!x->ealg) {
+                               err = -ENOMEM;
                                 goto out;
+                       }
                         strcpy(x->ealg->alg_name, a->name);
                         x->ealg->alg_key_len = 0;
                         if (key) {
@@ -1242,8 +1249,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
                 struct xfrm_encap_tmpl *natt;
  
                 x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
-               if (!x->encap)
+               if (!x->encap) {
+                       err = -ENOMEM;
                         goto out;
+               }
  
                 natt = x->encap;
                 n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
@@ -2742,6 +2751,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
         int err, err2;
  
         err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
+       if (!err)
+               xfrm_garbage_collect(net);
         err2 = unicast_flush_resp(sk, hdr);
         if (err || err2) {
                 if (err == -ESRCH) /* empty table - old silent behavior */
@@ -3728,7 +3739,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
         else
                 seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n",
                                s,
-                              atomic_read(&s->sk_refcnt),
+                              refcount_read(&s->sk_refcnt),
                                sk_rmem_alloc_get(s),
                                sk_wmem_alloc_get(s),
                                from_kuid_munged(seq_user_ns(f), sock_i_uid(s)),
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c

index d100aed3d06fb63b8851a00c55350f1728b18599..98a005d0d04a50208a9a67ad233d7b1301c323c9 100644 (file)
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -144,9 +144,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
                    tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
                    "");
         seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
-                  tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
+                  tunnel->sock ? refcount_read(&tunnel->sock->sk_refcnt) : 0,
                    atomic_read(&tunnel->ref_count));
-
         seq_printf(m, " %08x rx %ld/%ld/%ld rx %ld/%ld/%ld\n",
                    tunnel->debug,
                    atomic_long_read(&tunnel->stats.tx_packets),
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c

index 9b02c13d258b005bb10029b3baf8ec4db71f18b4..5e91b47f0d2ac73a6e5b6e34ba19e058ada70f74 100644 (file)
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -507,7 +507,7 @@ again:
         sk_nulls_for_each_rcu(rc, node, laddr_hb) {
                 if (llc_estab_match(sap, daddr, laddr, rc)) {
                         /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
-                       if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+                       if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
                                 goto again;
                         if (unlikely(llc_sk(rc)->sap != sap ||
                                      !llc_estab_match(sap, daddr, laddr, rc))) {
@@ -566,7 +566,7 @@ again:
         sk_nulls_for_each_rcu(rc, node, laddr_hb) {
                 if (llc_listener_match(sap, laddr, rc)) {
                         /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
-                       if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+                       if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
                                 goto again;
                         if (unlikely(llc_sk(rc)->sap != sap ||
                                      !llc_listener_match(sap, laddr, rc))) {
@@ -973,9 +973,9 @@ void llc_sk_free(struct sock *sk)
         skb_queue_purge(&sk->sk_write_queue);
         skb_queue_purge(&llc->pdu_unack_q);
  #ifdef LLC_REFCNT_DEBUG
-       if (atomic_read(&sk->sk_refcnt) != 1) {
+       if (refcount_read(&sk->sk_refcnt) != 1) {
                 printk(KERN_DEBUG "Destruction of LLC sock %p delayed in %s, cnt=%d\n",
-                       sk, __func__, atomic_read(&sk->sk_refcnt));
+                       sk, __func__, refcount_read(&sk->sk_refcnt));
                 printk(KERN_DEBUG "%d LLC sockets are still alive\n",
                         atomic_read(&llc_sock_nr));
         } else {
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c

index 63b6ab0563705f4b15c6bc8e3d9c7ad85a2af381..d90928f50226051a9af674f7a1ed95babbdd69fb 100644 (file)
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -329,7 +329,7 @@ again:
         sk_nulls_for_each_rcu(rc, node, laddr_hb) {
                 if (llc_dgram_match(sap, laddr, rc)) {
                         /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
-                       if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt)))
+                       if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
                                 goto again;
                         if (unlikely(llc_sk(rc)->sap != sap ||
                                      !llc_dgram_match(sap, laddr, rc))) {
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile

index c9b78e7b342f97328bdba267d8cedb2ab4c19834..91338091930141c0707239b04d0b8a42f05027b6 100644 (file)
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,10 +70,9 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
  obj-$(CONFIG_NF_DUP_NETDEV)    += nf_dup_netdev.o
  
  # nf_tables
-nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o
-nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
-nf_tables-objs += nft_lookup.o nft_dynset.o
+nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \
+                 nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \
+                 nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o
  
  obj-$(CONFIG_NF_TABLES)                += nf_tables.o
  obj-$(CONFIG_NF_TABLES_INET)   += nf_tables_inet.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c

index ba6a5516dc7c724b172ee13c0456b461c47f7281..e495b5e484b11c03c26674d999e3dc31422efab9 100644 (file)
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -841,14 +841,16 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
  
  static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                        const struct nlmsghdr *nlh,
-                      const struct nlattr * const attr[])
+                      const struct nlattr * const attr[],
+                      struct netlink_ext_ack *extack)
  {
         return -EOPNOTSUPP;
  }
  
  static int ip_set_create(struct net *net, struct sock *ctnl,
                          struct sk_buff *skb, const struct nlmsghdr *nlh,
-                        const struct nlattr * const attr[])
+                        const struct nlattr * const attr[],
+                        struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *set, *clash = NULL;
@@ -989,7 +991,8 @@ ip_set_destroy_set(struct ip_set *set)
  
  static int ip_set_destroy(struct net *net, struct sock *ctnl,
                           struct sk_buff *skb, const struct nlmsghdr *nlh,
-                         const struct nlattr * const attr[])
+                         const struct nlattr * const attr[],
+                         struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *s;
@@ -1067,7 +1070,8 @@ ip_set_flush_set(struct ip_set *set)
  
  static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                         const struct nlmsghdr *nlh,
-                       const struct nlattr * const attr[])
+                       const struct nlattr * const attr[],
+                       struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *s;
@@ -1106,7 +1110,8 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
  
  static int ip_set_rename(struct net *net, struct sock *ctnl,
                          struct sk_buff *skb, const struct nlmsghdr *nlh,
-                        const struct nlattr * const attr[])
+                        const struct nlattr * const attr[],
+                        struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *set, *s;
@@ -1155,7 +1160,8 @@ out:
  
  static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                        const struct nlmsghdr *nlh,
-                      const struct nlattr * const attr[])
+                      const struct nlattr * const attr[],
+                      struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *from, *to;
@@ -1428,7 +1434,8 @@ out:
  
  static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                        const struct nlmsghdr *nlh,
-                      const struct nlattr * const attr[])
+                      const struct nlattr * const attr[],
+                      struct netlink_ext_ack *extack)
  {
         if (unlikely(protocol_failed(attr)))
                 return -IPSET_ERR_PROTOCOL;
@@ -1513,7 +1520,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
  
  static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                        const struct nlmsghdr *nlh,
-                      const struct nlattr * const attr[])
+                      const struct nlattr * const attr[],
+                      struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *set;
@@ -1567,7 +1575,8 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
  
  static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                        const struct nlmsghdr *nlh,
-                      const struct nlattr * const attr[])
+                      const struct nlattr * const attr[],
+                      struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *set;
@@ -1621,7 +1630,8 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
  
  static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                         const struct nlmsghdr *nlh,
-                       const struct nlattr * const attr[])
+                       const struct nlattr * const attr[],
+                       struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         struct ip_set *set;
@@ -1656,7 +1666,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
  
  static int ip_set_header(struct net *net, struct sock *ctnl,
                          struct sk_buff *skb, const struct nlmsghdr *nlh,
-                        const struct nlattr * const attr[])
+                        const struct nlattr * const attr[],
+                        struct netlink_ext_ack *extack)
  {
         struct ip_set_net *inst = ip_set_pernet(net);
         const struct ip_set *set;
@@ -1712,7 +1723,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
  
  static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
                        const struct nlmsghdr *nlh,
-                      const struct nlattr * const attr[])
+                      const struct nlattr * const attr[],
+                      struct netlink_ext_ack *extack)
  {
         struct sk_buff *skb2;
         struct nlmsghdr *nlh2;
@@ -1770,7 +1782,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
  
  static int ip_set_protocol(struct net *net, struct sock *ctnl,
                            struct sk_buff *skb, const struct nlmsghdr *nlh,
-                          const struct nlattr * const attr[])
+                          const struct nlattr * const attr[],
+                          struct netlink_ext_ack *extack)
  {
         struct sk_buff *skb2;
         struct nlmsghdr *nlh2;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c

index 42c3e3ba1b9460eb3680f843424458e4513b8cef..3f09cdb4256266b3eab30f4672853ee81e438ed3 100644 (file)
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -38,8 +38,8 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
                 break;
         }
         case IPPROTO_SCTP: {
-               sctp_sctphdr_t _sh;
-               const sctp_sctphdr_t *sh;
+               struct sctphdr _sh;
+               const struct sctphdr *sh;
  
                 sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
                 if (!sh)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c

index ad99c1ceea6f42bf3e52500a4452f7f74e730be5..e31956b58abaf85404b60f1e3fa5a404d7ecc2c3 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1037,9 +1037,9 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
   */
  static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
  {
-       sctp_chunkhdr_t *sch, schunk;
-       sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
-                       sizeof(schunk), &schunk);
+       struct sctp_chunkhdr *sch, schunk;
+       sch = skb_header_pointer(skb, nh_len + sizeof(struct sctphdr),
+                                sizeof(schunk), &schunk);
         if (sch == NULL)
                 return 0;
         if (sch->type == SCTP_CID_ABORT)
@@ -1070,9 +1070,9 @@ static inline bool is_new_conn(const struct sk_buff *skb,
                 return th->syn;
         }
         case IPPROTO_SCTP: {
-               sctp_chunkhdr_t *sch, schunk;
+               struct sctp_chunkhdr *sch, schunk;
  
-               sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
+               sch = skb_header_pointer(skb, iph->len + sizeof(struct sctphdr),
                                          sizeof(schunk), &schunk);
                 if (sch == NULL)
                         return false;
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c

index 56f8e4b204ffcc4840a1042097a0f7d0f004df18..3ffad4adaddf97fb77946bdb3558b0d2fcecd88e 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -15,16 +15,15 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
                    struct ip_vs_iphdr *iph)
  {
         struct ip_vs_service *svc;
-       sctp_chunkhdr_t _schunkh, *sch;
-       sctp_sctphdr_t *sh, _sctph;
+       struct sctp_chunkhdr _schunkh, *sch;
+       struct sctphdr *sh, _sctph;
         __be16 _ports[2], *ports = NULL;
  
         if (likely(!ip_vs_iph_icmp(iph))) {
                 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
                 if (sh) {
-                       sch = skb_header_pointer(
-                               skb, iph->len + sizeof(sctp_sctphdr_t),
-                               sizeof(_schunkh), &_schunkh);
+                       sch = skb_header_pointer(skb, iph->len + sizeof(_sctph),
+                                                sizeof(_schunkh), &_schunkh);
                         if (sch && (sch->type == SCTP_CID_INIT ||
                                     sysctl_sloppy_sctp(ipvs)))
                                 ports = &sh->source;
@@ -77,7 +76,7 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
         return 1;
  }
  
-static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
+static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph,
                           unsigned int sctphoff)
  {
         sctph->checksum = sctp_compute_cksum(skb, sctphoff);
@@ -88,7 +87,7 @@ static int
  sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
  {
-       sctp_sctphdr_t *sctph;
+       struct sctphdr *sctph;
         unsigned int sctphoff = iph->len;
         bool payload_csum = false;
  
@@ -135,7 +134,7 @@ static int
  sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
                   struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
  {
-       sctp_sctphdr_t *sctph;
+       struct sctphdr *sctph;
         unsigned int sctphoff = iph->len;
         bool payload_csum = false;
  
@@ -378,7 +377,7 @@ static inline void
  set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                 int direction, const struct sk_buff *skb)
  {
-       sctp_chunkhdr_t _sctpch, *sch;
+       struct sctp_chunkhdr _sctpch, *sch;
         unsigned char chunk_type;
         int event, next_state;
         int ihl, cofs;
@@ -389,7 +388,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
         ihl = ip_hdrlen(skb);
  #endif
  
-       cofs = ihl + sizeof(sctp_sctphdr_t);
+       cofs = ihl + sizeof(struct sctphdr);
         sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
         if (sch == NULL)
                 return;
@@ -410,7 +409,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
             (sch->type == SCTP_CID_COOKIE_ACK)) {
                 int clen = ntohs(sch->length);
  
-               if (clen >= sizeof(sctp_chunkhdr_t)) {
+               if (clen >= sizeof(_sctpch)) {
                         sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
                                                  sizeof(_sctpch), &_sctpch);
                         if (sch && sch->type == SCTP_CID_ABORT)
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c

index 03d2ccffa9fa3c1eecfbc07dcadc420a0ab5e315..20edd589fe0635c2cb0b989d39e1cb8801a85a60 100644 (file)
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -197,8 +197,8 @@ static void __exit nf_conntrack_amanda_fini(void)
  {
         int i;
  
-       nf_conntrack_helper_unregister(&amanda_helper[0]);
-       nf_conntrack_helper_unregister(&amanda_helper[1]);
+       nf_conntrack_helpers_unregister(amanda_helper,
+                                       ARRAY_SIZE(amanda_helper));
         for (i = 0; i < ARRAY_SIZE(search); i++)
                 textsearch_destroy(search[i].ts);
  }
@@ -218,16 +218,12 @@ static int __init nf_conntrack_amanda_init(void)
                         goto err1;
                 }
         }
-       ret = nf_conntrack_helper_register(&amanda_helper[0]);
+       ret = nf_conntrack_helpers_register(amanda_helper,
+                                           ARRAY_SIZE(amanda_helper));
         if (ret < 0)
                 goto err1;
-       ret = nf_conntrack_helper_register(&amanda_helper[1]);
-       if (ret < 0)
-               goto err2;
         return 0;
  
-err2:
-       nf_conntrack_helper_unregister(&amanda_helper[0]);
  err1:
         while (--i >= 0)
                 textsearch_destroy(search[i].ts);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index e847dbaa0c6b3aefc3d417421a2b529a10735e38..9979f46c81dce32bc2288cfd4561c571f5bea4c5 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1586,13 +1586,12 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
  
  /* Bring out ya dead! */
  static struct nf_conn *
-get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
+get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
                 void *data, unsigned int *bucket)
  {
         struct nf_conntrack_tuple_hash *h;
         struct nf_conn *ct;
         struct hlist_nulls_node *n;
-       int cpu;
         spinlock_t *lockp;
  
         for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
@@ -1604,8 +1603,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
                                 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
                                         continue;
                                 ct = nf_ct_tuplehash_to_ctrack(h);
-                               if (net_eq(nf_ct_net(ct), net) &&
-                                   iter(ct, data))
+                               if (iter(ct, data))
                                         goto found;
                         }
                 }
@@ -1614,51 +1612,150 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
                 cond_resched();
         }
  
+       return NULL;
+found:
+       atomic_inc(&ct->ct_general.use);
+       spin_unlock(lockp);
+       local_bh_enable();
+       return ct;
+}
+
+static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
+                                 void *data, u32 portid, int report)
+{
+       unsigned int bucket = 0, sequence;
+       struct nf_conn *ct;
+
+       might_sleep();
+
+       for (;;) {
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
+
+               while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+                       /* Time to push up daises... */
+
+                       nf_ct_delete(ct, portid, report);
+                       nf_ct_put(ct);
+                       cond_resched();
+               }
+
+               if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
+                       break;
+               bucket = 0;
+       }
+}
+
+struct iter_data {
+       int (*iter)(struct nf_conn *i, void *data);
+       void *data;
+       struct net *net;
+};
+
+static int iter_net_only(struct nf_conn *i, void *data)
+{
+       struct iter_data *d = data;
+
+       if (!net_eq(d->net, nf_ct_net(i)))
+               return 0;
+
+       return d->iter(i, d->data);
+}
+
+static void
+__nf_ct_unconfirmed_destroy(struct net *net)
+{
+       int cpu;
+
         for_each_possible_cpu(cpu) {
-               struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+               struct nf_conntrack_tuple_hash *h;
+               struct hlist_nulls_node *n;
+               struct ct_pcpu *pcpu;
+
+               pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
  
                 spin_lock_bh(&pcpu->lock);
                 hlist_nulls_for_each_entry(h, n, &pcpu->unconfirmed, hnnode) {
+                       struct nf_conn *ct;
+
                         ct = nf_ct_tuplehash_to_ctrack(h);
-                       if (iter(ct, data))
-                               set_bit(IPS_DYING_BIT, &ct->status);
+
+                       /* we cannot call iter() on unconfirmed list, the
+                        * owning cpu can reallocate ct->ext at any time.
+                        */
+                       set_bit(IPS_DYING_BIT, &ct->status);
                 }
                 spin_unlock_bh(&pcpu->lock);
                 cond_resched();
         }
-       return NULL;
-found:
-       atomic_inc(&ct->ct_general.use);
-       spin_unlock(lockp);
-       local_bh_enable();
-       return ct;
  }
  
-void nf_ct_iterate_cleanup(struct net *net,
-                          int (*iter)(struct nf_conn *i, void *data),
-                          void *data, u32 portid, int report)
+void nf_ct_iterate_cleanup_net(struct net *net,
+                              int (*iter)(struct nf_conn *i, void *data),
+                              void *data, u32 portid, int report)
  {
-       struct nf_conn *ct;
-       unsigned int bucket = 0;
+       struct iter_data d;
  
         might_sleep();
  
         if (atomic_read(&net->ct.count) == 0)
                 return;
  
-       while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
-               /* Time to push up daises... */
+       __nf_ct_unconfirmed_destroy(net);
  
-               nf_ct_delete(ct, portid, report);
-               nf_ct_put(ct);
-               cond_resched();
+       d.iter = iter;
+       d.data = data;
+       d.net = net;
+
+       synchronize_net();
+
+       nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
+}
+EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
+
+/**
+ * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table
+ * @iter: callback to invoke for each conntrack
+ * @data: data to pass to @iter
+ *
+ * Like nf_ct_iterate_cleanup, but first marks conntracks on the
+ * unconfirmed list as dying (so they will not be inserted into
+ * main table).
+ *
+ * Can only be called in module exit path.
+ */
+void
+nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
+{
+       struct net *net;
+
+       rtnl_lock();
+       for_each_net(net) {
+               if (atomic_read(&net->ct.count) == 0)
+                       continue;
+               __nf_ct_unconfirmed_destroy(net);
         }
+       rtnl_unlock();
+
+       /* Need to wait for netns cleanup worker to finish, if its
+        * running -- it might have deleted a net namespace from
+        * the global list, so our __nf_ct_unconfirmed_destroy() might
+        * not have affected all namespaces.
+        */
+       net_ns_barrier();
+
+       /* a conntrack could have been unlinked from unconfirmed list
+        * before we grabbed pcpu lock in __nf_ct_unconfirmed_destroy().
+        * This makes sure its inserted into conntrack table.
+        */
+       synchronize_net();
+
+       nf_ct_iterate_cleanup(iter, data, 0, 0);
  }
-EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
+EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
  
  static int kill_all(struct nf_conn *i, void *data)
  {
-       return 1;
+       return net_eq(nf_ct_net(i), data);
  }
  
  void nf_ct_free_hashtable(void *hash, unsigned int size)
@@ -1723,7 +1820,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
  i_see_dead_people:
         busy = 0;
         list_for_each_entry(net, net_exit_list, exit_list) {
-               nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
+               nf_ct_iterate_cleanup(kill_all, net, 0, 0);
                 if (atomic_read(&net->ct.count) != 0)
                         busy = 1;
         }
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c

index 3bcdc718484e4c5352ab2de17c5a8aaf227af2f3..f71f0d2558fdb155e688d29d88ae27fdd0a29e31 100644 (file)
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1815,14 +1815,44 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
         },
  };
  
+static int __init h323_helper_init(void)
+{
+       int ret;
+
+       ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+       if (ret < 0)
+               return ret;
+       ret = nf_conntrack_helpers_register(nf_conntrack_helper_q931,
+                                       ARRAY_SIZE(nf_conntrack_helper_q931));
+       if (ret < 0)
+               goto err1;
+       ret = nf_conntrack_helpers_register(nf_conntrack_helper_ras,
+                                       ARRAY_SIZE(nf_conntrack_helper_ras));
+       if (ret < 0)
+               goto err2;
+
+       return 0;
+err2:
+       nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+                                       ARRAY_SIZE(nf_conntrack_helper_q931));
+err1:
+       nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+       return ret;
+}
+
+static void __exit h323_helper_exit(void)
+{
+       nf_conntrack_helpers_unregister(nf_conntrack_helper_ras,
+                                       ARRAY_SIZE(nf_conntrack_helper_ras));
+       nf_conntrack_helpers_unregister(nf_conntrack_helper_q931,
+                                       ARRAY_SIZE(nf_conntrack_helper_q931));
+       nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+}
+
  /****************************************************************************/
  static void __exit nf_conntrack_h323_fini(void)
  {
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[1]);
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
+       h323_helper_exit();
         kfree(h323_buffer);
         pr_debug("nf_ct_h323: fini\n");
  }
@@ -1837,32 +1867,11 @@ static int __init nf_conntrack_h323_init(void)
         h323_buffer = kmalloc(65536, GFP_KERNEL);
         if (!h323_buffer)
                 return -ENOMEM;
-       ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245);
+       ret = h323_helper_init();
         if (ret < 0)
                 goto err1;
-       ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]);
-       if (ret < 0)
-               goto err2;
-       ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]);
-       if (ret < 0)
-               goto err3;
-       ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]);
-       if (ret < 0)
-               goto err4;
-       ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]);
-       if (ret < 0)
-               goto err5;
         pr_debug("nf_ct_h323: init success\n");
         return 0;
-
-err5:
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]);
-err4:
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]);
-err3:
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]);
-err2:
-       nf_conntrack_helper_unregister(&nf_conntrack_helper_h245);
  err1:
         kfree(h323_buffer);
         return ret;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c

index 7f6100ca63be6dd4f37c24852fd16b9a71b8a823..9129bb3b51535a76f3aeef83c8f16fb13ee6f656 100644 (file)
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -285,16 +285,16 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
  EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
  
  /* appropriate ct lock protecting must be taken by caller */
-static inline int unhelp(struct nf_conntrack_tuple_hash *i,
-                        const struct nf_conntrack_helper *me)
+static int unhelp(struct nf_conn *ct, void *me)
  {
-       struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
         struct nf_conn_help *help = nfct_help(ct);
  
         if (help && rcu_dereference_raw(help->helper) == me) {
                 nf_conntrack_event(IPCT_HELPER, ct);
                 RCU_INIT_POINTER(help->helper, NULL);
         }
+
+       /* We are not intended to delete this conntrack. */
         return 0;
  }
  
@@ -437,33 +437,10 @@ out:
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_helper_register);
  
-static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
-                                            struct net *net)
-{
-       struct nf_conntrack_tuple_hash *h;
-       const struct hlist_nulls_node *nn;
-       int cpu;
-
-       /* Get rid of expecteds, set helpers to NULL. */
-       for_each_possible_cpu(cpu) {
-               struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
-               spin_lock_bh(&pcpu->lock);
-               hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
-                       unhelp(h, me);
-               spin_unlock_bh(&pcpu->lock);
-       }
-}
-
  void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
  {
-       struct nf_conntrack_tuple_hash *h;
         struct nf_conntrack_expect *exp;
         const struct hlist_node *next;
-       const struct hlist_nulls_node *nn;
-       unsigned int last_hsize;
-       spinlock_t *lock;
-       struct net *net;
         unsigned int i;
  
         mutex_lock(&nf_ct_helper_mutex);
@@ -491,26 +468,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
         }
         spin_unlock_bh(&nf_conntrack_expect_lock);
  
-       rtnl_lock();
-       for_each_net(net)
-               __nf_conntrack_helper_unregister(me, net);
-       rtnl_unlock();
-
-       local_bh_disable();
-restart:
-       last_hsize = nf_conntrack_htable_size;
-       for (i = 0; i < last_hsize; i++) {
-               lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
-               nf_conntrack_lock(lock);
-               if (last_hsize != nf_conntrack_htable_size) {
-                       spin_unlock(lock);
-                       goto restart;
-               }
-               hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
-                       unhelp(h, me);
-               spin_unlock(lock);
-       }
-       local_bh_enable();
+       nf_ct_iterate_destroy(unhelp, me);
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
  
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index a8be9b72e6cd2ca34166bba49a532f4f92e86e9e..7999e70c3bfbe69f1374087cf9bd8c16d7e813cd 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -636,11 +636,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
         if (events & (1 << IPCT_DESTROY)) {
                 type = IPCTNL_MSG_CT_DELETE;
                 group = NFNLGRP_CONNTRACK_DESTROY;
-       } else  if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
+       } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
                 type = IPCTNL_MSG_CT_NEW;
                 flags = NLM_F_CREATE|NLM_F_EXCL;
                 group = NFNLGRP_CONNTRACK_NEW;
-       } else  if (events) {
+       } else if (events) {
                 type = IPCTNL_MSG_CT_NEW;
                 group = NFNLGRP_CONNTRACK_UPDATE;
         } else
@@ -1122,8 +1122,8 @@ static int ctnetlink_flush_conntrack(struct net *net,
                         return PTR_ERR(filter);
         }
  
-       nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter,
-                             portid, report);
+       nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
+                                 portid, report);
         kfree(filter);
  
         return 0;
@@ -1132,7 +1132,8 @@ static int ctnetlink_flush_conntrack(struct net *net,
  static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
                                    struct sk_buff *skb,
                                    const struct nlmsghdr *nlh,
-                                  const struct nlattr * const cda[])
+                                  const struct nlattr * const cda[],
+                                  struct netlink_ext_ack *extack)
  {
         struct nf_conntrack_tuple_hash *h;
         struct nf_conntrack_tuple tuple;
@@ -1184,7 +1185,8 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
  static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
                                    struct sk_buff *skb,
                                    const struct nlmsghdr *nlh,
-                                  const struct nlattr * const cda[])
+                                  const struct nlattr * const cda[],
+                                  struct netlink_ext_ack *extack)
  {
         struct nf_conntrack_tuple_hash *h;
         struct nf_conntrack_tuple tuple;
@@ -1345,7 +1347,8 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
  static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
                                   struct sk_buff *skb,
                                   const struct nlmsghdr *nlh,
-                                 const struct nlattr * const cda[])
+                                 const struct nlattr * const cda[],
+                                 struct netlink_ext_ack *extack)
  {
         if (nlh->nlmsg_flags & NLM_F_DUMP) {
                 struct netlink_dump_control c = {
@@ -1367,7 +1370,8 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
  static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
                                         struct sk_buff *skb,
                                         const struct nlmsghdr *nlh,
-                                       const struct nlattr * const cda[])
+                                       const struct nlattr * const cda[],
+                                       struct netlink_ext_ack *extack)
  {
         if (nlh->nlmsg_flags & NLM_F_DUMP) {
                 struct netlink_dump_control c = {
@@ -1906,7 +1910,8 @@ err1:
  static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
                                    struct sk_buff *skb,
                                    const struct nlmsghdr *nlh,
-                                  const struct nlattr * const cda[])
+                                  const struct nlattr * const cda[],
+                                  struct netlink_ext_ack *extack)
  {
         struct nf_conntrack_tuple otuple, rtuple;
         struct nf_conntrack_tuple_hash *h = NULL;
@@ -2071,7 +2076,8 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
  static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
                                  struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
-                                const struct nlattr * const cda[])
+                                const struct nlattr * const cda[],
+                                struct netlink_ext_ack *extack)
  {
         if (nlh->nlmsg_flags & NLM_F_DUMP) {
                 struct netlink_dump_control c = {
@@ -2116,7 +2122,8 @@ nlmsg_failure:
  
  static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const cda[])
+                            const struct nlattr * const cda[],
+                            struct netlink_ext_ack *extack)
  {
         struct sk_buff *skb2;
         int err;
@@ -2778,7 +2785,8 @@ out:
  static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
                                  struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
-                                const struct nlattr * const cda[])
+                                const struct nlattr * const cda[],
+                                struct netlink_ext_ack *extack)
  {
         int err;
         struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2822,7 +2830,8 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
  
  static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
                                 struct sk_buff *skb, const struct nlmsghdr *nlh,
-                               const struct nlattr * const cda[])
+                               const struct nlattr * const cda[],
+                               struct netlink_ext_ack *extack)
  {
         struct nf_conntrack_tuple tuple;
         struct nf_conntrack_expect *exp;
@@ -2834,7 +2843,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
  
         if (nlh->nlmsg_flags & NLM_F_DUMP) {
                 if (cda[CTA_EXPECT_MASTER])
-                       return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda);
+                       return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda,
+                                                    extack);
                 else {
                         struct netlink_dump_control c = {
                                 .dump = ctnetlink_exp_dump_table,
@@ -2902,7 +2912,8 @@ out:
  
  static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
                                 struct sk_buff *skb, const struct nlmsghdr *nlh,
-                               const struct nlattr * const cda[])
+                               const struct nlattr * const cda[],
+                               struct netlink_ext_ack *extack)
  {
         struct nf_conntrack_expect *exp;
         struct nf_conntrack_tuple tuple;
@@ -3190,7 +3201,8 @@ err_ct:
  
  static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
                                 struct sk_buff *skb, const struct nlmsghdr *nlh,
-                               const struct nlattr * const cda[])
+                               const struct nlattr * const cda[],
+                               struct netlink_ext_ack *extack)
  {
         struct nf_conntrack_tuple tuple;
         struct nf_conntrack_expect *exp;
@@ -3296,7 +3308,8 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
  static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
                                   struct sk_buff *skb,
                                   const struct nlmsghdr *nlh,
-                                 const struct nlattr * const cda[])
+                                 const struct nlattr * const cda[],
+                                 struct netlink_ext_ack *extack)
  {
         if (nlh->nlmsg_flags & NLM_F_DUMP) {
                 struct netlink_dump_control c = {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c

index 2de6c1fe326149c5ab46f06b9ca7a3db992c4e7f..1dcad229c3cc7290c9e89bea109ebf6098e79b21 100644 (file)
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,8 +28,8 @@
  #include <net/netfilter/nf_conntrack_l4proto.h>
  #include <net/netfilter/nf_conntrack_core.h>
  
-static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
-struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
+struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
  EXPORT_SYMBOL_GPL(nf_ct_l3protos);
  
  static DEFINE_MUTEX(nf_ct_proto_mutex);
@@ -68,7 +68,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header,
  struct nf_conntrack_l4proto *
  __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
  {
-       if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
+       if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL))
                 return &nf_conntrack_l4proto_generic;
  
         return rcu_dereference(nf_ct_protos[l3proto][l4proto]);
@@ -212,7 +212,7 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
         int ret = 0;
         struct nf_conntrack_l3proto *old;
  
-       if (proto->l3proto >= AF_MAX)
+       if (proto->l3proto >= NFPROTO_NUMPROTO)
                 return -EBUSY;
  
         if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
  
  void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
  {
-       BUG_ON(proto->l3proto >= AF_MAX);
+       BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
  
         mutex_lock(&nf_ct_proto_mutex);
         BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
@@ -265,6 +265,8 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto)
         mutex_unlock(&nf_ct_proto_mutex);
  
         synchronize_rcu();
+       /* Remove all contrack entries for this protocol */
+       nf_ct_iterate_destroy(kill_l3proto, proto);
  }
  EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
  
@@ -280,9 +282,6 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
          */
         if (proto->net_ns_put)
                 proto->net_ns_put(net);
-
-       /* Remove all contrack entries for this protocol */
-       nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
  }
  EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
  
@@ -342,7 +341,7 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
  {
         int ret = 0;
  
-       if (l4proto->l3proto >= PF_MAX)
+       if (l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos))
                 return -EBUSY;
  
         if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
@@ -421,17 +420,23 @@ out:
  }
  EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
  
-void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+
  {
-       BUG_ON(l4proto->l3proto >= PF_MAX);
+       BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos));
  
-       mutex_lock(&nf_ct_proto_mutex);
         BUG_ON(rcu_dereference_protected(
                         nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
                         lockdep_is_held(&nf_ct_proto_mutex)
                         ) != l4proto);
         rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
                            &nf_conntrack_l4proto_generic);
+}
+
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
+{
+       mutex_lock(&nf_ct_proto_mutex);
+       __nf_ct_l4proto_unregister_one(l4proto);
         mutex_unlock(&nf_ct_proto_mutex);
  
         synchronize_rcu();
@@ -448,9 +453,6 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
  
         pn->users--;
         nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
-
-       /* Remove all contrack entries for this protocol */
-       nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
  }
  EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
  
@@ -500,8 +502,14 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
  void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
                               unsigned int num_proto)
  {
+       mutex_lock(&nf_ct_proto_mutex);
         while (num_proto-- != 0)
-               nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+               __nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+       mutex_unlock(&nf_ct_proto_mutex);
+
+       synchronize_net();
+       /* Remove all contrack entries for this protocol */
+       nf_ct_iterate_destroy(kill_l4proto, l4proto);
  }
  EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
  
@@ -548,7 +556,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
  int nf_conntrack_proto_init(void)
  {
         unsigned int i;
-       for (i = 0; i < AF_MAX; i++)
+       for (i = 0; i < NFPROTO_NUMPROTO; i++)
                 rcu_assign_pointer(nf_ct_l3protos[i],
                                    &nf_conntrack_l3proto_generic);
         return 0;
@@ -558,6 +566,6 @@ void nf_conntrack_proto_fini(void)
  {
         unsigned int i;
         /* free l3proto protocol tables */
-       for (i = 0; i < PF_MAX; i++)
+       for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
                 kfree(nf_ct_protos[i]);
  }
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c

index 1c5b14a6cab369591bd13e22b284a0737ad75c2e..31c6c8ee9d5d40c50b1e0451001051a3dc06d1bf 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -190,7 +190,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
  }
  
  #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count)    \
-for ((offset) = (dataoff) + sizeof(sctp_sctphdr_t), (count) = 0;       \
+for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0;       \
         (offset) < (skb)->len &&                                        \
         ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch)));   \
         (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++)
@@ -202,7 +202,7 @@ static int do_basic_checks(struct nf_conn *ct,
                            unsigned long *map)
  {
         u_int32_t offset, count;
-       sctp_chunkhdr_t _sch, *sch;
+       struct sctp_chunkhdr _sch, *sch;
         int flag;
  
         flag = 0;
@@ -395,9 +395,9 @@ static int sctp_packet(struct nf_conn *ct,
                 /* If it is an INIT or an INIT ACK note down the vtag */
                 if (sch->type == SCTP_CID_INIT ||
                     sch->type == SCTP_CID_INIT_ACK) {
-                       sctp_inithdr_t _inithdr, *ih;
+                       struct sctp_inithdr _inithdr, *ih;
  
-                       ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
+                       ih = skb_header_pointer(skb, offset + sizeof(_sch),
                                                 sizeof(_inithdr), &_inithdr);
                         if (ih == NULL)
                                 goto out_unlock;
@@ -471,23 +471,20 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
  
                 /* Copy the vtag into the state info */
                 if (sch->type == SCTP_CID_INIT) {
-                       if (sh->vtag == 0) {
-                               sctp_inithdr_t _inithdr, *ih;
+                       struct sctp_inithdr _inithdr, *ih;
+                       /* Sec 8.5.1 (A) */
+                       if (sh->vtag)
+                               return false;
  
-                               ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
-                                                       sizeof(_inithdr), &_inithdr);
-                               if (ih == NULL)
-                                       return false;
+                       ih = skb_header_pointer(skb, offset + sizeof(_sch),
+                                               sizeof(_inithdr), &_inithdr);
+                       if (!ih)
+                               return false;
  
-                               pr_debug("Setting vtag %x for new conn\n",
-                                        ih->init_tag);
+                       pr_debug("Setting vtag %x for new conn\n",
+                                ih->init_tag);
  
-                               ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
-                                                               ih->init_tag;
-                       } else {
-                               /* Sec 8.5.1 (A) */
-                               return false;
-                       }
+                       ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
                 } else if (sch->type == SCTP_CID_HEARTBEAT) {
                         pr_debug("Setting vtag %x for secondary conntrack\n",
                                  sh->vtag);
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c

index c9d7f95768ab35e26bc88c4c2350d792e78da9ba..f4a566e672135d420246471be6cd35a9e77bfbf1 100644 (file)
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -13,6 +13,7 @@
  #include <linux/netfilter.h>
  #include <linux/netfilter/nf_tables.h>
  #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_dup_netdev.h>
  
  static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
  {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c

index 6c72922d20caee83f498cb02cdce0c46899a1c22..832c5a08d9a58f216f61008ebdb6636e2384f5e3 100644 (file)
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -582,12 +582,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
                 .l3proto = l3proto,
                 .l4proto = l4proto,
         };
-       struct net *net;
  
-       rtnl_lock();
-       for_each_net(net)
-               nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
-       rtnl_unlock();
+       nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
  }
  
  static void nf_nat_l3proto_clean(u8 l3proto)
@@ -595,13 +591,8 @@ static void nf_nat_l3proto_clean(u8 l3proto)
         struct nf_nat_proto_clean clean = {
                 .l3proto = l3proto,
         };
-       struct net *net;
  
-       rtnl_lock();
-
-       for_each_net(net)
-               nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
-       rtnl_unlock();
+       nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
  }
  
  /* Protocol registration. */
@@ -822,17 +813,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
  }
  #endif
  
-static void __net_exit nf_nat_net_exit(struct net *net)
-{
-       struct nf_nat_proto_clean clean = {};
-
-       nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
-}
-
-static struct pernet_operations nf_nat_net_ops = {
-       .exit = nf_nat_net_exit,
-};
-
  static struct nf_ct_helper_expectfn follow_master_nat = {
         .name           = "nat-follow-master",
         .expectfn       = nf_nat_follow_master,
@@ -853,10 +833,6 @@ static int __init nf_nat_init(void)
                 return ret;
         }
  
-       ret = register_pernet_subsys(&nf_nat_net_ops);
-       if (ret < 0)
-               goto cleanup_extend;
-
         nf_ct_helper_expectfn_register(&follow_master_nat);
  
         BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -867,18 +843,15 @@ static int __init nf_nat_init(void)
         RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
  #endif
         return 0;
-
- cleanup_extend:
-       rhltable_destroy(&nf_nat_bysource_table);
-       nf_ct_extend_unregister(&nat_extend);
-       return ret;
  }
  
  static void __exit nf_nat_cleanup(void)
  {
+       struct nf_nat_proto_clean clean = {};
         unsigned int i;
  
-       unregister_pernet_subsys(&nf_nat_net_ops);
+       nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
+
         nf_ct_extend_unregister(&nat_extend);
         nf_ct_helper_expectfn_unregister(&follow_master_nat);
         RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c

index 804e8a0ab36ef56b120ea89be1994b39eca5bc36..c57ee3240b1d5e848077c61f37c72a40ed0d1afd 100644 (file)
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -32,7 +32,7 @@ sctp_manip_pkt(struct sk_buff *skb,
                const struct nf_conntrack_tuple *tuple,
                enum nf_nat_manip_type maniptype)
  {
-       sctp_sctphdr_t *hdr;
+       struct sctphdr *hdr;
         int hdrsize = 8;
  
         /* This could be an inner header returned in imcp packet; in such
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index da314be0c048720172bbd153cd2f730b486603ce..7843efa33c598f9d1785bc64e99246941213d571 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -13,6 +13,7 @@
  #include <linux/list.h>
  #include <linux/skbuff.h>
  #include <linux/netlink.h>
+#include <linux/vmalloc.h>
  #include <linux/netfilter.h>
  #include <linux/netfilter/nfnetlink.h>
  #include <linux/netfilter/nf_tables.h>
@@ -386,7 +387,7 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table)
         return ++table->hgenerator;
  }
  
-static const struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
+static const struct nf_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
  
  static const struct nf_chain_type *
  __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
@@ -534,7 +535,8 @@ done:
  
  static int nf_tables_gettable(struct net *net, struct sock *nlsk,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nla[])
+                             const struct nlattr * const nla[],
+                             struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_cur(net);
@@ -677,7 +679,8 @@ err:
  
  static int nf_tables_newtable(struct net *net, struct sock *nlsk,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nla[])
+                             const struct nlattr * const nla[],
+                             struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -830,7 +833,8 @@ out:
  
  static int nf_tables_deltable(struct net *net, struct sock *nlsk,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nla[])
+                             const struct nlattr * const nla[],
+                             struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -869,6 +873,9 @@ int nft_register_chain_type(const struct nf_chain_type *ctype)
  {
         int err = 0;
  
+       if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO))
+               return -EINVAL;
+
         nfnl_lock(NFNL_SUBSYS_NFTABLES);
         if (chain_type[ctype->family][ctype->type] != NULL) {
                 err = -EBUSY;
@@ -1123,7 +1130,8 @@ done:
  
  static int nf_tables_getchain(struct net *net, struct sock *nlsk,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nla[])
+                             const struct nlattr * const nla[],
+                             struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_cur(net);
@@ -1319,7 +1327,8 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
  
  static int nf_tables_newchain(struct net *net, struct sock *nlsk,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nla[])
+                             const struct nlattr * const nla[],
+                             struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         const struct nlattr * uninitialized_var(name);
@@ -1557,7 +1566,8 @@ err1:
  
  static int nf_tables_delchain(struct net *net, struct sock *nlsk,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nla[])
+                             const struct nlattr * const nla[],
+                             struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -2038,7 +2048,8 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
  
  static int nf_tables_getrule(struct net *net, struct sock *nlsk,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const nla[])
+                            const struct nlattr * const nla[],
+                            struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_cur(net);
@@ -2131,7 +2142,8 @@ static struct nft_expr_info *info;
  
  static int nf_tables_newrule(struct net *net, struct sock *nlsk,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const nla[])
+                            const struct nlattr * const nla[],
+                            struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -2313,7 +2325,8 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
  
  static int nf_tables_delrule(struct net *net, struct sock *nlsk,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const nla[])
+                            const struct nlattr * const nla[],
+                            struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -2377,64 +2390,77 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
   * Sets
   */
  
-static LIST_HEAD(nf_tables_set_ops);
+static LIST_HEAD(nf_tables_set_types);
  
-int nft_register_set(struct nft_set_ops *ops)
+int nft_register_set(struct nft_set_type *type)
  {
         nfnl_lock(NFNL_SUBSYS_NFTABLES);
-       list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
+       list_add_tail_rcu(&type->list, &nf_tables_set_types);
         nfnl_unlock(NFNL_SUBSYS_NFTABLES);
         return 0;
  }
  EXPORT_SYMBOL_GPL(nft_register_set);
  
-void nft_unregister_set(struct nft_set_ops *ops)
+void nft_unregister_set(struct nft_set_type *type)
  {
         nfnl_lock(NFNL_SUBSYS_NFTABLES);
-       list_del_rcu(&ops->list);
+       list_del_rcu(&type->list);
         nfnl_unlock(NFNL_SUBSYS_NFTABLES);
  }
  EXPORT_SYMBOL_GPL(nft_unregister_set);
  
+#define NFT_SET_FEATURES       (NFT_SET_INTERVAL | NFT_SET_MAP | \
+                                NFT_SET_TIMEOUT | NFT_SET_OBJECT)
+
+static bool nft_set_ops_candidate(const struct nft_set_ops *ops, u32 flags)
+{
+       return (flags & ops->features) == (flags & NFT_SET_FEATURES);
+}
+
  /*
   * Select a set implementation based on the data characteristics and the
   * given policy. The total memory use might not be known if no size is
   * given, in that case the amount of memory per element is used.
   */
  static const struct nft_set_ops *
-nft_select_set_ops(const struct nlattr * const nla[],
+nft_select_set_ops(const struct nft_ctx *ctx,
+                  const struct nlattr * const nla[],
                    const struct nft_set_desc *desc,
                    enum nft_set_policies policy)
  {
         const struct nft_set_ops *ops, *bops;
         struct nft_set_estimate est, best;
-       u32 features;
+       const struct nft_set_type *type;
+       u32 flags = 0;
  
  #ifdef CONFIG_MODULES
-       if (list_empty(&nf_tables_set_ops)) {
+       if (list_empty(&nf_tables_set_types)) {
                 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
                 request_module("nft-set");
                 nfnl_lock(NFNL_SUBSYS_NFTABLES);
-               if (!list_empty(&nf_tables_set_ops))
+               if (!list_empty(&nf_tables_set_types))
                         return ERR_PTR(-EAGAIN);
         }
  #endif
-       features = 0;
-       if (nla[NFTA_SET_FLAGS] != NULL) {
-               features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
-               features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT |
-                           NFT_SET_OBJECT;
-       }
+       if (nla[NFTA_SET_FLAGS] != NULL)
+               flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
  
         bops        = NULL;
         best.size   = ~0;
         best.lookup = ~0;
         best.space  = ~0;
  
-       list_for_each_entry(ops, &nf_tables_set_ops, list) {
-               if ((ops->features & features) != features)
+       list_for_each_entry(type, &nf_tables_set_types, list) {
+               if (!type->select_ops)
+                       ops = type->ops;
+               else
+                       ops = type->select_ops(ctx, desc, flags);
+               if (!ops)
+                       continue;
+
+               if (!nft_set_ops_candidate(ops, flags))
                         continue;
-               if (!ops->estimate(desc, features, &est))
+               if (!ops->estimate(desc, flags, &est))
                         continue;
  
                 switch (policy) {
@@ -2465,10 +2491,10 @@ nft_select_set_ops(const struct nlattr * const nla[],
                         break;
                 }
  
-               if (!try_module_get(ops->owner))
+               if (!try_module_get(type->owner))
                         continue;
                 if (bops != NULL)
-                       module_put(bops->owner);
+                       module_put(bops->type->owner);
  
                 bops = ops;
                 best = est;
@@ -2816,7 +2842,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
  
  static int nf_tables_getset(struct net *net, struct sock *nlsk,
                             struct sk_buff *skb, const struct nlmsghdr *nlh,
-                           const struct nlattr * const nla[])
+                           const struct nlattr * const nla[],
+                           struct netlink_ext_ack *extack)
  {
         u8 genmask = nft_genmask_cur(net);
         const struct nft_set *set;
@@ -2892,7 +2919,8 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
  
  static int nf_tables_newset(struct net *net, struct sock *nlsk,
                             struct sk_buff *skb, const struct nlmsghdr *nlh,
-                           const struct nlattr * const nla[])
+                           const struct nlattr * const nla[],
+                           struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -3029,7 +3057,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
         if (!(nlh->nlmsg_flags & NLM_F_CREATE))
                 return -ENOENT;
  
-       ops = nft_select_set_ops(nla, &desc, policy);
+       ops = nft_select_set_ops(&ctx, nla, &desc, policy);
         if (IS_ERR(ops))
                 return PTR_ERR(ops);
  
@@ -3039,12 +3067,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
  
         size = 0;
         if (ops->privsize != NULL)
-               size = ops->privsize(nla);
+               size = ops->privsize(nla, &desc);
  
-       err = -ENOMEM;
-       set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
-       if (set == NULL)
+       set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
+       if (!set) {
+               err = -ENOMEM;
                 goto err1;
+       }
  
         nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
         err = nf_tables_set_alloc_name(&ctx, set, name);
@@ -3087,17 +3116,17 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
  err3:
         ops->destroy(set);
  err2:
-       kfree(set);
+       kvfree(set);
  err1:
-       module_put(ops->owner);
+       module_put(ops->type->owner);
         return err;
  }
  
  static void nft_set_destroy(struct nft_set *set)
  {
         set->ops->destroy(set);
-       module_put(set->ops->owner);
-       kfree(set);
+       module_put(set->ops->type->owner);
+       kvfree(set);
  }
  
  static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
@@ -3109,7 +3138,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
  
  static int nf_tables_delset(struct net *net, struct sock *nlsk,
                             struct sk_buff *skb, const struct nlmsghdr *nlh,
-                           const struct nlattr * const nla[])
+                           const struct nlattr * const nla[],
+                           struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -3469,7 +3499,8 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb)
  
  static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
                                 struct sk_buff *skb, const struct nlmsghdr *nlh,
-                               const struct nlattr * const nla[])
+                               const struct nlattr * const nla[],
+                               struct netlink_ext_ack *extack)
  {
         u8 genmask = nft_genmask_cur(net);
         const struct nft_set *set;
@@ -3870,7 +3901,8 @@ err1:
  
  static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
                                 struct sk_buff *skb, const struct nlmsghdr *nlh,
-                               const struct nlattr * const nla[])
+                               const struct nlattr * const nla[],
+                               struct netlink_ext_ack *extack)
  {
         u8 genmask = nft_genmask_next(net);
         const struct nlattr *attr;
@@ -4067,7 +4099,8 @@ err1:
  
  static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
                                 struct sk_buff *skb, const struct nlmsghdr *nlh,
-                               const struct nlattr * const nla[])
+                               const struct nlattr * const nla[],
+                               struct netlink_ext_ack *extack)
  {
         u8 genmask = nft_genmask_next(net);
         const struct nlattr *attr;
@@ -4277,7 +4310,8 @@ static const struct nft_object_type *nft_obj_type_get(u32 objtype)
  
  static int nf_tables_newobj(struct net *net, struct sock *nlsk,
                             struct sk_buff *skb, const struct nlmsghdr *nlh,
-                           const struct nlattr * const nla[])
+                           const struct nlattr * const nla[],
+                           struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         const struct nft_object_type *type;
@@ -4471,7 +4505,8 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
  
  static int nf_tables_getobj(struct net *net, struct sock *nlsk,
                             struct sk_buff *skb, const struct nlmsghdr *nlh,
-                           const struct nlattr * const nla[])
+                           const struct nlattr * const nla[],
+                           struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_cur(net);
@@ -4549,8 +4584,9 @@ static void nft_obj_destroy(struct nft_object *obj)
  }
  
  static int nf_tables_delobj(struct net *net, struct sock *nlsk,
-                             struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nla[])
+                           struct sk_buff *skb, const struct nlmsghdr *nlh,
+                           const struct nlattr * const nla[],
+                           struct netlink_ext_ack *extack)
  {
         const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u8 genmask = nft_genmask_next(net);
@@ -4680,7 +4716,8 @@ err:
  
  static int nf_tables_getgen(struct net *net, struct sock *nlsk,
                             struct sk_buff *skb, const struct nlmsghdr *nlh,
-                           const struct nlattr * const nla[])
+                           const struct nlattr * const nla[],
+                           struct netlink_ext_ack *extack)
  {
         struct sk_buff *skb2;
         int err;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c

index 80f5ecf2c3d7096f579361663da5d2e651eebd3e..92b05e188fd1f8adac7e363f59998ce8310ed9ed 100644 (file)
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -201,7 +201,8 @@ replay:
  
                 if (nc->call_rcu) {
                         err = nc->call_rcu(net, net->nfnl, skb, nlh,
-                                          (const struct nlattr **)cda);
+                                          (const struct nlattr **)cda,
+                                          extack);
                         rcu_read_unlock();
                 } else {
                         rcu_read_unlock();
@@ -211,7 +212,8 @@ replay:
                                 err = -EAGAIN;
                         else if (nc->call)
                                 err = nc->call(net, net->nfnl, skb, nlh,
-                                              (const struct nlattr **)cda);
+                                              (const struct nlattr **)cda,
+                                              extack);
                         else
                                 err = -EINVAL;
                         nfnl_unlock(subsys_id);
@@ -226,9 +228,11 @@ struct nfnl_err {
         struct list_head        head;
         struct nlmsghdr         *nlh;
         int                     err;
+       struct netlink_ext_ack  extack;
  };
  
-static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err,
+                       const struct netlink_ext_ack *extack)
  {
         struct nfnl_err *nfnl_err;
  
@@ -238,6 +242,7 @@ static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
  
         nfnl_err->nlh = nlh;
         nfnl_err->err = err;
+       nfnl_err->extack = *extack;
         list_add_tail(&nfnl_err->head, list);
  
         return 0;
@@ -262,7 +267,8 @@ static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
         struct nfnl_err *nfnl_err, *next;
  
         list_for_each_entry_safe(nfnl_err, next, err_list, head) {
-               netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, NULL);
+               netlink_ack(skb, nfnl_err->nlh, nfnl_err->err,
+                           &nfnl_err->extack);
                 nfnl_err_del(nfnl_err);
         }
  }
@@ -280,6 +286,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
         struct net *net = sock_net(skb->sk);
         const struct nfnetlink_subsystem *ss;
         const struct nfnl_callback *nc;
+       struct netlink_ext_ack extack;
         LIST_HEAD(err_list);
         u32 status;
         int err;
@@ -325,6 +332,7 @@ replay:
         while (skb->len >= nlmsg_total_size(0)) {
                 int msglen, type;
  
+               memset(&extack, 0, sizeof(extack));
                 nlh = nlmsg_hdr(skb);
                 err = 0;
  
@@ -384,7 +392,8 @@ replay:
  
                         if (nc->call_batch) {
                                 err = nc->call_batch(net, net->nfnl, skb, nlh,
-                                                    (const struct nlattr **)cda);
+                                                    (const struct nlattr **)cda,
+                                                    &extack);
                         }
  
                         /* The lock was released to autoload some module, we
@@ -402,7 +411,7 @@ ack:
                          * processed, this avoids that the same error is
                          * reported several times when replaying the batch.
                          */
-                       if (nfnl_err_add(&err_list, nlh, err) < 0) {
+                       if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
                                 /* We failed to enqueue an error, reset the
                                  * list of errors and send OOM to userspace
                                  * pointing to the batch header.
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c

index 9898fb4d0512ce80d0bf970340e5da43db25ce15..c45e6d4358abe912d5a49efca4b183c6fbe21127 100644 (file)
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -49,7 +49,8 @@ struct nfacct_filter {
  
  static int nfnl_acct_new(struct net *net, struct sock *nfnl,
                          struct sk_buff *skb, const struct nlmsghdr *nlh,
-                        const struct nlattr * const tb[])
+                        const struct nlattr * const tb[],
+                        struct netlink_ext_ack *extack)
  {
         struct nf_acct *nfacct, *matching = NULL;
         char *acct_name;
@@ -264,7 +265,8 @@ nfacct_filter_alloc(const struct nlattr * const attr)
  
  static int nfnl_acct_get(struct net *net, struct sock *nfnl,
                          struct sk_buff *skb, const struct nlmsghdr *nlh,
-                        const struct nlattr * const tb[])
+                        const struct nlattr * const tb[],
+                        struct netlink_ext_ack *extack)
  {
         int ret = -ENOENT;
         struct nf_acct *cur;
@@ -343,7 +345,8 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
  
  static int nfnl_acct_del(struct net *net, struct sock *nfnl,
                          struct sk_buff *skb, const struct nlmsghdr *nlh,
-                        const struct nlattr * const tb[])
+                        const struct nlattr * const tb[],
+                        struct netlink_ext_ack *extack)
  {
         struct nf_acct *cur, *tmp;
         int ret = -ENOENT;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c

index be678a323598c3237a2cae09e4e3ed4bdea46614..41628b3936731b77885717c27cf4dfa8e62b3c0f 100644 (file)
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -398,7 +398,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
  
  static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const tb[])
+                            const struct nlattr * const tb[],
+                            struct netlink_ext_ack *extack)
  {
         const char *helper_name;
         struct nf_conntrack_helper *cur, *helper = NULL;
@@ -599,7 +600,8 @@ out:
  
  static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const tb[])
+                            const struct nlattr * const tb[],
+                            struct netlink_ext_ack *extack)
  {
         int ret = -ENOENT;
         struct nf_conntrack_helper *cur;
@@ -666,7 +668,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
  
  static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const tb[])
+                            const struct nlattr * const tb[],
+                            struct netlink_ext_ack *extack)
  {
         char *helper_name = NULL;
         struct nf_conntrack_helper *cur;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c

index a3e7bb54d96acf2e7ac570077d57502d5b23625b..400e9ae971533439ed1711c63ce222a5833ee105 100644 (file)
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -69,7 +69,8 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
  static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
                                  struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
-                                const struct nlattr * const cda[])
+                                const struct nlattr * const cda[],
+                                struct netlink_ext_ack *extack)
  {
         __u16 l3num;
         __u8 l4num;
@@ -239,7 +240,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
  static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
                                  struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
-                                const struct nlattr * const cda[])
+                                const struct nlattr * const cda[],
+                                struct netlink_ext_ack *extack)
  {
         int ret = -ENOENT;
         char *name;
@@ -287,49 +289,20 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
         return ret;
  }
  
-static void untimeout(struct nf_conntrack_tuple_hash *i,
-                     struct ctnl_timeout *timeout)
+static int untimeout(struct nf_conn *ct, void *timeout)
  {
-       struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
         struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
  
         if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
                 RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+       /* We are not intended to delete this conntrack. */
+       return 0;
  }
  
  static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
  {
-       struct nf_conntrack_tuple_hash *h;
-       const struct hlist_nulls_node *nn;
-       unsigned int last_hsize;
-       spinlock_t *lock;
-       int i, cpu;
-
-       for_each_possible_cpu(cpu) {
-               struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
-
-               spin_lock_bh(&pcpu->lock);
-               hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
-                       untimeout(h, timeout);
-               spin_unlock_bh(&pcpu->lock);
-       }
-
-       local_bh_disable();
-restart:
-       last_hsize = nf_conntrack_htable_size;
-       for (i = 0; i < last_hsize; i++) {
-               lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
-               nf_conntrack_lock(lock);
-               if (last_hsize != nf_conntrack_htable_size) {
-                       spin_unlock(lock);
-                       goto restart;
-               }
-
-               hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
-                       untimeout(h, timeout);
-               spin_unlock(lock);
-       }
-       local_bh_enable();
+       nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
  }
  
  /* try to delete object, fail if it is still in use. */
@@ -355,7 +328,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
  static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
                                  struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
-                                const struct nlattr * const cda[])
+                                const struct nlattr * const cda[],
+                                struct netlink_ext_ack *extack)
  {
         struct ctnl_timeout *cur, *tmp;
         int ret = -ENOENT;
@@ -386,7 +360,8 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
  static int cttimeout_default_set(struct net *net, struct sock *ctnl,
                                  struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
-                                const struct nlattr * const cda[])
+                                const struct nlattr * const cda[],
+                                struct netlink_ext_ack *extack)
  {
         __u16 l3num;
         __u8 l4num;
@@ -475,7 +450,8 @@ nla_put_failure:
  static int cttimeout_default_get(struct net *net, struct sock *ctnl,
                                  struct sk_buff *skb,
                                  const struct nlmsghdr *nlh,
-                                const struct nlattr * const cda[])
+                                const struct nlattr * const cda[],
+                                struct netlink_ext_ack *extack)
  {
         __u16 l3num;
         __u8 l4num;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c

index 94ec0d0765a85a6b2150e166256a603cecc38d72..c684ba95dbb49447b0ddb2a7a1f27b54536a5a03 100644 (file)
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -795,7 +795,8 @@ static struct notifier_block nfulnl_rtnl_notifier = {
  
  static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nfqa[])
+                             const struct nlattr * const nfqa[],
+                             struct netlink_ext_ack *extack)
  {
         return -ENOTSUPP;
  }
@@ -818,7 +819,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
  
  static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
                               struct sk_buff *skb, const struct nlmsghdr *nlh,
-                             const struct nlattr * const nfula[])
+                             const struct nlattr * const nfula[],
+                             struct netlink_ext_ack *extack)
  {
         struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u_int16_t group_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 1b17a1b445a377286dacba1ce91dcb4af875513a..16fa04086880c5390ed35c8d1e1bc90c923d45de 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1032,7 +1032,8 @@ static int nfq_id_after(unsigned int id, unsigned int max)
  static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
                                     struct sk_buff *skb,
                                     const struct nlmsghdr *nlh,
-                                   const struct nlattr * const nfqa[])
+                                   const struct nlattr * const nfqa[],
+                                   struct netlink_ext_ack *extack)
  {
         struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         struct nf_queue_entry *entry, *tmp;
@@ -1136,7 +1137,8 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
  static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
                               struct sk_buff *skb,
                               const struct nlmsghdr *nlh,
-                             const struct nlattr * const nfqa[])
+                             const struct nlattr * const nfqa[],
+                             struct netlink_ext_ack *extack)
  {
         struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -1200,7 +1202,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
  
  static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const nfqa[])
+                            const struct nlattr * const nfqa[],
+                            struct netlink_ext_ack *extack)
  {
         return -ENOTSUPP;
  }
@@ -1217,7 +1220,8 @@ static const struct nf_queue_handler nfqh = {
  
  static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
                              struct sk_buff *skb, const struct nlmsghdr *nlh,
-                            const struct nlattr * const nfqa[])
+                            const struct nlattr * const nfqa[],
+                            struct netlink_ext_ack *extack)
  {
         struct nfgenmsg *nfmsg = nlmsg_data(nlh);
         u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c

index f753ec69f7902b80c9f448f764ecc1e48f6a2680..f5a7cb68694e76db73dec897ad51305f29bb8981 100644 (file)
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -530,7 +530,8 @@ nla_put_failure:
  
  static int nfnl_compat_get(struct net *net, struct sock *nfnl,
                            struct sk_buff *skb, const struct nlmsghdr *nlh,
-                          const struct nlattr * const tb[])
+                          const struct nlattr * const tb[],
+                          struct netlink_ext_ack *extack)
  {
         int ret = 0, target;
         struct nfgenmsg *nfmsg;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c

index d3eb640bc78498f84ff706f2c27042b54feb3bfc..c7383d8f88d0b2fa90b9bc9d94df20e6198dd2de 100644 (file)
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -23,9 +23,9 @@ struct nft_rt {
         enum nft_registers      dreg:8;
  };
  
-void nft_rt_get_eval(const struct nft_expr *expr,
-                    struct nft_regs *regs,
-                    const struct nft_pktinfo *pkt)
+static void nft_rt_get_eval(const struct nft_expr *expr,
+                           struct nft_regs *regs,
+                           const struct nft_pktinfo *pkt)
  {
         const struct nft_rt *priv = nft_expr_priv(expr);
         const struct sk_buff *skb = pkt->skb;
@@ -72,9 +72,9 @@ const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
         [NFTA_RT_KEY]           = { .type = NLA_U32 },
  };
  
-int nft_rt_get_init(const struct nft_ctx *ctx,
-                   const struct nft_expr *expr,
-                   const struct nlattr * const tb[])
+static int nft_rt_get_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr * const tb[])
  {
         struct nft_rt *priv = nft_expr_priv(expr);
         unsigned int len;
@@ -103,8 +103,8 @@ int nft_rt_get_init(const struct nft_ctx *ctx,
                                            NFT_DATA_VALUE, len);
  }
  
-int nft_rt_get_dump(struct sk_buff *skb,
-                   const struct nft_expr *expr)
+static int nft_rt_get_dump(struct sk_buff *skb,
+                          const struct nft_expr *expr)
  {
         const struct nft_rt *priv = nft_expr_priv(expr);
  
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c

index b988162b5b15b9442b496abf2571a9cf7dbc66f3..734989c40579ef4ac6a36f770517a89202fa6b8b 100644 (file)
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -236,7 +236,8 @@ static inline u32 nft_bitmap_total_size(u32 klen)
         return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
  }
  
-static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[])
+static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[],
+                                       const struct nft_set_desc *desc)
  {
         u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
  
@@ -278,7 +279,9 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
         return true;
  }
  
+static struct nft_set_type nft_bitmap_type;
  static struct nft_set_ops nft_bitmap_ops __read_mostly = {
+       .type           = &nft_bitmap_type,
         .privsize       = nft_bitmap_privsize,
         .elemsize       = offsetof(struct nft_bitmap_elem, ext),
         .estimate       = nft_bitmap_estimate,
@@ -291,17 +294,21 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = {
         .activate       = nft_bitmap_activate,
         .lookup         = nft_bitmap_lookup,
         .walk           = nft_bitmap_walk,
+};
+
+static struct nft_set_type nft_bitmap_type __read_mostly = {
+       .ops            = &nft_bitmap_ops,
         .owner          = THIS_MODULE,
  };
  
  static int __init nft_bitmap_module_init(void)
  {
-       return nft_register_set(&nft_bitmap_ops);
+       return nft_register_set(&nft_bitmap_type);
  }
  
  static void __exit nft_bitmap_module_exit(void)
  {
-       nft_unregister_set(&nft_bitmap_ops);
+       nft_unregister_set(&nft_bitmap_type);
  }
  
  module_init(nft_bitmap_module_init);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c

index 3d3a6df4ce70ea0950a4f07cab75b3c54680e09a..0fa01d772c5e2c10c76094bdda80193fca155581 100644 (file)
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -22,45 +22,43 @@
  #include <net/netfilter/nf_tables.h>
  
  /* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#define NFT_RHASH_ELEMENT_HINT 3
  
-struct nft_hash {
+struct nft_rhash {
         struct rhashtable               ht;
         struct delayed_work             gc_work;
  };
  
-struct nft_hash_elem {
+struct nft_rhash_elem {
         struct rhash_head               node;
         struct nft_set_ext              ext;
  };
  
-struct nft_hash_cmp_arg {
+struct nft_rhash_cmp_arg {
         const struct nft_set            *set;
         const u32                       *key;
         u8                              genmask;
  };
  
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
  {
-       const struct nft_hash_cmp_arg *arg = data;
+       const struct nft_rhash_cmp_arg *arg = data;
  
         return jhash(arg->key, len, seed);
  }
  
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed)
  {
-       const struct nft_hash_elem *he = data;
+       const struct nft_rhash_elem *he = data;
  
         return jhash(nft_set_ext_key(&he->ext), len, seed);
  }
  
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
-                              const void *ptr)
+static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
+                               const void *ptr)
  {
-       const struct nft_hash_cmp_arg *x = arg->key;
-       const struct nft_hash_elem *he = ptr;
+       const struct nft_rhash_cmp_arg *x = arg->key;
+       const struct nft_rhash_elem *he = ptr;
  
         if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
                 return 1;
@@ -71,41 +69,49 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
         return 0;
  }
  
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
-                           const u32 *key, const struct nft_set_ext **ext)
+static const struct rhashtable_params nft_rhash_params = {
+       .head_offset            = offsetof(struct nft_rhash_elem, node),
+       .hashfn                 = nft_rhash_key,
+       .obj_hashfn             = nft_rhash_obj,
+       .obj_cmpfn              = nft_rhash_cmp,
+       .automatic_shrinking    = true,
+};
+
+static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+                            const u32 *key, const struct nft_set_ext **ext)
  {
-       struct nft_hash *priv = nft_set_priv(set);
-       const struct nft_hash_elem *he;
-       struct nft_hash_cmp_arg arg = {
+       struct nft_rhash *priv = nft_set_priv(set);
+       const struct nft_rhash_elem *he;
+       struct nft_rhash_cmp_arg arg = {
                 .genmask = nft_genmask_cur(net),
                 .set     = set,
                 .key     = key,
         };
  
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
         if (he != NULL)
                 *ext = &he->ext;
  
         return !!he;
  }
  
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
-                           void *(*new)(struct nft_set *,
-                                        const struct nft_expr *,
-                                        struct nft_regs *regs),
-                           const struct nft_expr *expr,
-                           struct nft_regs *regs,
-                           const struct nft_set_ext **ext)
+static bool nft_rhash_update(struct nft_set *set, const u32 *key,
+                            void *(*new)(struct nft_set *,
+                                         const struct nft_expr *,
+                                         struct nft_regs *regs),
+                            const struct nft_expr *expr,
+                            struct nft_regs *regs,
+                            const struct nft_set_ext **ext)
  {
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he, *prev;
-       struct nft_hash_cmp_arg arg = {
+       struct nft_rhash *priv = nft_set_priv(set);
+       struct nft_rhash_elem *he, *prev;
+       struct nft_rhash_cmp_arg arg = {
                 .genmask = NFT_GENMASK_ANY,
                 .set     = set,
                 .key     = key,
         };
  
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
         if (he != NULL)
                 goto out;
  
@@ -114,7 +120,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
                 goto err1;
  
         prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
-                                               nft_hash_params);
+                                               nft_rhash_params);
         if (IS_ERR(prev))
                 goto err2;
  
@@ -134,21 +140,21 @@ err1:
         return false;
  }
  
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
-                          const struct nft_set_elem *elem,
-                          struct nft_set_ext **ext)
+static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
+                           const struct nft_set_elem *elem,
+                           struct nft_set_ext **ext)
  {
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he = elem->priv;
-       struct nft_hash_cmp_arg arg = {
+       struct nft_rhash *priv = nft_set_priv(set);
+       struct nft_rhash_elem *he = elem->priv;
+       struct nft_rhash_cmp_arg arg = {
                 .genmask = nft_genmask_next(net),
                 .set     = set,
                 .key     = elem->key.val.data,
         };
-       struct nft_hash_elem *prev;
+       struct nft_rhash_elem *prev;
  
         prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
-                                              nft_hash_params);
+                                               nft_rhash_params);
         if (IS_ERR(prev))
                 return PTR_ERR(prev);
         if (prev) {
@@ -158,19 +164,19 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
         return 0;
  }
  
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
-                             const struct nft_set_elem *elem)
+static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
+                              const struct nft_set_elem *elem)
  {
-       struct nft_hash_elem *he = elem->priv;
+       struct nft_rhash_elem *he = elem->priv;
  
         nft_set_elem_change_active(net, set, &he->ext);
         nft_set_elem_clear_busy(&he->ext);
  }
  
-static bool nft_hash_flush(const struct net *net,
-                          const struct nft_set *set, void *priv)
+static bool nft_rhash_flush(const struct net *net,
+                           const struct nft_set *set, void *priv)
  {
-       struct nft_hash_elem *he = priv;
+       struct nft_rhash_elem *he = priv;
  
         if (!nft_set_elem_mark_busy(&he->ext) ||
             !nft_is_active(net, &he->ext)) {
@@ -180,22 +186,22 @@ static bool nft_hash_flush(const struct net *net,
         return false;
  }
  
-static void *nft_hash_deactivate(const struct net *net,
-                                const struct nft_set *set,
-                                const struct nft_set_elem *elem)
+static void *nft_rhash_deactivate(const struct net *net,
+                                 const struct nft_set *set,
+                                 const struct nft_set_elem *elem)
  {
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he;
-       struct nft_hash_cmp_arg arg = {
+       struct nft_rhash *priv = nft_set_priv(set);
+       struct nft_rhash_elem *he;
+       struct nft_rhash_cmp_arg arg = {
                 .genmask = nft_genmask_next(net),
                 .set     = set,
                 .key     = elem->key.val.data,
         };
  
         rcu_read_lock();
-       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params);
         if (he != NULL &&
-           !nft_hash_flush(net, set, he))
+           !nft_rhash_flush(net, set, he))
                 he = NULL;
  
         rcu_read_unlock();
@@ -203,21 +209,21 @@ static void *nft_hash_deactivate(const struct net *net,
         return he;
  }
  
-static void nft_hash_remove(const struct net *net,
-                           const struct nft_set *set,
-                           const struct nft_set_elem *elem)
+static void nft_rhash_remove(const struct net *net,
+                            const struct nft_set *set,
+                            const struct nft_set_elem *elem)
  {
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he = elem->priv;
+       struct nft_rhash *priv = nft_set_priv(set);
+       struct nft_rhash_elem *he = elem->priv;
  
-       rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+       rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
  }
  
-static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
-                         struct nft_set_iter *iter)
+static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+                          struct nft_set_iter *iter)
  {
-       struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he;
+       struct nft_rhash *priv = nft_set_priv(set);
+       struct nft_rhash_elem *he;
         struct rhashtable_iter hti;
         struct nft_set_elem elem;
         int err;
@@ -266,16 +272,16 @@ out:
         rhashtable_walk_exit(&hti);
  }
  
-static void nft_hash_gc(struct work_struct *work)
+static void nft_rhash_gc(struct work_struct *work)
  {
         struct nft_set *set;
-       struct nft_hash_elem *he;
-       struct nft_hash *priv;
+       struct nft_rhash_elem *he;
+       struct nft_rhash *priv;
         struct nft_set_gc_batch *gcb = NULL;
         struct rhashtable_iter hti;
         int err;
  
-       priv = container_of(work, struct nft_hash, gc_work.work);
+       priv = container_of(work, struct nft_rhash, gc_work.work);
         set  = nft_set_container_of(priv);
  
         err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
@@ -301,7 +307,7 @@ static void nft_hash_gc(struct work_struct *work)
                 gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
                 if (gcb == NULL)
                         goto out;
-               rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+               rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
                 atomic_dec(&set->nelems);
                 nft_set_gc_batch_add(gcb, he);
         }
@@ -315,82 +321,290 @@ schedule:
                            nft_set_gc_interval(set));
  }
  
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
+                                      const struct nft_set_desc *desc)
  {
-       return sizeof(struct nft_hash);
+       return sizeof(struct nft_rhash);
  }
  
-static const struct rhashtable_params nft_hash_params = {
-       .head_offset            = offsetof(struct nft_hash_elem, node),
-       .hashfn                 = nft_hash_key,
-       .obj_hashfn             = nft_hash_obj,
-       .obj_cmpfn              = nft_hash_cmp,
-       .automatic_shrinking    = true,
-};
-
-static int nft_hash_init(const struct nft_set *set,
-                        const struct nft_set_desc *desc,
-                        const struct nlattr * const tb[])
+static int nft_rhash_init(const struct nft_set *set,
+                         const struct nft_set_desc *desc,
+                         const struct nlattr * const tb[])
  {
-       struct nft_hash *priv = nft_set_priv(set);
-       struct rhashtable_params params = nft_hash_params;
+       struct nft_rhash *priv = nft_set_priv(set);
+       struct rhashtable_params params = nft_rhash_params;
         int err;
  
-       params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+       params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
         params.key_len    = set->klen;
  
         err = rhashtable_init(&priv->ht, &params);
         if (err < 0)
                 return err;
  
-       INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+       INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
         if (set->flags & NFT_SET_TIMEOUT)
                 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                                    nft_set_gc_interval(set));
         return 0;
  }
  
-static void nft_hash_elem_destroy(void *ptr, void *arg)
+static void nft_rhash_elem_destroy(void *ptr, void *arg)
  {
         nft_set_elem_destroy(arg, ptr, true);
  }
  
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_set *set)
  {
-       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_rhash *priv = nft_set_priv(set);
  
         cancel_delayed_work_sync(&priv->gc_work);
-       rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+       rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
                                     (void *)set);
  }
  
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
-                             struct nft_set_estimate *est)
+static u32 nft_hash_buckets(u32 size)
  {
-       unsigned int esize;
+       return roundup_pow_of_two(size * 4 / 3);
+}
  
-       esize = sizeof(struct nft_hash_elem);
-       if (desc->size) {
-               est->size = sizeof(struct nft_hash) +
-                           roundup_pow_of_two(desc->size * 4 / 3) *
-                           sizeof(struct nft_hash_elem *) +
-                           desc->size * esize;
-       } else {
-               /* Resizing happens when the load drops below 30% or goes
-                * above 75%. The average of 52.5% load (approximated by 50%)
-                * is used for the size estimation of the hash buckets,
-                * meaning we calculate two buckets per element.
-                */
-               est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
+                              struct nft_set_estimate *est)
+{
+       est->size   = ~0;
+       est->lookup = NFT_SET_CLASS_O_1;
+       est->space  = NFT_SET_CLASS_O_N;
+
+       return true;
+}
+
+struct nft_hash {
+       u32                             seed;
+       u32                             buckets;
+       struct hlist_head               table[];
+};
+
+struct nft_hash_elem {
+       struct hlist_node               node;
+       struct nft_set_ext              ext;
+};
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+                           const u32 *key, const struct nft_set_ext **ext)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       u8 genmask = nft_genmask_cur(net);
+       const struct nft_hash_elem *he;
+       u32 hash;
+
+       hash = jhash(key, set->klen, priv->seed);
+       hash = reciprocal_scale(hash, priv->buckets);
+       hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+               if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
+                   nft_set_elem_active(&he->ext, genmask)) {
+                       *ext = &he->ext;
+                       return true;
+               }
+       }
+       return false;
+}
+
+/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
+static inline u32 nft_hash_key(const u32 *key, u32 klen)
+{
+       if (klen == 4)
+               return *key;
+
+       return *(u16 *)key;
+}
+
+static bool nft_hash_lookup_fast(const struct net *net,
+                                const struct nft_set *set,
+                                const u32 *key, const struct nft_set_ext **ext)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       u8 genmask = nft_genmask_cur(net);
+       const struct nft_hash_elem *he;
+       u32 hash, k1, k2;
+
+       k1 = nft_hash_key(key, set->klen);
+       hash = jhash_1word(k1, priv->seed);
+       hash = reciprocal_scale(hash, priv->buckets);
+       hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
+               k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen);
+               if (k1 == k2 &&
+                   nft_set_elem_active(&he->ext, genmask)) {
+                       *ext = &he->ext;
+                       return true;
+               }
+       }
+       return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+                          const struct nft_set_elem *elem,
+                          struct nft_set_ext **ext)
+{
+       struct nft_hash_elem *this = elem->priv, *he;
+       struct nft_hash *priv = nft_set_priv(set);
+       u8 genmask = nft_genmask_next(net);
+       u32 hash;
+
+       hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
+       hash = reciprocal_scale(hash, priv->buckets);
+       hlist_for_each_entry(he, &priv->table[hash], node) {
+               if (!memcmp(nft_set_ext_key(&this->ext),
+                           nft_set_ext_key(&he->ext), set->klen) &&
+                   nft_set_elem_active(&he->ext, genmask)) {
+                       *ext = &he->ext;
+                       return -EEXIST;
+               }
+       }
+       hlist_add_head_rcu(&this->node, &priv->table[hash]);
+       return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+                             const struct nft_set_elem *elem)
+{
+       struct nft_hash_elem *he = elem->priv;
+
+       nft_set_elem_change_active(net, set, &he->ext);
+}
+
+static bool nft_hash_flush(const struct net *net,
+                          const struct nft_set *set, void *priv)
+{
+       struct nft_hash_elem *he = priv;
+
+       nft_set_elem_change_active(net, set, &he->ext);
+       return true;
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                const struct nft_set_elem *elem)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *this = elem->priv, *he;
+       u8 genmask = nft_genmask_next(net);
+       u32 hash;
+
+       hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
+       hash = reciprocal_scale(hash, priv->buckets);
+       hlist_for_each_entry(he, &priv->table[hash], node) {
+               if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
+                           set->klen) ||
+                   nft_set_elem_active(&he->ext, genmask)) {
+                       nft_set_elem_change_active(net, set, &he->ext);
+                       return he;
+               }
         }
+       return NULL;
+}
+
+static void nft_hash_remove(const struct net *net,
+                           const struct nft_set *set,
+                           const struct nft_set_elem *elem)
+{
+       struct nft_hash_elem *he = elem->priv;
+
+       hlist_del_rcu(&he->node);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
+                         struct nft_set_iter *iter)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
+       struct nft_set_elem elem;
+       int i;
+
+       for (i = 0; i < priv->buckets; i++) {
+               hlist_for_each_entry_rcu(he, &priv->table[i], node) {
+                       if (iter->count < iter->skip)
+                               goto cont;
+                       if (!nft_set_elem_active(&he->ext, iter->genmask))
+                               goto cont;
+
+                       elem.priv = he;
+
+                       iter->err = iter->fn(ctx, set, iter, &elem);
+                       if (iter->err < 0)
+                               return;
+cont:
+                       iter->count++;
+               }
+       }
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[],
+                                     const struct nft_set_desc *desc)
+{
+       return sizeof(struct nft_hash) +
+              nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
+}
  
+static int nft_hash_init(const struct nft_set *set,
+                        const struct nft_set_desc *desc,
+                        const struct nlattr * const tb[])
+{
+       struct nft_hash *priv = nft_set_priv(set);
+
+       priv->buckets = nft_hash_buckets(desc->size);
+       get_random_bytes(&priv->seed, sizeof(priv->seed));
+
+       return 0;
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
+       struct hlist_node *next;
+       int i;
+
+       for (i = 0; i < priv->buckets; i++) {
+               hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
+                       hlist_del_rcu(&he->node);
+                       nft_set_elem_destroy(set, he, true);
+               }
+       }
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+                             struct nft_set_estimate *est)
+{
+       est->size   = sizeof(struct nft_hash) +
+                     nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+                     desc->size * sizeof(struct nft_hash_elem);
         est->lookup = NFT_SET_CLASS_O_1;
         est->space  = NFT_SET_CLASS_O_N;
  
         return true;
  }
  
+static struct nft_set_type nft_hash_type;
+static struct nft_set_ops nft_rhash_ops __read_mostly = {
+       .type           = &nft_hash_type,
+       .privsize       = nft_rhash_privsize,
+       .elemsize       = offsetof(struct nft_rhash_elem, ext),
+       .estimate       = nft_rhash_estimate,
+       .init           = nft_rhash_init,
+       .destroy        = nft_rhash_destroy,
+       .insert         = nft_rhash_insert,
+       .activate       = nft_rhash_activate,
+       .deactivate     = nft_rhash_deactivate,
+       .flush          = nft_rhash_flush,
+       .remove         = nft_rhash_remove,
+       .lookup         = nft_rhash_lookup,
+       .update         = nft_rhash_update,
+       .walk           = nft_rhash_walk,
+       .features       = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
+};
+
  static struct nft_set_ops nft_hash_ops __read_mostly = {
+       .type           = &nft_hash_type,
         .privsize       = nft_hash_privsize,
         .elemsize       = offsetof(struct nft_hash_elem, ext),
         .estimate       = nft_hash_estimate,
@@ -402,20 +616,57 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
         .flush          = nft_hash_flush,
         .remove         = nft_hash_remove,
         .lookup         = nft_hash_lookup,
-       .update         = nft_hash_update,
         .walk           = nft_hash_walk,
-       .features       = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
+       .features       = NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
+       .type           = &nft_hash_type,
+       .privsize       = nft_hash_privsize,
+       .elemsize       = offsetof(struct nft_hash_elem, ext),
+       .estimate       = nft_hash_estimate,
+       .init           = nft_hash_init,
+       .destroy        = nft_hash_destroy,
+       .insert         = nft_hash_insert,
+       .activate       = nft_hash_activate,
+       .deactivate     = nft_hash_deactivate,
+       .flush          = nft_hash_flush,
+       .remove         = nft_hash_remove,
+       .lookup         = nft_hash_lookup_fast,
+       .walk           = nft_hash_walk,
+       .features       = NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static const struct nft_set_ops *
+nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
+                   u32 flags)
+{
+       if (desc->size) {
+               switch (desc->klen) {
+               case 2:
+               case 4:
+                       return &nft_hash_fast_ops;
+               default:
+                       return &nft_hash_ops;
+               }
+       }
+
+       return &nft_rhash_ops;
+}
+
+static struct nft_set_type nft_hash_type __read_mostly = {
+       .select_ops     = nft_hash_select_ops,
         .owner          = THIS_MODULE,
  };
  
  static int __init nft_hash_module_init(void)
  {
-       return nft_register_set(&nft_hash_ops);
+       return nft_register_set(&nft_hash_type);
  }
  
  static void __exit nft_hash_module_exit(void)
  {
-       nft_unregister_set(&nft_hash_ops);
+       nft_unregister_set(&nft_hash_type);
  }
  
  module_init(nft_hash_module_init);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c

index fbdbaa00dd5fd751f6ab8f428de987017b5bdf79..bce5382f1d49dfdff6432714d480b75b95c48945 100644 (file)
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -251,7 +251,8 @@ cont:
         read_unlock_bh(&priv->lock);
  }
  
-static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
+                                       const struct nft_set_desc *desc)
  {
         return sizeof(struct nft_rbtree);
  }
@@ -283,13 +284,11 @@ static void nft_rbtree_destroy(const struct nft_set *set)
  static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
                                 struct nft_set_estimate *est)
  {
-       unsigned int nsize;
-
-       nsize = sizeof(struct nft_rbtree_elem);
         if (desc->size)
-               est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+               est->size = sizeof(struct nft_rbtree) +
+                           desc->size * sizeof(struct nft_rbtree_elem);
         else
-               est->size = nsize;
+               est->size = ~0;
  
         est->lookup = NFT_SET_CLASS_O_LOG_N;
         est->space  = NFT_SET_CLASS_O_N;
@@ -297,7 +296,9 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
         return true;
  }
  
+static struct nft_set_type nft_rbtree_type;
  static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+       .type           = &nft_rbtree_type,
         .privsize       = nft_rbtree_privsize,
         .elemsize       = offsetof(struct nft_rbtree_elem, ext),
         .estimate       = nft_rbtree_estimate,
@@ -311,17 +312,21 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = {
         .lookup         = nft_rbtree_lookup,
         .walk           = nft_rbtree_walk,
         .features       = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
+};
+
+static struct nft_set_type nft_rbtree_type __read_mostly = {
+       .ops            = &nft_rbtree_ops,
         .owner          = THIS_MODULE,
  };
  
  static int __init nft_rbtree_module_init(void)
  {
-       return nft_register_set(&nft_rbtree_ops);
+       return nft_register_set(&nft_rbtree_type);
  }
  
  static void __exit nft_rbtree_module_exit(void)
  {
-       nft_unregister_set(&nft_rbtree_ops);
+       nft_unregister_set(&nft_rbtree_type);
  }
  
  module_init(nft_rbtree_module_init);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c

index df7f1df0033090c0cd76f3afda43e5159a509791..d767e35fff6bd8a56373ba1f6622a534bf36253d 100644 (file)
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -127,7 +127,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
                                                     daddr, dport,
                                                     in->ifindex);
  
-                       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+                       if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                                 sk = NULL;
                         /* NOTE: we return listeners even if bound to
                          * 0.0.0.0, those are filtered out in
@@ -197,7 +197,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
                                                    daddr, ntohs(dport),
                                                    in->ifindex);
  
-                       if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+                       if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
                                 sk = NULL;
                         /* NOTE: we return listeners even if bound to
                          * 0.0.0.0, those are filtered out in
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c

index c05fefcec238c0648fe2b2710dde7e60e1e37279..71cfa9551d083fc543960b51c7b089d76f1842f0 100644 (file)
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -63,7 +63,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
  
  static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
                                struct sk_buff *skb, const struct nlmsghdr *nlh,
-                              const struct nlattr * const osf_attrs[])
+                              const struct nlattr * const osf_attrs[],
+                              struct netlink_ext_ack *extack)
  {
         struct xt_osf_user_finger *f;
         struct xt_osf_finger *kf = NULL, *sf;
@@ -107,7 +108,8 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
  static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
                                   struct sk_buff *skb,
                                   const struct nlmsghdr *nlh,
-                                 const struct nlattr * const osf_attrs[])
+                                 const struct nlattr * const osf_attrs[],
+                                 struct netlink_ext_ack *extack)
  {
         struct xt_osf_user_finger *f;
         struct xt_osf_finger *sf;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c

index 4dedb96d1a067de6c9766017642e16f9b148d616..2d2fa1d53ea6b3487a474d360a7043e60438e131 100644 (file)
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -42,8 +42,8 @@ match_packet(const struct sk_buff *skb,
              bool *hotdrop)
  {
         u_int32_t chunkmapcopy[256 / sizeof (u_int32_t)];
-       const sctp_chunkhdr_t *sch;
-       sctp_chunkhdr_t _sch;
+       const struct sctp_chunkhdr *sch;
+       struct sctp_chunkhdr _sch;
         int chunk_match_type = info->chunk_match_type;
         const struct xt_sctp_flag_info *flag_info = info->flag_info;
         int flag_count = info->flag_count;
@@ -118,8 +118,8 @@ static bool
  sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
  {
         const struct xt_sctp_info *info = par->matchinfo;
-       const sctp_sctphdr_t *sh;
-       sctp_sctphdr_t _sh;
+       const struct sctphdr *sh;
+       struct sctphdr _sh;
  
         if (par->fragoff != 0) {
                 pr_debug("Dropping non-first fragment.. FIXME\n");
@@ -136,13 +136,13 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
  
         return  SCCHECK(ntohs(sh->source) >= info->spts[0]
                         && ntohs(sh->source) <= info->spts[1],
-                       XT_SCTP_SRC_PORTS, info->flags, info->invflags)
-               && SCCHECK(ntohs(sh->dest) >= info->dpts[0]
+                       XT_SCTP_SRC_PORTS, info->flags, info->invflags) &&
+               SCCHECK(ntohs(sh->dest) >= info->dpts[0]
                         && ntohs(sh->dest) <= info->dpts[1],
-                       XT_SCTP_DEST_PORTS, info->flags, info->invflags)
-               && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
-                                       info, &par->hotdrop),
-                          XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
+                       XT_SCTP_DEST_PORTS, info->flags, info->invflags) &&
+               SCCHECK(match_packet(skb, par->thoff + sizeof(_sh),
+                                    info, &par->hotdrop),
+                       XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
  }
  
  static int sctp_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c

index a88745e4b7df79264a80b3ee4150298bed4aab9f..5acee49db90b508cb99660dc0677a44da691640e 100644 (file)
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -372,7 +372,7 @@ static void netlink_sock_destruct(struct sock *sk)
         }
  
         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-       WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+       WARN_ON(refcount_read(&sk->sk_wmem_alloc));
         WARN_ON(nlk_sk(sk)->groups);
  }
  
@@ -575,7 +575,7 @@ static void netlink_remove(struct sock *sk)
         table = &nl_table[sk->sk_protocol];
         if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
                                     netlink_rhashtable_params)) {
-               WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+               WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
                 __sock_put(sk);
         }
  
@@ -691,7 +691,7 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
         struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
         struct sock *sk = &nlk->sk;
  
-       if (!atomic_dec_and_test(&sk->sk_refcnt))
+       if (!refcount_dec_and_test(&sk->sk_refcnt))
                 return;
  
         if (nlk->cb_running && nlk->cb.done) {
@@ -1848,7 +1848,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
         }
  
         if (dst_group) {
-               atomic_inc(&skb->users);
+               refcount_inc(&skb->users);
                 netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
         }
         err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
@@ -2226,7 +2226,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
         struct netlink_sock *nlk;
         int ret;
  
-       atomic_inc(&skb->users);
+       refcount_inc(&skb->users);
  
         sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
         if (sk == NULL) {
@@ -2431,7 +2431,7 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
                 int exclude_portid = 0;
  
                 if (report) {
-                       atomic_inc(&skb->users);
+                       refcount_inc(&skb->users);
                         exclude_portid = portid;
                 }
  
@@ -2568,7 +2568,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
                            sk_rmem_alloc_get(s),
                            sk_wmem_alloc_get(s),
                            nlk->cb_running,
-                          atomic_read(&s->sk_refcnt),
+                          refcount_read(&s->sk_refcnt),
                            atomic_read(&s->sk_drops),
                            sock_i_ino(s)
                         );
diff --git a/net/nfc/core.c b/net/nfc/core.c

index 122bb81da9181ff92a964f381ea44113c5bd5025..5cf33df888c3d181d07753570746a566011a22d8 100644 (file)
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -982,6 +982,8 @@ static void nfc_release(struct device *d)
                         kfree(se);
         }
  
+       ida_simple_remove(&nfc_index_ida, dev->idx);
+
         kfree(dev);
  }
  
@@ -1056,6 +1058,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
                                     int tx_headroom, int tx_tailroom)
  {
         struct nfc_dev *dev;
+       int rc;
  
         if (!ops->start_poll || !ops->stop_poll || !ops->activate_target ||
             !ops->deactivate_target || !ops->im_transceive)
@@ -1068,6 +1071,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
         if (!dev)
                 return NULL;
  
+       rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
+       if (rc < 0)
+               goto err_free_dev;
+       dev->idx = rc;
+
+       dev->dev.class = &nfc_class;
+       dev_set_name(&dev->dev, "nfc%d", dev->idx);
+       device_initialize(&dev->dev);
+
         dev->ops = ops;
         dev->supported_protocols = supported_protocols;
         dev->tx_headroom = tx_headroom;
@@ -1090,6 +1102,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
         }
  
         return dev;
+
+err_free_dev:
+       kfree(dev);
+
+       return ERR_PTR(rc);
  }
  EXPORT_SYMBOL(nfc_allocate_device);
  
@@ -1104,14 +1121,6 @@ int nfc_register_device(struct nfc_dev *dev)
  
         pr_debug("dev_name=%s\n", dev_name(&dev->dev));
  
-       dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
-       if (dev->idx < 0)
-               return dev->idx;
-
-       dev->dev.class = &nfc_class;
-       dev_set_name(&dev->dev, "nfc%d", dev->idx);
-       device_initialize(&dev->dev);
-
         mutex_lock(&nfc_devlist_mutex);
         nfc_devlist_generation++;
         rc = device_add(&dev->dev);
@@ -1149,12 +1158,10 @@ EXPORT_SYMBOL(nfc_register_device);
   */
  void nfc_unregister_device(struct nfc_dev *dev)
  {
-       int rc, id;
+       int rc;
  
         pr_debug("dev_name=%s\n", dev_name(&dev->dev));
  
-       id = dev->idx;
-
         if (dev->rfkill) {
                 rfkill_unregister(dev->rfkill);
                 rfkill_destroy(dev->rfkill);
@@ -1179,8 +1186,6 @@ void nfc_unregister_device(struct nfc_dev *dev)
         nfc_devlist_generation++;
         device_del(&dev->dev);
         mutex_unlock(&nfc_devlist_mutex);
-
-       ida_simple_remove(&nfc_index_ida, id);
  }
  EXPORT_SYMBOL(nfc_unregister_device);
  
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c

index ebeace7a8278861939be6eacee410e76b3339430..de6dd37d04c7065b9eb9f5186f735136048be335 100644 (file)
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -240,7 +240,7 @@ int digital_send_cmd(struct nfc_digital_dev *ddev, u8 cmd_type,
  {
         struct digital_cmd *cmd;
  
-       cmd = kzalloc(sizeof(struct digital_cmd), GFP_KERNEL);
+       cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
         if (!cmd)
                 return -ENOMEM;
  
@@ -287,7 +287,7 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
  {
         struct digital_tg_mdaa_params *params;
  
-       params = kzalloc(sizeof(struct digital_tg_mdaa_params), GFP_KERNEL);
+       params = kzalloc(sizeof(*params), GFP_KERNEL);
         if (!params)
                 return -ENOMEM;
  
@@ -706,11 +706,9 @@ static int digital_in_send(struct nfc_dev *nfc_dev, struct nfc_target *target,
         struct digital_data_exch *data_exch;
         int rc;
  
-       data_exch = kzalloc(sizeof(struct digital_data_exch), GFP_KERNEL);
-       if (!data_exch) {
-               pr_err("Failed to allocate data_exch struct\n");
+       data_exch = kzalloc(sizeof(*data_exch), GFP_KERNEL);
+       if (!data_exch)
                 return -ENOMEM;
-       }
  
         data_exch->cb = cb;
         data_exch->cb_context = cb_context;
@@ -764,7 +762,7 @@ struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
             !ops->switch_rf || (ops->tg_listen_md && !ops->tg_get_rf_tech))
                 return NULL;
  
-       ddev = kzalloc(sizeof(struct nfc_digital_dev), GFP_KERNEL);
+       ddev = kzalloc(sizeof(*ddev), GFP_KERNEL);
         if (!ddev)
                 return NULL;
  
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c

index 74ccc2dd79d0943f2f5a24e2adb171908bfac7c8..4f9a973988b2b13fc5426525b487a23cbabb84f3 100644 (file)
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -151,7 +151,7 @@ static const u8 digital_payload_bits_map[4] = {
   *  0 <= wt <= 14 (given by the target by the TO field of ATR_RES response)
   */
  #define DIGITAL_NFC_DEP_IN_MAX_WT 14
-#define DIGITAL_NFC_DEP_TG_MAX_WT 8
+#define DIGITAL_NFC_DEP_TG_MAX_WT 14
  static const u16 digital_rwt_map[DIGITAL_NFC_DEP_IN_MAX_WT + 1] = {
         100,  101,  101,  102,  105,
         110,  119,  139,  177,  255,
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c

index 3cc3448da524c4b99bb8e6068bc077c56bac6e6b..2021d1d58a75e8bca71625c3d34e1a05a9cce89d 100644 (file)
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -27,6 +27,7 @@
  
  #define DIGITAL_SDD_RES_CT  0x88
  #define DIGITAL_SDD_RES_LEN 5
+#define DIGITAL_SEL_RES_LEN 1
  
  #define DIGITAL_SEL_RES_NFCID1_COMPLETE(sel_res) (!((sel_res) & 0x04))
  #define DIGITAL_SEL_RES_IS_T2T(sel_res) (!((sel_res) & 0x60))
@@ -299,7 +300,7 @@ static void digital_in_recv_sel_res(struct nfc_digital_dev *ddev, void *arg,
                 }
         }
  
-       if (!resp->len) {
+       if (resp->len != DIGITAL_SEL_RES_LEN) {
                 rc = -EIO;
                 goto exit;
         }
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c

index 2ffb18e73df6c03072fffeb68b660fb2f884eb45..fb7afcaa30047dbb3c1dd6287317c4ad7d3b186e 100644 (file)
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -77,7 +77,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
         struct sockaddr_nfc_llcp llcp_addr;
         int len, ret = 0;
  
-       if (!addr || addr->sa_family != AF_NFC)
+       if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+           addr->sa_family != AF_NFC)
                 return -EINVAL;
  
         pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -151,7 +152,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr,
         struct sockaddr_nfc_llcp llcp_addr;
         int len, ret = 0;
  
-       if (!addr || addr->sa_family != AF_NFC)
+       if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+           addr->sa_family != AF_NFC)
                 return -EINVAL;
  
         pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -662,8 +664,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
  
         pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags);
  
-       if (!addr || len < sizeof(struct sockaddr_nfc) ||
-           addr->sa_family != AF_NFC)
+       if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC)
                 return -EINVAL;
  
         if (addr->service_name_len == 0 && addr->dsap == 0)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c

index a3dac34cf790b423001549c89091c87739547083..c25e9b4179c34b571c6adbb65f3cff9f60dad06d 100644 (file)
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -73,11 +73,10 @@ int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
                 if (conn_info->dest_type == dest_type) {
                         if (!params)
                                 return conn_info->conn_id;
-                       if (conn_info) {
-                               if (params->id == conn_info->dest_params->id &&
-                                   params->protocol == conn_info->dest_params->protocol)
-                                       return conn_info->conn_id;
-                       }
+
+                       if (params->id == conn_info->dest_params->id &&
+                           params->protocol == conn_info->dest_params->protocol)
+                               return conn_info->conn_id;
                 }
         }
  
@@ -1173,8 +1172,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
         return ndev;
  
  free_nfc:
-       kfree(ndev->nfc_dev);
-
+       nfc_free_device(ndev->nfc_dev);
  free_nci:
         kfree(ndev);
         return NULL;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c

index 6b0850e63e09cf747a498f913f7eabe11e59c380..b251fb936a27a9632c55d5aa91c1dbd2fa543ffa 100644 (file)
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -907,7 +907,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
         u32 device_idx, target_idx, protocol;
         int rc;
  
-       if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+       if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+           !info->attrs[NFC_ATTR_TARGET_INDEX] ||
+           !info->attrs[NFC_ATTR_PROTOCOLS])
                 return -EINVAL;
  
         device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c

index d772e9a4b4f8faabd9403c2361a1ce0fc9a252c4..45fe8c8a884df36100bc0cf26d279cb92e51bdf8 100644 (file)
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1090,6 +1090,58 @@ static struct sw_flow_actions *get_flow_actions(struct net *net,
         return acts;
  }
  
+/* Factor out match-init and action-copy to avoid
+ * "Wframe-larger-than=1024" warning. Because mask is only
+ * used to get actions, we new a function to save some
+ * stack space.
+ *
+ * If there are not key and action attrs, we return 0
+ * directly. In the case, the caller will also not use the
+ * match as before. If there is action attr, we try to get
+ * actions and save them to *acts. Before returning from
+ * the function, we reset the match->mask pointer. Because
+ * we should not to return match object with dangling reference
+ * to mask.
+ * */
+static int ovs_nla_init_match_and_action(struct net *net,
+                                        struct sw_flow_match *match,
+                                        struct sw_flow_key *key,
+                                        struct nlattr **a,
+                                        struct sw_flow_actions **acts,
+                                        bool log)
+{
+       struct sw_flow_mask mask;
+       int error = 0;
+
+       if (a[OVS_FLOW_ATTR_KEY]) {
+               ovs_match_init(match, key, true, &mask);
+               error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
+                                         a[OVS_FLOW_ATTR_MASK], log);
+               if (error)
+                       goto error;
+       }
+
+       if (a[OVS_FLOW_ATTR_ACTIONS]) {
+               if (!a[OVS_FLOW_ATTR_KEY]) {
+                       OVS_NLERR(log,
+                                 "Flow key attribute not present in set flow.");
+                       return -EINVAL;
+               }
+
+               *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
+                                        &mask, log);
+               if (IS_ERR(*acts)) {
+                       error = PTR_ERR(*acts);
+                       goto error;
+               }
+       }
+
+       /* On success, error is 0. */
+error:
+       match->mask = NULL;
+       return error;
+}
+
  static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
  {
         struct net *net = sock_net(skb->sk);
@@ -1097,7 +1149,6 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
         struct ovs_header *ovs_header = info->userhdr;
         struct sw_flow_key key;
         struct sw_flow *flow;
-       struct sw_flow_mask mask;
         struct sk_buff *reply = NULL;
         struct datapath *dp;
         struct sw_flow_actions *old_acts = NULL, *acts = NULL;
@@ -1109,34 +1160,18 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
         bool ufid_present;
  
         ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
-       if (a[OVS_FLOW_ATTR_KEY]) {
-               ovs_match_init(&match, &key, true, &mask);
-               error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
-                                         a[OVS_FLOW_ATTR_MASK], log);
-       } else if (!ufid_present) {
+       if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
                 OVS_NLERR(log,
                           "Flow set message rejected, Key attribute missing.");
-               error = -EINVAL;
+               return -EINVAL;
         }
+
+       error = ovs_nla_init_match_and_action(net, &match, &key, a,
+                                             &acts, log);
         if (error)
                 goto error;
  
-       /* Validate actions. */
-       if (a[OVS_FLOW_ATTR_ACTIONS]) {
-               if (!a[OVS_FLOW_ATTR_KEY]) {
-                       OVS_NLERR(log,
-                                 "Flow key attribute not present in set flow.");
-                       error = -EINVAL;
-                       goto error;
-               }
-
-               acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
-                                       &mask, log);
-               if (IS_ERR(acts)) {
-                       error = PTR_ERR(acts);
-                       goto error;
-               }
-
+       if (acts) {
                 /* Can allocate before locking if have acts. */
                 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
                                                 ufid_flags);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index f9349a495caf0aed6a1c5106d32551dd8e11ee8a..e3beb28203ebe06a4192ff80d4d0eb3b3741cf41 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1317,7 +1317,7 @@ static void packet_sock_destruct(struct sock *sk)
         skb_queue_purge(&sk->sk_error_queue);
  
         WARN_ON(atomic_read(&sk->sk_rmem_alloc));
-       WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+       WARN_ON(refcount_read(&sk->sk_wmem_alloc));
  
         if (!sock_flag(sk, SOCK_DEAD)) {
                 pr_err("Attempt to release alive packet socket: %p\n", sk);
@@ -1739,7 +1739,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
                 match->flags = flags;
                 INIT_LIST_HEAD(&match->list);
                 spin_lock_init(&match->lock);
-               atomic_set(&match->sk_ref, 0);
+               refcount_set(&match->sk_ref, 0);
                 fanout_init_data(match);
                 match->prot_hook.type = po->prot_hook.type;
                 match->prot_hook.dev = po->prot_hook.dev;
@@ -1753,10 +1753,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
             match->prot_hook.type == po->prot_hook.type &&
             match->prot_hook.dev == po->prot_hook.dev) {
                 err = -ENOSPC;
-               if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+               if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
                         __dev_remove_pack(&po->prot_hook);
                         po->fanout = match;
-                       atomic_inc(&match->sk_ref);
+                       refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
                         __fanout_link(sk, po);
                         err = 0;
                 }
@@ -1785,7 +1785,7 @@ static struct packet_fanout *fanout_release(struct sock *sk)
         if (f) {
                 po->fanout = NULL;
  
-               if (atomic_dec_and_test(&f->sk_ref))
+               if (refcount_dec_and_test(&f->sk_ref))
                         list_del(&f->list);
                 else
                         f = NULL;
@@ -2523,7 +2523,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
         skb->data_len = to_write;
         skb->len += to_write;
         skb->truesize += to_write;
-       atomic_add(to_write, &po->sk.sk_wmem_alloc);
+       refcount_add(to_write, &po->sk.sk_wmem_alloc);
  
         while (likely(to_write)) {
                 nr_frags = skb_shinfo(skb)->nr_frags;
@@ -4495,7 +4495,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
                 seq_printf(seq,
                            "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
                            s,
-                          atomic_read(&s->sk_refcnt),
+                          refcount_read(&s->sk_refcnt),
                            s->sk_type,
                            ntohs(po->num),
                            po->ifindex,
diff --git a/net/packet/internal.h b/net/packet/internal.h

index 9ee46314b7d76df47d683c252a92ce97398d592b..94d1d405a11667ad95e61e49d4b66bdf31a6488a 100644 (file)
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -1,6 +1,8 @@
  #ifndef __PACKET_INTERNAL_H__
  #define __PACKET_INTERNAL_H__
  
+#include <linux/refcount.h>
+
  struct packet_mclist {
         struct packet_mclist    *next;
         int                     ifindex;
@@ -86,7 +88,7 @@ struct packet_fanout {
         struct list_head        list;
         struct sock             *arr[PACKET_FANOUT_MAX];
         spinlock_t              lock;
-       atomic_t                sk_ref;
+       refcount_t              sk_ref;
         struct packet_type      prot_hook ____cacheline_aligned_in_smp;
  };
  
diff --git a/net/phonet/socket.c b/net/phonet/socket.c

index 64634e3ec2fc78ebb84ad8873f6e446d06844493..1b050dd17393c1289d3b04628b440d9011e5eb37 100644 (file)
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -360,7 +360,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
                 return POLLHUP;
  
         if (sk->sk_state == TCP_ESTABLISHED &&
-               atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
+               refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
                 atomic_read(&pn->tx_credits))
                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
  
@@ -614,7 +614,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
                         sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
                         from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
                         sock_i_ino(sk),
-                       atomic_read(&sk->sk_refcnt), sk,
+                       refcount_read(&sk->sk_refcnt), sk,
                         atomic_read(&sk->sk_drops));
         }
         seq_pad(seq, '\n');
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c

index 52d11d7725c839820f76410c9837b762f2b2205e..0d8616aa5bad00201cbda687fd836cfc59512116 100644 (file)
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -202,7 +202,7 @@ void rds_tcp_write_space(struct sock *sk)
         tc->t_last_seen_una = rds_tcp_snd_una(tc);
         rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked);
  
-       if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+       if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
                 queue_delayed_work(rds_wq, &cp->cp_send_w, 0);
  
  out:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c

index 58ae0db52ea13380582804cfa5431f25141cf7ef..a2ad4482376f30f7284329ee04904e4dcd114989 100644 (file)
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -53,7 +53,7 @@ static void rxrpc_sock_destructor(struct sock *);
   */
  static inline int rxrpc_writable(struct sock *sk)
  {
-       return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+       return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
  }
  
  /*
@@ -730,7 +730,7 @@ static void rxrpc_sock_destructor(struct sock *sk)
  
         rxrpc_purge_queue(&sk->sk_receive_queue);
  
-       WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+       WARN_ON(refcount_read(&sk->sk_wmem_alloc));
         WARN_ON(!sk_unhashed(sk));
         WARN_ON(sk->sk_socket);
  
@@ -747,7 +747,7 @@ static int rxrpc_release_sock(struct sock *sk)
  {
         struct rxrpc_sock *rx = rxrpc_sk(sk);
  
-       _enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+       _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
  
         /* declare the socket closed for business */
         sock_orphan(sk);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c

index 67b02c45271ba80be022c77c9c84abe4de2b1c47..b8985d01876a27168fbdc411208aa2984535557b 100644 (file)
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -27,7 +27,7 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
  {
         const void *here = __builtin_return_address(0);
         int n = atomic_inc_return(select_skb_count(op));
-       trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+       trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
  }
  
  /*
@@ -38,7 +38,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
         const void *here = __builtin_return_address(0);
         if (skb) {
                 int n = atomic_read(select_skb_count(op));
-               trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+               trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
         }
  }
  
@@ -49,7 +49,7 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
  {
         const void *here = __builtin_return_address(0);
         int n = atomic_inc_return(select_skb_count(op));
-       trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+       trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
         skb_get(skb);
  }
  
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
                 int n;
                 CHECK_SLAB_OKAY(&skb->users);
                 n = atomic_dec_return(select_skb_count(op));
-               trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+               trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
                 kfree_skb(skb);
         }
  }
@@ -78,7 +78,7 @@ void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
                 int n;
                 CHECK_SLAB_OKAY(&skb->users);
                 n = atomic_dec_return(select_skb_count(op));
-               trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+               trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
                 kfree_skb(skb);
         }
  }
@@ -93,7 +93,7 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
         while ((skb = skb_dequeue((list))) != NULL) {
                 int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
                 trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
-                               atomic_read(&skb->users), n, here);
+                               refcount_read(&skb->users), n, here);
                 kfree_skb(skb);
         }
  }
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c

index eb0e9bab54c175d706b6f89f354f2dd5b8793a50..d6e97115500bcda961014071178886b120b65952 100644 (file)
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -340,7 +340,7 @@ META_COLLECTOR(int_sk_refcnt)
                 *err = -1;
                 return;
         }
-       dst->value = atomic_read(&skb->sk->sk_refcnt);
+       dst->value = refcount_read(&skb->sk->sk_refcnt);
  }
  
  META_COLLECTOR(int_sk_rcvbuf)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c

index 5d95401bbc021343cef1d8b094062ec8ddfcd85c..43b94c7b69bdb0d25d10a79c05e789a2388888da 100644 (file)
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1019,7 +1019,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
                 return sch;
         }
         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
-       ops->destroy(sch);
+       if (ops->destroy)
+               ops->destroy(sch);
  err_out3:
         dev_put(dev);
         kfree((char *) sch - sch->padded);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c

index de162592eee0505339f25103611e6bf305dcbce1..572fe2584e48c81dbf58d90ce9d6a4ae68d2a385 100644 (file)
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -498,7 +498,7 @@ static void sch_atm_dequeue(unsigned long data)
                         ATM_SKB(skb)->vcc = flow->vcc;
                         memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
                                flow->hdr_len);
-                       atomic_add(skb->truesize,
+                       refcount_add(skb->truesize,
                                    &sk_atm(flow->vcc)->sk_wmem_alloc);
                         /* atm.atm_options are already set by atm_tc_enqueue */
                         flow->vcc->send(flow->vcc, skb);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c

index 757be416f7780716dd4590ef2b44120e27598e69..fa4f530ab7e1435b645eaf3fc45fcb42f7e51e08 100644 (file)
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
  {
         struct net *net = sock_net(sk);
         struct sctp_sock *sp;
-       sctp_paramhdr_t *p;
+       struct sctp_paramhdr *p;
         int i;
  
         /* Retrieve the SCTP per socket area.  */
@@ -284,9 +284,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
                         ntohs(ep->auth_chunk_list->param_hdr.length));
  
         /* Get the AUTH random number for this association */
-       p = (sctp_paramhdr_t *)asoc->c.auth_random;
+       p = (struct sctp_paramhdr *)asoc->c.auth_random;
         p->type = SCTP_PARAM_RANDOM;
-       p->length = htons(sizeof(sctp_paramhdr_t) + SCTP_AUTH_RANDOM_LENGTH);
+       p->length = htons(sizeof(*p) + SCTP_AUTH_RANDOM_LENGTH);
         get_random_bytes(p+1, SCTP_AUTH_RANDOM_LENGTH);
  
         return asoc;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c

index f99d4855d3de34e0f0b1bf027a87dde3ad91a522..8ffa5985cd6e4190882b057f26130b99aa5475fc 100644 (file)
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -538,7 +538,8 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
         if (!hmacs)
                 return NULL;
  
-       n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+       n_elt = (ntohs(hmacs->param_hdr.length) -
+                sizeof(struct sctp_paramhdr)) >> 1;
         for (i = 0; i < n_elt; i++) {
                 id = ntohs(hmacs->hmac_ids[i]);
  
@@ -589,7 +590,8 @@ int sctp_auth_asoc_verify_hmac_id(const struct sctp_association *asoc,
                 return 0;
  
         hmacs = (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs;
-       n_elt = (ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t)) >> 1;
+       n_elt = (ntohs(hmacs->param_hdr.length) -
+                sizeof(struct sctp_paramhdr)) >> 1;
  
         return __sctp_auth_find_hmacid(hmacs->hmac_ids, n_elt, hmac_id);
  }
@@ -612,8 +614,8 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
         if (asoc->default_hmac_id)
                 return;
  
-       n_params = (ntohs(hmacs->param_hdr.length)
-                               - sizeof(sctp_paramhdr_t)) >> 1;
+       n_params = (ntohs(hmacs->param_hdr.length) -
+                   sizeof(struct sctp_paramhdr)) >> 1;
         ep = asoc->ep;
         for (i = 0; i < n_params; i++) {
                 id = ntohs(hmacs->hmac_ids[i]);
@@ -632,7 +634,7 @@ void sctp_auth_asoc_set_default_hmac(struct sctp_association *asoc,
  
  
  /* Check to see if the given chunk is supposed to be authenticated */
-static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
+static int __sctp_auth_cid(enum sctp_cid chunk, struct sctp_chunks_param *param)
  {
         unsigned short len;
         int found = 0;
@@ -641,7 +643,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
         if (!param || param->param_hdr.length == 0)
                 return 0;
  
-       len = ntohs(param->param_hdr.length) - sizeof(sctp_paramhdr_t);
+       len = ntohs(param->param_hdr.length) - sizeof(struct sctp_paramhdr);
  
         /* SCTP-AUTH, Section 3.2
          *    The chunk types for INIT, INIT-ACK, SHUTDOWN-COMPLETE and AUTH
@@ -668,7 +670,7 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
  }
  
  /* Check if peer requested that this chunk is authenticated */
-int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
  {
         if (!asoc)
                 return 0;
@@ -680,7 +682,7 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
  }
  
  /* Check if we requested that peer authenticate this chunk. */
-int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
+int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc)
  {
         if (!asoc)
                 return 0;
@@ -775,7 +777,7 @@ int sctp_auth_ep_add_chunkid(struct sctp_endpoint *ep, __u8 chunk_id)
  
         /* Check if we can add this chunk to the array */
         param_len = ntohs(p->param_hdr.length);
-       nchunks = param_len - sizeof(sctp_paramhdr_t);
+       nchunks = param_len - sizeof(struct sctp_paramhdr);
         if (nchunks == SCTP_NUM_CHUNK_TYPES)
                 return -EINVAL;
  
@@ -812,9 +814,11 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
                 return -EINVAL;
  
         for (i = 0; i < hmacs->shmac_num_idents; i++)
-               ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
-       ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
-                               hmacs->shmac_num_idents * sizeof(__u16));
+               ep->auth_hmacs_list->hmac_ids[i] =
+                               htons(hmacs->shmac_idents[i]);
+       ep->auth_hmacs_list->param_hdr.length =
+                       htons(sizeof(struct sctp_paramhdr) +
+                       hmacs->shmac_num_idents * sizeof(__u16));
         return 0;
  }
  
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c

index 3dcd0ecf3d99f74ec8ed4aad149bd32950ab23ef..efbc31877804e3d64d3c46ddd58916d16b2d3572 100644 (file)
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -90,12 +90,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
                  */
                 auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
                 auth_hmacs->param_hdr.length =
-                                       htons(sizeof(sctp_paramhdr_t) + 2);
+                                       htons(sizeof(struct sctp_paramhdr) + 2);
                 auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
  
                 /* Initialize the CHUNKS parameter */
                 auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
-               auth_chunks->param_hdr.length = htons(sizeof(sctp_paramhdr_t));
+               auth_chunks->param_hdr.length =
+                                       htons(sizeof(struct sctp_paramhdr));
  
                 /* If the Add-IP functionality is enabled, we must
                  * authenticate, ASCONF and ASCONF-ACK chunks
@@ -104,7 +105,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
                         auth_chunks->chunks[0] = SCTP_CID_ASCONF;
                         auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
                         auth_chunks->param_hdr.length =
-                                       htons(sizeof(sctp_paramhdr_t) + 2);
+                                       htons(sizeof(struct sctp_paramhdr) + 2);
                 }
         }
  
@@ -268,16 +269,14 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
  
         memset(ep->secret_key, 0, sizeof(ep->secret_key));
  
-       /* Give up our hold on the sock. */
         sk = ep->base.sk;
-       if (sk != NULL) {
-               /* Remove and free the port */
-               if (sctp_sk(sk)->bind_hash)
-                       sctp_put_port(sk);
+       /* Remove and free the port */
+       if (sctp_sk(sk)->bind_hash)
+               sctp_put_port(sk);
  
-               sctp_sk(sk)->ep = NULL;
-               sock_put(sk);
-       }
+       sctp_sk(sk)->ep = NULL;
+       /* Give up our hold on the sock */
+       sock_put(sk);
  
         kfree(ep);
         SCTP_DBG_OBJCNT_DEC(ep);
diff --git a/net/sctp/input.c b/net/sctp/input.c

index ba9ad32fc44740b9ec45d95e265d3d895148d7a7..41eb2ec104601eb0fc9ae37851248752884992b2 100644 (file)
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -663,19 +663,19 @@ out_unlock:
   */
  static int sctp_rcv_ootb(struct sk_buff *skb)
  {
-       sctp_chunkhdr_t *ch, _ch;
+       struct sctp_chunkhdr *ch, _ch;
         int ch_end, offset = 0;
  
         /* Scan through all the chunks in the packet.  */
         do {
                 /* Make sure we have at least the header there */
-               if (offset + sizeof(sctp_chunkhdr_t) > skb->len)
+               if (offset + sizeof(_ch) > skb->len)
                         break;
  
                 ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
  
                 /* Break out if chunk length is less then minimal. */
-               if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+               if (ntohs(ch->length) < sizeof(_ch))
                         break;
  
                 ch_end = offset + SCTP_PAD4(ntohs(ch->length));
@@ -1051,7 +1051,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
         union sctp_addr *paddr = &addr;
         struct sctphdr *sh = sctp_hdr(skb);
         union sctp_params params;
-       sctp_init_chunk_t *init;
+       struct sctp_init_chunk *init;
         struct sctp_af *af;
  
         /*
@@ -1070,7 +1070,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
         /* Find the start of the TLVs and the end of the chunk.  This is
          * the region we search for address parameters.
          */
-       init = (sctp_init_chunk_t *)skb->data;
+       init = (struct sctp_init_chunk *)skb->data;
  
         /* Walk the parameters looking for embedded addresses. */
         sctp_walk_params(params, init, init_hdr.params) {
@@ -1106,7 +1106,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
   */
  static struct sctp_association *__sctp_rcv_asconf_lookup(
                                         struct net *net,
-                                       sctp_chunkhdr_t *ch,
+                                       struct sctp_chunkhdr *ch,
                                         const union sctp_addr *laddr,
                                         __be16 peer_port,
                                         struct sctp_transport **transportp)
@@ -1144,7 +1144,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
                                       struct sctp_transport **transportp)
  {
         struct sctp_association *asoc = NULL;
-       sctp_chunkhdr_t *ch;
+       struct sctp_chunkhdr *ch;
         int have_auth = 0;
         unsigned int chunk_num = 1;
         __u8 *ch_end;
@@ -1152,10 +1152,10 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
         /* Walk through the chunks looking for AUTH or ASCONF chunks
          * to help us find the association.
          */
-       ch = (sctp_chunkhdr_t *) skb->data;
+       ch = (struct sctp_chunkhdr *)skb->data;
         do {
                 /* Break out if chunk length is less then minimal. */
-               if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+               if (ntohs(ch->length) < sizeof(*ch))
                         break;
  
                 ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
@@ -1192,7 +1192,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
                 if (asoc)
                         break;
  
-               ch = (sctp_chunkhdr_t *) ch_end;
+               ch = (struct sctp_chunkhdr *)ch_end;
                 chunk_num++;
         } while (ch_end < skb_tail_pointer(skb));
  
@@ -1210,7 +1210,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
                                       const union sctp_addr *laddr,
                                       struct sctp_transport **transportp)
  {
-       sctp_chunkhdr_t *ch;
+       struct sctp_chunkhdr *ch;
  
         /* We do not allow GSO frames here as we need to linearize and
          * then cannot guarantee frame boundaries. This shouldn't be an
@@ -1220,7 +1220,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
         if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
                 return NULL;
  
-       ch = (sctp_chunkhdr_t *) skb->data;
+       ch = (struct sctp_chunkhdr *)skb->data;
  
         /* The code below will attempt to walk the chunk and extract
          * parameter information.  Before we do that, we need to verify
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c

index f731de3e8428c951583c3217586da0609bb25d3a..48392552ee7c1ea75a134375b55ffb0ebf59064e 100644 (file)
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -99,7 +99,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk)
  struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
  {
         struct sctp_chunk *chunk;
-       sctp_chunkhdr_t *ch = NULL;
+       struct sctp_chunkhdr *ch = NULL;
  
         chunk = queue->in_progress;
         /* If there is no more chunks in this packet, say so */
@@ -108,7 +108,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
             chunk->pdiscard)
                     return NULL;
  
-       ch = (sctp_chunkhdr_t *)chunk->chunk_end;
+       ch = (struct sctp_chunkhdr *)chunk->chunk_end;
  
         return ch;
  }
@@ -122,7 +122,7 @@ struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *queue)
  struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
  {
         struct sctp_chunk *chunk;
-       sctp_chunkhdr_t *ch = NULL;
+       struct sctp_chunkhdr *ch = NULL;
  
         /* The assumption is that we are safe to process the chunks
          * at this time.
@@ -151,7 +151,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
                         chunk = queue->in_progress = NULL;
                 } else {
                         /* Nothing to do. Next chunk in the packet, please. */
-                       ch = (sctp_chunkhdr_t *) chunk->chunk_end;
+                       ch = (struct sctp_chunkhdr *)chunk->chunk_end;
                         /* Force chunk->skb->data to chunk->chunk_end.  */
                         skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data);
                         /* We are guaranteed to pull a SCTP header. */
@@ -195,7 +195,7 @@ next_chunk:
  
  new_skb:
                 /* This is the first chunk in the packet.  */
-               ch = (sctp_chunkhdr_t *) chunk->skb->data;
+               ch = (struct sctp_chunkhdr *)chunk->skb->data;
                 chunk->singleton = 1;
                 chunk->data_accepted = 0;
                 chunk->pdiscard = 0;
@@ -214,11 +214,10 @@ new_skb:
  
         chunk->chunk_hdr = ch;
         chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
-       skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+       skb_pull(chunk->skb, sizeof(*ch));
         chunk->subh.v = NULL; /* Subheader is no longer valid.  */
  
-       if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) <
-           skb_tail_pointer(chunk->skb)) {
+       if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) {
                 /* This is not a singleton */
                 chunk->singleton = 0;
         } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
diff --git a/net/sctp/output.c b/net/sctp/output.c

index 89cee1482d35592cf4e2d3a97e4b00cf47ed07de..9d8504985744f8153f985695e7123f48994d3494 100644 (file)
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -402,7 +402,7 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
          * therefore only reserve a single byte to keep socket around until
          * the packet has been transmitted.
          */
-       atomic_inc(&sk->sk_wmem_alloc);
+       refcount_inc(&sk->sk_wmem_alloc);
  }
  
  static int sctp_packet_pack(struct sctp_packet *packet,
@@ -723,8 +723,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
         /* Check whether this chunk and all the rest of pending data will fit
          * or delay in hopes of bundling a full sized packet.
          */
-       if (chunk->skb->len + q->out_qlen >
-               transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4)
+       if (chunk->skb->len + q->out_qlen > transport->pathmtu -
+               packet->overhead - sizeof(struct sctp_data_chunk) - 4)
                 /* Enough data queued to fill a packet */
                 return SCTP_XMIT_OK;
  
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c

index 20299df163b98ededf0d8f87ccdeb8d08ca648f0..e8762702a3138cae4fd7192ecc490407b9b19a06 100644 (file)
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1102,7 +1102,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                                  sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
                                  "illegal chunk", ntohl(chunk->subh.data_hdr->tsn),
                                  chunk->skb ? chunk->skb->head : NULL, chunk->skb ?
-                                atomic_read(&chunk->skb->users) : -1);
+                                refcount_read(&chunk->skb->users) : -1);
  
                         /* Add the chunk to the packet.  */
                         status = sctp_packet_transmit_chunk(packet, chunk, 0, gfp);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c

index 8e34db56bc1db2a118722b5d88ab5e3f757f2fca..26b4be6b41720a23040fcf31983199d331f026e4 100644 (file)
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -363,7 +363,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
                 assoc->stream.outcnt, assoc->max_retrans,
                 assoc->init_retries, assoc->shutdown_retries,
                 assoc->rtx_data_chunks,
-               atomic_read(&sk->sk_wmem_alloc),
+               refcount_read(&sk->sk_wmem_alloc),
                 sk->sk_wmem_queued,
                 sk->sk_sndbuf,
                 sk->sk_rcvbuf);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c

index 4b1967997c1650a94d49bdca5c4eaa279e9220bf..3af4dd024ec00abd095e3b9a0367aeb61aa13e9e 100644 (file)
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -217,7 +217,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
  {
         struct net *net = sock_net(asoc->base.sk);
         struct sctp_endpoint *ep = asoc->ep;
-       sctp_inithdr_t init;
+       struct sctp_inithdr init;
         union sctp_params addrs;
         size_t chunksize;
         struct sctp_chunk *retval = NULL;
@@ -229,7 +229,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
         sctp_supported_ext_param_t ext_param;
         int num_ext = 0;
         __u8 extensions[3];
-       sctp_paramhdr_t *auth_chunks = NULL,
+       struct sctp_paramhdr *auth_chunks = NULL,
                         *auth_hmacs = NULL;
  
         /* RFC 2960 3.3.2 Initiation (INIT) (1)
@@ -286,14 +286,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
                 chunksize += sizeof(asoc->c.auth_random);
  
                 /* Add HMACS parameter length if any were defined */
-               auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+               auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
                 if (auth_hmacs->length)
                         chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
                 else
                         auth_hmacs = NULL;
  
                 /* Add CHUNKS parameter length */
-               auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+               auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
                 if (auth_chunks->length)
                         chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
                 else
@@ -385,7 +385,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
                                  const struct sctp_chunk *chunk,
                                  gfp_t gfp, int unkparam_len)
  {
-       sctp_inithdr_t initack;
+       struct sctp_inithdr initack;
         struct sctp_chunk *retval;
         union sctp_params addrs;
         struct sctp_sock *sp;
@@ -397,7 +397,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
         sctp_supported_ext_param_t ext_param;
         int num_ext = 0;
         __u8 extensions[3];
-       sctp_paramhdr_t *auth_chunks = NULL,
+       struct sctp_paramhdr *auth_chunks = NULL,
                         *auth_hmacs = NULL,
                         *auth_random = NULL;
  
@@ -448,16 +448,16 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
                 chunksize += sizeof(aiparam);
  
         if (asoc->peer.auth_capable) {
-               auth_random = (sctp_paramhdr_t *)asoc->c.auth_random;
+               auth_random = (struct sctp_paramhdr *)asoc->c.auth_random;
                 chunksize += ntohs(auth_random->length);
  
-               auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
+               auth_hmacs = (struct sctp_paramhdr *)asoc->c.auth_hmacs;
                 if (auth_hmacs->length)
                         chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
                 else
                         auth_hmacs = NULL;
  
-               auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
+               auth_chunks = (struct sctp_paramhdr *)asoc->c.auth_chunks;
                 if (auth_chunks->length)
                         chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
                 else
@@ -1085,18 +1085,18 @@ struct sctp_chunk *sctp_make_abort_violation(
         struct sctp_chunk  *retval;
         struct sctp_paramhdr phdr;
  
-       retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen
-                                       + sizeof(sctp_paramhdr_t));
+       retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen +
+                                             sizeof(phdr));
         if (!retval)
                 goto end;
  
-       sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen
-                                       + sizeof(sctp_paramhdr_t));
+       sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, paylen +
+                                                           sizeof(phdr));
  
         phdr.type = htons(chunk->chunk_hdr->type);
         phdr.length = chunk->chunk_hdr->length;
         sctp_addto_chunk(retval, paylen, payload);
-       sctp_addto_param(retval, sizeof(sctp_paramhdr_t), &phdr);
+       sctp_addto_param(retval, sizeof(phdr), &phdr);
  
  end:
         return retval;
@@ -1110,16 +1110,16 @@ struct sctp_chunk *sctp_make_violation_paramlen(
         struct sctp_chunk *retval;
         static const char error[] = "The following parameter had invalid length:";
         size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) +
-                               sizeof(sctp_paramhdr_t);
+                            sizeof(*param);
  
         retval = sctp_make_abort(asoc, chunk, payload_len);
         if (!retval)
                 goto nodata;
  
         sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION,
-                       sizeof(error) + sizeof(sctp_paramhdr_t));
+                       sizeof(error) + sizeof(*param));
         sctp_addto_chunk(retval, sizeof(error), error);
-       sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param);
+       sctp_addto_param(retval, sizeof(*param), param);
  
  nodata:
         return retval;
@@ -1379,20 +1379,20 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
                                             gfp_t gfp)
  {
         struct sctp_chunk *retval;
-       sctp_chunkhdr_t *chunk_hdr;
+       struct sctp_chunkhdr *chunk_hdr;
         struct sk_buff *skb;
         struct sock *sk;
  
         /* No need to allocate LL here, as this is only a chunk. */
-       skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+       skb = alloc_skb(SCTP_PAD4(sizeof(*chunk_hdr) + paylen), gfp);
         if (!skb)
                 goto nodata;
  
         /* Make room for the chunk header.  */
-       chunk_hdr = skb_put(skb, sizeof(sctp_chunkhdr_t));
+       chunk_hdr = (struct sctp_chunkhdr *)skb_put(skb, sizeof(*chunk_hdr));
         chunk_hdr->type   = type;
         chunk_hdr->flags  = flags;
-       chunk_hdr->length = htons(sizeof(sctp_chunkhdr_t));
+       chunk_hdr->length = htons(sizeof(*chunk_hdr));
  
         sk = asoc ? asoc->base.sk : NULL;
         retval = sctp_chunkify(skb, asoc, sk, gfp);
@@ -1402,7 +1402,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
         }
  
         retval->chunk_hdr = chunk_hdr;
-       retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(struct sctp_chunkhdr);
+       retval->chunk_end = ((__u8 *)chunk_hdr) + sizeof(*chunk_hdr);
  
         /* Determine if the chunk needs to be authenticated */
         if (sctp_auth_send_cid(type, asoc))
@@ -1614,7 +1614,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
         /* Header size is static data prior to the actual cookie, including
          * any padding.
          */
-       headersize = sizeof(sctp_paramhdr_t) +
+       headersize = sizeof(struct sctp_paramhdr) +
                      (sizeof(struct sctp_signed_cookie) -
                       sizeof(struct sctp_cookie));
         bodysize = sizeof(struct sctp_cookie)
@@ -1710,7 +1710,7 @@ struct sctp_association *sctp_unpack_cookie(
         /* Header size is static data prior to the actual cookie, including
          * any padding.
          */
-       headersize = sizeof(sctp_chunkhdr_t) +
+       headersize = sizeof(struct sctp_chunkhdr) +
                      (sizeof(struct sctp_signed_cookie) -
                       sizeof(struct sctp_cookie));
         bodysize = ntohs(chunk->chunk_hdr->length) - headersize;
@@ -1882,7 +1882,7 @@ struct __sctp_missing {
   * Report a missing mandatory parameter.
   */
  static int sctp_process_missing_param(const struct sctp_association *asoc,
-                                     sctp_param_t paramtype,
+                                     enum sctp_param paramtype,
                                       struct sctp_chunk *chunk,
                                       struct sctp_chunk **errp)
  {
@@ -1975,7 +1975,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
  
  static int sctp_verify_ext_param(struct net *net, union sctp_params param)
  {
-       __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+       __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
         int have_auth = 0;
         int have_asconf = 0;
         int i;
@@ -2010,7 +2010,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
                                     union sctp_params param)
  {
         struct net *net = sock_net(asoc->base.sk);
-       __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+       __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
         int i;
  
         for (i = 0; i < num_ext; i++) {
@@ -2123,7 +2123,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
                                         const struct sctp_endpoint *ep,
                                         const struct sctp_association *asoc,
                                         union sctp_params param,
-                                       sctp_cid_t cid,
+                                       enum sctp_cid cid,
                                         struct sctp_chunk *chunk,
                                         struct sctp_chunk **err_chunk)
  {
@@ -2180,7 +2180,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
                  * cause 'Protocol Violation'.
                  */
                 if (SCTP_AUTH_RANDOM_LENGTH !=
-                       ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) {
+                       ntohs(param.p->length) - sizeof(struct sctp_paramhdr)) {
                         sctp_process_inv_paramlength(asoc, param.p,
                                                         chunk, err_chunk);
                         retval = SCTP_IERROR_ABORT;
@@ -2208,7 +2208,8 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
                         goto fallthrough;
  
                 hmacs = (struct sctp_hmac_algo_param *)param.p;
-               n_elt = (ntohs(param.p->length) - sizeof(sctp_paramhdr_t)) >> 1;
+               n_elt = (ntohs(param.p->length) -
+                        sizeof(struct sctp_paramhdr)) >> 1;
  
                 /* SCTP-AUTH: Section 6.1
                  * The HMAC algorithm based on SHA-1 MUST be supported and
@@ -2240,9 +2241,9 @@ fallthrough:
  
  /* Verify the INIT packet before we process it.  */
  int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
-                    const struct sctp_association *asoc, sctp_cid_t cid,
-                    sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
-                    struct sctp_chunk **errp)
+                    const struct sctp_association *asoc, enum sctp_cid cid,
+                    struct sctp_init_chunk *peer_init,
+                    struct sctp_chunk *chunk, struct sctp_chunk **errp)
  {
         union sctp_params param;
         bool has_cookie = false;
@@ -2306,7 +2307,7 @@ int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
   */
  int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
                       const union sctp_addr *peer_addr,
-                     sctp_init_chunk_t *peer_init, gfp_t gfp)
+                     struct sctp_init_chunk *peer_init, gfp_t gfp)
  {
         struct net *net = sock_net(asoc->base.sk);
         union sctp_params param;
@@ -2565,7 +2566,7 @@ do_addr_param:
                         asoc->peer.ipv4_address = 1;
  
                 /* Cycle through address types; avoid divide by 0. */
-               sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+               sat = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
                 if (sat)
                         sat /= sizeof(__u16);
  
@@ -2592,7 +2593,7 @@ do_addr_param:
  
         case SCTP_PARAM_STATE_COOKIE:
                 asoc->peer.cookie_len =
-                       ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
+                       ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
                 asoc->peer.cookie = param.cookie->body;
                 break;
  
@@ -3176,7 +3177,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
                                 return false;
                         length = ntohs(param.addip->param_hdr.length);
                         if (length < sizeof(sctp_addip_param_t) +
-                                    sizeof(sctp_paramhdr_t))
+                                    sizeof(**errp))
                                 return false;
                         break;
                 case SCTP_PARAM_SUCCESS_REPORT:
@@ -3218,7 +3219,8 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
         int     chunk_len;
         __u32   serial;
  
-       chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+       chunk_len = ntohs(asconf->chunk_hdr->length) -
+                   sizeof(struct sctp_chunkhdr);
         hdr = (sctp_addiphdr_t *)asconf->skb->data;
         serial = ntohl(hdr->serial);
  
@@ -3364,7 +3366,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack,
                 err_code = SCTP_ERROR_REQ_REFUSED;
  
         asconf_ack_len = ntohs(asconf_ack->chunk_hdr->length) -
-                            sizeof(sctp_chunkhdr_t);
+                        sizeof(struct sctp_chunkhdr);
  
         /* Skip the addiphdr from the asconf_ack chunk and store a pointer to
          * the first asconf_ack parameter.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c

index dfe1fcb520ba3f6536400379c27767756c1c8d49..d6e5e9e0fd6d429640e411f55b7d5ed94f6f3781 100644 (file)
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -647,7 +647,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
  static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
                                  struct sctp_association *asoc,
                                  struct sctp_chunk *chunk,
-                                sctp_init_chunk_t *peer_init,
+                                struct sctp_init_chunk *peer_init,
                                  gfp_t gfp)
  {
         int error;
@@ -955,9 +955,10 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
                 switch (err_hdr->cause) {
                 case SCTP_ERROR_UNKNOWN_CHUNK:
                 {
-                       sctp_chunkhdr_t *unk_chunk_hdr;
+                       struct sctp_chunkhdr *unk_chunk_hdr;
  
-                       unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable;
+                       unk_chunk_hdr = (struct sctp_chunkhdr *)
+                                                       err_hdr->variable;
                         switch (unk_chunk_hdr->type) {
                         /* ADDIP 4.1 A9) If the peer responds to an ASCONF with
                          * an ERROR chunk reporting that it did not recognized
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c

index 8feff96a5bef262d5b2871e0931c50bb132322a7..b2a74c3823eea7e82a11d2812f0e235748274e7f 100644 (file)
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -235,7 +235,7 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net,
                 return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
  
         /* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -345,7 +345,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
          * error, but since we don't have an association, we'll
          * just discard the packet.
          */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
                 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
  
         /* If the INIT is coming toward a closing socket, we'll send back
@@ -360,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
         /* Verify the INIT chunk before processing it. */
         err_chunk = NULL;
         if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-                             (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+                             (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
                               &err_chunk)) {
                 /* This chunk contains fatal error. It is to be discarded.
                  * Send an ABORT, with causes if there is any.
@@ -368,9 +368,9 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
                 if (err_chunk) {
                         packet = sctp_abort_pkt_new(net, ep, asoc, arg,
                                         (__u8 *)(err_chunk->chunk_hdr) +
-                                       sizeof(sctp_chunkhdr_t),
+                                       sizeof(struct sctp_chunkhdr),
                                         ntohs(err_chunk->chunk_hdr->length) -
-                                       sizeof(sctp_chunkhdr_t));
+                                       sizeof(struct sctp_chunkhdr));
  
                         sctp_chunk_free(err_chunk);
  
@@ -389,10 +389,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
         }
  
         /* Grab the INIT header.  */
-       chunk->subh.init_hdr = (sctp_inithdr_t *)chunk->skb->data;
+       chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
  
         /* Tag the variable length parameters.  */
-       chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+       chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
  
         new_asoc = sctp_make_temp_asoc(ep, chunk, GFP_ATOMIC);
         if (!new_asoc)
@@ -405,7 +405,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
  
         /* The call, sctp_process_init(), can fail on memory allocation.  */
         if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
-                              (sctp_init_chunk_t *)chunk->chunk_hdr,
+                              (struct sctp_init_chunk *)chunk->chunk_hdr,
                                GFP_ATOMIC))
                 goto nomem_init;
  
@@ -417,7 +417,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
         len = 0;
         if (err_chunk)
                 len = ntohs(err_chunk->chunk_hdr->length) -
-                       sizeof(sctp_chunkhdr_t);
+                     sizeof(struct sctp_chunkhdr);
  
         repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
         if (!repl)
@@ -437,7 +437,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
                  */
                 unk_param = (sctp_unrecognized_param_t *)
                             ((__u8 *)(err_chunk->chunk_hdr) +
-                           sizeof(sctp_chunkhdr_t));
+                           sizeof(struct sctp_chunkhdr));
                 /* Replace the cause code with the "Unrecognized parameter"
                  * parameter type.
                  */
@@ -503,7 +503,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
                                        sctp_cmd_seq_t *commands)
  {
         struct sctp_chunk *chunk = arg;
-       sctp_init_chunk_t *initchunk;
+       struct sctp_init_chunk *initchunk;
         struct sctp_chunk *err_chunk;
         struct sctp_packet *packet;
  
@@ -522,12 +522,12 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
         /* Grab the INIT header.  */
-       chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+       chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
  
         /* Verify the INIT chunk before processing it. */
         err_chunk = NULL;
         if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-                             (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+                             (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
                               &err_chunk)) {
  
                 sctp_error_t error = SCTP_ERROR_NO_RESOURCE;
@@ -540,9 +540,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
                 if (err_chunk) {
                         packet = sctp_abort_pkt_new(net, ep, asoc, arg,
                                         (__u8 *)(err_chunk->chunk_hdr) +
-                                       sizeof(sctp_chunkhdr_t),
+                                       sizeof(struct sctp_chunkhdr),
                                         ntohs(err_chunk->chunk_hdr->length) -
-                                       sizeof(sctp_chunkhdr_t));
+                                       sizeof(struct sctp_chunkhdr));
  
                         sctp_chunk_free(err_chunk);
  
@@ -576,9 +576,9 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
         /* Tag the variable length parameters.  Note that we never
          * convert the parameters in an INIT chunk.
          */
-       chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+       chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
  
-       initchunk = (sctp_init_chunk_t *) chunk->chunk_hdr;
+       initchunk = (struct sctp_init_chunk *)chunk->chunk_hdr;
  
         sctp_add_cmd_sf(commands, SCTP_CMD_PEER_INIT,
                         SCTP_PEER_INIT(initchunk));
@@ -653,7 +653,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
  {
         struct sctp_chunk *chunk = arg;
         struct sctp_association *new_asoc;
-       sctp_init_chunk_t *peer_init;
+       struct sctp_init_chunk *peer_init;
         struct sctp_chunk *repl;
         struct sctp_ulpevent *ev, *ai_ev = NULL;
         int error = 0;
@@ -673,7 +673,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
          * chunk header.  More detailed verification is done
          * in sctp_unpack_cookie().
          */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
  
         /* If the endpoint is not listening or if the number of associations
@@ -691,7 +691,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
         chunk->subh.cookie_hdr =
                 (struct sctp_signed_cookie *)chunk->skb->data;
         if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-                                        sizeof(sctp_chunkhdr_t)))
+                                        sizeof(struct sctp_chunkhdr)))
                 goto nomem;
  
         /* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
@@ -770,9 +770,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
                 auth.skb = chunk->auth_chunk;
                 auth.asoc = chunk->asoc;
                 auth.sctp_hdr = chunk->sctp_hdr;
-               auth.chunk_hdr = skb_push(chunk->auth_chunk,
-                                         sizeof(sctp_chunkhdr_t));
-               skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
+               auth.chunk_hdr = (struct sctp_chunkhdr *)
+                                       skb_push(chunk->auth_chunk,
+                                                sizeof(struct sctp_chunkhdr));
+               skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr));
                 auth.transport = chunk->transport;
  
                 ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
@@ -886,7 +887,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
         /* Verify that the chunk length for the COOKIE-ACK is OK.
          * If we don't do this, any bundled chunks may be junked.
          */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -1080,7 +1081,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
                                     void *arg,
                                     sctp_cmd_seq_t *commands)
  {
-       sctp_paramhdr_t *param_hdr;
+       struct sctp_paramhdr *param_hdr;
         struct sctp_chunk *chunk = arg;
         struct sctp_chunk *reply;
         size_t paylen = 0;
@@ -1097,9 +1098,9 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
          * respond with a HEARTBEAT ACK that contains the Heartbeat
          * Information field copied from the received HEARTBEAT chunk.
          */
-       chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
-       param_hdr = (sctp_paramhdr_t *) chunk->subh.hb_hdr;
-       paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
+       chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data;
+       param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr;
+       paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr);
  
         if (ntohs(param_hdr->length) > paylen)
                 return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
@@ -1164,7 +1165,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
                 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
  
         /* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) +
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) +
                                             sizeof(sctp_sender_hb_info_t)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
@@ -1449,19 +1450,19 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
          * In this case, we generate a protocol violation since we have
          * an association established.
          */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
         /* Grab the INIT header.  */
-       chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
+       chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
  
         /* Tag the variable length parameters.  */
-       chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(sctp_inithdr_t));
+       chunk->param_hdr.v = skb_pull(chunk->skb, sizeof(struct sctp_inithdr));
  
         /* Verify the INIT chunk before processing it. */
         err_chunk = NULL;
         if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type,
-                             (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
+                             (struct sctp_init_chunk *)chunk->chunk_hdr, chunk,
                               &err_chunk)) {
                 /* This chunk contains fatal error. It is to be discarded.
                  * Send an ABORT, with causes if there is any.
@@ -1469,9 +1470,9 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
                 if (err_chunk) {
                         packet = sctp_abort_pkt_new(net, ep, asoc, arg,
                                         (__u8 *)(err_chunk->chunk_hdr) +
-                                       sizeof(sctp_chunkhdr_t),
+                                       sizeof(struct sctp_chunkhdr),
                                         ntohs(err_chunk->chunk_hdr->length) -
-                                       sizeof(sctp_chunkhdr_t));
+                                       sizeof(struct sctp_chunkhdr));
  
                         if (packet) {
                                 sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
@@ -1508,7 +1509,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
          * place (local tie-tag and per tie-tag) within the state cookie.
          */
         if (!sctp_process_init(new_asoc, chunk, sctp_source(chunk),
-                              (sctp_init_chunk_t *)chunk->chunk_hdr,
+                              (struct sctp_init_chunk *)chunk->chunk_hdr,
                                GFP_ATOMIC))
                 goto nomem;
  
@@ -1535,7 +1536,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
         len = 0;
         if (err_chunk) {
                 len = ntohs(err_chunk->chunk_hdr->length) -
-                       sizeof(sctp_chunkhdr_t);
+                     sizeof(struct sctp_chunkhdr);
         }
  
         repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len);
@@ -1556,7 +1557,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
                  */
                 unk_param = (sctp_unrecognized_param_t *)
                             ((__u8 *)(err_chunk->chunk_hdr) +
-                           sizeof(sctp_chunkhdr_t));
+                           sizeof(struct sctp_chunkhdr));
                 /* Replace the cause code with the "Unrecognized parameter"
                  * parameter type.
                  */
@@ -1729,7 +1730,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
                                         sctp_cmd_seq_t *commands,
                                         struct sctp_association *new_asoc)
  {
-       sctp_init_chunk_t *peer_init;
+       struct sctp_init_chunk *peer_init;
         struct sctp_ulpevent *ev;
         struct sctp_chunk *repl;
         struct sctp_chunk *err;
@@ -1844,7 +1845,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
                                         sctp_cmd_seq_t *commands,
                                         struct sctp_association *new_asoc)
  {
-       sctp_init_chunk_t *peer_init;
+       struct sctp_init_chunk *peer_init;
         struct sctp_chunk *repl;
  
         /* new_asoc is a brand-new association, so these are not yet
@@ -2044,7 +2045,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
          * enough for the chunk header.  Cookie length verification is
          * done later.
          */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -2053,7 +2054,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
          */
         chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
         if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-                                       sizeof(sctp_chunkhdr_t)))
+                                       sizeof(struct sctp_chunkhdr)))
                 goto nomem;
  
         /* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
@@ -2806,7 +2807,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
         struct sctp_chunk *reply;
  
         /* Make sure that the chunk has a valid length */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -2989,7 +2990,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
                 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
         }
  
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -3009,7 +3010,8 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
                 return SCTP_DISPOSITION_ABORT;
         case SCTP_IERROR_PROTO_VIOLATION:
                 return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
-                       (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+                                              (u8 *)chunk->subh.data_hdr,
+                                              sizeof(struct sctp_datahdr));
         default:
                 BUG();
         }
@@ -3107,7 +3109,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
                 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
         }
  
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_data_chunk)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -3123,7 +3125,8 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
                 return SCTP_DISPOSITION_ABORT;
         case SCTP_IERROR_PROTO_VIOLATION:
                 return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
-                       (u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
+                                              (u8 *)chunk->subh.data_hdr,
+                                              sizeof(struct sctp_datahdr));
         default:
                 BUG();
         }
@@ -3358,7 +3361,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
                 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
  
         /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
         /* 10.2 H) SHUTDOWN COMPLETE notification
@@ -3435,7 +3438,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
  {
         struct sctp_chunk *chunk = arg;
         struct sk_buff *skb = chunk->skb;
-       sctp_chunkhdr_t *ch;
+       struct sctp_chunkhdr *ch;
         sctp_errhdr_t *err;
         __u8 *ch_end;
         int ootb_shut_ack = 0;
@@ -3443,10 +3446,10 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
  
         SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
  
-       ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
+       ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
         do {
                 /* Report violation if the chunk is less then minimal */
-               if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
+               if (ntohs(ch->length) < sizeof(*ch))
                         return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -3487,7 +3490,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
                         }
                 }
  
-               ch = (sctp_chunkhdr_t *) ch_end;
+               ch = (struct sctp_chunkhdr *)ch_end;
         } while (ch_end < skb_tail_pointer(skb));
  
         if (ootb_shut_ack)
@@ -3560,7 +3563,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
         /* If the chunk length is invalid, we don't want to process
          * the reset of the packet.
          */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
  
         /* We need to discard the rest of the packet to prevent
@@ -3591,7 +3594,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
         struct sctp_chunk *chunk = arg;
  
         /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -4256,7 +4259,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
  {
         struct sctp_chunk *unk_chunk = arg;
         struct sctp_chunk *err_chunk;
-       sctp_chunkhdr_t *hdr;
+       struct sctp_chunkhdr *hdr;
  
         pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk);
  
@@ -4267,7 +4270,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
          * Since we don't know the chunk type, we use a general
          * chunkhdr structure to make a comparison.
          */
-       if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(unk_chunk, sizeof(*hdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -4340,7 +4343,7 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
          * Since we don't know the chunk type, we use a general
          * chunkhdr structure to make a comparison.
          */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -4405,7 +4408,7 @@ sctp_disposition_t sctp_sf_violation(struct net *net,
         struct sctp_chunk *chunk = arg;
  
         /* Make sure that the chunk has a valid length. */
-       if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
+       if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
                 return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                   commands);
  
@@ -6121,9 +6124,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
                 switch (chunk->chunk_hdr->type) {
                 case SCTP_CID_INIT:
                 {
-                       sctp_init_chunk_t *init;
+                       struct sctp_init_chunk *init;
  
-                       init = (sctp_init_chunk_t *)chunk->chunk_hdr;
+                       init = (struct sctp_init_chunk *)chunk->chunk_hdr;
                         vtag = ntohl(init->init_hdr.init_tag);
                         break;
                 }
@@ -6196,7 +6199,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
                          struct sctp_chunk *chunk,
                          sctp_cmd_seq_t *commands)
  {
-       sctp_datahdr_t *data_hdr;
+       struct sctp_datahdr *data_hdr;
         struct sctp_chunk *err;
         size_t datalen;
         sctp_verb_t deliver;
@@ -6209,8 +6212,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
         u16 sid;
         u8 ordered = 0;
  
-       data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
-       skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
+       data_hdr = (struct sctp_datahdr *)chunk->skb->data;
+       chunk->subh.data_hdr = data_hdr;
+       skb_pull(chunk->skb, sizeof(*data_hdr));
  
         tsn = ntohl(data_hdr->tsn);
         pr_debug("%s: TSN 0x%x\n", __func__, tsn);
@@ -6258,7 +6262,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
          * Actually, allow a little bit of overflow (up to a MTU).
          */
         datalen = ntohs(chunk->chunk_hdr->length);
-       datalen -= sizeof(sctp_data_chunk_t);
+       datalen -= sizeof(struct sctp_data_chunk);
  
         deliver = SCTP_CMD_CHUNK_ULP;
  
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c

index 419b18ebb056512f060d762e2b479ef417009b2d..3e958c1c4b95dad376b70d225050168849605103 100644 (file)
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -53,7 +53,7 @@ static const sctp_sm_table_entry_t
  timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
  
  static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-                                                           sctp_cid_t cid,
+                                                           enum sctp_cid cid,
                                                             sctp_state_t state);
  
  
@@ -968,7 +968,7 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
  };
  
  static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
-                                                           sctp_cid_t cid,
+                                                           enum sctp_cid cid,
                                                             sctp_state_t state)
  {
         if (state > SCTP_STATE_MAX)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c

index 7b6e20eb9451a070340ae461b6dc5162588da408..1db478e345203f75733044d843a763cc3a3966e1 100644 (file)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -164,7 +164,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
                                 sizeof(struct sk_buff) +
                                 sizeof(struct sctp_chunk);
  
-       atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+       refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
         sk->sk_wmem_queued += chunk->skb->truesize;
         sk_mem_charge(sk, chunk->skb->truesize);
  }
@@ -4933,11 +4933,47 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
  }
  EXPORT_SYMBOL(sctp_do_peeloff);
  
+static int sctp_getsockopt_peeloff_common(struct sock *sk, sctp_peeloff_arg_t *peeloff,
+                                         struct file **newfile, unsigned flags)
+{
+       struct socket *newsock;
+       int retval;
+
+       retval = sctp_do_peeloff(sk, peeloff->associd, &newsock);
+       if (retval < 0)
+               goto out;
+
+       /* Map the socket to an unused fd that can be returned to the user.  */
+       retval = get_unused_fd_flags(flags & SOCK_CLOEXEC);
+       if (retval < 0) {
+               sock_release(newsock);
+               goto out;
+       }
+
+       *newfile = sock_alloc_file(newsock, 0, NULL);
+       if (IS_ERR(*newfile)) {
+               put_unused_fd(retval);
+               sock_release(newsock);
+               retval = PTR_ERR(*newfile);
+               *newfile = NULL;
+               return retval;
+       }
+
+       pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk,
+                retval);
+
+       peeloff->sd = retval;
+
+       if (flags & SOCK_NONBLOCK)
+               (*newfile)->f_flags |= O_NONBLOCK;
+out:
+       return retval;
+}
+
  static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval, int __user *optlen)
  {
         sctp_peeloff_arg_t peeloff;
-       struct socket *newsock;
-       struct file *newfile;
+       struct file *newfile = NULL;
         int retval = 0;
  
         if (len < sizeof(sctp_peeloff_arg_t))
@@ -4946,26 +4982,44 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
         if (copy_from_user(&peeloff, optval, len))
                 return -EFAULT;
  
-       retval = sctp_do_peeloff(sk, peeloff.associd, &newsock);
+       retval = sctp_getsockopt_peeloff_common(sk, &peeloff, &newfile, 0);
         if (retval < 0)
                 goto out;
  
-       /* Map the socket to an unused fd that can be returned to the user.  */
-       retval = get_unused_fd_flags(0);
-       if (retval < 0) {
-               sock_release(newsock);
-               goto out;
+       /* Return the fd mapped to the new socket.  */
+       if (put_user(len, optlen)) {
+               fput(newfile);
+               put_unused_fd(retval);
+               return -EFAULT;
         }
  
-       newfile = sock_alloc_file(newsock, 0, NULL);
-       if (IS_ERR(newfile)) {
+       if (copy_to_user(optval, &peeloff, len)) {
+               fput(newfile);
                 put_unused_fd(retval);
-               sock_release(newsock);
-               return PTR_ERR(newfile);
+               return -EFAULT;
         }
+       fd_install(retval, newfile);
+out:
+       return retval;
+}
  
-       pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk,
-                retval);
+static int sctp_getsockopt_peeloff_flags(struct sock *sk, int len,
+                                        char __user *optval, int __user *optlen)
+{
+       sctp_peeloff_flags_arg_t peeloff;
+       struct file *newfile = NULL;
+       int retval = 0;
+
+       if (len < sizeof(sctp_peeloff_flags_arg_t))
+               return -EINVAL;
+       len = sizeof(sctp_peeloff_flags_arg_t);
+       if (copy_from_user(&peeloff, optval, len))
+               return -EFAULT;
+
+       retval = sctp_getsockopt_peeloff_common(sk, &peeloff.p_arg,
+                                               &newfile, peeloff.flags);
+       if (retval < 0)
+               goto out;
  
         /* Return the fd mapped to the new socket.  */
         if (put_user(len, optlen)) {
@@ -4973,7 +5027,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
                 put_unused_fd(retval);
                 return -EFAULT;
         }
-       peeloff.sd = retval;
+
         if (copy_to_user(optval, &peeloff, len)) {
                 fput(newfile);
                 put_unused_fd(retval);
@@ -6033,7 +6087,8 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
                 return -EACCES;
  
         hmacs = ep->auth_hmacs_list;
-       data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t);
+       data_len = ntohs(hmacs->param_hdr.length) -
+                  sizeof(struct sctp_paramhdr);
  
         if (len < sizeof(struct sctp_hmacalgo) + data_len)
                 return -EINVAL;
@@ -6117,7 +6172,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
                 goto num;
  
         /* See if the user provided enough room for all the data */
-       num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+       num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
         if (len < num_chunks)
                 return -EINVAL;
  
@@ -6165,7 +6220,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
         if (!ch)
                 goto num;
  
-       num_chunks = ntohs(ch->param_hdr.length) - sizeof(sctp_paramhdr_t);
+       num_chunks = ntohs(ch->param_hdr.length) - sizeof(struct sctp_paramhdr);
         if (len < sizeof(struct sctp_authchunks) + num_chunks)
                 return -EINVAL;
  
@@ -6758,6 +6813,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
         case SCTP_SOCKOPT_PEELOFF:
                 retval = sctp_getsockopt_peeloff(sk, len, optval, optlen);
                 break;
+       case SCTP_SOCKOPT_PEELOFF_FLAGS:
+               retval = sctp_getsockopt_peeloff_flags(sk, len, optval, optlen);
+               break;
         case SCTP_PEER_ADDR_PARAMS:
                 retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
                                                           optlen);
@@ -7563,7 +7621,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
                 if (flags & MSG_PEEK) {
                         skb = skb_peek(&sk->sk_receive_queue);
                         if (skb)
-                               atomic_inc(&skb->users);
+                               refcount_inc(&skb->users);
                 } else {
                         skb = __skb_dequeue(&sk->sk_receive_queue);
                 }
@@ -7684,7 +7742,7 @@ static void sctp_wfree(struct sk_buff *skb)
                                 sizeof(struct sk_buff) +
                                 sizeof(struct sctp_chunk);
  
-       atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+       WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc));
  
         /*
          * This undoes what is done via sctp_set_owner_w and sk_mem_charge
diff --git a/net/sctp/stream.c b/net/sctp/stream.c

index 82e6d40052a8dd9ad2049ec506d132ab1305a64f..63ea1550371493ec8863627c7a43f46a22f4a4c9 100644 (file)
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -304,7 +304,7 @@ out:
         return retval;
  }
  
-static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
+static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
                         struct sctp_association *asoc, __u32 resp_seq,
                         __be16 type)
  {
@@ -749,7 +749,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
         struct sctp_strreset_resp *resp = param.v;
         struct sctp_transport *t;
         __u16 i, nums, flags = 0;
-       sctp_paramhdr_t *req;
+       struct sctp_paramhdr *req;
         __u32 result;
  
         req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c

index 17854fb0e512058f24a433f31657d18cd712fbcf..5f86c5062a987be610bdb3fb64878be2d6c42944 100644 (file)
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -158,7 +158,7 @@ struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
                 /* Trim the buffer to the right length.  */
                 skb_trim(skb, sizeof(struct sctp_assoc_change) +
                          ntohs(chunk->chunk_hdr->length) -
-                        sizeof(sctp_chunkhdr_t));
+                        sizeof(struct sctp_chunkhdr));
         } else {
                 event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
                                   MSG_NOTIFICATION, gfp);
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c

index 25f7e4140566254cd277b708a7c4835477a0ff5f..0225d62a869f8deff10565c4625df0a10464ce87 100644 (file)
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1090,7 +1090,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
  
         if (chunk) {
                 needed = ntohs(chunk->chunk_hdr->length);
-               needed -= sizeof(sctp_data_chunk_t);
+               needed -= sizeof(struct sctp_data_chunk);
         } else
                 needed = SCTP_DEFAULT_MAXWINDOW;
  
diff --git a/net/tipc/socket.c b/net/tipc/socket.c

index 1b92b72e812f942fc9826d8542f29bf1bd7c26c5..101e3597338f7c1e182c9090af0d14caf5be1b8b 100644 (file)
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2313,7 +2313,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk)
         struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
  
         if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) {
-               WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+               WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
                 __sock_put(sk);
         }
  }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index 1a0c961f4ffeef40abc5b980e004b01120e3c536..b9ee766054f6c906529c9f052bef7824e998617b 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(unix_peer_get);
  
  static inline void unix_release_addr(struct unix_address *addr)
  {
-       if (atomic_dec_and_test(&addr->refcnt))
+       if (refcount_dec_and_test(&addr->refcnt))
                 kfree(addr);
  }
  
@@ -442,7 +442,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
  static int unix_writable(const struct sock *sk)
  {
         return sk->sk_state != TCP_LISTEN &&
-              (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+              (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
  }
  
  static void unix_write_space(struct sock *sk)
@@ -487,7 +487,7 @@ static void unix_sock_destructor(struct sock *sk)
  
         skb_queue_purge(&sk->sk_receive_queue);
  
-       WARN_ON(atomic_read(&sk->sk_wmem_alloc));
+       WARN_ON(refcount_read(&sk->sk_wmem_alloc));
         WARN_ON(!sk_unhashed(sk));
         WARN_ON(sk->sk_socket);
         if (!sock_flag(sk, SOCK_DEAD)) {
@@ -864,7 +864,7 @@ static int unix_autobind(struct socket *sock)
                 goto out;
  
         addr->name->sun_family = AF_UNIX;
-       atomic_set(&addr->refcnt, 1);
+       refcount_set(&addr->refcnt, 1);
  
  retry:
         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
@@ -1040,7 +1040,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         memcpy(addr->name, sunaddr, addr_len);
         addr->len = addr_len;
         addr->hash = hash ^ sk->sk_type;
-       atomic_set(&addr->refcnt, 1);
+       refcount_set(&addr->refcnt, 1);
  
         if (sun_path[0]) {
                 addr->hash = UNIX_HASH_SIZE;
@@ -1335,7 +1335,7 @@ restart:
  
         /* copy address information from listening to new sock*/
         if (otheru->addr) {
-               atomic_inc(&otheru->addr->refcnt);
+               refcount_inc(&otheru->addr->refcnt);
                 newu->addr = otheru->addr;
         }
         if (otheru->path.dentry) {
@@ -2033,7 +2033,7 @@ alloc_skb:
         skb->len += size;
         skb->data_len += size;
         skb->truesize += size;
-       atomic_add(size, &sk->sk_wmem_alloc);
+       refcount_add(size, &sk->sk_wmem_alloc);
  
         if (newskb) {
                 err = unix_scm_to_skb(&scm, skb, false);
@@ -2847,7 +2847,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
  
                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
                         s,
-                       atomic_read(&s->sk_refcnt),
+                       refcount_read(&s->sk_refcnt),
                         0,
                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
                         s->sk_type,
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile

index abf81b329dc1f3276e1cf5631ebae4ec31a229de..55b2ac3009955aacd09db22c7fc4d082807c3ed9 100644 (file)
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -4,8 +4,7 @@
  
  obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
                       xfrm_input.o xfrm_output.o \
-                     xfrm_sysctl.o xfrm_replay.o
-obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o
+                     xfrm_sysctl.o xfrm_replay.o xfrm_device.o
  obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
  obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
  obj-$(CONFIG_XFRM_USER) += xfrm_user.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c

index 6d4a60d1bf19f84716c0fe4f9857468dac26cf74..5f7e8bfa0c2dca6d811616aa7002b768d1bb51ae 100644 (file)
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -22,6 +22,7 @@
  #include <net/xfrm.h>
  #include <linux/notifier.h>
  
+#ifdef CONFIG_XFRM_OFFLOAD
  int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features)
  {
         int err;
@@ -137,6 +138,7 @@ ok:
         return true;
  }
  EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
+#endif
  
  static int xfrm_dev_register(struct net_device *dev)
  {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c

index a3dc7ab0b7ede1f74f6361b2da26224ef539b250..4706df61217052d63868416072d15e12344acc03 100644 (file)
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1006,10 +1006,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
                 err = -ESRCH;
  out:
         spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
-
-       if (cnt)
-               xfrm_garbage_collect(net);
-
         return err;
  }
  EXPORT_SYMBOL(xfrm_policy_flush);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

index 6197c7231bc75ead01cfe12d3573a909fffe3058..2be4c6af008a7917ae5cc1e3306c5466cae387e4 100644 (file)
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2027,6 +2027,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                         return 0;
                 return err;
         }
+       xfrm_garbage_collect(net);
  
         c.data.type = type;
         c.event = nlh->nlmsg_type;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile

index e7ec9b8539a508f8fb2d451cfea05647699c06a5..9c650589e80f315f0425c20d7bd205c0d89770e2 100644 (file)
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -36,6 +36,7 @@ hostprogs-y += lwt_len_hist
  hostprogs-y += xdp_tx_iptunnel
  hostprogs-y += test_map_in_map
  hostprogs-y += per_socket_stats_example
+hostprogs-y += load_sock_ops
  
  # Libbpf dependencies
  LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -52,6 +53,7 @@ tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o
  tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o
  tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o
  tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
+load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
  test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
  trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
  lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
@@ -111,6 +113,12 @@ always += lwt_len_hist_kern.o
  always += xdp_tx_iptunnel_kern.o
  always += test_map_in_map_kern.o
  always += cookie_uid_helper_example.o
+always += tcp_synrto_kern.o
+always += tcp_rwnd_kern.o
+always += tcp_bufs_kern.o
+always += tcp_cong_kern.o
+always += tcp_iw_kern.o
+always += tcp_clamp_kern.o
  
  HOSTCFLAGS += -I$(objtree)/usr/include
  HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -130,6 +138,7 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt
  HOSTLOADLIBES_tracex5 += -lelf
  HOSTLOADLIBES_tracex6 += -lelf
  HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
+HOSTLOADLIBES_load_sock_ops += -lelf
  HOSTLOADLIBES_test_probe_write_user += -lelf
  HOSTLOADLIBES_trace_output += -lelf -lrt
  HOSTLOADLIBES_lathist += -lelf
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h

index f4840b8bb8f90af32a5af01a489fbe076452afc9..d50ac342dc92762eba4dcbafb27dd24ab27b4729 100644 (file)
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -60,6 +60,9 @@ static unsigned long long (*bpf_get_prandom_u32)(void) =
         (void *) BPF_FUNC_get_prandom_u32;
  static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
         (void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+                            int optlen) =
+       (void *) BPF_FUNC_setsockopt;
  
  /* llvm builtin functions that eBPF C program may use to
   * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c

index a91c57dd8571e01f93ad95986a8c80103a0a406e..a4be7cfa65190b2bab1a1d654dea931ece8501dc 100644 (file)
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
         bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
         bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
         bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
+       bool is_sockops = strncmp(event, "sockops", 7) == 0;
         size_t insns_cnt = size / sizeof(struct bpf_insn);
         enum bpf_prog_type prog_type;
         char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                 prog_type = BPF_PROG_TYPE_CGROUP_SKB;
         } else if (is_cgroup_sk) {
                 prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+       } else if (is_sockops) {
+               prog_type = BPF_PROG_TYPE_SOCK_OPS;
         } else {
                 printf("Unknown event '%s'\n", event);
                 return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
         if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
                 return 0;
  
-       if (is_socket) {
-               event += 6;
+       if (is_socket || is_sockops) {
+               if (is_socket)
+                       event += 6;
+               else
+                       event += 7;
                 if (*event != '/')
                         return 0;
                 event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
                     memcmp(shname, "xdp", 3) == 0 ||
                     memcmp(shname, "perf_event", 10) == 0 ||
                     memcmp(shname, "socket", 6) == 0 ||
-                   memcmp(shname, "cgroup/", 7) == 0)
+                   memcmp(shname, "cgroup/", 7) == 0 ||
+                   memcmp(shname, "sockops", 7) == 0)
                         load_and_attach(shname, data->d_buf, data->d_size);
         }
  
diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c

new file mode 100644 (file)

index 0000000..e5da6cf
--- /dev/null
+++ b/samples/bpf/load_sock_ops.c
@@ -0,0 +1,97 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+
+static void usage(char *pname)
+{
+       printf("USAGE:\n  %s [-l] <cg-path> <prog filename>\n", pname);
+       printf("\tLoad and attach a sock_ops program to the specified "
+              "cgroup\n");
+       printf("\tIf \"-l\" is used, the program will continue to run\n");
+       printf("\tprinting the BPF log buffer\n");
+       printf("\tIf the specified filename does not end in \".o\", it\n");
+       printf("\tappends \"_kern.o\" to the name\n");
+       printf("\n");
+       printf("  %s -r <cg-path>\n", pname);
+       printf("\tDetaches the currently attached sock_ops program\n");
+       printf("\tfrom the specified cgroup\n");
+       printf("\n");
+       exit(1);
+}
+
+int main(int argc, char **argv)
+{
+       int logFlag = 0;
+       int error = 0;
+       char *cg_path;
+       char fn[500];
+       char *prog;
+       int cg_fd;
+
+       if (argc < 3)
+               usage(argv[0]);
+
+       if (!strcmp(argv[1], "-r")) {
+               cg_path = argv[2];
+               cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+               error = bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+               if (error) {
+                       printf("ERROR: bpf_prog_detach: %d (%s)\n",
+                              error, strerror(errno));
+                       return 2;
+               }
+               return 0;
+       } else if (!strcmp(argv[1], "-h")) {
+               usage(argv[0]);
+       } else if (!strcmp(argv[1], "-l")) {
+               logFlag = 1;
+               if (argc < 4)
+                       usage(argv[0]);
+       }
+
+       prog = argv[argc - 1];
+       cg_path = argv[argc - 2];
+       if (strlen(prog) > 480) {
+               fprintf(stderr, "ERROR: program name too long (> 480 chars)\n");
+               return 3;
+       }
+       cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
+
+       if (!strcmp(prog + strlen(prog)-2, ".o"))
+               strcpy(fn, prog);
+       else
+               sprintf(fn, "%s_kern.o", prog);
+       if (logFlag)
+               printf("loading bpf file:%s\n", fn);
+       if (load_bpf_file(fn)) {
+               printf("ERROR: load_bpf_file failed for: %s\n", fn);
+               printf("%s", bpf_log_buf);
+               return 4;
+       }
+       if (logFlag)
+               printf("TCP BPF Loaded %s\n", fn);
+
+       error = bpf_prog_attach(prog_fd[0], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+       if (error) {
+               printf("ERROR: bpf_prog_attach: %d (%s)\n",
+                      error, strerror(errno));
+               return 5;
+       } else if (logFlag) {
+               read_trace_pipe();
+       }
+
+       return error;
+}
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c

index b5524d417eb57acd808aa07f4e876191df734166..877ecf8fc5acd956d165c1fff93ba631e73172cd 100644 (file)
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -8,6 +8,10 @@
  #include <arpa/inet.h>
  #include <sys/resource.h>
  
+#define PARSE_IP 3
+#define PARSE_IP_PROG_FD (prog_fd[0])
+#define PROG_ARRAY_FD (map_fd[0])
+
  struct bpf_flow_keys {
         __be32 src;
         __be32 dst;
@@ -28,7 +32,9 @@ int main(int argc, char **argv)
         struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
         char filename[256];
         FILE *f;
-       int i, sock;
+       int i, sock, err, id, key = PARSE_IP;
+       struct bpf_prog_info info = {};
+       uint32_t info_len = sizeof(info);
  
         snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
         setrlimit(RLIMIT_MEMLOCK, &r);
@@ -38,6 +44,13 @@ int main(int argc, char **argv)
                 return 1;
         }
  
+       /* Test fd array lookup which returns the id of the bpf_prog */
+       err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
+       assert(!err);
+       err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
+       assert(!err);
+       assert(id == info.id);
+
         sock = open_raw_sock("lo");
  
         assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
diff --git a/samples/bpf/tcp_bufs_kern.c b/samples/bpf/tcp_bufs_kern.c

new file mode 100644 (file)

index 0000000..ee83bba
--- /dev/null
+++ b/samples/bpf/tcp_bufs_kern.c
@@ -0,0 +1,86 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets and send
+ * and receive buffers to 1.5MB. This would usually be done after
+ * doing appropriate checks that indicate the hosts are far enough
+ * away (i.e. large RTT).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+SEC("sockops")
+int bpf_bufs(struct bpf_sock_ops *skops)
+{
+       int bufsize = 1500000;
+       int rwnd_init = 40;
+       int rv = 0;
+       int op;
+
+       /* For testing purposes, only execute rest of BPF program
+        * if neither port numberis 55601
+        */
+       if (bpf_ntohl(skops->remote_port) != 55601 &&
+           skops->local_port != 55601)
+               return -1;
+
+       op = (int) skops->op;
+
+#ifdef DEBUG
+       bpf_printk("Returning %d\n", rv);
+#endif
+
+       /* Usually there would be a check to insure the hosts are far
+        * from each other so it makes sense to increase buffer sizes
+        */
+       switch (op) {
+       case BPF_SOCK_OPS_RWND_INIT:
+               rv = rwnd_init;
+               break;
+       case BPF_SOCK_OPS_TCP_CONNECT_CB:
+               /* Set sndbuf and rcvbuf of active connections */
+               rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+                                   sizeof(bufsize));
+               rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+                                            &bufsize, sizeof(bufsize));
+               break;
+       case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+               /* Nothing to do */
+               break;
+       case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+               /* Set sndbuf and rcvbuf of passive connections */
+               rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+                                   sizeof(bufsize));
+               rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+                                            &bufsize, sizeof(bufsize));
+               break;
+       default:
+               rv = -1;
+       }
+#ifdef DEBUG
+       bpf_printk("Returning %d\n", rv);
+#endif
+       skops->reply = rv;
+       return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_clamp_kern.c b/samples/bpf/tcp_clamp_kern.c

new file mode 100644 (file)

index 0000000..d68eadd
--- /dev/null
+++ b/samples/bpf/tcp_clamp_kern.c
@@ -0,0 +1,102 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Sample BPF program to set send and receive buffers to 150KB, sndcwnd clamp
+ * to 100 packets and SYN and SYN_ACK RTOs to 10ms when both hosts are within
+ * the same datacenter. For his example, we assume they are within the same
+ * datacenter when the first 5.5 bytes of their IPv6 addresses are the same.
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+SEC("sockops")
+int bpf_clamp(struct bpf_sock_ops *skops)
+{
+       int bufsize = 150000;
+       int to_init = 10;
+       int clamp = 100;
+       int rv = 0;
+       int op;
+
+       /* For testing purposes, only execute rest of BPF program
+        * if neither port numberis 55601
+        */
+       if (bpf_ntohl(skops->remote_port) != 55601 && skops->local_port != 55601)
+               return -1;
+
+       op = (int) skops->op;
+
+#ifdef DEBUG
+       bpf_printk("BPF command: %d\n", op);
+#endif
+
+       /* Check that both hosts are within same datacenter. For this example
+        * it is the case when the first 5.5 bytes of their IPv6 addresses are
+        * the same.
+        */
+       if (skops->family == AF_INET6 &&
+           skops->local_ip6[0] == skops->remote_ip6[0] &&
+           (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+           (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+               switch (op) {
+               case BPF_SOCK_OPS_TIMEOUT_INIT:
+                       rv = to_init;
+                       break;
+               case BPF_SOCK_OPS_TCP_CONNECT_CB:
+                       /* Set sndbuf and rcvbuf of active connections */
+                       rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF,
+                                           &bufsize, sizeof(bufsize));
+                       rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+                                                     SO_RCVBUF, &bufsize,
+                                                     sizeof(bufsize));
+                       break;
+               case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+                       rv = bpf_setsockopt(skops, SOL_TCP,
+                                           TCP_BPF_SNDCWND_CLAMP,
+                                           &clamp, sizeof(clamp));
+                       break;
+               case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+                       /* Set sndbuf and rcvbuf of passive connections */
+                       rv = bpf_setsockopt(skops, SOL_TCP,
+                                           TCP_BPF_SNDCWND_CLAMP,
+                                           &clamp, sizeof(clamp));
+                       rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+                                                     SO_SNDBUF, &bufsize,
+                                                     sizeof(bufsize));
+                       rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET,
+                                                     SO_RCVBUF, &bufsize,
+                                                     sizeof(bufsize));
+                       break;
+               default:
+                       rv = -1;
+               }
+       } else {
+               rv = -1;
+       }
+#ifdef DEBUG
+       bpf_printk("Returning %d\n", rv);
+#endif
+       skops->reply = rv;
+       return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_cong_kern.c b/samples/bpf/tcp_cong_kern.c

new file mode 100644 (file)

index 0000000..dac15bc
--- /dev/null
+++ b/samples/bpf/tcp_cong_kern.c
@@ -0,0 +1,83 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set congestion control to dctcp when both hosts are
+ * in the same datacenter (as deteremined by IPv6 prefix).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+SEC("sockops")
+int bpf_cong(struct bpf_sock_ops *skops)
+{
+       char cong[] = "dctcp";
+       int rv = 0;
+       int op;
+
+       /* For testing purposes, only execute rest of BPF program
+        * if neither port numberis 55601
+        */
+       if (bpf_ntohl(skops->remote_port) != 55601 &&
+           skops->local_port != 55601)
+               return -1;
+
+       op = (int) skops->op;
+
+#ifdef DEBUG
+       bpf_printk("BPF command: %d\n", op);
+#endif
+
+       /* Check if both hosts are in the same datacenter. For this
+        * example they are if the 1st 5.5 bytes in the IPv6 address
+        * are the same.
+        */
+       if (skops->family == AF_INET6 &&
+           skops->local_ip6[0] == skops->remote_ip6[0] &&
+           (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+           (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+               switch (op) {
+               case BPF_SOCK_OPS_NEEDS_ECN:
+                       rv = 1;
+                       break;
+               case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+                       rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
+                                           cong, sizeof(cong));
+                       break;
+               case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+                       rv = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION,
+                                           cong, sizeof(cong));
+                       break;
+               default:
+                       rv = -1;
+               }
+       } else {
+               rv = -1;
+       }
+#ifdef DEBUG
+       bpf_printk("Returning %d\n", rv);
+#endif
+       skops->reply = rv;
+       return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_iw_kern.c b/samples/bpf/tcp_iw_kern.c

new file mode 100644 (file)

index 0000000..23c5122
--- /dev/null
+++ b/samples/bpf/tcp_iw_kern.c
@@ -0,0 +1,88 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial congestion window and initial receive
+ * window to 40 packets and send and receive buffers to 1.5MB. This
+ * would usually be done after doing appropriate checks that indicate
+ * the hosts are far enough away (i.e. large RTT).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+SEC("sockops")
+int bpf_iw(struct bpf_sock_ops *skops)
+{
+       int bufsize = 1500000;
+       int rwnd_init = 40;
+       int iw = 40;
+       int rv = 0;
+       int op;
+
+       /* For testing purposes, only execute rest of BPF program
+        * if neither port numberis 55601
+        */
+       if (bpf_ntohl(skops->remote_port) != 55601 &&
+           skops->local_port != 55601)
+               return -1;
+
+       op = (int) skops->op;
+
+#ifdef DEBUG
+       bpf_printk("BPF command: %d\n", op);
+#endif
+
+       /* Usually there would be a check to insure the hosts are far
+        * from each other so it makes sense to increase buffer sizes
+        */
+       switch (op) {
+       case BPF_SOCK_OPS_RWND_INIT:
+               rv = rwnd_init;
+               break;
+       case BPF_SOCK_OPS_TCP_CONNECT_CB:
+               /* Set sndbuf and rcvbuf of active connections */
+               rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+                                   sizeof(bufsize));
+               rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+                                            &bufsize, sizeof(bufsize));
+               break;
+       case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+               rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_IW, &iw,
+                                   sizeof(iw));
+               break;
+       case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+               /* Set sndbuf and rcvbuf of passive connections */
+               rv = bpf_setsockopt(skops, SOL_SOCKET, SO_SNDBUF, &bufsize,
+                                   sizeof(bufsize));
+               rv = rv*100 + bpf_setsockopt(skops, SOL_SOCKET, SO_RCVBUF,
+                                            &bufsize, sizeof(bufsize));
+               break;
+       default:
+               rv = -1;
+       }
+#ifdef DEBUG
+       bpf_printk("Returning %d\n", rv);
+#endif
+       skops->reply = rv;
+       return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_rwnd_kern.c b/samples/bpf/tcp_rwnd_kern.c

new file mode 100644 (file)

index 0000000..3f2a228
--- /dev/null
+++ b/samples/bpf/tcp_rwnd_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set initial receive window to 40 packets when using IPv6
+ * and the first 5.5 bytes of the IPv6 addresses are not the same (in this
+ * example that means both hosts are not the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+SEC("sockops")
+int bpf_rwnd(struct bpf_sock_ops *skops)
+{
+       int rv = -1;
+       int op;
+
+       /* For testing purposes, only execute rest of BPF program
+        * if neither port numberis 55601
+        */
+       if (bpf_ntohl(skops->remote_port) !=
+           55601 && skops->local_port != 55601)
+               return -1;
+
+       op = (int) skops->op;
+
+#ifdef DEBUG
+       bpf_printk("BPF command: %d\n", op);
+#endif
+
+       /* Check for RWND_INIT operation and IPv6 addresses */
+       if (op == BPF_SOCK_OPS_RWND_INIT &&
+               skops->family == AF_INET6) {
+
+               /* If the first 5.5 bytes of the IPv6 address are not the same
+                * then both hosts are not in the same datacenter
+                * so use a larger initial advertized window (40 packets)
+                */
+               if (skops->local_ip6[0] != skops->remote_ip6[0] ||
+                   (bpf_ntohl(skops->local_ip6[1]) & 0xfffff000) !=
+                   (bpf_ntohl(skops->remote_ip6[1]) & 0xfffff000))
+                       rv = 40;
+       }
+#ifdef DEBUG
+       bpf_printk("Returning %d\n", rv);
+#endif
+       skops->reply = rv;
+       return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_synrto_kern.c b/samples/bpf/tcp_synrto_kern.c

new file mode 100644 (file)

index 0000000..3c3fc83
--- /dev/null
+++ b/samples/bpf/tcp_synrto_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set SYN and SYN-ACK RTOs to 10ms when using IPv6 addresses
+ * and the first 5.5 bytes of the IPv6 addresses are the same (in this example
+ * that means both hosts are in the same datacenter).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+SEC("sockops")
+int bpf_synrto(struct bpf_sock_ops *skops)
+{
+       int rv = -1;
+       int op;
+
+       /* For testing purposes, only execute rest of BPF program
+        * if neither port numberis 55601
+        */
+       if (bpf_ntohl(skops->remote_port) != 55601 &&
+           skops->local_port != 55601)
+               return -1;
+
+       op = (int) skops->op;
+
+#ifdef DEBUG
+       bpf_printk("BPF command: %d\n", op);
+#endif
+
+       /* Check for TIMEOUT_INIT operation and IPv6 addresses */
+       if (op == BPF_SOCK_OPS_TIMEOUT_INIT &&
+               skops->family == AF_INET6) {
+
+               /* If the first 5.5 bytes of the IPv6 address are the same
+                * then both hosts are in the same datacenter
+                * so use an RTO of 10ms
+                */
+               if (skops->local_ip6[0] == skops->remote_ip6[0] &&
+                   (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+                   (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000))
+                       rv = 10;
+       }
+#ifdef DEBUG
+       bpf_printk("Returning %d\n", rv);
+#endif
+       skops->reply = rv;
+       return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c

index f62fdc2bd4281005f959cdb85cf1aafe2dc08193..1aca18539d8dd01742909e9837bf8394185ab38e 100644 (file)
--- a/samples/bpf/test_map_in_map_user.c
+++ b/samples/bpf/test_map_in_map_user.c
@@ -32,6 +32,20 @@ static const char * const test_names[] = {
  
  #define NR_TESTS (sizeof(test_names) / sizeof(*test_names))
  
+static void check_map_id(int inner_map_fd, int map_in_map_fd, uint32_t key)
+{
+       struct bpf_map_info info = {};
+       uint32_t info_len = sizeof(info);
+       int ret, id;
+
+       ret = bpf_obj_get_info_by_fd(inner_map_fd, &info, &info_len);
+       assert(!ret);
+
+       ret = bpf_map_lookup_elem(map_in_map_fd, &key, &id);
+       assert(!ret);
+       assert(id == info.id);
+}
+
  static void populate_map(uint32_t port_key, int magic_result)
  {
         int ret;
@@ -45,12 +59,15 @@ static void populate_map(uint32_t port_key, int magic_result)
  
         ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY);
         assert(!ret);
+       check_map_id(PORT_A, A_OF_PORT_A, port_key);
  
         ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST);
         assert(!ret);
+       check_map_id(PORT_A, H_OF_PORT_A, port_key);
  
         ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST);
         assert(!ret);
+       check_map_id(PORT_H, H_OF_PORT_H, port_key);
  }
  
  static void test_map_in_map(void)
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst

index ce753a408c56823dbd1c4b5d4fb5cfe88e7054c4..c583a1e1bd3c16356cf67a772a3e680adec9681a 100644 (file)
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -14,7 +14,15 @@ __headers:
  include scripts/Kbuild.include
  
  srcdir        := $(srctree)/$(obj)
-subdirs       := $(patsubst $(srcdir)/%/.,%,$(wildcard $(srcdir)/*/.))
+
+# When make is run under a fakechroot environment, the function
+# $(wildcard $(srcdir)/*/.) doesn't only return directories, but also regular
+# files. So, we are using a combination of sort/dir/wildcard which works
+# with fakechroot.
+subdirs       := $(patsubst $(srcdir)/%/,%,\
+                $(filter-out $(srcdir)/,\
+                $(sort $(dir $(wildcard $(srcdir)/*/)))))
+
  # caller may set destination dir (when installing to asm/)
  _dst          := $(if $(dst),$(dst),$(obj))
  
diff --git a/scripts/genksyms/genksyms.h b/scripts/genksyms/genksyms.h

index 3bffdcaaa274e82271a98c65fa0bc43cef53ffe2..b724a0290c75e45cbc89134f2b9ae03166ff27a5 100644 (file)
--- a/scripts/genksyms/genksyms.h
+++ b/scripts/genksyms/genksyms.h
@@ -75,7 +75,7 @@ struct string_list *copy_list_range(struct string_list *start,
  int yylex(void);
  int yyparse(void);
  
-void error_with_pos(const char *, ...);
+void error_with_pos(const char *, ...) __attribute__ ((format(printf, 1, 2)));
  
  /*----------------------------------------------------------------------*/
  #define xmalloc(size) ({ void *__ptr = malloc(size);           \
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile

index 90a091b6ae4de74e6c070d77255b9f4b5655629a..eb8144643b78355cea07f301531333001cd1d608 100644 (file)
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -196,7 +196,7 @@ clean-files     += config.pot linux.pot
  
  # Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
  PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/,$(lxdialog)): $(obj)/dochecklxdialog
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
  $(obj)/dochecklxdialog:
         $(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
  
diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c

index a9bc5334a478d6774d1409a837665b4f143d8597..0031147798153bdd06aa5c1f3d6f2015298b5b5d 100644 (file)
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -271,7 +271,7 @@ static struct mitem k_menu_items[MAX_MENU_ITEMS];
  static int items_num;
  static int global_exit;
  /* the currently selected button */
-const char *current_instructions = menu_instructions;
+static const char *current_instructions = menu_instructions;
  
  static char *dialog_input_result;
  static int dialog_input_result_len;
@@ -305,7 +305,7 @@ struct function_keys {
  };
  
  static const int function_keys_num = 9;
-struct function_keys function_keys[] = {
+static struct function_keys function_keys[] = {
         {
                 .key_str = "F1",
                 .func = "Help",
@@ -508,7 +508,7 @@ static int get_mext_match(const char *match_str, match_f flag)
         index = (index + items_num) % items_num;
         while (true) {
                 char *str = k_menu_items[index].str;
-               if (strcasestr(str, match_str) != 0)
+               if (strcasestr(str, match_str) != NULL)
                         return index;
                 if (flag == FIND_NEXT_MATCH_UP ||
                     flag == MATCH_TINKER_PATTERN_UP)
@@ -1067,7 +1067,7 @@ static int do_match(int key, struct match_state *state, int *ans)
  
  static void conf(struct menu *menu)
  {
-       struct menu *submenu = 0;
+       struct menu *submenu = NULL;
         const char *prompt = menu_get_prompt(menu);
         struct symbol *sym;
         int res;
@@ -1234,7 +1234,7 @@ static void show_help(struct menu *menu)
  static void conf_choice(struct menu *menu)
  {
         const char *prompt = _(menu_get_prompt(menu));
-       struct menu *child = 0;
+       struct menu *child = NULL;
         struct symbol *active;
         int selected_index = 0;
         int last_top_row = 0;
@@ -1456,7 +1456,7 @@ static void conf_save(void)
         }
  }
  
-void setup_windows(void)
+static void setup_windows(void)
  {
         int lines, columns;
  
diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c

index 4b2f44c20caf8941d150f261074b40d589a10376..a64b1c31253e13b918fefe509e1cccf5b4ac248d 100644 (file)
--- a/scripts/kconfig/nconf.gui.c
+++ b/scripts/kconfig/nconf.gui.c
@@ -129,7 +129,7 @@ static void no_colors_theme(void)
         mkattrn(FUNCTION_TEXT, A_REVERSE);
  }
  
-void set_colors()
+void set_colors(void)
  {
         start_color();
         use_default_colors();
@@ -192,7 +192,7 @@ const char *get_line(const char *text, int line_no)
         int lines = 0;
  
         if (!text)
-               return 0;
+               return NULL;
  
         for (i = 0; text[i] != '\0' && lines < line_no; i++)
                 if (text[i] == '\n')
diff --git a/scripts/tags.sh b/scripts/tags.sh

index d661f2f3ef614c41e22f2346ee9b41b238266715..d23dcbf17457c2c16cf56c73a1ffb3d07e03a257 100755 (executable)
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -106,6 +106,7 @@ all_compiled_sources()
                 case "$i" in
                         *.[cS])
                                 j=${i/\.[cS]/\.o}
+                               j="${j#$tree}"
                                 if [ -e $j ]; then
                                         echo $i
                                 fi
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c

index 5088d4b8db2222e28a71baaa4db7c70a30997e48..009e6c98754e484e489ebcdde361e200ee0d6205 100644 (file)
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -2492,7 +2492,7 @@ static int pcm_chmap_ctl_get(struct snd_kcontrol *kcontrol,
         struct snd_pcm_substream *substream;
         const struct snd_pcm_chmap_elem *map;
  
-       if (snd_BUG_ON(!info->chmap))
+       if (!info->chmap)
                 return -EINVAL;
         substream = snd_pcm_chmap_substream(info, idx);
         if (!substream)
@@ -2524,7 +2524,7 @@ static int pcm_chmap_ctl_tlv(struct snd_kcontrol *kcontrol, int op_flag,
         unsigned int __user *dst;
         int c, count = 0;
  
-       if (snd_BUG_ON(!info->chmap))
+       if (!info->chmap)
                 return -EINVAL;
         if (size < 8)
                 return -ENOMEM;
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c

index 9e6f54f8c45d2330ee339a7f4c7d1ac86c8e4774..1e26854b3425e23bd9d2942194f3665fa37e06cf 100644 (file)
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -682,7 +682,9 @@ static void out_stream_callback(struct fw_iso_context *context, u32 tstamp,
                 cycle = increment_cycle_count(cycle, 1);
                 if (s->handle_packet(s, 0, cycle, i) < 0) {
                         s->packet_index = -1;
-                       amdtp_stream_pcm_abort(s);
+                       if (in_interrupt())
+                               amdtp_stream_pcm_abort(s);
+                       WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
                         return;
                 }
         }
@@ -734,7 +736,9 @@ static void in_stream_callback(struct fw_iso_context *context, u32 tstamp,
         /* Queueing error or detecting invalid payload. */
         if (i < packets) {
                 s->packet_index = -1;
-               amdtp_stream_pcm_abort(s);
+               if (in_interrupt())
+                       amdtp_stream_pcm_abort(s);
+               WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
                 return;
         }
  
diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h

index 7e88317228212a63ad38e6e9880655f6ea567c85..ea1a91e99875e1f06933142d607a8b9880d210b1 100644 (file)
--- a/sound/firewire/amdtp-stream.h
+++ b/sound/firewire/amdtp-stream.h
@@ -135,7 +135,7 @@ struct amdtp_stream {
         /* For a PCM substream processing. */
         struct snd_pcm_substream *pcm;
         struct tasklet_struct period_tasklet;
-       unsigned int pcm_buffer_pointer;
+       snd_pcm_uframes_t pcm_buffer_pointer;
         unsigned int pcm_period_pointer;
  
         /* To wait for first packet. */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c

index 1770f085c2a694a398e2b30312cb79d6c3fd617a..01eb1dc7b5b3b5cf3c070a7656b81452890781e0 100644 (file)
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -370,10 +370,12 @@ enum {
  #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71)
  #define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0)
  #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_BXT_T(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x1a98)
  #define IS_GLK(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x3198)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \
-                       IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci)  || \
-                       IS_GLK(pci)
+#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci) || \
+                         IS_BXT_T(pci) || IS_KBL(pci) || IS_KBL_LP(pci) || \
+                         IS_KBL_H(pci) || IS_GLK(pci) || IS_CFL(pci))
  
  static char *driver_short_names[] = {
         [AZX_DRIVER_ICH] = "HDA Intel",
@@ -2378,6 +2380,9 @@ static const struct pci_device_id azx_ids[] = {
         /* Kabylake-H */
         { PCI_DEVICE(0x8086, 0xa2f0),
           .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+       /* Coffelake */
+       { PCI_DEVICE(0x8086, 0xa348),
+         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE},
         /* Broxton-P(Apollolake) */
         { PCI_DEVICE(0x8086, 0x5a98),
           .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index f94b48b168dcc82cbd67b916951a4699f4f675ef..ce2988be4f0eac843de17f563c360a795943f35e 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_LWT_IN,
         BPF_PROG_TYPE_LWT_OUT,
         BPF_PROG_TYPE_LWT_XMIT,
+       BPF_PROG_TYPE_SOCK_OPS,
  };
  
  enum bpf_attach_type {
         BPF_CGROUP_INET_INGRESS,
         BPF_CGROUP_INET_EGRESS,
         BPF_CGROUP_INET_SOCK_CREATE,
+       BPF_CGROUP_SOCK_OPS,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -518,6 +520,25 @@ union bpf_attr {
   *     Set full skb->hash.
   *     @skb: pointer to skb
   *     @hash: hash to set
+ *
+ * int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
+ *     Calls setsockopt. Not all opts are available, only those with
+ *     integer optvals plus TCP_CONGESTION.
+ *     Supported levels: SOL_SOCKET and IPROTO_TCP
+ *     @bpf_socket: pointer to bpf_socket
+ *     @level: SOL_SOCKET or IPROTO_TCP
+ *     @optname: option name
+ *     @optval: pointer to option value
+ *     @optlen: length of optval in byes
+ *     Return: 0 or negative error
+ *
+ * int bpf_skb_adjust_room(skb, len_diff, mode, flags)
+ *     Grow or shrink room in sk_buff.
+ *     @skb: pointer to skb
+ *     @len_diff: (signed) amount of room to grow/shrink
+ *     @mode: operation mode (enum bpf_adj_room_mode)
+ *     @flags: reserved for future use
+ *     Return: 0 on success or negative error code
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -568,7 +589,9 @@ union bpf_attr {
         FN(probe_read_str),             \
         FN(get_socket_cookie),          \
         FN(get_socket_uid),             \
-       FN(set_hash),
+       FN(set_hash),                   \
+       FN(setsockopt),                 \
+       FN(skb_adjust_room),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -618,6 +641,11 @@ enum bpf_func_id {
  /* BPF_FUNC_perf_event_output for sk_buff input context. */
  #define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
  
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+       BPF_ADJ_ROOM_NET_OPTS,
+};
+
  /* user accessible mirror of in-kernel sk_buff.
   * new fields can only be added to the end of this structure
   */
@@ -720,4 +748,54 @@ struct bpf_map_info {
         __u32 map_flags;
  } __attribute__((aligned(8)));
  
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+       __u32 op;
+       union {
+               __u32 reply;
+               __u32 replylong[4];
+       };
+       __u32 family;
+       __u32 remote_ip4;
+       __u32 local_ip4;
+       __u32 remote_ip6[4];
+       __u32 local_ip6[4];
+       __u32 remote_port;
+       __u32 local_port;
+};
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+       BPF_SOCK_OPS_VOID,
+       BPF_SOCK_OPS_TIMEOUT_INIT,      /* Should return SYN-RTO value to use or
+                                        * -1 if default value should be used
+                                        */
+       BPF_SOCK_OPS_RWND_INIT,         /* Should return initial advertized
+                                        * window (in packets) or -1 if default
+                                        * value should be used
+                                        */
+       BPF_SOCK_OPS_TCP_CONNECT_CB,    /* Calls BPF program right before an
+                                        * active connection is initialized
+                                        */
+       BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB,     /* Calls BPF program when an
+                                                * active connection is
+                                                * established
+                                                */
+       BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB,    /* Calls BPF program when a
+                                                * passive connection is
+                                                * established
+                                                */
+       BPF_SOCK_OPS_NEEDS_ECN,         /* If connection's congestion control
+                                        * needs ECN
+                                        */
+};
+
+#define TCP_BPF_IW             1001    /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP  1002    /* Set sndcwnd_clamp */
+
  #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c

index 84e7e698411e6a80a39050514227cbb90556ca13..a2670e9d652dfa09eebb7e53a8ed2b63a7cea152 100644 (file)
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -619,7 +619,7 @@ static int post_process_probe_trace_point(struct probe_trace_point *tp,
                                            struct map *map, unsigned long offs)
  {
         struct symbol *sym;
-       u64 addr = tp->address + tp->offset - offs;
+       u64 addr = tp->address - offs;
  
         sym = map__find_symbol(map, addr);
         if (!sym)
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c

index c0af0195432f5a3ee52df7f8c739c5499b3d687a..404aec5208128101812f6c9189ed046d1ec6eb24 100644 (file)
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2657,6 +2657,171 @@ static struct bpf_test tests[] = {
                 .prog_type = BPF_PROG_TYPE_SCHED_CLS,
                 .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
         },
+       {
+               "direct packet access: test18 (imm += pkt_ptr, 1)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_IMM(BPF_REG_0, 8),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+                       BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "direct packet access: test19 (imm += pkt_ptr, 2)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+                       BPF_MOV64_IMM(BPF_REG_4, 4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+                       BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "direct packet access: test20 (x += pkt_ptr, 1)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+                       BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
+       {
+               "direct packet access: test21 (x += pkt_ptr, 2)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+                       BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+                       BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
+       {
+               "direct packet access: test22 (x += pkt_ptr, 3)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+                       BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
+       {
+               "direct packet access: test23 (x += pkt_ptr, 4)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+                       BPF_MOV64_IMM(BPF_REG_0, 31),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = REJECT,
+               .errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet",
+       },
+       {
+               "direct packet access: test24 (x += pkt_ptr, 5)",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
+                       BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+                       BPF_MOV64_IMM(BPF_REG_0, 64),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+                       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
         {
                 "helper access to packet: test1, valid packet_ptr range",
                 .insns = {
@@ -3766,6 +3931,72 @@ static struct bpf_test tests[] = {
                 .result = REJECT,
                 .errstr = "invalid bpf_context access",
         },
+       {
+               "leak pointer into ctx 1",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+                                   offsetof(struct __sk_buff, cb[0])),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+                                     offsetof(struct __sk_buff, cb[0])),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 2 },
+               .errstr_unpriv = "R2 leaks addr into mem",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
+       {
+               "leak pointer into ctx 2",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+                                   offsetof(struct __sk_buff, cb[0])),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+                                     offsetof(struct __sk_buff, cb[0])),
+                       BPF_EXIT_INSN(),
+               },
+               .errstr_unpriv = "R10 leaks addr into mem",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
+       {
+               "leak pointer into ctx 3",
+               .insns = {
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_LD_MAP_FD(BPF_REG_2, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+                                     offsetof(struct __sk_buff, cb[0])),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 1 },
+               .errstr_unpriv = "R2 leaks addr into ctx",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
+       {
+               "leak pointer into map val",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+                       BPF_MOV64_IMM(BPF_REG_3, 0),
+                       BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+                       BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr_unpriv = "R6 leaks addr into mem",
+               .result_unpriv = REJECT,
+               .result = ACCEPT,
+       },
         {
                 "helper access to map: full range",
                 .insns = {
author	David S. Miller <davem@davemloft.net>
	Mon, 3 Jul 2017 12:51:45 +0000 (05:51 -0700)
committer	David S. Miller <davem@davemloft.net>
	Mon, 3 Jul 2017 12:51:45 +0000 (05:51 -0700)