Merge branch 'x86/mm' into x86/asm, to unify the two branches for simplicity

author Ingo Molnar <mingo@kernel.org>

Thu, 8 Sep 2016 06:41:52 +0000 (08:41 +0200)

committer Ingo Molnar <mingo@kernel.org>

Thu, 8 Sep 2016 06:41:52 +0000 (08:41 +0200)
author Ingo Molnar <mingo@kernel.org>
Thu, 8 Sep 2016 06:41:52 +0000 (08:41 +0200)
committer Ingo Molnar <mingo@kernel.org>
Thu, 8 Sep 2016 06:41:52 +0000 (08:41 +0200)
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt

index d515d58962b9df66b39bdcf0fc6f981090ed4630..2a3904030dea5d287f6e1dc2f8f93461707ae018 100644 (file)
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -14,6 +14,12 @@ add_random (RW)
  This file allows to turn off the disk entropy contribution. Default
  value of this file is '1'(on).
  
+dax (RO)
+--------
+This file indicates whether the device supports Direct Access (DAX),
+used by CPU-addressable storage to bypass the pagecache.  It shows '1'
+if true, '0' if not.
+
  discard_granularity (RO)
  -----------------------
  This shows the size of internal allocation of the device in bytes, if
@@ -46,6 +52,12 @@ hw_sector_size (RO)
  -------------------
  This is the hardware sector size of the device, in bytes.
  
+io_poll (RW)
+------------
+When read, this file shows the total number of block IO polls and how
+many returned success.  Writing '0' to this file will disable polling
+for this device.  Writing any non-zero value will enable this feature.
+
  iostats (RW)
  -------------
  This file is used to control (on/off) the iostats accounting of the
@@ -151,5 +163,11 @@ device state. This means that it might not be safe to toggle the
  setting from "write back" to "write through", since that will also
  eliminate cache flushes issued by the kernel.
  
+write_same_max_bytes (RO)
+-------------------------
+This is the number of bytes the device can write in a single write-same
+command.  A value of '0' means write-same is not supported by this
+device.
+
  
  Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/conf.py b/Documentation/conf.py

index 96b7aa66c89ca68c5f891d6fb3f2744dd80c6ca7..106ae9c740b99307abcffee492e0eace15c4f295 100644 (file)
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -131,7 +131,7 @@ pygments_style = 'sphinx'
  todo_include_todos = False
  
  primary_domain = 'C'
-highlight_language = 'C'
+highlight_language = 'guess'
  
  # -- Options for HTML output ----------------------------------------------
  
diff --git a/Documentation/hwmon/ftsteutates b/Documentation/hwmon/ftsteutates

index 2a1bf69c6a26fc3c416755f192f3f8778023ceee..8c10a916de20d064a34fb43b121309b190eab98c 100644 (file)
--- a/Documentation/hwmon/ftsteutates
+++ b/Documentation/hwmon/ftsteutates
@@ -19,5 +19,5 @@ enhancements. It can monitor up to 4 voltages, 16 temperatures and
  implemented in this driver.
  
  Specification of the chip can be found here:
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
diff --git a/Documentation/kernel-documentation.rst b/Documentation/kernel-documentation.rst

index c4eb5049da390767753032acfc583ac282bd0f0b..391decc66a18fd3da282cc009d6f239441de4a8d 100644 (file)
--- a/Documentation/kernel-documentation.rst
+++ b/Documentation/kernel-documentation.rst
@@ -366,8 +366,6 @@ Domain`_ references.
  Cross-referencing from reStructuredText
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  
-.. highlight:: none
-
  To cross-reference the functions and types defined in the kernel-doc comments
  from reStructuredText documents, please use the `Sphinx C Domain`_
  references. For example::
@@ -390,8 +388,6 @@ For further details, please refer to the `Sphinx C Domain`_ documentation.
  Function documentation
  ----------------------
  
-.. highlight:: c
-
  The general format of a function and function-like macro kernel-doc comment is::
  
    /**
@@ -572,8 +568,6 @@ DocBook XML [DEPRECATED]
  Converting DocBook to Sphinx
  ----------------------------
  
-.. highlight:: none
-
  Over time, we expect all of the documents under ``Documentation/DocBook`` to be
  converted to Sphinx and reStructuredText. For most DocBook XML documents, a good
  enough solution is to use the simple ``Documentation/sphinx/tmplcvt`` script,
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt

index 16a924c486bf3adb856efc2a2bec72b90311f2ff..70c926ae212d397c8543cbc9da56ad815fe62da5 100644 (file)
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -790,13 +790,12 @@ The kernel interface functions are as follows:
       Data messages can have their contents extracted with the usual bunch of
       socket buffer manipulation functions.  A data message can be determined to
       be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
-     data message has been used up, rxrpc_kernel_data_delivered() should be
-     called on it..
+     data message has been used up, rxrpc_kernel_data_consumed() should be
+     called on it.
  
-     Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
-     of.  It is possible to get extra refs on all types of message for later
-     freeing, but this may pin the state of a call until the message is finally
-     freed.
+     Messages should be handled to rxrpc_kernel_free_skb() to dispose of.  It
+     is possible to get extra refs on all types of message for later freeing,
+     but this may pin the state of a call until the message is finally freed.
  
   (*) Accept an incoming call.
  
@@ -821,12 +820,14 @@ The kernel interface functions are as follows:
       Other errors may be returned if the call had been aborted (-ECONNABORTED)
       or had timed out (-ETIME).
  
- (*) Record the delivery of a data message and free it.
+ (*) Record the delivery of a data message.
  
-       void rxrpc_kernel_data_delivered(struct sk_buff *skb);
+       void rxrpc_kernel_data_consumed(struct rxrpc_call *call,
+                                       struct sk_buff *skb);
  
-     This is used to record a data message as having been delivered and to
-     update the ACK state for the call.  The socket buffer will be freed.
+     This is used to record a data message as having been consumed and to
+     update the ACK state for the call.  The message must still be passed to
+     rxrpc_kernel_free_skb() for disposal by the caller.
  
   (*) Free a message.
  
diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt

index b96098ccfe69208cf7491c1c4885563310398bf5..708f87f78a756aaa923aedcb11103a96bf5611e7 100644 (file)
--- a/Documentation/power/basic-pm-debugging.txt
+++ b/Documentation/power/basic-pm-debugging.txt
@@ -164,7 +164,32 @@ load n/2 modules more and try again.
  Again, if you find the offending module(s), it(they) must be unloaded every time
  before hibernation, and please report the problem with it(them).
  
-c) Advanced debugging
+c) Using the "test_resume" hibernation option
+
+/sys/power/disk generally tells the kernel what to do after creating a
+hibernation image.  One of the available options is "test_resume" which
+causes the just created image to be used for immediate restoration.  Namely,
+after doing:
+
+# echo test_resume > /sys/power/disk
+# echo disk > /sys/power/state
+
+a hibernation image will be created and a resume from it will be triggered
+immediately without involving the platform firmware in any way.
+
+That test can be used to check if failures to resume from hibernation are
+related to bad interactions with the platform firmware.  That is, if the above
+works every time, but resume from actual hibernation does not work or is
+unreliable, the platform firmware may be responsible for the failures.
+
+On architectures and platforms that support using different kernels to restore
+hibernation images (that is, the kernel used to read the image from storage and
+load it into memory is different from the one included in the image) or support
+kernel address space randomization, it also can be used to check if failures
+to resume may be related to the differences between the restore and image
+kernels.
+
+d) Advanced debugging
  
  In case that hibernation does not work on your system even in the minimal
  configuration and compiling more drivers as modules is not practical or some
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt

index f1f0f59a7c47d594a9753207d713d0834a0e99b1..974916ff6608e7b55f390d69e64a8fbe7396a9e9 100644 (file)
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -1,75 +1,76 @@
-Power Management Interface
-
-
-The power management subsystem provides a unified sysfs interface to 
-userspace, regardless of what architecture or platform one is
-running. The interface exists in /sys/power/ directory (assuming sysfs
-is mounted at /sys). 
-
-/sys/power/state controls system power state. Reading from this file
-returns what states are supported, which is hard-coded to 'freeze',
-'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk'
-(Suspend-to-Disk). 
-
-Writing to this file one of those strings causes the system to
-transition into that state. Please see the file
-Documentation/power/states.txt for a description of each of those
-states.
-
-
-/sys/power/disk controls the operating mode of the suspend-to-disk
-mechanism. Suspend-to-disk can be handled in several ways. We have a
-few options for putting the system to sleep - using the platform driver
-(e.g. ACPI or other suspend_ops), powering off the system or rebooting the
-system (for testing).
-
-Additionally, /sys/power/disk can be used to turn on one of the two testing
-modes of the suspend-to-disk mechanism: 'testproc' or 'test'.  If the
-suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
-/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
-tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs.  If it is
-in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
-to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
-for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs.  Then,
-we are able to look in the log messages and work out, for example, which code
-is being slow and which device drivers are misbehaving.
-
-Reading from this file will display all supported modes and the currently
-selected one in brackets, for example
-
-       [shutdown] reboot test testproc
-
-Writing to this file will accept one of
-
-       'platform' (only if the platform supports it)
-       'shutdown'
-       'reboot'
-       'testproc'
-       'test'
-
-/sys/power/image_size controls the size of the image created by
-the suspend-to-disk mechanism.  It can be written a string
-representing a non-negative integer that will be used as an upper
-limit of the image size, in bytes.  The suspend-to-disk mechanism will
-do its best to ensure the image size will not exceed that number.  However,
-if this turns out to be impossible, it will try to suspend anyway using the
-smallest image possible.  In particular, if "0" is written to this file, the
-suspend image will be as small as possible.
-
-Reading from this file will display the current image size limit, which
-is set to 2/5 of available RAM by default.
-
-/sys/power/pm_trace controls the code which saves the last PM event point in
-the RTC across reboots, so that you can debug a machine that just hangs
-during suspend (or more commonly, during resume).  Namely, the RTC is only
-used to save the last PM event point if this file contains '1'.  Initially it
-contains '0' which may be changed to '1' by writing a string representing a
-nonzero integer into it.
-
-To use this debugging feature you should attempt to suspend the machine, then
-reboot it and run
-
-       dmesg -s 1000000 | grep 'hash matches'
-
-CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
-set to a random invalid time after a resume.
+Power Management Interface for System Sleep
+
+Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+The power management subsystem provides userspace with a unified sysfs interface
+for system sleep regardless of the underlying system architecture or platform.
+The interface is located in the /sys/power/ directory (assuming that sysfs is
+mounted at /sys).
+
+/sys/power/state is the system sleep state control file.
+
+Reading from it returns a list of supported sleep states, encoded as:
+
+'freeze' (Suspend-to-Idle)
+'standby' (Power-On Suspend)
+'mem' (Suspend-to-RAM)
+'disk' (Suspend-to-Disk)
+
+Suspend-to-Idle is always supported.  Suspend-to-Disk is always supported
+too as long the kernel has been configured to support hibernation at all
+(ie. CONFIG_HIBERNATION is set in the kernel configuration file).  Support
+for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the
+platform.
+
+If one of the strings listed in /sys/power/state is written to it, the system
+will attempt to transition into the corresponding sleep state.  Refer to
+Documentation/power/states.txt for a description of each of those states.
+
+/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk).
+Specifically, it tells the kernel what to do after creating a hibernation image.
+
+Reading from it returns a list of supported options encoded as:
+
+'platform' (put the system into sleep using a platform-provided method)
+'shutdown' (shut the system down)
+'reboot' (reboot the system)
+'suspend' (trigger a Suspend-to-RAM transition)
+'test_resume' (resume-after-hibernation test mode)
+
+The currently selected option is printed in square brackets.
+
+The 'platform' option is only available if the platform provides a special
+mechanism to put the system to sleep after creating a hibernation image (ACPI
+does that, for example).  The 'suspend' option is available if Suspend-to-RAM
+is supported.  Refer to Documentation/power/basic_pm_debugging.txt for the
+description of the 'test_resume' option.
+
+To select an option, write the string representing it to /sys/power/disk.
+
+/sys/power/image_size controls the size of hibernation images.
+
+It can be written a string representing a non-negative integer that will be
+used as a best-effort upper limit of the image size, in bytes.  The hibernation
+core will do its best to ensure that the image size will not exceed that number.
+However, if that turns out to be impossible to achieve, a hibernation image will
+still be created and its size will be as small as possible.  In particular,
+writing '0' to this file will enforce hibernation images to be as small as
+possible.
+
+Reading from this file returns the current image size limit, which is set to
+around 2/5 of available RAM by default.
+
+/sys/power/pm_trace controls the PM trace mechanism saving the last suspend
+or resume event point in the RTC across reboots.
+
+It helps to debug hard lockups or reboots due to device driver failures that
+occur during system suspend or resume (which is more common) more effectively.
+
+If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume
+event point in turn will be stored in the RTC memory (overwriting the actual
+RTC information), so it will survive a system crash if one occurs right after
+storing it and it can be used later to identify the driver that caused the crash
+to happen (see Documentation/power/s2ram.txt for more information).
+
+Initially it contains '0' which may be changed to '1' by writing a string
+representing a nonzero integer into it.
diff --git a/Documentation/sphinx-static/theme_overrides.css b/Documentation/sphinx-static/theme_overrides.css

index 3a2ac4bcfd789bc704987b650da6694089a91dca..e88461c4c1e69dd186a7b420215246154f9a9f09 100644 (file)
--- a/Documentation/sphinx-static/theme_overrides.css
+++ b/Documentation/sphinx-static/theme_overrides.css
@@ -42,11 +42,12 @@
      caption a.headerlink { opacity: 0; }
      caption a.headerlink:hover { opacity: 1; }
  
-    /* inline literal: drop the borderbox and red color */
+    /* inline literal: drop the borderbox, padding and red color */
  
      code, .rst-content tt, .rst-content code {
          color: inherit;
          border: none;
+        padding: unset;
          background: inherit;
          font-size: 85%;
      }
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt

index dd5f916b351d1cdd7f11162cb497530973c53940..a273dd0bbaaa52a86691c84ae64e787a30d0c9b0 100644 (file)
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -203,6 +203,17 @@ along to ftrace_push_return_trace() instead of a stub value of 0.
  
  Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
  
+HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+--------------------------------
+
+An arch may pass in a pointer to the return address on the stack.  This
+prevents potential stack unwinding issues where the unwinder gets out of
+sync with ret_stack and the wrong addresses are reported by
+ftrace_graph_ret_addr().
+
+Adding support for it is easy: just define the macro in asm/ftrace.h and
+pass the return address pointer as the 'retp' argument to
+ftrace_push_return_trace().
  
  HAVE_FTRACE_NMI_ENTER
  ---------------------
diff --git a/MAINTAINERS b/MAINTAINERS

index 20bb1d00098c70dacad7a9c778087f9319b0c5c6..0bbe4b105c346893db530be25031ab8ce0350b56 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1004,6 +1004,7 @@ N:        meson
  ARM/Annapurna Labs ALPINE ARCHITECTURE
  M:     Tsahee Zidenberg <tsahee@annapurnalabs.com>
  M:     Antoine Tenart <antoine.tenart@free-electrons.com>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Maintained
  F:     arch/arm/mach-alpine/
  F:     arch/arm/boot/dts/alpine*
@@ -4524,6 +4525,12 @@ L:       linux-edac@vger.kernel.org
  S:     Maintained
  F:     drivers/edac/sb_edac.c
  
+EDAC-SKYLAKE
+M:     Tony Luck <tony.luck@intel.com>
+L:     linux-edac@vger.kernel.org
+S:     Maintained
+F:     drivers/edac/skx_edac.c
+
  EDAC-XGENE
  APPLIED MICRO (APM) X-GENE SOC EDAC
  M:     Loc Ho <lho@apm.com>
diff --git a/Makefile b/Makefile

index 8c504f3241544620a149d7b946a62e7535b3eef6..3537aa23905fa54234b0526e41bb265723981b8e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  VERSION = 4
  PATCHLEVEL = 8
  SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc3
  NAME = Psychotic Stoned Sheep
  
  # *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig

index e9c9334507ddd57f2fd787f2faa3dac71edc18a7..9ecf9f6f9e15b2093560311da73351958dc444a6 100644 (file)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -707,4 +707,38 @@ config ARCH_NO_COHERENT_DMA_MMAP
  config CPU_NO_EFFICIENT_FFS
         def_bool n
  
+config HAVE_ARCH_VMAP_STACK
+       def_bool n
+       help
+         An arch should select this symbol if it can support kernel stacks
+         in vmalloc space.  This means:
+
+         - vmalloc space must be large enough to hold many kernel stacks.
+           This may rule out many 32-bit architectures.
+
+         - Stacks in vmalloc space need to work reliably.  For example, if
+           vmap page tables are created on demand, either this mechanism
+           needs to work while the stack points to a virtual address with
+           unpopulated page tables or arch code (switch_to() and switch_mm(),
+           most likely) needs to ensure that the stack's page table entries
+           are populated before running on a possibly unpopulated stack.
+
+         - If the stack overflows into a guard page, something reasonable
+           should happen.  The definition of "reasonable" is flexible, but
+           instantly rebooting without logging anything would be unfriendly.
+
+config VMAP_STACK
+       default y
+       bool "Use a virtually-mapped stack"
+       depends on HAVE_ARCH_VMAP_STACK && !KASAN
+       ---help---
+         Enable this if you want the use virtually-mapped kernel stacks
+         with guard pages.  This causes kernel stack overflows to be
+         caught immediately rather than causing difficult-to-diagnose
+         corruption.
+
+         This is presently incompatible with KASAN because KASAN expects
+         the stack to map directly to the KASAN shadow map using a formula
+         that is incorrect if the stack is in vmalloc space.
+
  source "kernel/gcov/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile

index 56ea5c60b31883bdf52ca01c4748061c4ed39871..61f6ccc19cfa94364e777cc68d10ce5a24093c0f 100644 (file)
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -260,12 +260,14 @@ machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y))
  platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
  
  ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
  ifeq ($(KBUILD_SRC),)
  KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs))
  else
  KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
  endif
  endif
+endif
  
  export TEXT_OFFSET GZFLAGS MMUEXT
  
diff --git a/arch/arm/boot/dts/arm-realview-pbx-a9.dts b/arch/arm/boot/dts/arm-realview-pbx-a9.dts

index db808f92dd79c975b1f059d824e263080208d20b..90d00b407f851dfce2706d8e85bb436ac39a3921 100644 (file)
--- a/arch/arm/boot/dts/arm-realview-pbx-a9.dts
+++ b/arch/arm/boot/dts/arm-realview-pbx-a9.dts
@@ -70,13 +70,12 @@
                  * associativity as these may be erroneously set
                  * up by boot loader(s).
                  */
-               cache-size = <1048576>; // 1MB
-               cache-sets = <4096>;
+               cache-size = <131072>; // 128KB
+               cache-sets = <512>;
                 cache-line-size = <32>;
                 arm,parity-disable;
-               arm,tag-latency = <1>;
-               arm,data-latency = <1 1>;
-               arm,dirty-latency = <1>;
+               arm,tag-latency = <1 1 1>;
+               arm,data-latency = <1 1 1>;
         };
  
         scu: scu@1f000000 {
diff --git a/arch/arm/boot/dts/integratorap.dts b/arch/arm/boot/dts/integratorap.dts

index cf06e32ee108a221c330c8ff521e0a82d8b78e4d..4b34b54e09a193ebd93ef82566d5c012274241d5 100644 (file)
--- a/arch/arm/boot/dts/integratorap.dts
+++ b/arch/arm/boot/dts/integratorap.dts
@@ -42,7 +42,7 @@
         };
  
         syscon {
-               compatible = "arm,integrator-ap-syscon";
+               compatible = "arm,integrator-ap-syscon", "syscon";
                 reg = <0x11000000 0x100>;
                 interrupt-parent = <&pic>;
                 /* These are the logical module IRQs */
diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts

index d43f15b4f79a242d2437f6ea45626556380c010c..79430fbfec3bd17ec311625e67695aabee3e0726 100644 (file)
--- a/arch/arm/boot/dts/integratorcp.dts
+++ b/arch/arm/boot/dts/integratorcp.dts
@@ -94,7 +94,7 @@
         };
  
         syscon {
-               compatible = "arm,integrator-cp-syscon";
+               compatible = "arm,integrator-cp-syscon", "syscon";
                 reg = <0xcb000000 0x100>;
         };
  
diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi

index 00cb314d5e4db81fcc035b2a68101a557cf751a4..e23f46d15c806566abc2ec88828bc8d053ffd6e5 100644 (file)
--- a/arch/arm/boot/dts/keystone.dtsi
+++ b/arch/arm/boot/dts/keystone.dtsi
@@ -70,14 +70,6 @@
                 cpu_on          = <0x84000003>;
         };
  
-       psci {
-               compatible      = "arm,psci";
-               method          = "smc";
-               cpu_suspend     = <0x84000001>;
-               cpu_off         = <0x84000002>;
-               cpu_on          = <0x84000003>;
-       };
-
         soc {
                 #address-cells = <1>;
                 #size-cells = <1>;
diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts

index e52b82449a79528bc362d96fabe7b2d688abd78e..6403e0de540e842b952b3bb02b5673bbbfb3e683 100644 (file)
--- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts
+++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts
@@ -1382,7 +1382,7 @@
          *   Pin 41: BR_UART1_TXD
          *   Pin 44: BR_UART1_RXD
          */
-       serial@70006000 {
+       serial@0,70006000 {
                 compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                 status = "okay";
         };
@@ -1394,7 +1394,7 @@
          *   Pin 71: UART2_CTS_L
          *   Pin 74: UART2_RTS_L
          */
-       serial@70006040 {
+       serial@0,70006040 {
                 compatible = "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart";
                 status = "okay";
         };
diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig

index b6e54ee9bdbd8e54a4c740d0d2308dd26f682b42..ca39c04fec6b7af28847b78b6b3ff36a75811b31 100644 (file)
--- a/arch/arm/configs/aspeed_g4_defconfig
+++ b/arch/arm/configs/aspeed_g4_defconfig
@@ -58,7 +58,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
  # CONFIG_IOMMU_SUPPORT is not set
  CONFIG_FIRMWARE_MEMMAP=y
  CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
  CONFIG_DYNAMIC_DEBUG=y
  CONFIG_STRIP_ASM_SYMS=y
  CONFIG_PAGE_POISONING=y
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig

index 89260516735720460b4002e462ae5d3e5811125b..4f366b0370e939a27056f0230b49140ddee85503 100644 (file)
--- a/arch/arm/configs/aspeed_g5_defconfig
+++ b/arch/arm/configs/aspeed_g5_defconfig
@@ -59,7 +59,7 @@ CONFIG_SERIAL_OF_PLATFORM=y
  # CONFIG_IOMMU_SUPPORT is not set
  CONFIG_FIRMWARE_MEMMAP=y
  CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
  CONFIG_DYNAMIC_DEBUG=y
  CONFIG_STRIP_ASM_SYMS=y
  CONFIG_PAGE_POISONING=y
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S

index bc5f50799d75627fb3eb22b9afe6d849373e77d4..9f157e7c51e75cc468de327bd9c5403353885849 100644 (file)
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -295,6 +295,7 @@ __und_svc_fault:
         bl      __und_fault
  
  __und_svc_finish:
+       get_thread_info tsk
         ldr     r5, [sp, #S_PSR]                @ Get SVC cpsr
         svc_exit r5                             @ return from exception
   UNWIND(.fnend         )
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c

index 709ee1d6d4df4813c82ad1bb23dac7868e00f999..3f1759411d51bec32175992a2332ab07ca598a8a 100644 (file)
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -218,7 +218,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
         }
  
         err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-                                      frame_pointer);
+                                      frame_pointer, NULL);
         if (err == -EBUSY) {
                 *parent = old;
                 return;
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c

index 087acb569b63a4bd90982e0c9b15fc2313636c53..5f221acd21aebb3ca1c2ee560fb68241bc1e02c9 100644 (file)
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
         mm_segment_t fs;
         long ret, err, i;
  
-       if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+       if (maxevents <= 0 ||
+                       maxevents > (INT_MAX/sizeof(*kbuf)) ||
+                       maxevents > (INT_MAX/sizeof(*events)))
                 return -EINVAL;
+       if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+               return -EFAULT;
         kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
         if (!kbuf)
                 return -ENOMEM;
@@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
  
         if (nsops < 1 || nsops > SEMOPM)
                 return -EINVAL;
+       if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+               return -EFAULT;
         sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
         if (!sops)
                 return -ENOMEM;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c

index d94bb9093ead7d10641aecfaed668d0c3ee3fa39..75f130ef650413036625c59f34ab864c1649052e 100644 (file)
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1009,9 +1009,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
  
         switch (ioctl) {
         case KVM_CREATE_IRQCHIP: {
+               int ret;
                 if (!vgic_present)
                         return -ENXIO;
-               return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+               mutex_lock(&kvm->lock);
+               ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+               mutex_unlock(&kvm->lock);
+               return ret;
         }
         case KVM_ARM_SET_DEVICE_ADDR: {
                 struct kvm_arm_device_addr dev_addr;
diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig

index dc7c6edeab39a89d24f8211d4d11ace5494bafee..61284b9389cf5e41de92215b3fa45ee9c2bc79df 100644 (file)
--- a/arch/arm/mach-clps711x/Kconfig
+++ b/arch/arm/mach-clps711x/Kconfig
@@ -1,13 +1,13 @@
  menuconfig ARCH_CLPS711X
         bool "Cirrus Logic EP721x/EP731x-based"
         depends on ARCH_MULTI_V4T
-       select ARCH_REQUIRE_GPIOLIB
         select AUTO_ZRELADDR
         select CLKSRC_OF
         select CLPS711X_TIMER
         select COMMON_CLK
         select CPU_ARM720T
         select GENERIC_CLOCKEVENTS
+       select GPIOLIB
         select MFD_SYSCON
         select OF_IRQ
         select USE_OF
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c

index fd87205324710ffcfca1871c01ea23a7da8856b4..0df062d8b2c942f84a31a923e0a4f221c6c9366d 100644 (file)
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -271,6 +271,12 @@ static int __init imx_gpc_init(struct device_node *node,
         for (i = 0; i < IMR_NUM; i++)
                 writel_relaxed(~0, gpc_base + GPC_IMR1 + i * 4);
  
+       /*
+        * Clear the OF_POPULATED flag set in of_irq_init so that
+        * later the GPC power domain driver will not be skipped.
+        */
+       of_node_clear_flag(node, OF_POPULATED);
+
         return 0;
  }
  IRQCHIP_DECLARE(imx_gpc, "fsl,imx6q-gpc", imx_gpc_init);
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile

index e53c6cfcab51cd12c798fd11d663686c77761b02..6c6497e80a7b13433d833923d8c5003c52039d9a 100644 (file)
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -1,5 +1,4 @@
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-       -I$(srctree)/arch/arm/plat-orion/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include
  
  AFLAGS_coherency_ll.o          := -Wa,-march=armv7-a
  CFLAGS_pmsu.o                  := -march=armv7-a
diff --git a/arch/arm/mach-oxnas/Kconfig b/arch/arm/mach-oxnas/Kconfig

index 567496bd250a2fc5c8037ed203dc71d203521be0..29100beb2e7f201403ce9bc84c78dab1aea818eb 100644 (file)
--- a/arch/arm/mach-oxnas/Kconfig
+++ b/arch/arm/mach-oxnas/Kconfig
@@ -11,11 +11,13 @@ if ARCH_OXNAS
  
  config MACH_OX810SE
         bool "Support OX810SE Based Products"
+       select ARCH_HAS_RESET_CONTROLLER
         select COMMON_CLK_OXNAS
         select CPU_ARM926T
         select MFD_SYSCON
         select OXNAS_RPS_TIMER
         select PINCTRL_OXNAS
+       select RESET_CONTROLLER
         select RESET_OXNAS
         select VERSATILE_FPGA_IRQ
         help
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c

index dc109dc3a622834bcca135322672e754cc1db488..10bfdb169366b0a7b4e6403cd3fd5242a940f3e8 100644 (file)
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -13,6 +13,7 @@
   */
  
  #include <linux/kernel.h>
+#include <linux/module.h>      /* symbol_get ; symbol_put */
  #include <linux/init.h>
  #include <linux/platform_device.h>
  #include <linux/major.h>
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c

index 1080580b1343d1bdf164230bff82d6fed2e8b0d5..2c150bfc0cd5128dcc252e63ca485076e9469ba9 100644 (file)
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -13,6 +13,7 @@
   */
  
  #include <linux/kernel.h>
+#include <linux/module.h>      /* symbol_get ; symbol_put */
  #include <linux/platform_device.h>
  #include <linux/delay.h>
  #include <linux/gpio_keys.h>
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile

index dae8d86ef4ccc75c4bb3dfbb98aba9e7e12eec9f..4048821309566281d61e5dd478f2db7c921b9491 100644 (file)
--- a/arch/arm/mach-realview/Makefile
+++ b/arch/arm/mach-realview/Makefile
@@ -1,8 +1,7 @@
  #
  # Makefile for the linux kernel.
  #
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-       -I$(srctree)/arch/arm/plat-versatile/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include
  
  obj-y                                  := core.o
  obj-$(CONFIG_REALVIEW_DT)              += realview-dt.o
diff --git a/arch/arm/mach-s5pv210/Makefile b/arch/arm/mach-s5pv210/Makefile

index 72b9e96715070f2c47b13b7f7aade101abb9586e..fa7fb716e388a7ef4f4da1b89f58090e60805245 100644 (file)
--- a/arch/arm/mach-s5pv210/Makefile
+++ b/arch/arm/mach-s5pv210/Makefile
@@ -5,7 +5,7 @@
  #
  # Licensed under GPLv2
  
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include
  
  # Core
  
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c

index f3dba6f356e29446c0960af2d37e51d2eacc8302..02e21bceb0856bc5ac5c769af3362afab2927c99 100644 (file)
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -40,5 +40,8 @@ bool shmobile_smp_cpu_can_disable(unsigned int cpu)
  bool __init shmobile_smp_init_fallback_ops(void)
  {
         /* fallback on PSCI/smp_ops if no other DT based method is detected */
+       if (!IS_ENABLED(CONFIG_SMP))
+               return false;
+
         return platform_can_secondary_boot() ? true : false;
  }
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c

index 62f4d01941f718bf02a6e24c19042d6c65b961d9..6344913f0804a20ccbad6539febbbfc981e921a5 100644 (file)
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -728,7 +728,8 @@ static void *__init late_alloc(unsigned long sz)
  {
         void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
  
-       BUG_ON(!ptr);
+       if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+               BUG();
         return ptr;
  }
  
@@ -1155,10 +1156,19 @@ void __init sanity_check_meminfo(void)
  {
         phys_addr_t memblock_limit = 0;
         int highmem = 0;
-       phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
+       u64 vmalloc_limit;
         struct memblock_region *reg;
         bool should_use_highmem = false;
  
+       /*
+        * Let's use our own (unoptimized) equivalent of __pa() that is
+        * not affected by wrap-arounds when sizeof(phys_addr_t) == 4.
+        * The result is used as the upper bound on physical memory address
+        * and may itself be outside the valid range for which phys_addr_t
+        * and therefore __pa() is defined.
+        */
+       vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
+
         for_each_memblock(memory, reg) {
                 phys_addr_t block_start = reg->base;
                 phys_addr_t block_end = reg->base + reg->size;
@@ -1183,10 +1193,11 @@ void __init sanity_check_meminfo(void)
                         if (reg->size > size_limit) {
                                 phys_addr_t overlap_size = reg->size - size_limit;
  
-                               pr_notice("Truncating RAM at %pa-%pa to -%pa",
-                                         &block_start, &block_end, &vmalloc_limit);
-                               memblock_remove(vmalloc_limit, overlap_size);
+                               pr_notice("Truncating RAM at %pa-%pa",
+                                         &block_start, &block_end);
                                 block_end = vmalloc_limit;
+                               pr_cont(" to -%pa", &block_end);
+                               memblock_remove(vmalloc_limit, overlap_size);
                                 should_use_highmem = true;
                         }
                 }
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms

index bb2616b161576b4a664536ccae5c761544e93167..be5d824ebdba2dab24840bb7808abcc40da2053e 100644 (file)
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -8,7 +8,7 @@ config ARCH_SUNXI
  
  config ARCH_ALPINE
         bool "Annapurna Labs Alpine platform"
-       select ALPINE_MSI
+       select ALPINE_MSI if PCI
         help
           This enables support for the Annapurna Labs Alpine
           Soc family.
@@ -66,7 +66,7 @@ config ARCH_LG1K
  config ARCH_HISI
         bool "Hisilicon SoC Family"
         select ARM_TIMER_SP804
-       select HISILICON_IRQ_MBIGEN
+       select HISILICON_IRQ_MBIGEN if PCI
         help
           This enables support for Hisilicon ARMv8 SoC family
  
diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts

index 299f3ce969ab8517a602ff7addda417ecd5aa5f3..c528dd52ba2d39b30547ab964eda219b1068a043 100644 (file)
--- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -12,6 +12,7 @@
  /dts-v1/;
  #include "exynos7.dtsi"
  #include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/samsung,s2mps11.h>
  
  / {
         model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@@ -43,6 +44,8 @@
  
  &rtc {
         status = "okay";
+       clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>;
+       clock-names = "rtc", "rtc_src";
  };
  
  &watchdog {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig

index 0555b7caaf2c2960a1e4f02eca120ddd5d3d01b1..eadf4855ad2d995072022254000a83cd0c476a0d 100644 (file)
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,4 +1,3 @@
-# CONFIG_LOCALVERSION_AUTO is not set
  CONFIG_SYSVIPC=y
  CONFIG_POSIX_MQUEUE=y
  CONFIG_AUDIT=y
@@ -15,10 +14,14 @@ CONFIG_IKCONFIG_PROC=y
  CONFIG_LOG_BUF_SHIFT=14
  CONFIG_MEMCG=y
  CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
  CONFIG_CGROUP_HUGETLB=y
-# CONFIG_UTS_NS is not set
-# CONFIG_IPC_NS is not set
-# CONFIG_NET_NS is not set
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_USER_NS=y
  CONFIG_SCHED_AUTOGROUP=y
  CONFIG_BLK_DEV_INITRD=y
  CONFIG_KALLSYMS_ALL=y
@@ -71,6 +74,7 @@ CONFIG_PREEMPT=y
  CONFIG_KSM=y
  CONFIG_TRANSPARENT_HUGEPAGE=y
  CONFIG_CMA=y
+CONFIG_SECCOMP=y
  CONFIG_XEN=y
  CONFIG_KEXEC=y
  # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -84,10 +88,37 @@ CONFIG_NET=y
  CONFIG_PACKET=y
  CONFIG_UNIX=y
  CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
  CONFIG_IP_PNP=y
  CONFIG_IP_PNP_DHCP=y
  CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IPV6 is not set
+CONFIG_IPV6=m
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
  CONFIG_BPF_JIT=y
  CONFIG_CFG80211=m
  CONFIG_MAC80211=m
@@ -103,6 +134,7 @@ CONFIG_MTD=y
  CONFIG_MTD_M25P80=y
  CONFIG_MTD_SPI_NOR=y
  CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
  CONFIG_VIRTIO_BLK=y
  CONFIG_SRAM=y
  # CONFIG_SCSI_PROC_FS is not set
@@ -120,7 +152,10 @@ CONFIG_SATA_SIL24=y
  CONFIG_PATA_PLATFORM=y
  CONFIG_PATA_OF_PLATFORM=y
  CONFIG_NETDEVICES=y
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
  CONFIG_TUN=y
+CONFIG_VETH=m
  CONFIG_VIRTIO_NET=y
  CONFIG_AMD_XGBE=y
  CONFIG_NET_XGENE=y
@@ -350,12 +385,16 @@ CONFIG_EXYNOS_ADC=y
  CONFIG_PWM_SAMSUNG=y
  CONFIG_EXT2_FS=y
  CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
  CONFIG_FANOTIFY=y
  CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
  CONFIG_QUOTA=y
  CONFIG_AUTOFS4_FS=y
-CONFIG_FUSE_FS=y
-CONFIG_CUSE=y
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
  CONFIG_VFAT_FS=y
  CONFIG_TMPFS=y
  CONFIG_HUGETLBFS=y
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h

index 61b49150dfa3046cb521c6fc54232d718e840460..1737aecfcc5e462c78e6c47ab06ffe2e2fc12c60 100644 (file)
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -22,7 +22,6 @@
  
  #define __ARCH_WANT_KPROBES_INSN_SLOT
  #define MAX_INSN_SIZE                  1
-#define MAX_STACK_SIZE                 128
  
  #define flush_insn_slot(p)             do { } while (0)
  #define kretprobe_blacklist_size       0
@@ -47,7 +46,6 @@ struct kprobe_ctlblk {
         struct prev_kprobe prev_kprobe;
         struct kprobe_step_ctx ss_ctx;
         struct pt_regs jprobe_saved_regs;
-       char jprobes_stack[MAX_STACK_SIZE];
  };
  
  void arch_remove_kprobe(struct kprobe *);
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S

index 0f03a8fe23144e777b3ead0a6ea18e038b5d1066..aef02d2af3b50db44476e887291b09c4056bf1c2 100644 (file)
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -219,7 +219,7 @@ ENDPROC(ftrace_graph_caller)
   *
   * Run ftrace_return_to_handler() before going back to parent.
   * @fp is checked against the value passed by ftrace_graph_caller()
- * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
   */
  ENTRY(return_to_handler)
         save_return_regs
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

index 96e4a2b64cc1221420540df2d7255dd2aca1b298..441420ca7d084af131bf7e7373c7bfc22c8ae26f 100644 (file)
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -353,6 +353,8 @@ el1_sync:
         lsr     x24, x1, #ESR_ELx_EC_SHIFT      // exception class
         cmp     x24, #ESR_ELx_EC_DABT_CUR       // data abort in EL1
         b.eq    el1_da
+       cmp     x24, #ESR_ELx_EC_IABT_CUR       // instruction abort in EL1
+       b.eq    el1_ia
         cmp     x24, #ESR_ELx_EC_SYS64          // configurable trap
         b.eq    el1_undef
         cmp     x24, #ESR_ELx_EC_SP_ALIGN       // stack alignment exception
@@ -364,6 +366,11 @@ el1_sync:
         cmp     x24, #ESR_ELx_EC_BREAKPT_CUR    // debug exception in EL1
         b.ge    el1_dbg
         b       el1_inv
+
+el1_ia:
+       /*
+        * Fall through to the Data abort case
+        */
  el1_da:
         /*
          * Data abort handling
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c

index ebecf9aa33d12da8a564ea0314f59a71b89e64a0..40ad08ac569af76b1f58fa775101ef264c480039 100644 (file)
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -138,7 +138,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
                 return;
  
         err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-                                      frame_pointer);
+                                      frame_pointer, NULL);
         if (err == -EBUSY)
                 return;
         else
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c

index 21ab5df9fa76ddea54d58abf77812593ddbb0f6d..65d81f965e7491daba3e05be40052776a6d1f64d 100644 (file)
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -35,6 +35,7 @@
  #include <asm/sections.h>
  #include <asm/smp.h>
  #include <asm/suspend.h>
+#include <asm/sysreg.h>
  #include <asm/virt.h>
  
  /*
@@ -217,12 +218,22 @@ static int create_safe_exec_page(void *src_start, size_t length,
         set_pte(pte, __pte(virt_to_phys((void *)dst) |
                          pgprot_val(PAGE_KERNEL_EXEC)));
  
-       /* Load our new page tables */
-       asm volatile("msr       ttbr0_el1, %0;"
-                    "isb;"
-                    "tlbi      vmalle1is;"
-                    "dsb       ish;"
-                    "isb" : : "r"(virt_to_phys(pgd)));
+       /*
+        * Load our new page tables. A strict BBM approach requires that we
+        * ensure that TLBs are free of any entries that may overlap with the
+        * global mappings we are about to install.
+        *
+        * For a real hibernate/resume cycle TTBR0 currently points to a zero
+        * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
+        * runtime services), while for a userspace-driven test_resume cycle it
+        * points to userspace page tables (and we must point it at a zero page
+        * ourselves). Elsewhere we only (un)install the idmap with preemption
+        * disabled, so T0SZ should be as required regardless.
+        */
+       cpu_set_reserved_ttbr0();
+       local_flush_tlb_all();
+       write_sysreg(virt_to_phys(pgd), ttbr0_el1);
+       isb();
  
         *phys_dst_addr = virt_to_phys((void *)dst);
  
@@ -393,6 +404,38 @@ int swsusp_arch_resume(void)
         void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
                                           void *, phys_addr_t, phys_addr_t);
  
+       /*
+        * Restoring the memory image will overwrite the ttbr1 page tables.
+        * Create a second copy of just the linear map, and use this when
+        * restoring.
+        */
+       tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+       if (!tmp_pg_dir) {
+               pr_err("Failed to allocate memory for temporary page tables.");
+               rc = -ENOMEM;
+               goto out;
+       }
+       rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+       if (rc)
+               goto out;
+
+       /*
+        * Since we only copied the linear map, we need to find restore_pblist's
+        * linear map address.
+        */
+       lm_restore_pblist = LMADDR(restore_pblist);
+
+       /*
+        * We need a zero page that is zero before & after resume in order to
+        * to break before make on the ttbr1 page tables.
+        */
+       zero_page = (void *)get_safe_page(GFP_ATOMIC);
+       if (!zero_page) {
+               pr_err("Failed to allocate zero page.");
+               rc = -ENOMEM;
+               goto out;
+       }
+
         /*
          * Locate the exit code in the bottom-but-one page, so that *NULL
          * still has disastrous affects.
@@ -418,27 +461,6 @@ int swsusp_arch_resume(void)
          */
         __flush_dcache_area(hibernate_exit, exit_size);
  
-       /*
-        * Restoring the memory image will overwrite the ttbr1 page tables.
-        * Create a second copy of just the linear map, and use this when
-        * restoring.
-        */
-       tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
-       if (!tmp_pg_dir) {
-               pr_err("Failed to allocate memory for temporary page tables.");
-               rc = -ENOMEM;
-               goto out;
-       }
-       rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
-       if (rc)
-               goto out;
-
-       /*
-        * Since we only copied the linear map, we need to find restore_pblist's
-        * linear map address.
-        */
-       lm_restore_pblist = LMADDR(restore_pblist);
-
         /*
          * KASLR will cause the el2 vectors to be in a different location in
          * the resumed kernel. Load hibernate's temporary copy into el2.
@@ -453,12 +475,6 @@ int swsusp_arch_resume(void)
                 __hyp_set_vectors(el2_vectors);
         }
  
-       /*
-        * We need a zero page that is zero before & after resume in order to
-        * to break before make on the ttbr1 page tables.
-        */
-       zero_page = (void *)get_safe_page(GFP_ATOMIC);
-
         hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
                        resume_hdr.reenter_kernel, lm_restore_pblist,
                        resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c

index bf97685882883e8970e472edeb0b457cbc2b76e6..c6b0f40620d868d1bf031db8e0cb0dfff3b134dc 100644 (file)
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -41,18 +41,6 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
  static void __kprobes
  post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
  
-static inline unsigned long min_stack_size(unsigned long addr)
-{
-       unsigned long size;
-
-       if (on_irq_stack(addr, raw_smp_processor_id()))
-               size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr;
-       else
-               size = (unsigned long)current_thread_info() + THREAD_START_SP - addr;
-
-       return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack));
-}
-
  static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
  {
         /* prepare insn slot */
@@ -489,20 +477,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
  {
         struct jprobe *jp = container_of(p, struct jprobe, kp);
         struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-       long stack_ptr = kernel_stack_pointer(regs);
  
         kcb->jprobe_saved_regs = *regs;
         /*
-        * As Linus pointed out, gcc assumes that the callee
-        * owns the argument space and could overwrite it, e.g.
-        * tailcall optimization. So, to be absolutely safe
-        * we also save and restore enough stack bytes to cover
-        * the argument area.
+        * Since we can't be sure where in the stack frame "stacked"
+        * pass-by-value arguments are stored we just don't try to
+        * duplicate any of the stack. Do not use jprobes on functions that
+        * use more than 64 bytes (after padding each to an 8 byte boundary)
+        * of arguments, or pass individual arguments larger than 16 bytes.
          */
-       kasan_disable_current();
-       memcpy(kcb->jprobes_stack, (void *)stack_ptr,
-              min_stack_size(stack_ptr));
-       kasan_enable_current();
  
         instruction_pointer_set(regs, (unsigned long) jp->entry);
         preempt_disable();
@@ -554,10 +537,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
         }
         unpause_graph_tracing();
         *regs = kcb->jprobe_saved_regs;
-       kasan_disable_current();
-       memcpy((void *)stack_addr, kcb->jprobes_stack,
-              min_stack_size(stack_addr));
-       kasan_enable_current();
         preempt_enable_no_resched();
         return 1;
  }
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S

index 9a3aec97ac091bd8503e539544b3a62bec0bd143..ccf79d849e0a89e8a0694d04c005f8508d590de3 100644 (file)
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -101,12 +101,20 @@ ENTRY(cpu_resume)
         bl      el2_setup               // if in EL2 drop to EL1 cleanly
         /* enable the MMU early - so we can access sleep_save_stash by va */
         adr_l   lr, __enable_mmu        /* __cpu_setup will return here */
-       ldr     x27, =_cpu_resume       /* __enable_mmu will branch here */
+       adr_l   x27, _resume_switched   /* __enable_mmu will branch here */
         adrp    x25, idmap_pg_dir
         adrp    x26, swapper_pg_dir
         b       __cpu_setup
  ENDPROC(cpu_resume)
  
+       .pushsection    ".idmap.text", "ax"
+_resume_switched:
+       ldr     x8, =_cpu_resume
+       br      x8
+ENDPROC(_resume_switched)
+       .ltorg
+       .popsection
+
  ENTRY(_cpu_resume)
         mrs     x1, mpidr_el1
         adrp    x8, mpidr_hash
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index 76a6d9263908faf4800cb0807952d17c6f8276d9..d93d433525047e41c20760d0ec24e10524333804 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -661,9 +661,9 @@ void __init smp_init_cpus(void)
                 acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
                                       acpi_parse_gic_cpu_interface, 0);
  
-       if (cpu_count > NR_CPUS)
-               pr_warn("no. of cores (%d) greater than configured maximum of %d - clipping\n",
-                       cpu_count, NR_CPUS);
+       if (cpu_count > nr_cpu_ids)
+               pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n",
+                       cpu_count, nr_cpu_ids);
  
         if (!bootcpu_valid) {
                 pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
@@ -677,7 +677,7 @@ void __init smp_init_cpus(void)
          * with entries in cpu_logical_map while initializing the cpus.
          * If the cpu set-up fails, invalidate the cpu_logical_map entry.
          */
-       for (i = 1; i < NR_CPUS; i++) {
+       for (i = 1; i < nr_cpu_ids; i++) {
                 if (cpu_logical_map(i) != INVALID_HWID) {
                         if (smp_cpu_setup(i))
                                 cpu_logical_map(i) = INVALID_HWID;
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c

index f94b80eb295dc48cd26a8d8ba01094abc9d669e8..9c3e75df21804bdefec8587550e53c8b3fd8920e 100644 (file)
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
  
  static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
  {
-       pte_t *pte = pte_offset_kernel(pmd, 0);
+       pte_t *pte = pte_offset_kernel(pmd, 0UL);
         unsigned long addr;
         unsigned i;
  
@@ -254,7 +254,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
  
  static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
  {
-       pmd_t *pmd = pmd_offset(pud, 0);
+       pmd_t *pmd = pmd_offset(pud, 0UL);
         unsigned long addr;
         unsigned i;
  
@@ -271,7 +271,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
  
  static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
  {
-       pud_t *pud = pud_offset(pgd, 0);
+       pud_t *pud = pud_offset(pgd, 0UL);
         unsigned long addr;
         unsigned i;
  
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c

index c8beaa0da7df4e60e3939c962b268cf666337dde..05d2bd776c69b932397ccec82306d8fbb46272bf 100644 (file)
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -153,6 +153,11 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
  }
  #endif
  
+static bool is_el1_instruction_abort(unsigned int esr)
+{
+       return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+}
+
  /*
   * The kernel tried to access some page that wasn't present.
   */
@@ -161,8 +166,9 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
  {
         /*
          * Are we prepared to handle this kernel fault?
+        * We are almost certainly not prepared to handle instruction faults.
          */
-       if (fixup_exception(regs))
+       if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
                 return;
  
         /*
@@ -267,7 +273,8 @@ static inline bool is_permission_fault(unsigned int esr)
         unsigned int ec       = ESR_ELx_EC(esr);
         unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
  
-       return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+       return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
+              (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
  }
  
  static bool is_el0_instruction_abort(unsigned int esr)
@@ -312,6 +319,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
                 if (regs->orig_addr_limit == KERNEL_DS)
                         die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
  
+               if (is_el1_instruction_abort(esr))
+                       die("Attempting to execute userspace memory", regs, esr);
+
                 if (!search_exception_tables(regs->pc))
                         die("Accessing user space memory outside uaccess.h routines", regs, esr);
         }
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c

index c7fe3ec70774a6c81cb0940940eba389ffa6acb7..5bb15eab6f00e42bee6142a489746dc961298e87 100644 (file)
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -23,6 +23,8 @@
  #include <linux/module.h>
  #include <linux/of.h>
  
+#include <asm/acpi.h>
+
  struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
  EXPORT_SYMBOL(node_data);
  nodemask_t numa_nodes_parsed __initdata;
diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S

index 28d059540424f5ac07ffae53e556d5d2c6f1393f..3b8bdcbb7da3e3235590ee86d681bd106a1ea8e9 100644 (file)
--- a/arch/blackfin/kernel/ftrace-entry.S
+++ b/arch/blackfin/kernel/ftrace-entry.S
@@ -169,7 +169,7 @@ ENTRY(_ftrace_graph_caller)
         r0 = sp;        /* unsigned long *parent */
         r1 = [sp];      /* unsigned long self_addr */
  # endif
-# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
         r2 = fp;        /* unsigned long frame_pointer */
  # endif
         r0 += 16;       /* skip the 4 local regs on stack */
@@ -190,7 +190,7 @@ ENTRY(_return_to_handler)
         [--sp] = r1;
  
         /* get original return address */
-# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
         r0 = fp;        /* Blackfin is sane, so omit this */
  # endif
         call _ftrace_return_to_handler;
diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c

index 095de0fa044d0eae0a698e8f02fae14329b4cfb0..8dad7589b8436a7a63acacd36bcc51f9b94d91d4 100644 (file)
--- a/arch/blackfin/kernel/ftrace.c
+++ b/arch/blackfin/kernel/ftrace.c
@@ -107,7 +107,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
                 return;
  
         if (ftrace_push_return_trace(*parent, self_addr, &trace.depth,
-                                    frame_pointer) == -EBUSY)
+                                    frame_pointer, NULL) == -EBUSY)
                 return;
  
         trace.func = self_addr;
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h

index 2e221c5f0203b22d3bda055862548afb3803bb2d..f86918aed9e1181e3dc2546a1b0520f814138164 100644 (file)
--- a/arch/h8300/include/asm/io.h
+++ b/arch/h8300/include/asm/io.h
@@ -3,6 +3,8 @@
  
  #ifdef __KERNEL__
  
+#include <linux/types.h>
+
  /* H8/300 internal I/O functions */
  
  #define __raw_readb __raw_readb
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h

index 29bd59790d6c087939e28f484d303ae30378d21c..c7026429816be5d319462f2e9bdd1e373925f828 100644 (file)
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -56,7 +56,7 @@ struct thread_info {
  #define alloc_thread_stack_node(tsk, node)     ((unsigned long *) 0)
  #define task_thread_info(tsk)  ((struct thread_info *) 0)
  #endif
-#define free_thread_stack(ti)  /* nothing */
+#define free_thread_stack(tsk) /* nothing */
  #define task_stack_page(tsk)   ((void *)(tsk))
  
  #define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c

index 2dcee3a88867536b2186f01252f1bf1d842ec279..9202f82dfce60e919d884e37c89924ade5a33569 100644 (file)
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -213,7 +213,6 @@ static inline int frame_extra_sizes(int f)
  
  static inline void adjustformat(struct pt_regs *regs)
  {
-       ((struct switch_stack *)regs - 1)->a5 = current->mm->start_data;
         /*
          * set format byte to make stack appear modulo 4, which it will
          * be when doing the rte
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c

index 11fa51c89617deb1a303c6bfdbf82b2a32a1e4db..c0ec116b3993a3a61b852c9daf14a34ab0962e15 100644 (file)
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -390,7 +390,6 @@ void __init mem_init(void)
  
         free_all_bootmem();
         mem_init_print_info(NULL);
-       show_mem(0);
  }
  
  void free_initmem(void)
diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c

index fc7b48a52cd554c74be87e2185f7594c68c20beb..d57563c58a26be43672098d9895d9107c945e8d8 100644 (file)
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -63,7 +63,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
                 return;
         }
  
-       err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
+       err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
         if (err == -EBUSY) {
                 *parent = old;
                 return;
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c

index 937c54bc8ccc499504c561f39c026b2c58417b50..30a3b75e88eb6a3310808d8c743739efa3f98fb7 100644 (file)
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -382,8 +382,8 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
         if (unlikely(faulted))
                 goto out;
  
-       if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
-           == -EBUSY) {
+       if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
+                                    NULL) == -EBUSY) {
                 *parent_ra_addr = old_parent_ra;
                 return;
         }
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c

index 6eb52b9c98183b95134710c22187b1a6f0225ef9..e788515f766b46cefb2a36dfc95ab6bfcec6e8e3 100644 (file)
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1642,8 +1642,14 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
  
         preempt_disable();
         if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
-               if (kvm_mips_host_tlb_lookup(vcpu, va) < 0)
-                       kvm_mips_handle_kseg0_tlb_fault(va, vcpu);
+               if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
+                   kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
+                       kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
+                               __func__, va, vcpu, read_c0_entryhi());
+                       er = EMULATE_FAIL;
+                       preempt_enable();
+                       goto done;
+               }
         } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
                    KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
                 int index;
@@ -1680,12 +1686,18 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
                                                                 run, vcpu);
                                 preempt_enable();
                                 goto dont_update_pc;
-                       } else {
-                               /*
-                                * We fault an entry from the guest tlb to the
-                                * shadow host TLB
-                                */
-                               kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+                       }
+                       /*
+                        * We fault an entry from the guest tlb to the
+                        * shadow host TLB
+                        */
+                       if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+                               kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+                                       __func__, va, index, vcpu,
+                                       read_c0_entryhi());
+                               er = EMULATE_FAIL;
+                               preempt_enable();
+                               goto done;
                         }
                 }
         } else {
@@ -2659,7 +2671,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause,
                          * OK we have a Guest TLB entry, now inject it into the
                          * shadow host TLB
                          */
-                       kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+                       if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+                               kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+                                       __func__, va, index, vcpu,
+                                       read_c0_entryhi());
+                               er = EMULATE_FAIL;
+                       }
                 }
         }
  
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c

index 57319ee57c4fdd1aada66e1648adc3981edf9dc3..6cfdcf55572d6aaa747998683f06bbf7f5a3e5ab 100644 (file)
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -99,7 +99,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
         }
  
         gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
-       if (gfn >= kvm->arch.guest_pmap_npages) {
+       if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
                 kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
                         gfn, badvaddr);
                 kvm_mips_dump_host_tlbs();
@@ -138,35 +138,49 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
         unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
         struct kvm *kvm = vcpu->kvm;
         kvm_pfn_t pfn0, pfn1;
+       gfn_t gfn0, gfn1;
+       long tlb_lo[2];
         int ret;
  
-       if ((tlb->tlb_hi & VPN2_MASK) == 0) {
-               pfn0 = 0;
-               pfn1 = 0;
-       } else {
-               if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0])
-                                          >> PAGE_SHIFT) < 0)
-                       return -1;
-
-               if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1])
-                                          >> PAGE_SHIFT) < 0)
-                       return -1;
-
-               pfn0 = kvm->arch.guest_pmap[
-                       mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT];
-               pfn1 = kvm->arch.guest_pmap[
-                       mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT];
+       tlb_lo[0] = tlb->tlb_lo[0];
+       tlb_lo[1] = tlb->tlb_lo[1];
+
+       /*
+        * The commpage address must not be mapped to anything else if the guest
+        * TLB contains entries nearby, or commpage accesses will break.
+        */
+       if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
+                       VPN2_MASK & (PAGE_MASK << 1)))
+               tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
+
+       gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
+       gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
+       if (gfn0 >= kvm->arch.guest_pmap_npages ||
+           gfn1 >= kvm->arch.guest_pmap_npages) {
+               kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
+                       __func__, gfn0, gfn1, tlb->tlb_hi);
+               kvm_mips_dump_guest_tlbs(vcpu);
+               return -1;
         }
  
+       if (kvm_mips_map_page(kvm, gfn0) < 0)
+               return -1;
+
+       if (kvm_mips_map_page(kvm, gfn1) < 0)
+               return -1;
+
+       pfn0 = kvm->arch.guest_pmap[gfn0];
+       pfn1 = kvm->arch.guest_pmap[gfn1];
+
         /* Get attributes from the Guest TLB */
         entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
                 ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
-               (tlb->tlb_lo[0] & ENTRYLO_D) |
-               (tlb->tlb_lo[0] & ENTRYLO_V);
+               (tlb_lo[0] & ENTRYLO_D) |
+               (tlb_lo[0] & ENTRYLO_V);
         entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
                 ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
-               (tlb->tlb_lo[1] & ENTRYLO_D) |
-               (tlb->tlb_lo[1] & ENTRYLO_V);
+               (tlb_lo[1] & ENTRYLO_D) |
+               (tlb_lo[1] & ENTRYLO_V);
  
         kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
                   tlb->tlb_lo[0], tlb->tlb_lo[1]);
@@ -354,9 +368,15 @@ u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu)
                                 local_irq_restore(flags);
                                 return KVM_INVALID_INST;
                         }
-                       kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
-                                                            &vcpu->arch.
-                                                            guest_tlb[index]);
+                       if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
+                                               &vcpu->arch.guest_tlb[index])) {
+                               kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
+                                       __func__, opc, index, vcpu,
+                                       read_c0_entryhi());
+                               kvm_mips_dump_guest_tlbs(vcpu);
+                               local_irq_restore(flags);
+                               return KVM_INVALID_INST;
+                       }
                         inst = *(opc);
                 }
                 local_irq_restore(flags);
diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h

index c0ae62520d1575526c83b014d194adc0f6a4428a..274d5bc6ecce4aba9e1d19b1cd0e1d6f42ba0eb3 100644 (file)
--- a/arch/parisc/include/uapi/asm/errno.h
+++ b/arch/parisc/include/uapi/asm/errno.h
@@ -97,10 +97,10 @@
  #define        ENOTCONN        235     /* Transport endpoint is not connected */
  #define        ESHUTDOWN       236     /* Cannot send after transport endpoint shutdown */
  #define        ETOOMANYREFS    237     /* Too many references: cannot splice */
-#define EREFUSED       ECONNREFUSED    /* for HP's NFS apparently */
  #define        ETIMEDOUT       238     /* Connection timed out */
  #define        ECONNREFUSED    239     /* Connection refused */
-#define EREMOTERELEASE 240     /* Remote peer released connection */
+#define        EREFUSED        ECONNREFUSED    /* for HP's NFS apparently */
+#define        EREMOTERELEASE  240     /* Remote peer released connection */
  #define        EHOSTDOWN       241     /* Host is down */
  #define        EHOSTUNREACH    242     /* No route to host */
  
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c

index a828a0adf52c0b19d14eb12219bf718c3cf37704..5a5506a35395c411f91a61a59ebd50ed3ced38b6 100644 (file)
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -48,7 +48,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
                 return;
  
          if (ftrace_push_return_trace(old, self_addr, &trace.depth,
-                       0 ) == -EBUSY)
+                                    0, NULL) == -EBUSY)
                  return;
  
         /* activate parisc_return_to_handler() as return point */
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c

index 5adc339eb7c8dab935fbbc933a64e811f6800ccc..0c2a94a0f7518b8082ecda3f0307ead9534ea972 100644 (file)
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -51,8 +51,6 @@ EXPORT_SYMBOL(_parisc_requires_coherency);
  
  DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data);
  
-extern int update_cr16_clocksource(void);      /* from time.c */
-
  /*
  **     PARISC CPU driver - claim "device" and initialize CPU data structures.
  **
@@ -228,12 +226,6 @@ static int processor_probe(struct parisc_device *dev)
         }
  #endif
  
-       /* If we've registered more than one cpu,
-        * we'll use the jiffies clocksource since cr16
-        * is not synchronized between CPUs.
-        */
-       update_cr16_clocksource();
-
         return 0;
  }
  
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c

index 505cf1ac5af24ecef4731f845fd6de516ae1f306..4b0b963d52a757e83674149f4162faf3eafb7456 100644 (file)
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -221,18 +221,6 @@ static struct clocksource clocksource_cr16 = {
         .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
  };
  
-int update_cr16_clocksource(void)
-{
-       /* since the cr16 cycle counters are not synchronized across CPUs,
-          we'll check if we should switch to a safe clocksource: */
-       if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) {
-               clocksource_change_rating(&clocksource_cr16, 0);
-               return 1;
-       }
-
-       return 0;
-}
-
  void __init start_cpu_itimer(void)
  {
         unsigned int cpu = smp_processor_id();
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile

index ca254546cd05a1a4fe09e87e63945d7b38b8873f..1934707bf321ecf47a835bc7a0c4cd88f092ed07 100644 (file)
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -66,29 +66,28 @@ endif
  UTS_MACHINE := $(OLDARCH)
  
  ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC    += -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC    += -mno-strict-align
-endif
-override AS    += -mlittle-endian
  override LD    += -EL
-override CROSS32CC += -mlittle-endian
  override CROSS32AS += -mlittle-endian
  LDEMULATION    := lppc
  GNUTARGET      := powerpcle
  MULTIPLEWORD   := -mno-multiple
  KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
  else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC    += -mbig-endian
-override AS    += -mbig-endian
-endif
  override LD    += -EB
  LDEMULATION    := ppc
  GNUTARGET      := powerpc
  MULTIPLEWORD   := -mmultiple
  endif
  
+cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mlittle-endian
+ifneq ($(cc-name),clang)
+  cflags-$(CONFIG_CPU_LITTLE_ENDIAN)   += -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mlittle-endian
+
  ifeq ($(HAS_BIARCH),y)
  override AS    += -a$(CONFIG_WORD_SIZE)
  override LD    += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
@@ -232,6 +231,9 @@ cpu-as-$(CONFIG_E200)               += -Wa,-me200
  KBUILD_AFLAGS += $(cpu-as-y)
  KBUILD_CFLAGS += $(cpu-as-y)
  
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
  head-y                         := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
  head-$(CONFIG_8xx)             := arch/powerpc/kernel/head_8xx.o
  head-$(CONFIG_40x)             := arch/powerpc/kernel/head_40x.o
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c

index bfe3d37a24ef3a24c07e5a9f7ed8df3a13b4bf59..9fa046d56ebadd6ad25e62b5a29a853b123cd30a 100644 (file)
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -4,6 +4,7 @@
  #include <linux/module.h>
  #include <linux/string.h>
  #include <linux/kernel.h>
+#include <linux/cpufeature.h>
  #include <asm/switch_to.h>
  
  #define CHKSUM_BLOCK_SIZE      1
@@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void)
         crypto_unregister_shash(&alg);
  }
  
-module_init(crc32c_vpmsum_mod_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
  module_exit(crc32c_vpmsum_mod_fini);
  
  MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h

index 3d7fc06532a16a7b620a58342d70e41fd69a33da..01b8a13f022467be64ccd46f248344bdf96e9a41 100644 (file)
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state;
  
  #endif
  
+/* Idle state entry routines */
+#ifdef CONFIG_PPC_P7_NAP
+#define        IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
+       /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
+       std     r0,0(r1);                                       \
+       ptesync;                                                \
+       ld      r0,0(r1);                                       \
+1:     cmp     cr0,r0,r0;                                      \
+       bne     1b;                                             \
+       IDLE_INST;                                              \
+       b       .
+#endif /* CONFIG_PPC_P7_NAP */
+
  #endif
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h

index 57fec8ac7b924cdeabb4a21b65d46785e9fbf745..ddf54f5bbdd1c05efbd286ec305beac3c459d8d6 100644 (file)
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -186,6 +186,7 @@ label##3:                                           \
  
  #ifndef __ASSEMBLY__
  void apply_feature_fixups(void);
+void setup_feature_keys(void);
  #endif
  
  #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h

index 0a74ebe934e1cbcb61105b63d54a959614bca33d..17c8380673a60637c61fec5772162bf0ae5523cb 100644 (file)
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void)
  static inline void __giveup_spe(struct task_struct *t) { }
  #endif
  
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void flush_tmregs_to_thread(struct task_struct *);
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *t)
-{
-}
-#endif
-
  static inline void clear_task_ebb(struct task_struct *t)
  {
  #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h

index f5f729c115781a8d6c9c1850f89558b06383c761..f0b238516e9b44b5afabc52a8460daaa6e81ca97 100644 (file)
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void);
  extern void xics_kexec_teardown_cpu(int secondary);
  extern void xics_migrate_irqs_away(void);
  extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
  #ifdef CONFIG_SMP
  extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
                                unsigned int strict_check);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c

index c9bc78e9c6101b2ae5016efec858c5c38b0f4158..7429556eb8df7e468b447a1b6d4c541253c06295 100644 (file)
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -168,10 +168,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
         int n = 0, l = 0;
         char buffer[128];
  
-       n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+       n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
                        edev->phb->global_number, pdn->busno,
                        PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
-       pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+       pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
                 edev->phb->global_number, pdn->busno,
                 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
  
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S

index 41091fdf9bd88fbe68d9ef15aeb04c1b59b18e77..df6d45eb41150185e7cc17baa82feead23fa27f3 100644 (file)
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -144,29 +144,14 @@ machine_check_pSeries_1:
          * vector
          */
         SET_SCRATCH0(r13)               /* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
-       /* Running native on arch 2.06 or later, check if we are
-        * waking up from nap. We only handle no state loss and
-        * supervisor state loss. We do -not- handle hypervisor
-        * state loss at this time.
+       /*
+        * Running native on arch 2.06 or later, we may wakeup from winkle
+        * inside machine check. If yes, then last bit of HSPGR0 would be set
+        * to 1. Hence clear it unconditionally.
          */
-       mfspr   r13,SPRN_SRR1
-       rlwinm. r13,r13,47-31,30,31
-       OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-       beq     9f
-
-       mfspr   r13,SPRN_SRR1
-       rlwinm. r13,r13,47-31,30,31
-       /* waking up from powersave (nap) state */
-       cmpwi   cr1,r13,2
-       /* Total loss of HV state is fatal. let's just stay stuck here */
-       OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-       bgt     cr1,.
-9:
-       OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
+       GET_PACA(r13)
+       clrrdi  r13,r13,1
+       SET_PACA(r13)
         EXCEPTION_PROLOG_0(PACA_EXMC)
  BEGIN_FTR_SECTION
         b       machine_check_powernv_early
@@ -1273,25 +1258,51 @@ machine_check_handle_early:
          * Check if thread was in power saving mode. We come here when any
          * of the following is true:
          * a. thread wasn't in power saving mode
-        * b. thread was in power saving mode with no state loss or
-        *    supervisor state loss
+        * b. thread was in power saving mode with no state loss,
+        *    supervisor state loss or hypervisor state loss.
          *
-        * Go back to nap again if (b) is true.
+        * Go back to nap/sleep/winkle mode again if (b) is true.
          */
         rlwinm. r11,r12,47-31,30,31     /* Was it in power saving mode? */
         beq     4f                      /* No, it wasn;t */
         /* Thread was in power saving mode. Go back to nap again. */
         cmpwi   r11,2
-       bne     3f
-       /* Supervisor state loss */
+       blt     3f
+       /* Supervisor/Hypervisor state loss */
         li      r0,1
         stb     r0,PACA_NAPSTATELOST(r13)
  3:     bl      machine_check_queue_event
         MACHINE_CHECK_HANDLER_WINDUP
         GET_PACA(r13)
         ld      r1,PACAR1(r13)
-       li      r3,PNV_THREAD_NAP
-       b       pnv_enter_arch207_idle_mode
+       /*
+        * Check what idle state this CPU was in and go back to same mode
+        * again.
+        */
+       lbz     r3,PACA_THREAD_IDLE_STATE(r13)
+       cmpwi   r3,PNV_THREAD_NAP
+       bgt     10f
+       IDLE_STATE_ENTER_SEQ(PPC_NAP)
+       /* No return */
+10:
+       cmpwi   r3,PNV_THREAD_SLEEP
+       bgt     2f
+       IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+       /* No return */
+
+2:
+       /*
+        * Go back to winkle. Please note that this thread was woken up in
+        * machine check from winkle and have not restored the per-subcore
+        * state. Hence before going back to winkle, set last bit of HSPGR0
+        * to 1. This will make sure that if this thread gets woken up
+        * again at reset vector 0x100 then it will get chance to restore
+        * the subcore state.
+        */
+       ori     r13,r13,1
+       SET_PACA(r13)
+       IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+       /* No return */
  4:
  #endif
         /*
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c

index cc52d9795f88d5d6989f89adbf876030429402f1..a95639b8d4ac5d72a7be17d2b1e20e3f5fa874a1 100644 (file)
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -593,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
         if (!ftrace_graph_entry(&trace))
                 goto out;
  
-       if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+       if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
+                                    NULL) == -EBUSY)
                 goto out;
  
         parent = return_hooker;
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S

index ba79d15f4ddd7c0d8ce946e15098d977a00338fa..2265c6398a17ec4af7fc5983df9dad7443ac2fce 100644 (file)
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -44,18 +44,6 @@
                                 PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
                                 PSSCR_MTL_MASK
  
-/* Idle state entry routines */
-
-#define        IDLE_STATE_ENTER_SEQ(IDLE_INST)                         \
-       /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
-       std     r0,0(r1);                                       \
-       ptesync;                                                \
-       ld      r0,0(r1);                                       \
-1:     cmp     cr0,r0,r0;                                      \
-       bne     1b;                                             \
-       IDLE_INST;                                              \
-       b       .
-
         .text
  
  /*
@@ -363,8 +351,8 @@ _GLOBAL(power9_idle_stop)
   * cr3 - set to gt if waking up with partial/complete hypervisor state loss
   */
  _GLOBAL(pnv_restore_hyp_resource)
-       ld      r2,PACATOC(r13);
  BEGIN_FTR_SECTION
+       ld      r2,PACATOC(r13);
         /*
          * POWER ISA 3. Use PSSCR to determine if we
          * are waking up from deep idle state
@@ -395,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
          */
         clrldi  r5,r13,63
         clrrdi  r13,r13,1
+
+       /* Now that we are sure r13 is corrected, load TOC */
+       ld      r2,PACATOC(r13);
         cmpwi   cr4,r5,1
         mtspr   SPRN_HSPRG0,r13
  
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c

index ef267fd9dd225a3c7f3dfbcacc057a0bf28dca8e..5e7ece0fda9f5b802eb561ce51774cfb625032a6 100644 (file)
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
         mce->in_use = 1;
  
         mce->initiator = MCE_INITIATOR_CPU;
-       if (handled)
+       /* Mark it recovered if we have handled it and MSR(RI=1). */
+       if (handled && (regs->msr & MSR_RI))
                 mce->disposition = MCE_DISPOSITION_RECOVERED;
         else
                 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c

index a5c0153ede37f21d6dd8f47beac764587789fbc5..7fdf324d5b51f4ebb8716dfadda4ed40fb34d23d 100644 (file)
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops);
  static int get_phb_number(struct device_node *dn)
  {
         int ret, phb_id = -1;
+       u32 prop_32;
         u64 prop;
  
         /*
@@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn)
          * reading "ibm,opal-phbid", only present in OPAL environment.
          */
         ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
-       if (ret)
-               ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
+       if (ret) {
+               ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+               prop = prop_32;
+       }
  
         if (!ret)
                 phb_id = (int)(prop & (MAX_PHBS - 1));
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c

index 58ccf86415b46cd5c2db593424ecde772cd0d959..9ee2623e0f674977ee8e8f07bbfb1297a2f05dc8 100644 (file)
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1074,26 +1074,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
  #endif
  }
  
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void flush_tmregs_to_thread(struct task_struct *tsk)
-{
-       /*
-        * Process self tracing is not yet supported through
-        * ptrace interface. Ptrace generic code should have
-        * prevented this from happening in the first place.
-        * Warn once here with the message, if some how it
-        * is attempted.
-        */
-       WARN_ONCE(tsk == current,
-               "Not expecting ptrace on self: TM regs may be incorrect\n");
-
-       /*
-        * If task is not current, it should have been flushed
-        * already to it's thread_struct during __switch_to().
-        */
-}
-#endif
-
  struct task_struct *__switch_to(struct task_struct *prev,
         struct task_struct *new)
  {
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c

index 6ee4b72cda4201840cf2b85f38661831c5e37233..4e74fc588a3f6497177fa82e1d148b32383ffcb9 100644 (file)
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2940,7 +2940,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
  
         /* Don't print anything after quiesce under OPAL, it crashes OFW */
         if (of_platform != PLATFORM_OPAL) {
-               prom_printf("Booting Linux via __start() ...\n");
+               prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
                 prom_debug("->dt_header_start=0x%x\n", hdr);
         }
  
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c

index 4f3c5756cc09898f984de4cc6cf6fc7c1ba830ac..bf91658a8a406b051e2072e2ce948317abfefe29 100644 (file)
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -38,6 +38,7 @@
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include <asm/switch_to.h>
+#include <asm/tm.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/syscalls.h>
@@ -118,6 +119,24 @@ static const struct pt_regs_offset regoffset_table[] = {
         REG_OFFSET_END,
  };
  
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+       /*
+        * If task is not current, it will have been flushed already to
+        * it's thread_struct during __switch_to().
+        *
+        * A reclaim flushes ALL the state.
+        */
+
+       if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
+               tm_reclaim_current(TM_CAUSE_SIGNAL);
+
+}
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
  /**
   * regs_query_register_offset() - query register offset from its name
   * @name:      the name of a register
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c

index c3e861df4b203ce5be8e0e9f0fb8f4e236d41cd5..24ec3ea4b3a2eeeeae2e0f713226acd252bab806 100644 (file)
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -93,15 +93,16 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
   * and we are running with enough of the MMU enabled to have our
   * proper kernel virtual addresses
   *
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
   */
  extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
  
  notrace void __init machine_init(u64 dt_ptr)
  {
+       /* Configure static keys first, now that we're relocated. */
+       setup_feature_keys();
+
         /* Enable early debugging if any specified (see udbg.h) */
         udbg_early_init();
  
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c

index eafb9a79e0116b600624a16212c2a02bfb46e363..7ac8e6eaab5ba24566f1f6fe06829e22727e86ea 100644 (file)
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
  
         /* Apply all the dynamic patching */
         apply_feature_fixups();
+       setup_feature_keys();
  
         /* Initialize the hash table or TLB handling */
         early_init_mmu();
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c

index 6767605ea8da2eddb54152dbf92538467fe2dc3e..4111d30badfad30fa1eb7dd29abb6a26d8a338de 100644 (file)
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -22,6 +22,7 @@
  #include <linux/security.h>
  #include <linux/memblock.h>
  
+#include <asm/cpu_has_feature.h>
  #include <asm/pgtable.h>
  #include <asm/processor.h>
  #include <asm/mmu.h>
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile

index cbabd143acae8e9db3b3883be876527b68b83574..78a7449bf489d49a400ae45703ce77bdc7206f4b 100644 (file)
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc
  $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
  
  # link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
         $(call if_changed,vdso32ld)
  
  # strip rule for the .so file
@@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
         $(call if_changed,objcopy)
  
  # assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
         $(call if_changed_dep,vdso32as)
  
  # actual build commands
  quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
  quiet_cmd_vdso32as = VDSO32A $@
        cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
  
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile

index c710802b8fb685a7cb5815d86387f7debb356d9e..366ae09b14c1e3a5179987d312ec4d1fedf86313 100644 (file)
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
  $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
  
  # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
         $(call if_changed,vdso64ld)
  
  # strip rule for the .so file
@@ -32,12 +32,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
         $(call if_changed,objcopy)
  
  # assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
         $(call if_changed_dep,vdso64as)
  
  # actual build commands
  quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
  quiet_cmd_vdso64as = VDSO64A $@
        cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
  
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c

index a75ba38a2d81415dfcbe497c27b1329fdd6c8736..05aa11399a7867c1a4148d0f9eb4bf80bca3ee9d 100644 (file)
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1329,20 +1329,16 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
         xics->kvm = kvm;
  
         /* Already there ? */
-       mutex_lock(&kvm->lock);
         if (kvm->arch.xics)
                 ret = -EEXIST;
         else
                 kvm->arch.xics = xics;
-       mutex_unlock(&kvm->lock);
  
         if (ret) {
                 kfree(xics);
                 return ret;
         }
  
-       xics_debugfs_init(xics);
-
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
         if (cpu_has_feature(CPU_FTR_ARCH_206)) {
                 /* Enable real mode support */
@@ -1354,9 +1350,17 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
         return 0;
  }
  
+static void kvmppc_xics_init(struct kvm_device *dev)
+{
+       struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private;
+
+       xics_debugfs_init(xics);
+}
+
  struct kvm_device_ops kvm_xics_ops = {
         .name = "kvm-xics",
         .create = kvmppc_xics_create,
+       .init = kvmppc_xics_init,
         .destroy = kvmppc_xics_free,
         .set_attr = xics_set_attr,
         .get_attr = xics_get_attr,
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S

index d90870a66b60b4b0820a3044466f70716e1ed610..0a57fe6d49ccf43a07224c4f7da1f597e36cc10c 100644 (file)
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -127,8 +127,9 @@ _GLOBAL(csum_partial_copy_generic)
         stw     r7,12(r1)
         stw     r8,8(r1)
  
-       andi.   r0,r4,1                 /* is destination address even ? */
-       cmplwi  cr7,r0,0
+       rlwinm  r0,r4,3,0x8
+       rlwnm   r6,r6,r0,0,31   /* odd destination address: rotate one byte */
+       cmplwi  cr7,r0,0        /* is destination address even ? */
         addic   r12,r6,0
         addi    r6,r4,-4
         neg     r0,r4
@@ -237,7 +238,7 @@ _GLOBAL(csum_partial_copy_generic)
  66:    addze   r3,r12
         addi    r1,r1,16
         beqlr+  cr7
-       rlwinm  r3,r3,8,0,31    /* swap bytes for odd destination */
+       rlwinm  r3,r3,8,0,31    /* odd destination address: rotate one byte */
         blr
  
  /* read fault */
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c

index 74145f02ad417b496ceba07b0f114a2bbd77bc75..043415f0bdb1646fa85f7bb26d04f0241c68ff63 100644 (file)
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -188,7 +188,10 @@ void __init apply_feature_fixups(void)
                           &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
  #endif
         do_final_fixups();
+}
  
+void __init setup_feature_keys(void)
+{
         /*
          * Initialise jump label. This causes all the cpu/mmu_has_feature()
          * checks to take on their correct polarity based on the current set of
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c

index 5be15cff758df193bd935baa50867a5dc9e3b063..2975754c65ea9514e4d7d10284a2beb42d6dd3cb 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
         gang = alloc_spu_gang();
         SPUFS_I(inode)->i_ctx = NULL;
         SPUFS_I(inode)->i_gang = gang;
-       if (!gang)
+       if (!gang) {
+               ret = -ENOMEM;
                 goto out_iput;
+       }
  
         inode->i_op = &simple_dir_inode_operations;
         inode->i_fop = &simple_dir_operations;
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c

index 309d9ccccd509c83097386dca03621f4db3b7ca3..c61667e8bb06c51385fbe21c18b288aeb889f2da 100644 (file)
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
         if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
             !firmware_has_feature(FW_FEATURE_LPAR)) {
                 dev->dev.archdata.dma_ops = &dma_direct_ops;
+               /*
+                * Set the coherent DMA mask to prevent the iommu
+                * being used unnecessarily
+                */
+               dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
                 return;
         }
  #endif
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c

index e505223b4ec5ed2d82bcb08b2e718a6b64601f18..ed8bba68a162120d282bfc2286a674b049e44d5c 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -228,7 +228,8 @@ int __init opal_event_init(void)
                 }
  
                 /* Install interrupt handler */
-               rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+               rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+                                "opal", NULL);
                 if (rc) {
                         irq_dispose_mapping(virq);
                         pr_warn("Error %d requesting irq %d (0x%x)\n",
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c

index 8b4fc68cebcb2f4c02a716074cb1b1c1045d0d1f..6c9a65b52e63b589edbe809a4b65851ca2ea2f79 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs,
  
         if (!(regs->msr & MSR_RI)) {
                 /* If MSR_RI isn't set, we cannot recover */
+               pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
                 recovered = 0;
         } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
                 /* Platform corrected itself */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c

index 6b9528307f620e639be196d3b691e6ac8d159d28..fd9444f9fb0c24e0ed02dcb722c0e824ddc218e2 100644 (file)
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -111,10 +111,17 @@ static int __init iommu_setup(char *str)
  }
  early_param("iommu", iommu_setup);
  
-static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
  {
-       return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
-               (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+       /*
+        * WARNING: We cannot rely on the resource flags. The Linux PCI
+        * allocation code sometimes decides to put a 64-bit prefetchable
+        * BAR in the 32-bit window, so we have to compare the addresses.
+        *
+        * For simplicity we only test resource start.
+        */
+       return (r->start >= phb->ioda.m64_base &&
+               r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
  }
  
  static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
@@ -229,7 +236,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
         sgsz = phb->ioda.m64_segsize;
         for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
                 r = &pdev->resource[i];
-               if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+               if (!r->parent || !pnv_pci_is_m64(phb, r))
                         continue;
  
                 start = _ALIGN_DOWN(r->start - base, sgsz);
@@ -1877,7 +1884,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm,
                                         unsigned shift, unsigned long index,
                                         unsigned long npages)
  {
-       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+       __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
         unsigned long start, end, inc;
  
         /* We'll invalidate DMA address in PE scope */
@@ -2863,7 +2870,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
                 res = &pdev->resource[i + PCI_IOV_RESOURCES];
                 if (!res->flags || res->parent)
                         continue;
-               if (!pnv_pci_is_mem_pref_64(res->flags)) {
+               if (!pnv_pci_is_m64(phb, res)) {
                         dev_warn(&pdev->dev, "Don't support SR-IOV with"
                                         " non M64 VF BAR%d: %pR. \n",
                                  i, res);
@@ -2958,7 +2965,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
                         index++;
                 }
         } else if ((res->flags & IORESOURCE_MEM) &&
-                  !pnv_pci_is_mem_pref_64(res->flags)) {
+                  !pnv_pci_is_m64(phb, res)) {
                 region.start = res->start -
                                phb->hose->mem_offset[0] -
                                phb->ioda.m32_pci_base;
@@ -3083,9 +3090,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
                 bridge = bridge->bus->self;
         }
  
-       /* We fail back to M32 if M64 isn't supported */
-       if (phb->ioda.m64_segsize &&
-           pnv_pci_is_mem_pref_64(type))
+       /*
+        * We fall back to M32 if M64 isn't supported. We enforce the M64
+        * alignment for any 64-bit resource, PCIe doesn't care and
+        * bridges only do 64-bit prefetchable anyway.
+        */
+       if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
                 return phb->ioda.m64_segsize;
         if (type & IORESOURCE_MEM)
                 return phb->ioda.m32_segsize;
@@ -3125,7 +3135,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
                 w = NULL;
                 if (r->flags & type & IORESOURCE_IO)
                         w = &hose->io_resource;
-               else if (pnv_pci_is_mem_pref_64(r->flags) &&
+               else if (pnv_pci_is_m64(phb, r) &&
                          (type & IORESOURCE_PREFETCH) &&
                          phb->ioda.m64_segsize)
                         w = &hose->mem_resources[1];
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c

index 43f7beb2902d0b5d5b1001cff100fdf67c650fb4..76ec104e88beea0e89e3473d988e23fbdb7312c1 100644 (file)
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb)
         return dlpar_update_device_tree_lmb(lmb);
  }
  
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
-       unsigned long section_nr;
-       struct mem_section *mem_sect;
-       struct memory_block *mem_block;
-
-       section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
-       mem_sect = __nr_to_section(section_nr);
-
-       mem_block = find_memory_block(mem_sect);
-       return mem_block;
-}
-
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
  {
@@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
  
  static int dlpar_add_lmb(struct of_drconf_cell *);
  
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+       unsigned long section_nr;
+       struct mem_section *mem_sect;
+       struct memory_block *mem_block;
+
+       section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+       mem_sect = __nr_to_section(section_nr);
+
+       mem_block = find_memory_block(mem_sect);
+       return mem_block;
+}
+
  static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
  {
         struct memory_block *mem_block;
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig

index 0031eda320c3de7d94e3d9198b0cbc9087a7aec4..385e7aa9e2731cabf13cfccef35a2934719cdbc3 100644 (file)
--- a/arch/powerpc/sysdev/xics/Kconfig
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -1,6 +1,7 @@
  config PPC_XICS
         def_bool n
         select PPC_SMP_MUXED_IPI
+       select HARDIRQS_SW_RESEND
  
  config PPC_ICP_NATIVE
         def_bool n
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c

index 27c936c080a66ffb5523931ac74d6c4c3a01849d..1c6bf4b66f56854d0717a644c75dea70e3fabde7 100644 (file)
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = {
         .irq_mask = ics_opal_mask_irq,
         .irq_unmask = ics_opal_unmask_irq,
         .irq_eoi = NULL, /* Patched at init time */
-       .irq_set_affinity = ics_opal_set_affinity
+       .irq_set_affinity = ics_opal_set_affinity,
+       .irq_set_type = xics_set_irq_type,
+       .irq_retrigger = xics_retrigger,
  };
  
  static int ics_opal_map(struct ics *ics, unsigned int virq);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c

index 3854dd41558d2697e73f9d72f9dffb27327952a8..78ee5c778ef8c7650ccea536aff7fb5cefc0bca3 100644 (file)
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = {
         .irq_mask = ics_rtas_mask_irq,
         .irq_unmask = ics_rtas_unmask_irq,
         .irq_eoi = NULL, /* Patched at init time */
-       .irq_set_affinity = ics_rtas_set_affinity
+       .irq_set_affinity = ics_rtas_set_affinity,
+       .irq_set_type = xics_set_irq_type,
+       .irq_retrigger = xics_retrigger,
  };
  
  static int ics_rtas_map(struct ics *ics, unsigned int virq)
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c

index a795a5f0301c482ec8edeb3047464ea709f7326b..9d530f47958857621ad19655ac12f975f17e090d 100644 (file)
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq,
  
         pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
  
-       /* They aren't all level sensitive but we just don't really know */
-       irq_set_status_flags(virq, IRQ_LEVEL);
+       /*
+        * Mark interrupts as edge sensitive by default so that resend
+        * actually works. The device-tree parsing will turn the LSIs
+        * back to level.
+        */
+       irq_clear_status_flags(virq, IRQ_LEVEL);
  
         /* Don't call into ICS for IPIs */
         if (hw == XICS_IPI) {
@@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct,
                            irq_hw_number_t *out_hwirq, unsigned int *out_flags)
  
  {
-       /* Current xics implementation translates everything
-        * to level. It is not technically right for MSIs but this
-        * is irrelevant at this point. We might get smarter in the future
-        */
         *out_hwirq = intspec[0];
-       *out_flags = IRQ_TYPE_LEVEL_LOW;
  
+       /*
+        * If intsize is at least 2, we look for the type in the second cell,
+        * we assume the LSB indicates a level interrupt.
+        */
+       if (intsize > 1) {
+               if (intspec[1] & 1)
+                       *out_flags = IRQ_TYPE_LEVEL_LOW;
+               else
+                       *out_flags = IRQ_TYPE_EDGE_RISING;
+       } else
+               *out_flags = IRQ_TYPE_LEVEL_LOW;
+
+       return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+       /*
+        * We only support these. This has really no effect other than setting
+        * the corresponding descriptor bits mind you but those will in turn
+        * affect the resend function when re-enabling an edge interrupt.
+        *
+        * Set set the default to edge as explained in map().
+        */
+       if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+               flow_type = IRQ_TYPE_EDGE_RISING;
+
+       if (flow_type != IRQ_TYPE_EDGE_RISING &&
+           flow_type != IRQ_TYPE_LEVEL_LOW)
+               return -EINVAL;
+
+       irqd_set_trigger_type(d, flow_type);
+
+       return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+       /*
+        * We need to push a dummy CPPR when retriggering, since the subsequent
+        * EOI will try to pop it. Passing 0 works, as the function hard codes
+        * the priority value anyway.
+        */
+       xics_push_cppr(0);
+
+       /* Tell the core to do a soft retrigger */
         return 0;
  }
  
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig

index 0e348781327b2dcc9667b38344fb98e8c4936dcc..e751fe25d6ab670428f5c97b8464d9102baddbe6 100644 (file)
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -872,4 +872,17 @@ config S390_GUEST
           Select this option if you want to run the kernel as a guest under
           the KVM hypervisor.
  
+config S390_GUEST_OLD_TRANSPORT
+       def_bool y
+       prompt "Guest support for old s390 virtio transport (DEPRECATED)"
+       depends on S390_GUEST
+       help
+         Enable this option to add support for the old s390-virtio
+         transport (i.e. virtio devices NOT based on virtio-ccw). This
+         type of virtio devices is only available on the experimental
+         kuli userspace or with old (< 2.6) qemu. If you are running
+         with a modern version of qemu (which supports virtio-ccw since
+         1.4 and uses it by default since version 2.4), you probably won't
+         need this.
+
  endmenu
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S

index f86a4eef28a929ebe235576264122c97fb9eb08e..28c4f96a2d9cef6d6ce97abc84890cc0c1f65615 100644 (file)
--- a/arch/s390/boot/compressed/head.S
+++ b/arch/s390/boot/compressed/head.S
@@ -21,16 +21,21 @@ ENTRY(startup_continue)
         lg      %r15,.Lstack-.LPG1(%r13)
         aghi    %r15,-160
         brasl   %r14,decompress_kernel
-       # setup registers for memory mover & branch to target
+       # Set up registers for memory mover. We move the decompressed image to
+       # 0x11000, starting at offset 0x11000 in the decompressed image so
+       # that code living at 0x11000 in the image will end up at 0x11000 in
+       # memory.
         lgr     %r4,%r2
         lg      %r2,.Loffset-.LPG1(%r13)
         la      %r4,0(%r2,%r4)
         lg      %r3,.Lmvsize-.LPG1(%r13)
         lgr     %r5,%r3
-       # move the memory mover someplace safe
+       # Move the memory mover someplace safe so it doesn't overwrite itself.
         la      %r1,0x200
         mvc     0(mover_end-mover,%r1),mover-.LPG1(%r13)
-       # decompress image is started at 0x11000
+       # When the memory mover is done we pass control to
+       # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in
+       # the decompressed image.
         lgr     %r6,%r2
         br      %r1
  mover:
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig

index 889ea345021092ac2bd9fe4b72aa51f6f56aad00..26e0c7f0881417349438c44a0155a378ab638131 100644 (file)
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -678,7 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m
  CONFIG_CRYPTO_DES_S390=m
  CONFIG_CRYPTO_AES_S390=m
  CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
  CONFIG_ASYMMETRIC_KEY_TYPE=y
  CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
  CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig

index 1bcfd764910a706fd44b9fe813f04f07a0d48e7a..24879dab47bc1e1b4a71c3f22232c83d1028ef10 100644 (file)
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -616,7 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m
  CONFIG_CRYPTO_DES_S390=m
  CONFIG_CRYPTO_AES_S390=m
  CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
  CONFIG_ASYMMETRIC_KEY_TYPE=y
  CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
  CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig

index 13ff090139c86a897270ba70f37a5ae1a471e43d..a5c1e5f2a0cab667501e83825d7c6aaeb4447206 100644 (file)
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -615,7 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m
  CONFIG_CRYPTO_DES_S390=m
  CONFIG_CRYPTO_AES_S390=m
  CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
  CONFIG_ASYMMETRIC_KEY_TYPE=y
  CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
  CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c

index 577ae1d4ae894e58c5e0c37cec32f77341e18735..2bad9d837029924b3e1955bce4da9c170192c0f0 100644 (file)
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -51,6 +51,9 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
                 struct kernel_fpu vxstate;                                  \
                 unsigned long prealign, aligned, remaining;                 \
                                                                             \
+               if (datalen < VX_MIN_LEN + VX_ALIGN_MASK)                   \
+                       return ___crc32_sw(crc, data, datalen);             \
+                                                                           \
                 if ((unsigned long)data & VX_ALIGN_MASK) {                  \
                         prealign = VX_ALIGNMENT -                           \
                                   ((unsigned long)data & VX_ALIGN_MASK);    \
@@ -59,9 +62,6 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
                         data = (void *)((unsigned long)data + prealign);    \
                 }                                                           \
                                                                             \
-               if (datalen < VX_MIN_LEN)                                   \
-                       return ___crc32_sw(crc, data, datalen);             \
-                                                                           \
                 aligned = datalen & ~VX_ALIGN_MASK;                         \
                 remaining = datalen & VX_ALIGN_MASK;                        \
                                                                             \
diff --git a/arch/s390/defconfig b/arch/s390/defconfig

index ccccebeeaaf67d7d058a1e0544fe8a50eba9e19a..73610f2e3b4fbbdbd9c030a8f689ecd71aaa08b5 100644 (file)
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -234,7 +234,7 @@ CONFIG_CRYPTO_SHA256_S390=m
  CONFIG_CRYPTO_SHA512_S390=m
  CONFIG_CRYPTO_DES_S390=m
  CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
  CONFIG_CRC7=m
  # CONFIG_XZ_DEC_X86 is not set
  # CONFIG_XZ_DEC_POWERPC is not set
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c

index 0f7bfeba6da6a3632f2705ebbaccc065ffd356a2..60a8a4e207edbc080a15b809b3efd97ea245942a 100644 (file)
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -209,7 +209,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
         /* Only trace if the calling function expects to. */
         if (!ftrace_graph_entry(&trace))
                 goto out;
-       if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+       if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
+                                    NULL) == -EBUSY)
                 goto out;
         parent = (unsigned long) return_to_handler;
  out:
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S

index 56e4d8234ef2ea1c2970ebcd6d8d5d2d15b5a037..4431905f8cfa2cbac81ada709929201371669d99 100644 (file)
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -309,7 +309,9 @@ ENTRY(startup_kdump)
         l       %r15,.Lstack-.LPG0(%r13)
         ahi     %r15,-STACK_FRAME_OVERHEAD
         brasl   %r14,verify_facilities
-       /* Continue with startup code in head64.S */
+# For uncompressed images, continue in
+# arch/s390/kernel/head64.S. For compressed images, continue in
+# arch/s390/boot/compressed/head.S.
         jg      startup_continue
  
  .Lstack:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index 3f3ae4865d579e8a9420cc22c6c2b39f37e70a9f..f142215ed30dd1dc4ae7b14d57d72732a04b82b7 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1672,6 +1672,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                                     KVM_SYNC_CRS |
                                     KVM_SYNC_ARCH0 |
                                     KVM_SYNC_PFAULT;
+       kvm_s390_set_prefix(vcpu, 0);
         if (test_kvm_facility(vcpu->kvm, 64))
                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
         /* fprs can be synchronized via vrs, even if the guest has no vx. With
@@ -2361,8 +2362,10 @@ retry:
                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
                                           kvm_s390_get_prefix(vcpu),
                                           PAGE_SIZE * 2, PROT_WRITE);
-               if (rc)
+               if (rc) {
+                       kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
                         return rc;
+               }
                 goto retry;
         }
  
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c

index e390bbb16443db59d8a3551b2bdb633ae25c8d83..48352bffbc929cdc43cec471e92cc49bb23996fc 100644 (file)
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -237,11 +237,10 @@ char * strrchr(const char * s, int c)
  EXPORT_SYMBOL(strrchr);
  
  static inline int clcle(const char *s1, unsigned long l1,
-                       const char *s2, unsigned long l2,
-                       int *diff)
+                       const char *s2, unsigned long l2)
  {
         register unsigned long r2 asm("2") = (unsigned long) s1;
-       register unsigned long r3 asm("3") = (unsigned long) l2;
+       register unsigned long r3 asm("3") = (unsigned long) l1;
         register unsigned long r4 asm("4") = (unsigned long) s2;
         register unsigned long r5 asm("5") = (unsigned long) l2;
         int cc;
@@ -252,7 +251,6 @@ static inline int clcle(const char *s1, unsigned long l1,
                       "   srl   %0,28"
                       : "=&d" (cc), "+a" (r2), "+a" (r3),
                         "+a" (r4), "+a" (r5) : : "cc");
-       *diff = *(char *)r2 - *(char *)r4;
         return cc;
  }
  
@@ -270,9 +268,9 @@ char * strstr(const char * s1,const char * s2)
                 return (char *) s1;
         l1 = __strend(s1) - s1;
         while (l1-- >= l2) {
-               int cc, dummy;
+               int cc;
  
-               cc = clcle(s1, l1, s2, l2, &dummy);
+               cc = clcle(s1, l2, s2, l2);
                 if (!cc)
                         return (char *) s1;
                 s1++;
@@ -313,11 +311,11 @@ EXPORT_SYMBOL(memchr);
   */
  int memcmp(const void *cs, const void *ct, size_t n)
  {
-       int ret, diff;
+       int ret;
  
-       ret = clcle(cs, n, ct, n, &diff);
+       ret = clcle(cs, n, ct, n);
         if (ret)
-               ret = diff;
+               ret = ret == 1 ? -1 : 1;
         return ret;
  }
  EXPORT_SYMBOL(memcmp);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c

index 7104ffb5a67f8fb311184d3878e4a54f7d72d0cf..af7cf28cf97edcc71551917cf66cfb0214f56946 100644 (file)
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -252,6 +252,8 @@ static int change_page_attr(unsigned long addr, unsigned long end,
         int rc = -EINVAL;
         pgd_t *pgdp;
  
+       if (addr == end)
+               return 0;
         if (end >= MODULES_END)
                 return -EINVAL;
         mutex_lock(&cpa_mutex);
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c

index 38993e09ef03ef371932707058c57bd15dd2d875..95eccd49672f40ca7f5e10f998c3226581221c29 100644 (file)
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -382,7 +382,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
                 return;
         }
  
-       err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
+       err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
         if (err == -EBUSY) {
                 __raw_writel(old, parent);
                 return;
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig

index 59b09600dd326b8d0e24853d720d18d73837cd69..f5d60f14a0bcf50ed87ba2c5a104c6e15f9f90be 100644 (file)
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -56,7 +56,6 @@ config SPARC64
         def_bool 64BIT
         select HAVE_FUNCTION_TRACER
         select HAVE_FUNCTION_GRAPH_TRACER
-       select HAVE_FUNCTION_GRAPH_FP_TEST
         select HAVE_KRETPROBES
         select HAVE_KPROBES
         select HAVE_RCU_TABLE_FREE if SMP
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h

index 3192a8e42fd62c6f244a2b5ba5887da395847abd..62755a339a5932b96ddc42fd2a0eb8c3affb149b 100644 (file)
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -9,6 +9,10 @@
  void _mcount(void);
  #endif
  
+#endif /* CONFIG_MCOUNT */
+
+#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY)
+#define HAVE_FUNCTION_GRAPH_FP_TEST
  #endif
  
  #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c

index 0a2d2ddff543fd325709fc46eaf48d438d06b88b..6bcff698069bf1fa57c1d84df180c1cb67f18a65 100644 (file)
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -131,7 +131,7 @@ unsigned long prepare_ftrace_return(unsigned long parent,
                 return parent + 8UL;
  
         if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
-                                    frame_pointer) == -EBUSY)
+                                    frame_pointer, NULL) == -EBUSY)
                 return parent + 8UL;
  
         trace.func = self_addr;
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c

index 4a572088b270bec26a0b10cce0453ab2d15e6c20..b827a418b155021104d27de2d821cf81e59babce 100644 (file)
--- a/arch/tile/kernel/ftrace.c
+++ b/arch/tile/kernel/ftrace.c
@@ -184,7 +184,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
         *parent = return_hooker;
  
         err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-                                      frame_pointer);
+                                      frame_pointer, NULL);
         if (err == -EBUSY) {
                 *parent = old;
                 return;
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h

index e35632ef23c759a43e4673d222aac430172cdad7..62dfc644c908ab5ffb173b493d5b97b93e8f7ca6 100644 (file)
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -98,7 +98,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
  }
  
  static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-               bool write, bool foreign)
+               bool write, bool execute, bool foreign)
  {
         /* by default, allow everything */
         return true;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index c580d8c33562ec5eba4dbfc273ae3ed7b6b67072..ce8860cccc34f52d4802c388853d65503438595c 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -94,6 +94,7 @@ config X86
         select HAVE_ARCH_TRANSPARENT_HUGEPAGE
         select HAVE_ARCH_WITHIN_STACK_FRAMES
         select HAVE_EBPF_JIT                    if X86_64
+       select HAVE_ARCH_VMAP_STACK             if X86_64
         select HAVE_CC_STACKPROTECTOR
         select HAVE_CMPXCHG_DOUBLE
         select HAVE_CMPXCHG_LOCAL
@@ -110,7 +111,6 @@ config X86
         select HAVE_EXIT_THREAD
         select HAVE_FENTRY                      if X86_64
         select HAVE_FTRACE_MCOUNT_RECORD
-       select HAVE_FUNCTION_GRAPH_FP_TEST
         select HAVE_FUNCTION_GRAPH_TRACER
         select HAVE_FUNCTION_TRACER
         select HAVE_GCC_PLUGINS
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile

index fe91c25092da2662b67413a154e446843e7d5f7a..77f28ce9c6464e71a942f767082391502c8fa80d 100644 (file)
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,6 +5,8 @@
  OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
  OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
  
+CFLAGS_syscall_64.o            += -Wno-override-init
+CFLAGS_syscall_32.o            += -Wno-override-init
  obj-y                          := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
  obj-y                          += common.o
  
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S

index 0b56666e6039b9d090cc01c014ccba2b2662be88..b75a8bcd2d23cced23df9bade46e6304499d926b 100644 (file)
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,34 +204,70 @@
         POP_GS_EX
  .endm
  
+/*
+ * %eax: prev task
+ * %edx: next task
+ */
+ENTRY(__switch_to_asm)
+       /*
+        * Save callee-saved registers
+        * This must match the order in struct inactive_task_frame
+        */
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %edi
+       pushl   %esi
+
+       /* switch stack */
+       movl    %esp, TASK_threadsp(%eax)
+       movl    TASK_threadsp(%edx), %esp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+       movl    TASK_stack_canary(%edx), %ebx
+       movl    %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+#endif
+
+       /* restore callee-saved registers */
+       popl    %esi
+       popl    %edi
+       popl    %ebx
+       popl    %ebp
+
+       jmp     __switch_to
+END(__switch_to_asm)
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * eax: prev task we switched from
+ * ebx: kernel thread func (NULL for user thread)
+ * edi: kernel thread arg
+ */
  ENTRY(ret_from_fork)
         pushl   %eax
         call    schedule_tail
         popl    %eax
  
+       testl   %ebx, %ebx
+       jnz     1f              /* kernel threads are uncommon */
+
+2:
         /* When we fork, we trace the syscall return in the child, too. */
         movl    %esp, %eax
         call    syscall_return_slowpath
         jmp     restore_all
-END(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-       pushl   %eax
-       call    schedule_tail
-       popl    %eax
-       movl    PT_EBP(%esp), %eax
-       call    *PT_EBX(%esp)
-       movl    $0, PT_EAX(%esp)
  
+       /* kernel thread */
+1:     movl    %edi, %eax
+       call    *%ebx
         /*
-        * Kernel threads return to userspace as if returning from a syscall.
-        * We should check whether anything actually uses this path and, if so,
-        * consider switching it over to ret_from_fork.
+        * A kernel thread is allowed to return here after successfully
+        * calling do_execve().  Exit to userspace to complete the execve()
+        * syscall.
          */
-       movl    %esp, %eax
-       call    syscall_return_slowpath
-       jmp     restore_all
-ENDPROC(ret_from_kernel_thread)
+       movl    $0, PT_EAX(%esp)
+       jmp     2b
+END(ret_from_fork)
  
  /*
   * Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S

index 9f85827db24eff8088d497db6871b33ed4ebf796..c0373d6676744dc8c6121b4319e112ddf141d528 100644 (file)
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -288,11 +288,15 @@ return_from_SYSCALL_64:
         jne     opportunistic_sysret_failed
  
         /*
-        * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
-        * restoring TF results in a trap from userspace immediately after
-        * SYSRET.  This would cause an infinite loop whenever #DB happens
-        * with register state that satisfies the opportunistic SYSRET
-        * conditions.  For example, single-stepping this user code:
+        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+        * restore RF properly. If the slowpath sets it for whatever reason, we
+        * need to restore it correctly.
+        *
+        * SYSRET can restore TF, but unlike IRET, restoring TF results in a
+        * trap from userspace immediately after SYSRET.  This would cause an
+        * infinite loop whenever #DB happens with register state that satisfies
+        * the opportunistic SYSRET conditions.  For example, single-stepping
+        * this user code:
          *
          *           movq       $stuck_here, %rcx
          *           pushfq
@@ -347,8 +351,7 @@ ENTRY(stub_ptregs_64)
         jmp     entry_SYSCALL64_slow_path
  
  1:
-       /* Called from C */
-       jmp     *%rax                           /* called from C */
+       jmp     *%rax                           /* Called from C */
  END(stub_ptregs_64)
  
  .macro ptregs_stub func
@@ -364,42 +367,74 @@ END(ptregs_\func)
  #define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
  #include <asm/syscalls_64.h>
  
+/*
+ * %rdi: prev task
+ * %rsi: next task
+ */
+ENTRY(__switch_to_asm)
+       /*
+        * Save callee-saved registers
+        * This must match the order in inactive_task_frame
+        */
+       pushq   %rbp
+       pushq   %rbx
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+
+       /* switch stack */
+       movq    %rsp, TASK_threadsp(%rdi)
+       movq    TASK_threadsp(%rsi), %rsp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+       movq    TASK_stack_canary(%rsi), %rbx
+       movq    %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+#endif
+
+       /* restore callee-saved registers */
+       popq    %r15
+       popq    %r14
+       popq    %r13
+       popq    %r12
+       popq    %rbx
+       popq    %rbp
+
+       jmp     __switch_to
+END(__switch_to_asm)
+
  /*
   * A newly forked process directly context switches into this address.
   *
- * rdi: prev task we switched from
+ * rax: prev task we switched from
+ * rbx: kernel thread func (NULL for user thread)
+ * r12: kernel thread arg
   */
  ENTRY(ret_from_fork)
-       LOCK ; btr $TIF_FORK, TI_flags(%r8)
-
+       movq    %rax, %rdi
         call    schedule_tail                   /* rdi: 'prev' task parameter */
  
-       testb   $3, CS(%rsp)                    /* from kernel_thread? */
-       jnz     1f
+       testq   %rbx, %rbx                      /* from kernel_thread? */
+       jnz     1f                              /* kernel threads are uncommon */
  
-       /*
-        * We came from kernel_thread.  This code path is quite twisted, and
-        * someone should clean it up.
-        *
-        * copy_thread_tls stashes the function pointer in RBX and the
-        * parameter to be passed in RBP.  The called function is permitted
-        * to call do_execve and thereby jump to user mode.
-        */
-       movq    RBP(%rsp), %rdi
-       call    *RBX(%rsp)
-       movl    $0, RAX(%rsp)
-
-       /*
-        * Fall through as though we're exiting a syscall.  This makes a
-        * twisted sort of sense if we just called do_execve.
-        */
-
-1:
+2:
         movq    %rsp, %rdi
         call    syscall_return_slowpath /* returns with IRQs disabled */
         TRACE_IRQS_ON                   /* user mode is traced as IRQS on */
         SWAPGS
         jmp     restore_regs_and_iret
+
+1:
+       /* kernel thread */
+       movq    %r12, %rdi
+       call    *%rbx
+       /*
+        * A kernel thread is allowed to return here after successfully
+        * calling do_execve().  Exit to userspace to complete the execve()
+        * syscall.
+        */
+       movq    $0, RAX(%rsp)
+       jmp     2b
  END(ret_from_fork)
  
  /*
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c

index 97a69dbba649b6bb7c6dd936440f572c5d02ed6f..9d35ec0cb8fc916ba3b4b63f5bdb1b6ebda5de55 100644 (file)
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -100,6 +100,12 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
         }
  }
  
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+               SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
  static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
  {
         if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@ static struct attribute_group snb_uncore_format_group = {
  
  static struct intel_uncore_ops snb_uncore_msr_ops = {
         .init_box       = snb_uncore_msr_init_box,
+       .enable_box     = snb_uncore_msr_enable_box,
         .exit_box       = snb_uncore_msr_exit_box,
         .disable_event  = snb_uncore_msr_disable_event,
         .enable_event   = snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
         }
  }
  
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+       wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+               SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
  static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
  {
         if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@ static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
  
  static struct intel_uncore_ops skl_uncore_msr_ops = {
         .init_box       = skl_uncore_msr_init_box,
+       .enable_box     = skl_uncore_msr_enable_box,
         .exit_box       = skl_uncore_msr_exit_box,
         .disable_event  = snb_uncore_msr_disable_event,
         .enable_event   = snb_uncore_msr_enable_event,
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c

index 824e54086e071456b170380c52e561dbbac62cf7..8aee83bcf71f2dc5a380009957d0858a7b4a2507 100644 (file)
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2626,7 +2626,7 @@ void hswep_uncore_cpu_init(void)
  
  static struct intel_uncore_type hswep_uncore_ha = {
         .name           = "ha",
-       .num_counters   = 5,
+       .num_counters   = 4,
         .num_boxes      = 2,
         .perf_ctr_bits  = 48,
         SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2645,7 @@ static struct uncore_event_desc hswep_uncore_imc_events[] = {
  
  static struct intel_uncore_type hswep_uncore_imc = {
         .name           = "imc",
-       .num_counters   = 5,
+       .num_counters   = 4,
         .num_boxes      = 8,
         .perf_ctr_bits  = 48,
         .fixed_ctr_bits = 48,
@@ -2691,7 +2691,7 @@ static struct intel_uncore_type hswep_uncore_irp = {
  
  static struct intel_uncore_type hswep_uncore_qpi = {
         .name                   = "qpi",
-       .num_counters           = 5,
+       .num_counters           = 4,
         .num_boxes              = 3,
         .perf_ctr_bits          = 48,
         .perf_ctr               = SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2773,7 @@ static struct event_constraint hswep_uncore_r3qpi_constraints[] = {
  
  static struct intel_uncore_type hswep_uncore_r3qpi = {
         .name           = "r3qpi",
-       .num_counters   = 4,
+       .num_counters   = 3,
         .num_boxes      = 3,
         .perf_ctr_bits  = 44,
         .constraints    = hswep_uncore_r3qpi_constraints,
@@ -2972,7 +2972,7 @@ static struct intel_uncore_type bdx_uncore_ha = {
  
  static struct intel_uncore_type bdx_uncore_imc = {
         .name           = "imc",
-       .num_counters   = 5,
+       .num_counters   = 4,
         .num_boxes      = 8,
         .perf_ctr_bits  = 48,
         .fixed_ctr_bits = 48,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h

index f5befd4945f2a84d4123ce47d2c68d5bab1a0a88..124357773ffade4a78766e3cd021d928fa4f5866 100644 (file)
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -135,6 +135,7 @@ extern void init_apic_mappings(void);
  void register_lapic_address(unsigned long address);
  extern void setup_boot_APIC_clock(void);
  extern void setup_secondary_APIC_clock(void);
+extern void lapic_update_tsc_freq(void);
  extern int APIC_init_uniprocessor(void);
  
  #ifdef CONFIG_X86_64
@@ -170,6 +171,7 @@ static inline void init_apic_mappings(void) { }
  static inline void disable_local_APIC(void) { }
  # define setup_boot_APIC_clock x86_init_noop
  # define setup_secondary_APIC_clock x86_init_noop
+static inline void lapic_update_tsc_freq(void) { }
  #endif /* !CONFIG_X86_LOCAL_APIC */
  
  #ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h

index a4820d4df617daede213516eea42becd5b874640..eccd0ac6bc38857904565619857c68a7f8a494e6 100644 (file)
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -6,6 +6,7 @@
  # define MCOUNT_ADDR           ((unsigned long)(__fentry__))
  #else
  # define MCOUNT_ADDR           ((unsigned long)(mcount))
+# define HAVE_FUNCTION_GRAPH_FP_TEST
  #endif
  #define MCOUNT_INSN_SIZE       5 /* sizeof mcount call */
  
@@ -13,6 +14,8 @@
  #define ARCH_SUPPORTS_FTRACE_OPS 1
  #endif
  
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
  #ifndef __ASSEMBLY__
  extern void mcount(void);
  extern atomic_t modifying_ftrace_code;
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h

index 7178043b0e1dd69d20a6ff5ddaa37ee6c32841f8..59405a248fc2488c66259e5669c1f857b13954aa 100644 (file)
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,10 +22,6 @@ typedef struct {
  #ifdef CONFIG_SMP
         unsigned int irq_resched_count;
         unsigned int irq_call_count;
-       /*
-        * irq_tlb_count is double-counted in irq_call_count, so it must be
-        * subtracted from irq_call_count when displaying irq_call_count
-        */
         unsigned int irq_tlb_count;
  #endif
  #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h

index 223042086f4e9aa29498d3f159aaa2b757fa766a..737da62bfeb095e875912b18e5732b7ce04cb18e 100644 (file)
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -5,10 +5,10 @@ struct x86_mapping_info {
         void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
         void *context;                   /* context for alloc_pgt_page */
         unsigned long pmd_flag;          /* page flag for PMD entry */
-       bool kernel_mapping;             /* kernel mapping or ident mapping */
+       unsigned long offset;            /* ident mapping offset */
  };
  
  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-                               unsigned long addr, unsigned long end);
+                               unsigned long pstart, unsigned long pend);
  
  #endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h

index 1ef9d581b5d9829365160082acbb98d1f549d421..d318811884318aa8691cac64a7e34384446ec17e 100644 (file)
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,8 +24,6 @@ enum die_val {
  extern void printk_address(unsigned long address);
  extern void die(const char *, struct pt_regs *,long);
  extern int __must_check __die(const char *, struct pt_regs *, long);
-extern void show_trace(struct task_struct *t, struct pt_regs *regs,
-                      unsigned long *sp, unsigned long bp);
  extern void show_stack_regs(struct pt_regs *regs);
  extern void __show_regs(struct pt_regs *regs, int all);
  extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h

index 7e8ec7ae10faff67a1c6fd26314ed53be3ed598f..1cc82ece9ac1819b92ec82aca72805c0e966af97 100644 (file)
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
   *
   * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
   * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
   *
   * G (8) is aliased and used as a PROT_NONE indicator for
   * !present ptes.  We need to start storing swap entries above
@@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
  #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
  #define SWP_TYPE_BITS 5
  /* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
  
  #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
  
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index 63def9537a2d249f5814cf73fac20f7d8873e232..b22fb5a4ff3cf1a70796bbac6a367e7a6fda55f5 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -389,9 +389,6 @@ struct thread_struct {
         unsigned short          fsindex;
         unsigned short          gsindex;
  #endif
-#ifdef CONFIG_X86_32
-       unsigned long           ip;
-#endif
  #ifdef CONFIG_X86_64
         unsigned long           fsbase;
         unsigned long           gsbase;
@@ -724,8 +721,6 @@ static inline void spin_lock_prefetch(const void *x)
         .addr_limit             = KERNEL_DS,                              \
  }
  
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
  /*
   * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
   * This is necessary to guarantee that the entire "struct pt_regs"
@@ -776,17 +771,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
         .addr_limit             = KERNEL_DS,                    \
  }
  
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-#define thread_saved_pc(t)     READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
-
  #define task_pt_regs(tsk)      ((struct pt_regs *)(tsk)->thread.sp0 - 1)
  extern unsigned long KSTK_ESP(struct task_struct *task);
  
  #endif /* CONFIG_X86_64 */
  
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
  extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
                                                unsigned long new_sp);
  
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h

index 9c6b890d5e7a0733ed7e92f1d3f1b1aa81bd349f..230e1903acf07faa831c8d2496a4457aaae9b5ea 100644 (file)
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -44,9 +44,9 @@ struct trampoline_header {
  extern struct real_mode_header *real_mode_header;
  extern unsigned char real_mode_blob_end[];
  
-extern unsigned long init_rsp;
  extern unsigned long initial_code;
  extern unsigned long initial_gs;
+extern unsigned long initial_stack;
  
  extern unsigned char real_mode_blob[];
  extern unsigned char real_mode_relocs[];
@@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
  extern unsigned char secondary_startup_64[];
  #endif
  
+static inline size_t real_mode_size_needed(void)
+{
+       if (real_mode_header)
+               return 0;       /* already allocated. */
+
+       return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
+}
+
+void set_real_mode_mem(phys_addr_t mem, size_t size);
  void reserve_real_mode(void);
-void setup_real_mode(void);
  
  #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h

index ebd0c164cd4e9033ebb42ff119f8cfe05c901f65..19980b36f394b18e6816629390130fa3eb789115 100644 (file)
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -39,9 +39,6 @@ DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
  DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
  #endif
  
-/* Static state in head.S used to set up a CPU */
-extern unsigned long stack_start; /* Initial stack pointer address */
-
  struct task_struct;
  
  struct smp_ops {
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h

index 0944218af9e279d631af9f03506a9672c4443c6d..7646fb2772f801a0950ec7c9ecbbcf4122e9ebcd 100644 (file)
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -8,6 +8,7 @@
  
  #include <linux/uaccess.h>
  #include <linux/ptrace.h>
+#include <asm/switch_to.h>
  
  extern int kstack_depth_to_print;
  
@@ -70,8 +71,7 @@ stack_frame(struct task_struct *task, struct pt_regs *regs)
                 return bp;
         }
  
-       /* bp is the last reg pushed by switch_to */
-       return *(unsigned long *)task->thread.sp;
+       return ((struct inactive_task_frame *)task->thread.sp)->bp;
  }
  #else
  static inline unsigned long
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h

index 8f321a1b03a1aaa0e87c4c1182d2b2f282efa1e4..5cb436acd46315b75ba40aacc780373ea2cc4f7c 100644 (file)
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,130 +2,66 @@
  #define _ASM_X86_SWITCH_TO_H
  
  struct task_struct; /* one of the stranger aspects of C forward declarations */
+
+struct task_struct *__switch_to_asm(struct task_struct *prev,
+                                   struct task_struct *next);
+
  __visible struct task_struct *__switch_to(struct task_struct *prev,
-                                          struct task_struct *next);
+                                         struct task_struct *next);
  struct tss_struct;
  void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                       struct tss_struct *tss);
  
-#ifdef CONFIG_X86_32
+/* This runs runs on the previous thread's stack. */
+static inline void prepare_switch_to(struct task_struct *prev,
+                                    struct task_struct *next)
+{
+#ifdef CONFIG_VMAP_STACK
+       /*
+        * If we switch to a stack that has a top-level paging entry
+        * that is not present in the current mm, the resulting #PF will
+        * will be promoted to a double-fault and we'll panic.  Probe
+        * the new stack now so that vmalloc_fault can fix up the page
+        * tables if needed.  This can only happen if we use a stack
+        * in vmap space.
+        *
+        * We assume that the stack is aligned so that it never spans
+        * more than one top-level paging entry.
+        *
+        * To minimize cache pollution, just follow the stack pointer.
+        */
+       READ_ONCE(*(unsigned char *)next->thread.sp);
+#endif
+}
+
+asmlinkage void ret_from_fork(void);
+
+/* data that is pointed to by thread.sp */
+struct inactive_task_frame {
+#ifdef CONFIG_X86_64
+       unsigned long r15;
+       unsigned long r14;
+       unsigned long r13;
+       unsigned long r12;
+#else
+       unsigned long si;
+       unsigned long di;
+#endif
+       unsigned long bx;
+       unsigned long bp;
+       unsigned long ret_addr;
+};
  
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary                                                        \
-       "movl %P[task_canary](%[next]), %%ebx\n\t"                      \
-       "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
-#define __switch_canary_oparam                                         \
-       , [stack_canary] "=m" (stack_canary.canary)
-#define __switch_canary_iparam                                         \
-       , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else  /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
+struct fork_frame {
+       struct inactive_task_frame frame;
+       struct pt_regs regs;
+};
  
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
  #define switch_to(prev, next, last)                                    \
  do {                                                                   \
-       /*                                                              \
-        * Context-switching clobbers all registers, so we clobber      \
-        * them explicitly, via unused output variables.                \
-        * (EAX and EBP is not listed because EBP is saved/restored     \
-        * explicitly for wchan access and EAX is the return value of   \
-        * __switch_to())                                               \
-        */                                                             \
-       unsigned long ebx, ecx, edx, esi, edi;                          \
-                                                                       \
-       asm volatile("pushl %%ebp\n\t"          /* save    EBP   */     \
-                    "movl %%esp,%[prev_sp]\n\t"        /* save    ESP   */ \
-                    "movl %[next_sp],%%esp\n\t"        /* restore ESP   */ \
-                    "movl $1f,%[prev_ip]\n\t"  /* save    EIP   */     \
-                    "pushl %[next_ip]\n\t"     /* restore EIP   */     \
-                    __switch_canary                                    \
-                    "jmp __switch_to\n"        /* regparm call  */     \
-                    "1:\t"                                             \
-                    "popl %%ebp\n\t"           /* restore EBP   */     \
-                                                                       \
-                    /* output parameters */                            \
-                    : [prev_sp] "=m" (prev->thread.sp),                \
-                      [prev_ip] "=m" (prev->thread.ip),                \
-                      "=a" (last),                                     \
-                                                                       \
-                      /* clobbered output registers: */                \
-                      "=b" (ebx), "=c" (ecx), "=d" (edx),              \
-                      "=S" (esi), "=D" (edi)                           \
-                                                                       \
-                      __switch_canary_oparam                           \
-                                                                       \
-                      /* input parameters: */                          \
-                    : [next_sp]  "m" (next->thread.sp),                \
-                      [next_ip]  "m" (next->thread.ip),                \
-                                                                       \
-                      /* regparm parameters for __switch_to(): */      \
-                      [prev]     "a" (prev),                           \
-                      [next]     "d" (next)                            \
+       prepare_switch_to(prev, next);                                  \
                                                                         \
-                      __switch_canary_iparam                           \
-                                                                       \
-                    : /* reloaded segment registers */                 \
-                       "memory");                                      \
+       ((last) = __switch_to_asm((prev), (next)));                     \
  } while (0)
  
-#else /* CONFIG_X86_32 */
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
-
-#define __EXTRA_CLOBBER  \
-       , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
-         "r12", "r13", "r14", "r15", "flags"
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary                                                          \
-       "movq %P[task_canary](%%rsi),%%r8\n\t"                            \
-       "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
-#define __switch_canary_oparam                                           \
-       , [gs_canary] "=m" (irq_stack_union.stack_canary)
-#define __switch_canary_iparam                                           \
-       , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else  /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
-
-/*
- * There is no need to save or restore flags, because flags are always
- * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
- * has no effect.
- */
-#define switch_to(prev, next, last) \
-       asm volatile(SAVE_CONTEXT                                         \
-            "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */       \
-            "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */    \
-            "call __switch_to\n\t"                                       \
-            "movq "__percpu_arg([current_task])",%%rsi\n\t"              \
-            __switch_canary                                              \
-            "movq %P[thread_info](%%rsi),%%r8\n\t"                       \
-            "movq %%rax,%%rdi\n\t"                                       \
-            "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"                 \
-            "jnz   ret_from_fork\n\t"                                    \
-            RESTORE_CONTEXT                                              \
-            : "=a" (last)                                                \
-              __switch_canary_oparam                                     \
-            : [next] "S" (next), [prev] "D" (prev),                      \
-              [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
-              [ti_flags] "i" (offsetof(struct thread_info, flags)),      \
-              [_tif_fork] "i" (_TIF_FORK),                               \
-              [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-              [current_task] "m" (current_task)                          \
-              __switch_canary_iparam                                     \
-            : "memory", "cc" __EXTRA_CLOBBER)
-
-#endif /* CONFIG_X86_32 */
-
  #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h

index 8b7c8d8e0852cf50d5a03e24f89ae9c693a1891c..494c4b5ada347552922878cd9f102de2f54d4a3b 100644 (file)
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -95,7 +95,6 @@ struct thread_info {
  #define TIF_UPROBE             12      /* breakpointed or singlestepping */
  #define TIF_NOTSC              16      /* TSC is not accessible in userland */
  #define TIF_IA32               17      /* IA32 compatibility process */
-#define TIF_FORK               18      /* ret_from_fork */
  #define TIF_NOHZ               19      /* in adaptive nohz mode */
  #define TIF_MEMDIE             20      /* is terminating due to OOM killer */
  #define TIF_POLLING_NRFLAG     21      /* idle is polling for TIF_NEED_RESCHED */
@@ -119,7 +118,6 @@ struct thread_info {
  #define _TIF_UPROBE            (1 << TIF_UPROBE)
  #define _TIF_NOTSC             (1 << TIF_NOTSC)
  #define _TIF_IA32              (1 << TIF_IA32)
-#define _TIF_FORK              (1 << TIF_FORK)
  #define _TIF_NOHZ              (1 << TIF_NOHZ)
  #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
  #define _TIF_IO_BITMAP         (1 << TIF_IO_BITMAP)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h

index 4e5be94e079a6c64353bd327c9fe4ef9796e16b1..6fa85944af83d8ddbbad3a344a31a7920e64e6d0 100644 (file)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
  
  static inline void __native_flush_tlb(void)
  {
+       /*
+        * If current->mm == NULL then we borrow a mm which may change during a
+        * task switch and therefore we must not be preempted while we write CR3
+        * back:
+        */
+       preempt_disable();
         native_write_cr3(native_read_cr3());
+       preempt_enable();
  }
  
  static inline void __native_flush_tlb_global_irq_disabled(void)
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h

index c852590254d5f4191609f92fce8ee488896efe8b..e652a7cc61863667fad8ff2baa0a7242485600df 100644 (file)
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -79,7 +79,7 @@ struct uv_gam_range_entry {
         u16     nasid;          /* HNasid */
         u16     sockid;         /* Socket ID, high bits of APIC ID */
         u16     pnode;          /* Index to MMR and GRU spaces */
-       u32     pxm;            /* ACPI proximity domain number */
+       u32     unused2;
         u32     limit;          /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
  };
  
@@ -88,7 +88,8 @@ struct uv_gam_range_entry {
  #define        UV_SYSTAB_VERSION_UV4           0x400   /* UV4 BIOS base version */
  #define        UV_SYSTAB_VERSION_UV4_1         0x401   /* + gpa_shift */
  #define        UV_SYSTAB_VERSION_UV4_2         0x402   /* + TYPE_NVRAM/WINDOW/MBOX */
-#define        UV_SYSTAB_VERSION_UV4_LATEST    UV_SYSTAB_VERSION_UV4_2
+#define        UV_SYSTAB_VERSION_UV4_3         0x403   /* - GAM Range PXM Value */
+#define        UV_SYSTAB_VERSION_UV4_LATEST    UV_SYSTAB_VERSION_UV4_3
  
  #define        UV_SYSTAB_TYPE_UNUSED           0       /* End of table (offset == 0) */
  #define        UV_SYSTAB_TYPE_GAM_PARAMS       1       /* GAM PARAM conversions */
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c

index adb3eaf8fe2a5e038c2444bd8f75b55716faf607..48587335ede8e2296b80ff991d1bf4e8e155ad46 100644 (file)
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void)
         saved_magic = 0x12345678;
  #else /* CONFIG_64BIT */
  #ifdef CONFIG_SMP
-       stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
+       initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
         early_gdt_descr.address =
                         (unsigned long)get_cpu_gdt_table(smp_processor_id());
         initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c

index 20abd912f0e4a6bbaa364dd97b388d0dc3da136a..cea4fc19e8447d14fd6dcba5df94aa91f7a2332d 100644 (file)
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -313,7 +313,7 @@ int lapic_get_maxlvt(void)
  
  /* Clock divisor */
  #define APIC_DIVISOR 16
-#define TSC_DIVISOR  32
+#define TSC_DIVISOR  8
  
  /*
   * This function sets up the local APIC timer, with a timeout of
@@ -565,12 +565,36 @@ static void setup_APIC_timer(void)
                                     CLOCK_EVT_FEAT_DUMMY);
                 levt->set_next_event = lapic_next_deadline;
                 clockevents_config_and_register(levt,
-                                               (tsc_khz / TSC_DIVISOR) * 1000,
+                                               tsc_khz * (1000 / TSC_DIVISOR),
                                                 0xF, ~0UL);
         } else
                 clockevents_register_device(levt);
  }
  
+/*
+ * Install the updated TSC frequency from recalibration at the TSC
+ * deadline clockevent devices.
+ */
+static void __lapic_update_tsc_freq(void *info)
+{
+       struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+       if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+               return;
+
+       clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
+}
+
+void lapic_update_tsc_freq(void)
+{
+       /*
+        * The clockevent device's ->mult and ->shift can both be
+        * changed. In order to avoid races, schedule the frequency
+        * update code on each CPU.
+        */
+       on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
+}
+
  /*
   * In this functions we calibrate APIC bus clocks to the external timer.
   *
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c

index 766bdef1e1d727a5a714ca971ae4f7667555f162..200af5ae96626e1610c7c946bb0a33c5360f9bcb 100644 (file)
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
  /*
   * At CPU state changes, update the x2apic cluster sibling info.
   */
-int x2apic_prepare_cpu(unsigned int cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
  {
         if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
                 return -ENOMEM;
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
         return 0;
  }
  
-int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int this_cpu)
  {
         int cpu;
  
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
  static int x2apic_cluster_probe(void)
  {
         int cpu = smp_processor_id();
+       int ret;
  
         if (!x2apic_mode)
                 return 0;
  
+       ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+                               x2apic_prepare_cpu, x2apic_dead_cpu);
+       if (ret < 0) {
+               pr_err("Failed to register X2APIC_PREPARE\n");
+               return 0;
+       }
         cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
-       cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
-                         x2apic_prepare_cpu, x2apic_dead_cpu);
         return 1;
  }
  
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c

index ed887dedd35e9ae978a2978c56ccf7b469ec8925..b9f6157d42717010821b80b7ee53947208525a7f 100644 (file)
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
         if (strncmp(oem_id, "SGI", 3) != 0)
                 return 0;
  
+       if (numa_off) {
+               pr_err("UV: NUMA is off, disabling UV support\n");
+               return 0;
+       }
+
         /* Setup early hub type field in uv_hub_info for Node 0 */
         uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
  
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
         struct uv_gam_range_entry *gre = uv_gre_table;
         struct uv_gam_range_s *grt;
         unsigned long last_limit = 0, ram_limit = 0;
-       int bytes, i, sid, lsid = -1;
+       int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
  
         if (!gre)
                 return;
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
                 }
                 sid = gre->sockid - _min_socket;
                 if (lsid < sid) {               /* new range */
-                       grt = &_gr_table[sid];
-                       grt->base = lsid;
+                       grt = &_gr_table[indx];
+                       grt->base = lindx;
                         grt->nasid = gre->nasid;
                         grt->limit = last_limit = gre->limit;
                         lsid = sid;
+                       lindx = indx++;
                         continue;
                 }
                 if (lsid == sid && !ram_limit) {        /* update range */
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
                 }
                 if (!ram_limit) {               /* non-contiguous ram range */
                         grt++;
-                       grt->base = sid - 1;
+                       grt->base = lindx;
                         grt->nasid = gre->nasid;
                         grt->limit = last_limit = gre->limit;
                         continue;
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
         for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
                 if (!index) {
                         pr_info("UV: GAM Range Table...\n");
-                       pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+                       pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n",
                                 "Range", "", "Size", "Type", "NASID",
-                               "SID", "PN", "PXM");
+                               "SID", "PN");
                 }
                 pr_info(
-               "UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x %3d\n",
+               "UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
                         index++,
                         (unsigned long)lgre << UV_GAM_RANGE_SHFT,
                         (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
                         ((unsigned long)(gre->limit - lgre)) >>
                                 (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
-                       gre->type, gre->nasid, gre->sockid,
-                       gre->pnode, gre->pxm);
+                       gre->type, gre->nasid, gre->sockid, gre->pnode);
  
                 lgre = gre->limit;
                 if (sock_min > gre->sockid)
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
                 _pnode_to_socket[i] = SOCK_EMPTY;
  
         /* fill in pnode/node/addr conversion list values */
-       pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+       pr_info("UV: GAM Building socket/pnode conversion tables\n");
         for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
                 if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
                         continue;
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
                 if (_socket_to_pnode[i] != SOCK_EMPTY)
                         continue;       /* duplicate */
                 _socket_to_pnode[i] = gre->pnode;
-               _socket_to_node[i] = gre->pxm;
  
                 i = gre->pnode - minpnode;
                 _pnode_to_socket[i] = gre->sockid;
  
                 pr_info(
-               "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+               "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
                         gre->sockid, gre->type, gre->nasid,
                         _socket_to_pnode[gre->sockid - minsock],
-                       _socket_to_node[gre->sockid - minsock],
                         _pnode_to_socket[gre->pnode - minpnode]);
         }
  
-       /* check socket -> node values */
+       /* Set socket -> node values */
         lnid = -1;
         for_each_present_cpu(cpu) {
                 int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
                 lnid = nid;
                 apicid = per_cpu(x86_cpu_to_apicid, cpu);
                 sockid = apicid >> uv_cpuid.socketid_shift;
-               i = sockid - minsock;
-
-               if (nid != _socket_to_node[i]) {
-                       pr_warn(
-                       "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
-                               i, sockid, gre->type, _socket_to_node[i], nid);
-                       _socket_to_node[i] = nid;
-               }
+               _socket_to_node[sockid - minsock] = nid;
+               pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
+                       sockid, apicid, nid);
         }
  
         /* Setup physical blade to pnode translation from GAM Range Table */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c

index 2bd5c6ff7ee7c3634af74640f993c0104c405b48..db3a0af9b9ec7d7eef938ba78050c5272a8dca5e 100644 (file)
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -28,6 +28,12 @@
  #endif
  
  void common(void) {
+       BLANK();
+       OFFSET(TASK_threadsp, task_struct, thread.sp);
+#ifdef CONFIG_CC_STACKPROTECTOR
+       OFFSET(TASK_stack_canary, task_struct, stack_canary);
+#endif
+
         BLANK();
         OFFSET(TI_flags, thread_info, flags);
         OFFSET(TI_status, thread_info, status);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c

index ecdc1d217dc0f50a7b760c25cec8bee586643336..880aa093268df7a0d7db23abde984647880c47cc 100644 (file)
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@ void foo(void)
         /* Size of SYSENTER_stack */
         DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
  
+#ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+       OFFSET(stack_canary_offset, stack_canary, canary);
+#endif
+
  #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
         BLANK();
         OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c

index d875f97d4e0ba0477c648e1244ff238bfd48be03..210927ee2e74ad8db99ecc691dc67fec5c4dc38e 100644 (file)
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@ int main(void)
         OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
         BLANK();
  
+#ifdef CONFIG_CC_STACKPROTECTOR
+       DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
+       BLANK();
+#endif
+
         DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
         DEFINE(NR_syscalls, sizeof(syscalls_64));
  
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index d3b91be4873bf6707b2ae76318a20681ffd478a4..06919427d4518b2eaa9b777063512b838b420a25 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1286,7 +1286,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
  EXPORT_PER_CPU_SYMBOL(current_task);
  
  DEFINE_PER_CPU(char *, irq_stack_ptr) =
-       init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+       init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
  
  DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
  
@@ -1310,11 +1310,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
  /* May not be marked __init: used by software suspend */
  void syscall_init(void)
  {
-       /*
-        * LSTAR and STAR live in a bit strange symbiosis.
-        * They both write to the same internal register. STAR allows to
-        * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
-        */
         wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
         wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
  
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c

index 27a0228c9cae0dd32d2d5aff092a5b39162d6593..b816971f5da4cd45d1461831283aafb39578f296 100644 (file)
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -355,6 +355,7 @@ void load_ucode_amd_ap(void)
         unsigned int cpu = smp_processor_id();
         struct equiv_cpu_entry *eq;
         struct microcode_amd *mc;
+       u8 *cont = container;
         u32 rev, eax;
         u16 eq_id;
  
@@ -371,8 +372,11 @@ void load_ucode_amd_ap(void)
         if (check_current_patch_level(&rev, false))
                 return;
  
+       /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+       cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
         eax = cpuid_eax(0x00000001);
-       eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+       eq  = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
  
         eq_id = find_equiv_id(eq, eax);
         if (!eq_id)
@@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void)
         else
                 container = cont_va;
  
+       /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+       container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
         eax   = cpuid_eax(0x00000001);
         eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
  
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c

index 92e8f0a7159cadbafba6763081c2b33293be420a..01072e9e165e36ff076990402bfbc7c5606aea33 100644 (file)
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -38,38 +38,6 @@ void printk_address(unsigned long address)
         pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
  }
  
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void
-print_ftrace_graph_addr(unsigned long addr, void *data,
-                       const struct stacktrace_ops *ops,
-                       struct task_struct *task, int *graph)
-{
-       unsigned long ret_addr;
-       int index;
-
-       if (addr != (unsigned long)return_to_handler)
-               return;
-
-       index = task->curr_ret_stack;
-
-       if (!task->ret_stack || index < *graph)
-               return;
-
-       index -= *graph;
-       ret_addr = task->ret_stack[index].ret;
-
-       ops->address(data, ret_addr, 1);
-
-       (*graph)++;
-}
-#else
-static inline void
-print_ftrace_graph_addr(unsigned long addr, void *data,
-                       const struct stacktrace_ops *ops,
-                       struct task_struct *task, int *graph)
-{ }
-#endif
-
  /*
   * x86-64 can have up to three kernel stacks:
   * process stack
@@ -107,18 +75,33 @@ print_context_stack(struct task_struct *task,
                 stack = (unsigned long *)task_stack_page(task);
  
         while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
-               unsigned long addr;
+               unsigned long addr = *stack;
  
-               addr = *stack;
                 if (__kernel_text_address(addr)) {
+                       unsigned long real_addr;
+                       int reliable = 0;
+
                         if ((unsigned long) stack == bp + sizeof(long)) {
-                               ops->address(data, addr, 1);
+                               reliable = 1;
                                 frame = frame->next_frame;
                                 bp = (unsigned long) frame;
-                       } else {
-                               ops->address(data, addr, 0);
                         }
-                       print_ftrace_graph_addr(addr, data, ops, task, graph);
+
+                       /*
+                        * When function graph tracing is enabled for a
+                        * function, its return address on the stack is
+                        * replaced with the address of an ftrace handler
+                        * (return_to_handler).  In that case, before printing
+                        * the "real" address, we want to print the handler
+                        * address as an "unreliable" hint that function graph
+                        * tracing was involved.
+                        */
+                       real_addr = ftrace_graph_ret_addr(task, graph, addr,
+                                                         stack);
+                       if (real_addr != addr)
+                               ops->address(data, addr, 0);
+
+                       ops->address(data, real_addr, reliable);
                 }
                 stack++;
         }
@@ -133,19 +116,21 @@ print_context_stack_bp(struct task_struct *task,
                        unsigned long *end, int *graph)
  {
         struct stack_frame *frame = (struct stack_frame *)bp;
-       unsigned long *ret_addr = &frame->return_address;
+       unsigned long *retp = &frame->return_address;
  
-       while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
-               unsigned long addr = *ret_addr;
+       while (valid_stack_ptr(task, retp, sizeof(*retp), end)) {
+               unsigned long addr = *retp;
+               unsigned long real_addr;
  
                 if (!__kernel_text_address(addr))
                         break;
  
-               if (ops->address(data, addr, 1))
+               real_addr = ftrace_graph_ret_addr(task, graph, addr, retp);
+               if (ops->address(data, real_addr, 1))
                         break;
+
                 frame = frame->next_frame;
-               ret_addr = &frame->return_address;
-               print_ftrace_graph_addr(addr, data, ops, task, graph);
+               retp = &frame->return_address;
         }
  
         return (unsigned long)frame;
@@ -182,12 +167,6 @@ show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
         dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
  }
  
-void show_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp)
-{
-       show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
  void show_stack(struct task_struct *task, unsigned long *sp)
  {
         unsigned long bp = 0;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c

index 9ee4520ce83c8fe0dd127a50e7022010f86414ca..066eb5c77fd66e8eeeecfc22d20a65c1f82ddecd 100644 (file)
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -103,9 +103,6 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
         return (stack >= irq_stack && stack < irq_stack_end);
  }
  
-static const unsigned long irq_stack_size =
-       (IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
-
  enum stack_type {
         STACK_IS_UNKNOWN,
         STACK_IS_NORMAL,
@@ -133,7 +130,7 @@ analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
                 return STACK_IS_NORMAL;
  
         *stack_end = irq_stack;
-       irq_stack = irq_stack - irq_stack_size;
+       irq_stack -= (IRQ_STACK_SIZE / sizeof(long));
  
         if (in_irq_stack(stack, irq_stack, *stack_end))
                 return STACK_IS_IRQ;
@@ -202,7 +199,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
  
                         bp = ops->walk_stack(task, stack, bp, ops,
                                              data, stack_end, &graph);
-                       ops->stack(data, "<EOE>");
+                       ops->stack(data, "EOE");
                         /*
                          * We link to the next stack via the
                          * second-to-last pointer (index -2 to end) in the
@@ -256,8 +253,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
         preempt_disable();
         cpu = smp_processor_id();
  
-       irq_stack_end   = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
-       irq_stack       = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
+       irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
+       irq_stack     = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
  
         /*
          * Debugging aid: "show_stack(NULL, NULL);" prints the
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c

index 680049aa4593ca773d9860a2b8af77eab3839f31..01567aa87503f021cfb3b4104520842b1ec87179 100644 (file)
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
         return get_xsave_addr(&fpu->state.xsave, xsave_state);
  }
  
-
-/*
- * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
- * to take out of its "init state".  This will ensure that an
- * XRSTOR actually restores the state.
- */
-static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
-               int xstate_feature_mask)
-{
-       xsave->header.xfeatures |= xstate_feature_mask;
-}
-
-/*
- * This function is safe to call whether the FPU is in use or not.
- *
- * Note that this only works on the current task.
- *
- * Inputs:
- *     @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- *     XFEATURE_MASK_SSE, etc...)
- *     @xsave_state_ptr: a pointer to a copy of the state that you would
- *     like written in to the current task's FPU xsave state.  This pointer
- *     must not be located in the current tasks's xsave area.
- * Output:
- *     address of the state in the xsave area or NULL if the state
- *     is not present or is in its 'init state'.
- */
-static void fpu__xfeature_set_state(int xstate_feature_mask,
-               void *xstate_feature_src, size_t len)
-{
-       struct xregs_state *xsave = &current->thread.fpu.state.xsave;
-       struct fpu *fpu = &current->thread.fpu;
-       void *dst;
-
-       if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
-               WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
-               return;
-       }
-
-       /*
-        * Tell the FPU code that we need the FPU state to be in
-        * 'fpu' (not in the registers), and that we need it to
-        * be stable while we write to it.
-        */
-       fpu__current_fpstate_write_begin();
-
-       /*
-        * This method *WILL* *NOT* work for compact-format
-        * buffers.  If the 'xstate_feature_mask' is unset in
-        * xcomp_bv then we may need to move other feature state
-        * "up" in the buffer.
-        */
-       if (xsave->header.xcomp_bv & xstate_feature_mask) {
-               WARN_ON_ONCE(1);
-               goto out;
-       }
-
-       /* find the location in the xsave buffer of the desired state */
-       dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
-
-       /*
-        * Make sure that the pointer being passed in did not
-        * come from the xsave buffer itself.
-        */
-       WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
-
-       /* put the caller-provided data in the location */
-       memcpy(dst, xstate_feature_src, len);
-
-       /*
-        * Mark the xfeature so that the CPU knows there is state
-        * in the buffer now.
-        */
-       fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
-out:
-       /*
-        * We are done writing to the 'fpu'.  Reenable preeption
-        * and (possibly) move the fpstate back in to the fpregs.
-        */
-       fpu__current_fpstate_write_end();
-}
-
  #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
  #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
  
  /*
- * This will go out and modify the XSAVE buffer so that PKRU is
- * set to a particular state for access to 'pkey'.
- *
- * PKRU state does affect kernel access to user memory.  We do
- * not modfiy PKRU *itself* here, only the XSAVE state that will
- * be restored in to PKRU when we return back to userspace.
+ * This will go out and modify PKRU register to set the access
+ * rights for @pkey to @init_val.
   */
  int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
                 unsigned long init_val)
  {
-       struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
-       struct pkru_state *old_pkru_state;
-       struct pkru_state new_pkru_state;
+       u32 old_pkru;
         int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
         u32 new_pkru_bits = 0;
  
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
          */
         if (!boot_cpu_has(X86_FEATURE_OSPKE))
                 return -EINVAL;
+       /*
+        * For most XSAVE components, this would be an arduous task:
+        * brining fpstate up to date with fpregs, updating fpstate,
+        * then re-populating fpregs.  But, for components that are
+        * never lazily managed, we can just access the fpregs
+        * directly.  PKRU is never managed lazily, so we can just
+        * manipulate it directly.  Make sure it stays that way.
+        */
+       WARN_ON_ONCE(!use_eager_fpu());
  
         /* Set the bits we need in PKRU:  */
         if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
         /* Shift the bits in to the correct place in PKRU for pkey: */
         new_pkru_bits <<= pkey_shift;
  
-       /* Locate old copy of the state in the xsave buffer: */
-       old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
-
-       /*
-        * When state is not in the buffer, it is in the init
-        * state, set it manually.  Otherwise, copy out the old
-        * state.
-        */
-       if (!old_pkru_state)
-               new_pkru_state.pkru = 0;
-       else
-               new_pkru_state.pkru = old_pkru_state->pkru;
-
-       /* Mask off any old bits in place: */
-       new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-
-       /* Set the newly-requested bits: */
-       new_pkru_state.pkru |= new_pkru_bits;
-
-       /*
-        * We could theoretically live without zeroing pkru.pad.
-        * The current XSAVE feature state definition says that
-        * only bytes 0->3 are used.  But we do not want to
-        * chance leaking kernel stack out to userspace in case a
-        * memcpy() of the whole xsave buffer was done.
-        *
-        * They're in the same cacheline anyway.
-        */
-       new_pkru_state.pad = 0;
+       /* Get old PKRU and mask off any old bits in place: */
+       old_pkru = read_pkru();
+       old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
  
-       fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+       /* Write old part along with new part: */
+       write_pkru(old_pkru | new_pkru_bits);
  
         return 0;
  }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c

index d036cfb4495db30015db71eee64cf7f633603a2b..8639bb2ae05868ab65d88e44683f44c8651121f3 100644 (file)
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
         }
  
         if (ftrace_push_return_trace(old, self_addr, &trace.depth,
-                   frame_pointer) == -EBUSY) {
+                                    frame_pointer, parent) == -EBUSY) {
                 *parent = old;
                 return;
         }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c

index 2dda0bc4576ebf7a6940056e621d0bdb4850aa79..f16c55bfc0907bc4a3b1f35174ca5f5cbc53ce99 100644 (file)
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
         /* Initialize 32bit specific setup functions */
         x86_init.resources.reserve_resources = i386_reserve_resources;
         x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
-
-       reserve_bios_regions();
  }
  
  asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c

index 99d48e7d2974c64fe7b3fe9c1dd59e7f3b189cbb..54a2372f5dbb1eb0598788e944ad28708b638671 100644 (file)
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
                 copy_bootdata(__va(real_mode_data));
  
         x86_early_init_platform_quirks();
-       reserve_bios_regions();
  
         switch (boot_params.hdr.hardware_subarch) {
         case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S

index 6f8902b0d1514bd9f5b2b3ea86f55f96fa6d8618..5f401262f12d08c50d6411d80de056ef87b6c6e9 100644 (file)
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
   */
  __HEAD
  ENTRY(startup_32)
-       movl pa(stack_start),%ecx
+       movl pa(initial_stack),%ecx
         
         /* test KEEP_SEGMENTS flag to see if the bootloader is asking
                 us to not reload segments */
@@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4
   * start_secondary().
   */
  ENTRY(start_cpu0)
-       movl stack_start, %ecx
+       movl initial_stack, %ecx
         movl %ecx, %esp
         jmp  *(initial_code)
  ENDPROC(start_cpu0)
@@ -307,7 +307,7 @@ ENTRY(startup_32_smp)
         movl %eax,%es
         movl %eax,%fs
         movl %eax,%gs
-       movl pa(stack_start),%ecx
+       movl pa(initial_stack),%ecx
         movl %eax,%ss
         leal -__PAGE_OFFSET(%ecx),%esp
  
@@ -703,7 +703,7 @@ ENTRY(initial_page_table)
  
  .data
  .balign 4
-ENTRY(stack_start)
+ENTRY(initial_stack)
         .long init_thread_union+THREAD_SIZE
  
  __INITRODATA
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S

index 9f8efc9f00756a4e19c4b12e834483b6cf7150c4..c98a559c346ed0c2b5092181130b2a3a2f7b1dcf 100644 (file)
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,7 +66,7 @@ startup_64:
          */
  
         /*
-        * Setup stack for verify_cpu(). "-8" because stack_start is defined
+        * Setup stack for verify_cpu(). "-8" because initial_stack is defined
          * this way, see below. Our best guess is a NULL ptr for stack
          * termination heuristics and we don't want to break anything which
          * might depend on it (kgdb, ...).
@@ -226,7 +226,7 @@ ENTRY(secondary_startup_64)
         movq    %rax, %cr0
  
         /* Setup a boot time stack */
-       movq stack_start(%rip), %rsp
+       movq initial_stack(%rip), %rsp
  
         /* zero EFLAGS after setting rsp */
         pushq $0
@@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64)
   * start_secondary().
   */
  ENTRY(start_cpu0)
-       movq stack_start(%rip),%rsp
+       movq initial_stack(%rip),%rsp
         movq    initial_code(%rip),%rax
         pushq   $0              # fake return address to stop unwinder
         pushq   $__KERNEL_CS    # set correct cs
@@ -319,17 +319,15 @@ ENTRY(start_cpu0)
  ENDPROC(start_cpu0)
  #endif
  
-       /* SMP bootup changes these two */
+       /* Both SMP bootup and ACPI suspend change these variables */
         __REFDATA
         .balign 8
         GLOBAL(initial_code)
         .quad   x86_64_start_kernel
         GLOBAL(initial_gs)
         .quad   INIT_PER_CPU_VAR(irq_stack_union)
-
-       GLOBAL(stack_start)
+       GLOBAL(initial_stack)
         .quad  init_thread_union+THREAD_SIZE-8
-       .word  0
         __FINITDATA
  
  bad_address:
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c

index ed16e58658a4201184fd0b1d8c470f5eb3e32c1e..c6dfd801df973039e8c00f1d48ad4058713c914d 100644 (file)
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1242,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
         memset(&curr_time, 0, sizeof(struct rtc_time));
  
         if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
-               mc146818_set_time(&curr_time);
+               mc146818_get_time(&curr_time);
  
         if (hpet_rtc_flags & RTC_UIE &&
             curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c

index 61521dc19c102114e177cbc21a4f5da9d94c20cd..9f669fdd20106cbb53b404e7085d31b4244bd7c7 100644 (file)
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
         seq_puts(p, "  Rescheduling interrupts\n");
         seq_printf(p, "%*s: ", prec, "CAL");
         for_each_online_cpu(j)
-               seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
-                                       irq_stats(j)->irq_tlb_count);
+               seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
         seq_puts(p, "  Function call interrupts\n");
         seq_printf(p, "%*s: ", prec, "TLB");
         for_each_online_cpu(j)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c

index 04cde527d72849be75ccb65e0d7ed650a1ef3a82..8e36f249646e25d20bc2bcc04b7a0ccc292e498c 100644 (file)
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
  #include <asm/apicdef.h>
  #include <asm/apic.h>
  #include <asm/nmi.h>
+#include <asm/switch_to.h>
  
  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
  {
@@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
         gdb_regs[GDB_DX]        = 0;
         gdb_regs[GDB_SI]        = 0;
         gdb_regs[GDB_DI]        = 0;
-       gdb_regs[GDB_BP]        = *(unsigned long *)p->thread.sp;
+       gdb_regs[GDB_BP]        = ((struct inactive_task_frame *)p->thread.sp)->bp;
  #ifdef CONFIG_X86_32
         gdb_regs[GDB_DS]        = __KERNEL_DS;
         gdb_regs[GDB_ES]        = __KERNEL_DS;
         gdb_regs[GDB_PS]        = 0;
         gdb_regs[GDB_CS]        = __KERNEL_CS;
-       gdb_regs[GDB_PC]        = p->thread.ip;
         gdb_regs[GDB_SS]        = __KERNEL_DS;
         gdb_regs[GDB_FS]        = 0xFFFF;
         gdb_regs[GDB_GS]        = 0xFFFF;
  #else
-       gdb_regs32[GDB_PS]      = *(unsigned long *)(p->thread.sp + 8);
+       gdb_regs32[GDB_PS]      = 0;
         gdb_regs32[GDB_CS]      = __KERNEL_CS;
         gdb_regs32[GDB_SS]      = __KERNEL_DS;
-       gdb_regs[GDB_PC]        = 0;
         gdb_regs[GDB_R8]        = 0;
         gdb_regs[GDB_R9]        = 0;
         gdb_regs[GDB_R10]       = 0;
@@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
         gdb_regs[GDB_R14]       = 0;
         gdb_regs[GDB_R15]       = 0;
  #endif
+       gdb_regs[GDB_PC]        = 0;
         gdb_regs[GDB_SP]        = p->thread.sp;
  }
  
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c

index 62c0b0ea2ce4483cb2ab29dc10492f877b2f93e7..c1fa790c81cd51a0cb5f5b1650a749c3b86300ee 100644 (file)
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
  #include <asm/tlbflush.h>
  #include <asm/mce.h>
  #include <asm/vm86.h>
+#include <asm/switch_to.h>
  
  /*
   * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -512,6 +513,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
         return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
  }
  
+/*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+       struct inactive_task_frame *frame =
+               (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
+       return READ_ONCE_NOCHECK(frame->ret_addr);
+}
+
  /*
   * Called from fs/proc with a reference on @p to find the function
   * which called into schedule(). This needs to be done carefully
@@ -556,7 +568,7 @@ unsigned long get_wchan(struct task_struct *p)
         if (sp < bottom || sp > top)
                 return 0;
  
-       fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+       fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
         do {
                 if (fp < bottom || fp > top)
                         return 0;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c

index d86be29c38c73c8bb8e128aa3fdeae53ad39b1c1..404efdfa083b45c9bf719181ab434d15121ed4c0 100644 (file)
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,17 +55,6 @@
  #include <asm/switch_to.h>
  #include <asm/vm86.h>
  
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
-
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-       return ((unsigned long *)tsk->thread.sp)[3];
-}
-
  void __show_regs(struct pt_regs *regs, int all)
  {
         unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
         unsigned long arg, struct task_struct *p, unsigned long tls)
  {
         struct pt_regs *childregs = task_pt_regs(p);
+       struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
+       struct inactive_task_frame *frame = &fork_frame->frame;
         struct task_struct *tsk;
         int err;
  
-       p->thread.sp = (unsigned long) childregs;
+       frame->bp = 0;
+       frame->ret_addr = (unsigned long) ret_from_fork;
+       p->thread.sp = (unsigned long) fork_frame;
         p->thread.sp0 = (unsigned long) (childregs+1);
         memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
  
         if (unlikely(p->flags & PF_KTHREAD)) {
                 /* kernel thread */
                 memset(childregs, 0, sizeof(struct pt_regs));
-               p->thread.ip = (unsigned long) ret_from_kernel_thread;
-               task_user_gs(p) = __KERNEL_STACK_CANARY;
-               childregs->ds = __USER_DS;
-               childregs->es = __USER_DS;
-               childregs->fs = __KERNEL_PERCPU;
-               childregs->bx = sp;     /* function */
-               childregs->bp = arg;
-               childregs->orig_ax = -1;
-               childregs->cs = __KERNEL_CS | get_kernel_rpl();
-               childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+               frame->bx = sp;         /* function */
+               frame->di = arg;
                 p->thread.io_bitmap_ptr = NULL;
                 return 0;
         }
+       frame->bx = 0;
         *childregs = *current_pt_regs();
         childregs->ax = 0;
         if (sp)
                 childregs->sp = sp;
  
-       p->thread.ip = (unsigned long) ret_from_fork;
         task_user_gs(p) = get_user_gs(current_pt_regs());
  
         p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c

index 63236d8f84bf50c3291a9aa4591d3de24a500412..b812cd0d7889c6491f0ae8c6c75f633b51142324 100644 (file)
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -50,8 +50,6 @@
  #include <asm/switch_to.h>
  #include <asm/xen/hypervisor.h>
  
-asmlinkage extern void ret_from_fork(void);
-
  __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
  
  /* Prints also some state that isn't saved in the pt_regs */
@@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
  {
         int err;
         struct pt_regs *childregs;
+       struct fork_frame *fork_frame;
+       struct inactive_task_frame *frame;
         struct task_struct *me = current;
  
         p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
         childregs = task_pt_regs(p);
-       p->thread.sp = (unsigned long) childregs;
-       set_tsk_thread_flag(p, TIF_FORK);
+       fork_frame = container_of(childregs, struct fork_frame, regs);
+       frame = &fork_frame->frame;
+       frame->bp = 0;
+       frame->ret_addr = (unsigned long) ret_from_fork;
+       p->thread.sp = (unsigned long) fork_frame;
         p->thread.io_bitmap_ptr = NULL;
  
         savesegment(gs, p->thread.gsindex);
@@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
         if (unlikely(p->flags & PF_KTHREAD)) {
                 /* kernel thread */
                 memset(childregs, 0, sizeof(struct pt_regs));
-               childregs->sp = (unsigned long)childregs;
-               childregs->ss = __KERNEL_DS;
-               childregs->bx = sp; /* function */
-               childregs->bp = arg;
-               childregs->orig_ax = -1;
-               childregs->cs = __KERNEL_CS | get_kernel_rpl();
-               childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+               frame->bx = sp;         /* function */
+               frame->r12 = arg;
                 return 0;
         }
+       frame->bx = 0;
         *childregs = *current_pt_regs();
  
         childregs->ax = 0;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c

index 2537cfba4d894a258255f72e6598668bcf123051..5b88a1b26fc747cf0cdb533384a014c0ac9f3e93 100644 (file)
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
                 return sp;
  
         prev_esp = (u32 *)(context);
-       if (prev_esp)
-               return (unsigned long)prev_esp;
+       if (*prev_esp)
+               return (unsigned long)*prev_esp;
  
         return (unsigned long)regs;
  }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c

index 2d98798d395e6c8cabcaee9473a824051ade8947..44c868ebc31411a55300f560aae8ab0f54a10331 100644 (file)
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1101,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
                 efi_find_mirror();
         }
  
+       reserve_bios_regions();
+
         /*
          * The EFI specification says that boot service code won't be called
          * after ExitBootServices(). This is, in fact, a lie.
@@ -1129,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
  
         early_trap_pf_init();
  
-       setup_real_mode();
+       /*
+        * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
+        * with the current CR4 value.  This may not be necessary, but
+        * auditing all the early-boot CR4 manipulation would be needed to
+        * rule it out.
+        */
+       if (boot_cpu_data.cpuid_level >= 0)
+               /* A CPU has %cr4 if and only if it has CPUID. */
+               mmu_cr4_features = __read_cr4();
  
         memblock_set_current_limit(get_max_mapped());
  
@@ -1178,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
  
         kasan_init();
  
-       if (boot_cpu_data.cpuid_level >= 0) {
-               /* A CPU has %cr4 if and only if it has CPUID */
-               mmu_cr4_features = __read_cr4();
-               if (trampoline_cr4_features)
-                       *trampoline_cr4_features = mmu_cr4_features;
-       }
-
  #ifdef CONFIG_X86_32
         /* sync back kernel address range */
         clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c

index 1d5c79473639a55680f1e9af2d0e560e6f9f96bf..2bbd27f8980217bc443ac4d202d6658328061a81 100644 (file)
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void)
  #ifdef CONFIG_X86_64
                 per_cpu(irq_stack_ptr, cpu) =
                         per_cpu(irq_stack_union.irq_stack, cpu) +
-                       IRQ_STACK_SIZE - 64;
+                       IRQ_STACK_SIZE;
  #endif
  #ifdef CONFIG_NUMA
                 per_cpu(x86_cpu_to_node_map, cpu) =
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 2a6e84a30a546f022331c653ec0725bd73086bc5..7e52f83d3a4bfd1218fe7b88092c93aed4759253 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
  /* Logical package management. We might want to allocate that dynamically */
  static int *physical_to_logical_pkg __read_mostly;
  static unsigned long *physical_package_map __read_mostly;;
-static unsigned long *logical_package_map  __read_mostly;
  static unsigned int max_physical_pkg_id __read_mostly;
  unsigned int __max_logical_packages __read_mostly;
  EXPORT_SYMBOL(__max_logical_packages);
+static unsigned int logical_packages __read_mostly;
+static bool logical_packages_frozen __read_mostly;
  
  /* Maximum number of SMT threads on any online core */
  int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
         if (test_and_set_bit(pkg, physical_package_map))
                 goto found;
  
-       new = find_first_zero_bit(logical_package_map, __max_logical_packages);
-       if (new >= __max_logical_packages) {
+       if (logical_packages_frozen) {
                 physical_to_logical_pkg[pkg] = -1;
-               pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+               pr_warn("APIC(%x) Package %u exceeds logical package max\n",
                         apicid, pkg);
                 return -ENOSPC;
         }
-       set_bit(new, logical_package_map);
+
+       new = logical_packages++;
         pr_info("APIC(%x) Converting physical %u to logical package %u\n",
                 apicid, pkg, new);
         physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void)
         }
  
         __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+       logical_packages = 0;
  
         /*
          * Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void)
         memset(physical_to_logical_pkg, 0xff, size);
         size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
         physical_package_map = kzalloc(size, GFP_KERNEL);
-       size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
-       logical_package_map = kzalloc(size, GFP_KERNEL);
-
-       pr_info("Max logical packages: %u\n", __max_logical_packages);
  
         for_each_present_cpu(cpu) {
                 unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void)
                 set_cpu_possible(cpu, false);
                 set_cpu_present(cpu, false);
         }
+
+       if (logical_packages > __max_logical_packages) {
+               pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
+                       logical_packages, __max_logical_packages);
+               logical_packages_frozen = true;
+               __max_logical_packages  = logical_packages;
+       }
+
+       pr_info("Max logical packages: %u\n", __max_logical_packages);
  }
  
  void __init smp_store_boot_cpu_info(void)
@@ -935,7 +942,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
         per_cpu(cpu_current_top_of_stack, cpu) =
                 (unsigned long)task_stack_page(idle) + THREAD_SIZE;
  #else
-       clear_tsk_thread_flag(idle, TIF_FORK);
         initial_gs = per_cpu_offset(cpu);
  #endif
  }
@@ -962,7 +968,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
  
         early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
         initial_code = (unsigned long)start_secondary;
-       stack_start  = idle->thread.sp;
+       initial_stack  = idle->thread.sp;
  
         /*
          * Enable the espfix hack for this CPU
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c

index b70ca12dd389c57fd32222fa1279f0d46c39a6ac..907b4e4aeb5eabda5d6f9c028f17a081e41f2cd8 100644 (file)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP,     SIGBUS,  "segment not present", segment_not_present)
  DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",            stack_segment)
  DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",          alignment_check)
  
+#ifdef CONFIG_VMAP_STACK
+static void __noreturn handle_stack_overflow(const char *message,
+                                            struct pt_regs *regs,
+                                            unsigned long fault_address)
+{
+       printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
+                (void *)fault_address, current->stack,
+                (char *)current->stack + THREAD_SIZE - 1);
+       die(message, regs, 0);
+
+       /* Be absolutely certain we don't return. */
+       panic(message);
+}
+#endif
+
  #ifdef CONFIG_X86_64
  /* Runs on IST stack */
  dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
  {
         static const char str[] = "double fault";
         struct task_struct *tsk = current;
+#ifdef CONFIG_VMAP_STACK
+       unsigned long cr2;
+#endif
  
  #ifdef CONFIG_X86_ESPFIX64
         extern unsigned char native_irq_return_iret[];
@@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
         tsk->thread.error_code = error_code;
         tsk->thread.trap_nr = X86_TRAP_DF;
  
+#ifdef CONFIG_VMAP_STACK
+       /*
+        * If we overflow the stack into a guard page, the CPU will fail
+        * to deliver #PF and will send #DF instead.  Similarly, if we
+        * take any non-IST exception while too close to the bottom of
+        * the stack, the processor will get a page fault while
+        * delivering the exception and will generate a double fault.
+        *
+        * According to the SDM (footnote in 6.15 under "Interrupt 14 -
+        * Page-Fault Exception (#PF):
+        *
+        *   Processors update CR2 whenever a page fault is detected. If a
+        *   second page fault occurs while an earlier page fault is being
+        *   deliv- ered, the faulting linear address of the second fault will
+        *   overwrite the contents of CR2 (replacing the previous
+        *   address). These updates to CR2 occur even if the page fault
+        *   results in a double fault or occurs during the delivery of a
+        *   double fault.
+        *
+        * The logic below has a small possibility of incorrectly diagnosing
+        * some errors as stack overflows.  For example, if the IDT or GDT
+        * gets corrupted such that #GP delivery fails due to a bad descriptor
+        * causing #GP and we hit this condition while CR2 coincidentally
+        * points to the stack guard page, we'll think we overflowed the
+        * stack.  Given that we're going to panic one way or another
+        * if this happens, this isn't necessarily worth fixing.
+        *
+        * If necessary, we could improve the test by only diagnosing
+        * a stack overflow if the saved RSP points within 47 bytes of
+        * the bottom of the stack: if RSP == tsk_stack + 48 and we
+        * take an exception, the stack is already aligned and there
+        * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
+        * possible error code, so a stack overflow would *not* double
+        * fault.  With any less space left, exception delivery could
+        * fail, and, as a practical matter, we've overflowed the
+        * stack even if the actual trigger for the double fault was
+        * something else.
+        */
+       cr2 = read_cr2();
+       if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
+               handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
+#endif
+
  #ifdef CONFIG_DOUBLEFAULT
         df_debug(regs, error_code);
  #endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c

index 1ef87e887051e950071aa746f68ca9736f3c7e4a..78b9cb5a26af31559625f3a4e40e62fdc7d53512 100644 (file)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -22,6 +22,7 @@
  #include <asm/nmi.h>
  #include <asm/x86_init.h>
  #include <asm/geode.h>
+#include <asm/apic.h>
  
  unsigned int __read_mostly cpu_khz;    /* TSC clocks / usec, not used here */
  EXPORT_SYMBOL(cpu_khz);
@@ -1249,6 +1250,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
                 (unsigned long)tsc_khz / 1000,
                 (unsigned long)tsc_khz % 1000);
  
+       /* Inform the TSC deadline clockevent devices about the recalibration */
+       lapic_update_tsc_freq();
+
  out:
         if (boot_cpu_has(X86_FEATURE_ART))
                 art_related_clocksource = &clocksource_tsc;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c

index 6c1ff31d99ffeb0d0a28c5ee472bb1865ff23df3..495c776de4b470f8eb53236a0ddeb2ca8f043b6b 100644 (file)
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
                 *cursor &= 0xfe;
         }
         /*
-        * Similar treatment for VEX3 prefix.
-        * TODO: add XOP/EVEX treatment when insn decoder supports them
+        * Similar treatment for VEX3/EVEX prefix.
+        * TODO: add XOP treatment when insn decoder supports them
          */
-       if (insn->vex_prefix.nbytes == 3) {
+       if (insn->vex_prefix.nbytes >= 3) {
                 /*
                  * vex2:     c5    rvvvvLpp   (has no b bit)
                  * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
                  * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
-                *   (evex will need setting of both b and x since
-                *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
-                * Setting VEX3.b (setting because it has inverted meaning):
+                * Setting VEX3.b (setting because it has inverted meaning).
+                * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+                * is the 4th bit of MODRM.rm, and needs the same treatment.
+                * For VEX3-encoded insns, VEX3.x value has no effect in
+                * non-SIB encoding, the change is superfluous but harmless.
                  */
                 cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
-               *cursor |= 0x20;
+               *cursor |= 0x60;
         }
  
         /*
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
  
         reg = MODRM_REG(insn);  /* Fetch modrm.reg */
         reg2 = 0xff;            /* Fetch vex.vvvv */
-       if (insn->vex_prefix.nbytes == 2)
-               reg2 = insn->vex_prefix.bytes[1];
-       else if (insn->vex_prefix.nbytes == 3)
+       if (insn->vex_prefix.nbytes)
                 reg2 = insn->vex_prefix.bytes[2];
         /*
-        * TODO: add XOP, EXEV vvvv reading.
+        * TODO: add XOP vvvv reading.
          *
          * vex.vvvv field is in bits 6-3, bits are inverted.
          * But in 32-bit mode, high-order bit may be ignored.
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c

index f7dfeda83e5c444c85ec0527e2dba9fa86dc207a..121f59c6ee54e0159aab44889274f3bdf0b1041a 100644 (file)
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -19,7 +19,7 @@
  #include <asm/cpufeature.h>
  #include <asm/setup.h>
  
-#define debug_putstr(v) early_printk(v)
+#define debug_putstr(v) early_printk("%s", v)
  #define has_cpuflag(f) boot_cpu_has(f)
  #define get_boot_seed() kaslr_offset()
  #endif
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c

index ec21796ac5fd5a95f5e386af57a67d2e4ba51f61..4473cb4f8b906dcae083a4b29c38b72d9b7d56d3 100644 (file)
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -3,15 +3,17 @@
   * included by both the compressed kernel and the regular kernel.
   */
  
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
                            unsigned long addr, unsigned long end)
  {
         addr &= PMD_MASK;
         for (; addr < end; addr += PMD_SIZE) {
                 pmd_t *pmd = pmd_page + pmd_index(addr);
  
-               if (!pmd_present(*pmd))
-                       set_pmd(pmd, __pmd(addr | pmd_flag));
+               if (pmd_present(*pmd))
+                       continue;
+
+               set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
         }
  }
  
@@ -30,13 +32,13 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
  
                 if (pud_present(*pud)) {
                         pmd = pmd_offset(pud, 0);
-                       ident_pmd_init(info->pmd_flag, pmd, addr, next);
+                       ident_pmd_init(info, pmd, addr, next);
                         continue;
                 }
                 pmd = (pmd_t *)info->alloc_pgt_page(info->context);
                 if (!pmd)
                         return -ENOMEM;
-               ident_pmd_init(info->pmd_flag, pmd, addr, next);
+               ident_pmd_init(info, pmd, addr, next);
                 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
         }
  
@@ -44,14 +46,15 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
  }
  
  int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-                             unsigned long addr, unsigned long end)
+                             unsigned long pstart, unsigned long pend)
  {
+       unsigned long addr = pstart + info->offset;
+       unsigned long end = pend + info->offset;
         unsigned long next;
         int result;
-       int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
  
         for (; addr < end; addr = next) {
-               pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+               pgd_t *pgd = pgd_page + pgd_index(addr);
                 pud_t *pud;
  
                 next = (addr & PGDIR_MASK) + PGDIR_SIZE;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index 4dbe65622810208182bfe363efdd477a84abb040..a7655f6caf7dbd641dfea7c3e60581e01df594cb 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         unsigned cpu = smp_processor_id();
  
         if (likely(prev != next)) {
+               if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+                       /*
+                        * If our current stack is in vmalloc space and isn't
+                        * mapped in the new pgd, we'll double-fault.  Forcibly
+                        * map it.
+                        */
+                       unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+
+                       pgd_t *pgd = next->pgd + stack_pgd_index;
+
+                       if (unlikely(pgd_none(*pgd)))
+                               set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+               }
+
  #ifdef CONFIG_SMP
                 this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
                 this_cpu_write(cpu_tlbstate.active_mm, next);
  #endif
+
                 cpumask_set_cpu(cpu, mm_cpumask(next));
  
                 /*
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c

index 4480c06cade78d663f18db8ef09ff3db723dc1fe..89d1146f5a6f76424a0045266862eee566c2b930 100644 (file)
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -254,6 +254,7 @@ void __init efi_free_boot_services(void)
         for_each_efi_memory_desc(md) {
                 unsigned long long start = md->phys_addr;
                 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+               size_t rm_size;
  
                 if (md->type != EFI_BOOT_SERVICES_CODE &&
                     md->type != EFI_BOOT_SERVICES_DATA)
@@ -263,6 +264,26 @@ void __init efi_free_boot_services(void)
                 if (md->attribute & EFI_MEMORY_RUNTIME)
                         continue;
  
+               /*
+                * Nasty quirk: if all sub-1MB memory is used for boot
+                * services, we can get here without having allocated the
+                * real mode trampoline.  It's too late to hand boot services
+                * memory back to the memblock allocator, so instead
+                * try to manually allocate the trampoline if needed.
+                *
+                * I've seen this on a Dell XPS 13 9350 with firmware
+                * 1.4.4 with SGX enabled booting Linux via Fedora 24's
+                * grub2-efi on a hard disk.  (And no, I don't know why
+                * this happened, but Linux should still try to boot rather
+                * panicing early.)
+                */
+               rm_size = real_mode_size_needed();
+               if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
+                       set_real_mode_mem(start, rm_size);
+                       start += rm_size;
+                       size -= rm_size;
+               }
+
                 free_bootmem_late(start, size);
         }
  
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c

index 66b2166ea4a1c715a0362ed99cbeb3692a031476..23f2f3e41c7f48a60d3bd35fef9ef7025ca3e389 100644 (file)
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -187,7 +187,8 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
  void uv_bios_init(void)
  {
         uv_systab = NULL;
-       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+           !efi.uv_systab || efi_runtime_disabled()) {
                 pr_crit("UV: UVsystab: missing\n");
                 return;
         }
@@ -199,12 +200,14 @@ void uv_bios_init(void)
                 return;
         }
  
+       /* Starting with UV4 the UV systab size is variable */
         if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+               int size = uv_systab->size;
+
                 iounmap(uv_systab);
-               uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+               uv_systab = ioremap(efi.uv_systab, size);
                 if (!uv_systab) {
-                       pr_err("UV: UVsystab: ioremap(%d) failed!\n",
-                               uv_systab->size);
+                       pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
                         return;
                 }
         }
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c

index f0b5f2d402afb15f639be87f9581c44c7298bdef..9634557a544478fdde15eff06d8fa120ffce894d 100644 (file)
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -87,7 +87,7 @@ static int set_up_temporary_mappings(void)
         struct x86_mapping_info info = {
                 .alloc_pgt_page = alloc_pgt_page,
                 .pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
-               .kernel_mapping = true,
+               .offset         = __PAGE_OFFSET,
         };
         unsigned long mstart, mend;
         pgd_t *pgd;
@@ -113,7 +113,7 @@ static int set_up_temporary_mappings(void)
                         return result;
         }
  
-       temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
+       temp_level4_pgt = __pa(pgd);
         return 0;
  }
  
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c

index 705e3fffb4a1a3296ac5745603681cc434c44d92..5db706f14111c7c12a55fc0731a86641fe2c4741 100644 (file)
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,9 +1,11 @@
  #include <linux/io.h>
+#include <linux/slab.h>
  #include <linux/memblock.h>
  
  #include <asm/cacheflush.h>
  #include <asm/pgtable.h>
  #include <asm/realmode.h>
+#include <asm/tlbflush.h>
  
  struct real_mode_header *real_mode_header;
  u32 *trampoline_cr4_features;
@@ -11,25 +13,37 @@ u32 *trampoline_cr4_features;
  /* Hold the pgd entry used on booting additional CPUs */
  pgd_t trampoline_pgd_entry;
  
+void __init set_real_mode_mem(phys_addr_t mem, size_t size)
+{
+       void *base = __va(mem);
+
+       real_mode_header = (struct real_mode_header *) base;
+       printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
+              base, (unsigned long long)mem, size);
+}
+
  void __init reserve_real_mode(void)
  {
         phys_addr_t mem;
-       unsigned char *base;
-       size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+       size_t size = real_mode_size_needed();
+
+       if (!size)
+               return;
+
+       WARN_ON(slab_is_available());
  
         /* Has to be under 1M so we can execute real-mode AP code. */
         mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
-       if (!mem)
-               panic("Cannot allocate trampoline\n");
+       if (!mem) {
+               pr_info("No sub-1M memory is available for the trampoline\n");
+               return;
+       }
  
-       base = __va(mem);
         memblock_reserve(mem, size);
-       real_mode_header = (struct real_mode_header *) base;
-       printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
-              base, (unsigned long long)mem, size);
+       set_real_mode_mem(mem, size);
  }
  
-void __init setup_real_mode(void)
+static void __init setup_real_mode(void)
  {
         u16 real_mode_seg;
         const u32 *rel;
@@ -84,7 +98,7 @@ void __init setup_real_mode(void)
  
         trampoline_header->start = (u64) secondary_startup_64;
         trampoline_cr4_features = &trampoline_header->cr4;
-       *trampoline_cr4_features = __read_cr4();
+       *trampoline_cr4_features = mmu_cr4_features;
  
         trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
         trampoline_pgd[0] = trampoline_pgd_entry.pgd;
@@ -100,7 +114,7 @@ void __init setup_real_mode(void)
   * need to mark it executable at do_pre_smp_initcalls() at least,
   * thus run it as a early_initcall().
   */
-static int __init set_real_mode_permissions(void)
+static void __init set_real_mode_permissions(void)
  {
         unsigned char *base = (unsigned char *) real_mode_header;
         size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
@@ -119,7 +133,16 @@ static int __init set_real_mode_permissions(void)
         set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
         set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
         set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
+}
+
+static int __init init_real_mode(void)
+{
+       if (!real_mode_header)
+               panic("Real mode trampoline was not allocated");
+
+       setup_real_mode();
+       set_real_mode_permissions();
  
         return 0;
  }
-early_initcall(set_real_mode_permissions);
+early_initcall(init_real_mode);
diff --git a/crypto/Kconfig b/crypto/Kconfig

index a9377bef25e3ed0b3d3b2bf82b1cc456ab371920..84d71482bf080288d2c379b03dbb02894ba3b4c5 100644 (file)
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -439,7 +439,7 @@ config CRYPTO_CRC32C_INTEL
  
  config CRYPT_CRC32C_VPMSUM
         tristate "CRC32c CRC algorithm (powerpc64)"
-       depends on PPC64
+       depends on PPC64 && ALTIVEC
         select CRYPTO_HASH
         select CRC32
         help
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c

index 62264397a2d2863680d7cec283aeec59dda02448..7e8ed96236cefa794ca39684c9c2592ca49f7976 100644 (file)
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -24,14 +24,14 @@
  #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
  
  static const u64 keccakf_rndc[24] = {
-       0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-       0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-       0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-       0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-       0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-       0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
-       0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-       0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+       0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL,
+       0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL,
+       0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL,
+       0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
+       0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL,
+       0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
+       0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL,
+       0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
  };
  
  static const int keccakf_rotc[24] = {
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

index 8c234dd9b8bc595c21d9d5abaa7327e02b9cc572..80cc7c089a15e908ae070d95ad4879e5b6309555 100644 (file)
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1527,11 +1527,12 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
  {
         struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
         u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+       const u32 STATUS_MASK = 0x80000037;
  
         if (mmio->num_lines)
                 offset = to_interleave_offset(offset, mmio);
  
-       return readl(mmio->addr.base + offset);
+       return readl(mmio->addr.base + offset) & STATUS_MASK;
  }
  
  static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c

index 1a04af6d24212cdd16e5c8091d01f16e2b409384..6c6519f6492a4198c78cae1eaad5e33e03efd2d9 100644 (file)
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3950,6 +3950,7 @@ static void rbd_dev_release(struct device *dev)
         bool need_put = !!rbd_dev->opts;
  
         ceph_oid_destroy(&rbd_dev->header_oid);
+       ceph_oloc_destroy(&rbd_dev->header_oloc);
  
         rbd_put_client(rbd_dev->rbd_client);
         rbd_spec_put(rbd_dev->spec);
@@ -5336,15 +5337,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
         }
         spec->pool_id = (u64)rc;
  
-       /* The ceph file layout needs to fit pool id in 32 bits */
-
-       if (spec->pool_id > (u64)U32_MAX) {
-               rbd_warn(NULL, "pool id too large (%llu > %u)",
-                               (unsigned long long)spec->pool_id, U32_MAX);
-               rc = -EIO;
-               goto err_out_client;
-       }
-
         rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
         if (!rbd_dev) {
                 rc = -ENOMEM;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c

index 1523e05c46fc95b29c47af3b51ebdc9f93af9029..93b1aaa5ba3be26d5de4d0a7b461ecc2fe7beb61 100644 (file)
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -391,22 +391,16 @@ static int init_vq(struct virtio_blk *vblk)
                 num_vqs = 1;
  
         vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
-       if (!vblk->vqs) {
-               err = -ENOMEM;
-               goto out;
-       }
+       if (!vblk->vqs)
+               return -ENOMEM;
  
         names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
-       if (!names)
-               goto err_names;
-
         callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
-       if (!callbacks)
-               goto err_callbacks;
-
         vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
-       if (!vqs)
-               goto err_vqs;
+       if (!names || !callbacks || !vqs) {
+               err = -ENOMEM;
+               goto out;
+       }
  
         for (i = 0; i < num_vqs; i++) {
                 callbacks[i] = virtblk_done;
@@ -417,7 +411,7 @@ static int init_vq(struct virtio_blk *vblk)
         /* Discover virtqueues and write information to configuration.  */
         err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
         if (err)
-               goto err_find_vqs;
+               goto out;
  
         for (i = 0; i < num_vqs; i++) {
                 spin_lock_init(&vblk->vqs[i].lock);
@@ -425,16 +419,12 @@ static int init_vq(struct virtio_blk *vblk)
         }
         vblk->num_vqs = num_vqs;
  
- err_find_vqs:
+out:
         kfree(vqs);
- err_vqs:
         kfree(callbacks);
- err_callbacks:
         kfree(names);
- err_names:
         if (err)
                 kfree(vblk->vqs);
- out:
         return err;
  }
  
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c

index 28bce3f4f81d6aa56840657169c0842461f9f515..57700541f95129e6f8f194ede23afeeda35842da 100644 (file)
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -8,6 +8,9 @@
   * it under the terms of the GNU General Public License version 2 as
   * published by the Free Software Foundation.
   */
+
+#define pr_fmt(fmt)    "arm_arch_timer: " fmt
+
  #include <linux/init.h>
  #include <linux/kernel.h>
  #include <linux/device.h>
@@ -370,16 +373,33 @@ static bool arch_timer_has_nonsecure_ppi(void)
                 arch_timer_ppi[PHYS_NONSECURE_PPI]);
  }
  
+static u32 check_ppi_trigger(int irq)
+{
+       u32 flags = irq_get_trigger_type(irq);
+
+       if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
+               pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
+               pr_warn("WARNING: Please fix your firmware\n");
+               flags = IRQF_TRIGGER_LOW;
+       }
+
+       return flags;
+}
+
  static int arch_timer_starting_cpu(unsigned int cpu)
  {
         struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
+       u32 flags;
  
         __arch_timer_setup(ARCH_CP15_TIMER, clk);
  
-       enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0);
+       flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
+       enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
  
-       if (arch_timer_has_nonsecure_ppi())
-               enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
+       if (arch_timer_has_nonsecure_ppi()) {
+               flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]);
+               enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags);
+       }
  
         arch_counter_set_user_access();
         if (evtstrm_enable)
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c

index 7e3fd375a6278f17d26fc7b584bb98fa70459a02..92f6e4deee74a00499f0c51ede613818086adca7 100644 (file)
--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -66,10 +66,10 @@ static void kona_timer_disable_and_clear(void __iomem *base)
  
  }
  
-static void
+static int
  kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
  {
-       int loop_limit = 4;
+       int loop_limit = 3;
  
         /*
          * Read 64-bit free running counter
@@ -83,18 +83,19 @@ kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
          *      if new hi-word is equal to previously read hi-word then stop.
          */
  
-       while (--loop_limit) {
+       do {
                 *msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET);
                 *lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET);
                 if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET))
                         break;
-       }
+       } while (--loop_limit);
         if (!loop_limit) {
                 pr_err("bcm_kona_timer: getting counter failed.\n");
                 pr_err(" Timer will be impacted\n");
+               return -ETIMEDOUT;
         }
  
-       return;
+       return 0;
  }
  
  static int kona_timer_set_next_event(unsigned long clc,
@@ -112,8 +113,11 @@ static int kona_timer_set_next_event(unsigned long clc,
  
         uint32_t lsw, msw;
         uint32_t reg;
+       int ret;
  
-       kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+       ret = kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+       if (ret)
+               return ret;
  
         /* Load the "next" event tick value */
         writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET);
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c

index d91e8725917c615c8e3390d356dd9e3d133a666e..b4b3ab5a11ad006cd0bf2bd4db50627057ea27c8 100644 (file)
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -164,7 +164,7 @@ void __init gic_clocksource_init(unsigned int frequency)
         gic_start_count();
  }
  
-static void __init gic_clocksource_of_init(struct device_node *node)
+static int __init gic_clocksource_of_init(struct device_node *node)
  {
         struct clk *clk;
         int ret;
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c

index 719b478d136e7dbc8be409847c786a17df4cd971..3c39e6f459714e5b725cfdd10663fb04dd62063b 100644 (file)
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -338,7 +338,6 @@ static int __init armada_xp_timer_init(struct device_node *np)
         struct clk *clk = of_clk_get_by_name(np, "fixed");
         int ret;
  
-       clk = of_clk_get(np, 0);
         if (IS_ERR(clk)) {
                 pr_err("Failed to get clock");
                 return PTR_ERR(clk);
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c

index 87796e0864e945e41725be06106234ca8a32a133..d3ffde8066298ff48d64c990eefe1dec0bf34cb6 100644 (file)
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -145,11 +145,30 @@ static struct powernv_pstate_info {
  /* Use following macros for conversions between pstate_id and index */
  static inline int idx_to_pstate(unsigned int i)
  {
+       if (unlikely(i >= powernv_pstate_info.nr_pstates)) {
+               pr_warn_once("index %u is out of bound\n", i);
+               return powernv_freqs[powernv_pstate_info.nominal].driver_data;
+       }
+
         return powernv_freqs[i].driver_data;
  }
  
  static inline unsigned int pstate_to_idx(int pstate)
  {
+       int min = powernv_freqs[powernv_pstate_info.min].driver_data;
+       int max = powernv_freqs[powernv_pstate_info.max].driver_data;
+
+       if (min > 0) {
+               if (unlikely((pstate < max) || (pstate > min))) {
+                       pr_warn_once("pstate %d is out of bound\n", pstate);
+                       return powernv_pstate_info.nominal;
+               }
+       } else {
+               if (unlikely((pstate > max) || (pstate < min))) {
+                       pr_warn_once("pstate %d is out of bound\n", pstate);
+                       return powernv_pstate_info.nominal;
+               }
+       }
         /*
          * abs() is deliberately used so that is works with
          * both monotonically increasing and decreasing
@@ -593,7 +612,7 @@ void gpstate_timer_handler(unsigned long data)
         } else {
                 gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
                                                  gpstates->highest_lpstate_idx,
-                                                freq_data.pstate_id);
+                                                gpstates->last_lpstate_idx);
         }
  
         /*
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c

index ea8189f4b0212cc038f5f4363cf10f5e8a54099a..6dc597126b79e06e7410296576ef0705f8fe37ce 100644 (file)
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -441,6 +441,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
                                OP_ALG_AAI_CTR_MOD128);
         const bool is_rfc3686 = alg->caam.rfc3686;
  
+       if (!ctx->authsize)
+               return 0;
+
         /* NULL encryption / decryption */
         if (!ctx->enckeylen)
                 return aead_null_set_sh_desc(aead);
@@ -614,7 +617,7 @@ skip_enc:
                 keys_fit_inline = true;
  
         /* aead_givencrypt shared descriptor */
-       desc = ctx->sh_desc_givenc;
+       desc = ctx->sh_desc_enc;
  
         /* Note: Context registers are saved. */
         init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
@@ -645,13 +648,13 @@ copy_iv:
         append_operation(desc, ctx->class2_alg_type |
                          OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
  
-       /* ivsize + cryptlen = seqoutlen - authsize */
-       append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
         /* Read and write assoclen bytes */
         append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
         append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
  
+       /* ivsize + cryptlen = seqoutlen - authsize */
+       append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+
         /* Skip assoc data */
         append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
  
@@ -697,7 +700,7 @@ copy_iv:
         ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                               desc_bytes(desc),
                                               DMA_TO_DEVICE);
-       if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
+       if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
                 dev_err(jrdev, "unable to map shared descriptor\n");
                 return -ENOMEM;
         }
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c

index f1ecc8df8d41e40dff086bd0382f98f364ad4bb1..36365b3efdfdce5ffb6bc19a8b72fe31a3a463b3 100644 (file)
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1898,6 +1898,7 @@ caam_hash_alloc(struct caam_hash_template *template,
                          template->name);
                 snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
                          template->driver_name);
+               t_alg->ahash_alg.setkey = NULL;
         }
         alg->cra_module = THIS_MODULE;
         alg->cra_init = caam_hash_cra_init;
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig

index d0c1dab9b435480bef9d6c61f42cac9dbea0107f..dff1a4a6dc1b5cd63df8071564e3606c4305e44e 100644 (file)
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -251,6 +251,14 @@ config EDAC_SBRIDGE
           Support for error detection and correction the Intel
           Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
  
+config EDAC_SKX
+       tristate "Intel Skylake server Integrated MC"
+       depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+       depends on PCI_MMCONFIG
+       help
+         Support for error detection and correction the Intel
+         Skylake server Integrated Memory Controllers.
+
  config EDAC_MPC85XX
         tristate "Freescale MPC83xx / MPC85xx"
         depends on EDAC_MM_EDAC && FSL_SOC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile

index f9e4a3e0e6e915d1b5b6217e76e4f7f52323656c..986049925b08569d45692d3e0d0b16e6394363c4 100644 (file)
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_EDAC_I5400)              += i5400_edac.o
  obj-$(CONFIG_EDAC_I7300)               += i7300_edac.o
  obj-$(CONFIG_EDAC_I7CORE)              += i7core_edac.o
  obj-$(CONFIG_EDAC_SBRIDGE)             += sb_edac.o
+obj-$(CONFIG_EDAC_SKX)                 += skx_edac.o
  obj-$(CONFIG_EDAC_E7XXX)               += e7xxx_edac.o
  obj-$(CONFIG_EDAC_E752X)               += e752x_edac.o
  obj-$(CONFIG_EDAC_I82443BXGX)          += i82443bxgx_edac.o
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c

index 4fb2eb7c800d8839c6329cd34589eeea4dbaa5c0..ce0067b7a2f675e7933f91e7fa453b8992789763 100644 (file)
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -552,9 +552,9 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = {
  /* Knight's Landing Support */
  /*
   * KNL's memory channels are swizzled between memory controllers.
- * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2
+ * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2
   */
-#define knl_channel_remap(channel) ((channel + 3) % 6)
+#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3)
  
  /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
  #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC       0x7840
@@ -1286,7 +1286,7 @@ static u32 knl_get_mc_route(int entry, u32 reg)
         mc = GET_BITFIELD(reg, entry*3, (entry*3)+2);
         chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1);
  
-       return knl_channel_remap(mc*3 + chan);
+       return knl_channel_remap(mc, chan);
  }
  
  /*
@@ -2997,8 +2997,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
                 } else {
                         char A = *("A");
  
-                       channel = knl_channel_remap(channel);
+                       /*
+                        * Reported channel is in range 0-2, so we can't map it
+                        * back to mc. To figure out mc we check machine check
+                        * bank register that reported this error.
+                        * bank15 means mc0 and bank16 means mc1.
+                        */
+                       channel = knl_channel_remap(m->bank == 16, channel);
                         channel_mask = 1 << channel;
+
                         snprintf(msg, sizeof(msg),
                                 "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
                                 overflow ? " OVERFLOW" : "",
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c

new file mode 100644 (file)

index 0000000..0ff4878
--- /dev/null
+++ b/drivers/edac/skx_edac.c
@@ -0,0 +1,1121 @@
+/*
+ * EDAC driver for Intel(R) Xeon(R) Skylake processors
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_core.h"
+
+#define SKX_REVISION    " Ver: 1.0 "
+
+/*
+ * Debug macros
+ */
+#define skx_printk(level, fmt, arg...)                 \
+       edac_printk(level, "skx", fmt, ##arg)
+
+#define skx_mc_printk(mci, level, fmt, arg...)         \
+       edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
+
+/*
+ * Get a bit field at register value <v>, from bit <lo> to bit <hi>
+ */
+#define GET_BITFIELD(v, lo, hi) \
+       (((v) & GENMASK_ULL((hi), (lo))) >> (lo))
+
+static LIST_HEAD(skx_edac_list);
+
+static u64 skx_tolm, skx_tohm;
+
+#define NUM_IMC                        2       /* memory controllers per socket */
+#define NUM_CHANNELS           3       /* channels per memory controller */
+#define NUM_DIMMS              2       /* Max DIMMS per channel */
+
+#define        MASK26  0x3FFFFFF               /* Mask for 2^26 */
+#define MASK29 0x1FFFFFFF              /* Mask for 2^29 */
+
+/*
+ * Each cpu socket contains some pci devices that provide global
+ * information, and also some that are local to each of the two
+ * memory controllers on the die.
+ */
+struct skx_dev {
+       struct list_head        list;
+       u8                      bus[4];
+       struct pci_dev  *sad_all;
+       struct pci_dev  *util_all;
+       u32     mcroute;
+       struct skx_imc {
+               struct mem_ctl_info *mci;
+               u8      mc;     /* system wide mc# */
+               u8      lmc;    /* socket relative mc# */
+               u8      src_id, node_id;
+               struct skx_channel {
+                       struct pci_dev *cdev;
+                       struct skx_dimm {
+                               u8      close_pg;
+                               u8      bank_xor_enable;
+                               u8      fine_grain_bank;
+                               u8      rowbits;
+                               u8      colbits;
+                       } dimms[NUM_DIMMS];
+               } chan[NUM_CHANNELS];
+       } imc[NUM_IMC];
+};
+static int skx_num_sockets;
+
+struct skx_pvt {
+       struct skx_imc  *imc;
+};
+
+struct decoded_addr {
+       struct skx_dev *dev;
+       u64     addr;
+       int     socket;
+       int     imc;
+       int     channel;
+       u64     chan_addr;
+       int     sktways;
+       int     chanways;
+       int     dimm;
+       int     rank;
+       int     channel_rank;
+       u64     rank_address;
+       int     row;
+       int     column;
+       int     bank_address;
+       int     bank_group;
+};
+
+static struct skx_dev *get_skx_dev(u8 bus, u8 idx)
+{
+       struct skx_dev *d;
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               if (d->bus[idx] == bus)
+                       return d;
+       }
+
+       return NULL;
+}
+
+enum munittype {
+       CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
+};
+
+struct munit {
+       u16     did;
+       u16     devfn[NUM_IMC];
+       u8      busidx;
+       u8      per_socket;
+       enum munittype mtype;
+};
+
+/*
+ * List of PCI device ids that we need together with some device
+ * number and function numbers to tell which memory controller the
+ * device belongs to.
+ */
+static const struct munit skx_all_munits[] = {
+       { 0x2054, { }, 1, 1, SAD_ALL },
+       { 0x2055, { }, 1, 1, UTIL_ALL },
+       { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
+       { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
+       { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
+       { 0x208e, { }, 1, 0, SAD },
+       { }
+};
+
+/*
+ * We use the per-socket device 0x2016 to count how many sockets are present,
+ * and to detemine which PCI buses are associated with each socket. Allocate
+ * and build the full list of all the skx_dev structures that we need here.
+ */
+static int get_all_bus_mappings(void)
+{
+       struct pci_dev *pdev, *prev;
+       struct skx_dev *d;
+       u32 reg;
+       int ndev = 0;
+
+       prev = NULL;
+       for (;;) {
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
+               if (!pdev)
+                       break;
+               ndev++;
+               d = kzalloc(sizeof(*d), GFP_KERNEL);
+               if (!d) {
+                       pci_dev_put(pdev);
+                       return -ENOMEM;
+               }
+               pci_read_config_dword(pdev, 0xCC, &reg);
+               d->bus[0] =  GET_BITFIELD(reg, 0, 7);
+               d->bus[1] =  GET_BITFIELD(reg, 8, 15);
+               d->bus[2] =  GET_BITFIELD(reg, 16, 23);
+               d->bus[3] =  GET_BITFIELD(reg, 24, 31);
+               edac_dbg(2, "busses: %x, %x, %x, %x\n",
+                        d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
+               list_add_tail(&d->list, &skx_edac_list);
+               skx_num_sockets++;
+               prev = pdev;
+       }
+
+       return ndev;
+}
+
+static int get_all_munits(const struct munit *m)
+{
+       struct pci_dev *pdev, *prev;
+       struct skx_dev *d;
+       u32 reg;
+       int i = 0, ndev = 0;
+
+       prev = NULL;
+       for (;;) {
+               pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
+               if (!pdev)
+                       break;
+               ndev++;
+               if (m->per_socket == NUM_IMC) {
+                       for (i = 0; i < NUM_IMC; i++)
+                               if (m->devfn[i] == pdev->devfn)
+                                       break;
+                       if (i == NUM_IMC)
+                               goto fail;
+               }
+               d = get_skx_dev(pdev->bus->number, m->busidx);
+               if (!d)
+                       goto fail;
+
+               /* Be sure that the device is enabled */
+               if (unlikely(pci_enable_device(pdev) < 0)) {
+                       skx_printk(KERN_ERR,
+                               "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did);
+                       goto fail;
+               }
+
+               switch (m->mtype) {
+               case CHAN0: case CHAN1: case CHAN2:
+                       pci_dev_get(pdev);
+                       d->imc[i].chan[m->mtype].cdev = pdev;
+                       break;
+               case SAD_ALL:
+                       pci_dev_get(pdev);
+                       d->sad_all = pdev;
+                       break;
+               case UTIL_ALL:
+                       pci_dev_get(pdev);
+                       d->util_all = pdev;
+                       break;
+               case SAD:
+                       /*
+                        * one of these devices per core, including cores
+                        * that don't exist on this SKU. Ignore any that
+                        * read a route table of zero, make sure all the
+                        * non-zero values match.
+                        */
+                       pci_read_config_dword(pdev, 0xB4, &reg);
+                       if (reg != 0) {
+                               if (d->mcroute == 0)
+                                       d->mcroute = reg;
+                               else if (d->mcroute != reg) {
+                                       skx_printk(KERN_ERR,
+                                               "mcroute mismatch\n");
+                                       goto fail;
+                               }
+                       }
+                       ndev--;
+                       break;
+               }
+
+               prev = pdev;
+       }
+
+       return ndev;
+fail:
+       pci_dev_put(pdev);
+       return -ENODEV;
+}
+
+const struct x86_cpu_id skx_cpuids[] = {
+       { X86_VENDOR_INTEL, 6, 0x55, 0, 0 },    /* Skylake */
+       { }
+};
+MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
+
+static u8 get_src_id(struct skx_dev *d)
+{
+       u32 reg;
+
+       pci_read_config_dword(d->util_all, 0xF0, &reg);
+
+       return GET_BITFIELD(reg, 12, 14);
+}
+
+static u8 skx_get_node_id(struct skx_dev *d)
+{
+       u32 reg;
+
+       pci_read_config_dword(d->util_all, 0xF4, &reg);
+
+       return GET_BITFIELD(reg, 0, 2);
+}
+
+static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
+                        int maxval, char *name)
+{
+       u32 val = GET_BITFIELD(reg, lobit, hibit);
+
+       if (val < minval || val > maxval) {
+               edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg);
+               return -EINVAL;
+       }
+       return val + add;
+}
+
+#define IS_DIMM_PRESENT(mtr)           GET_BITFIELD((mtr), 15, 15)
+
+#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks")
+#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
+#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
+
+static int get_width(u32 mtr)
+{
+       switch (GET_BITFIELD(mtr, 8, 9)) {
+       case 0:
+               return DEV_X4;
+       case 1:
+               return DEV_X8;
+       case 2:
+               return DEV_X16;
+       }
+       return DEV_UNKNOWN;
+}
+
+static int skx_get_hi_lo(void)
+{
+       struct pci_dev *pdev;
+       u32 reg;
+
+       pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
+       if (!pdev) {
+               edac_dbg(0, "Can't get tolm/tohm\n");
+               return -ENODEV;
+       }
+
+       pci_read_config_dword(pdev, 0xD0, &reg);
+       skx_tolm = reg;
+       pci_read_config_dword(pdev, 0xD4, &reg);
+       skx_tohm = reg;
+       pci_read_config_dword(pdev, 0xD8, &reg);
+       skx_tohm |= (u64)reg << 32;
+
+       pci_dev_put(pdev);
+       edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm);
+
+       return 0;
+}
+
+static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+                        struct skx_imc *imc, int chan, int dimmno)
+{
+       int  banks = 16, ranks, rows, cols, npages;
+       u64 size;
+
+       if (!IS_DIMM_PRESENT(mtr))
+               return 0;
+       ranks = numrank(mtr);
+       rows = numrow(mtr);
+       cols = numcol(mtr);
+
+       /*
+        * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
+        */
+       size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
+       npages = MiB_TO_PAGES(size);
+
+       edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+                imc->mc, chan, dimmno, size, npages,
+                banks, ranks, rows, cols);
+
+       imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
+       imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
+       imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
+       imc->chan[chan].dimms[dimmno].rowbits = rows;
+       imc->chan[chan].dimms[dimmno].colbits = cols;
+
+       dimm->nr_pages = npages;
+       dimm->grain = 32;
+       dimm->dtype = get_width(mtr);
+       dimm->mtype = MEM_DDR4;
+       dimm->edac_mode = EDAC_SECDED; /* likely better than this */
+       snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+                imc->src_id, imc->lmc, chan, dimmno);
+
+       return 1;
+}
+
+#define SKX_GET_MTMTR(dev, reg) \
+       pci_read_config_dword((dev), 0x87c, &reg)
+
+static bool skx_check_ecc(struct pci_dev *pdev)
+{
+       u32 mtmtr;
+
+       SKX_GET_MTMTR(pdev, mtmtr);
+
+       return !!GET_BITFIELD(mtmtr, 2, 2);
+}
+
+static int skx_get_dimm_config(struct mem_ctl_info *mci)
+{
+       struct skx_pvt *pvt = mci->pvt_info;
+       struct skx_imc *imc = pvt->imc;
+       struct dimm_info *dimm;
+       int i, j;
+       u32 mtr, amap;
+       int ndimms;
+
+       for (i = 0; i < NUM_CHANNELS; i++) {
+               ndimms = 0;
+               pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
+               for (j = 0; j < NUM_DIMMS; j++) {
+                       dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+                                            mci->n_layers, i, j, 0);
+                       pci_read_config_dword(imc->chan[i].cdev,
+                                       0x80 + 4*j, &mtr);
+                       ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
+               }
+               if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
+                       skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+
+static void skx_unregister_mci(struct skx_imc *imc)
+{
+       struct mem_ctl_info *mci = imc->mci;
+
+       if (!mci)
+               return;
+
+       edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
+
+       /* Remove MC sysfs nodes */
+       edac_mc_del_mc(mci->pdev);
+
+       edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+}
+
+static int skx_register_mci(struct skx_imc *imc)
+{
+       struct mem_ctl_info *mci;
+       struct edac_mc_layer layers[2];
+       struct pci_dev *pdev = imc->chan[0].cdev;
+       struct skx_pvt *pvt;
+       int rc;
+
+       /* allocate a new MC control structure */
+       layers[0].type = EDAC_MC_LAYER_CHANNEL;
+       layers[0].size = NUM_CHANNELS;
+       layers[0].is_virt_csrow = false;
+       layers[1].type = EDAC_MC_LAYER_SLOT;
+       layers[1].size = NUM_DIMMS;
+       layers[1].is_virt_csrow = true;
+       mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
+                           sizeof(struct skx_pvt));
+
+       if (unlikely(!mci))
+               return -ENOMEM;
+
+       edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
+
+       /* Associate skx_dev and mci for future usage */
+       imc->mci = mci;
+       pvt = mci->pvt_info;
+       pvt->imc = imc;
+
+       mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
+                                 imc->node_id, imc->lmc);
+       mci->mtype_cap = MEM_FLAG_DDR4;
+       mci->edac_ctl_cap = EDAC_FLAG_NONE;
+       mci->edac_cap = EDAC_FLAG_NONE;
+       mci->mod_name = "skx_edac.c";
+       mci->dev_name = pci_name(imc->chan[0].cdev);
+       mci->mod_ver = SKX_REVISION;
+       mci->ctl_page_to_phys = NULL;
+
+       rc = skx_get_dimm_config(mci);
+       if (rc < 0)
+               goto fail;
+
+       /* record ptr to the generic device */
+       mci->pdev = &pdev->dev;
+
+       /* add this new MC control structure to EDAC's list of MCs */
+       if (unlikely(edac_mc_add_mc(mci))) {
+               edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+               rc = -EINVAL;
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       kfree(mci->ctl_name);
+       edac_mc_free(mci);
+       imc->mci = NULL;
+       return rc;
+}
+
+#define        SKX_MAX_SAD 24
+
+#define SKX_GET_SAD(d, i, reg) \
+       pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg)
+#define SKX_GET_ILV(d, i, reg) \
+       pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg)
+
+#define        SKX_SAD_MOD3MODE(sad)   GET_BITFIELD((sad), 30, 31)
+#define        SKX_SAD_MOD3(sad)       GET_BITFIELD((sad), 27, 27)
+#define SKX_SAD_LIMIT(sad)     (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
+#define        SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6)
+#define        SKX_SAD_ATTR(sad)       GET_BITFIELD((sad), 3, 4)
+#define        SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2)
+#define SKX_SAD_ENABLE(sad)    GET_BITFIELD((sad), 0, 0)
+
+#define SKX_ILV_REMOTE(tgt)    (((tgt) & 8) == 0)
+#define SKX_ILV_TARGET(tgt)    ((tgt) & 7)
+
+static bool skx_sad_decode(struct decoded_addr *res)
+{
+       struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list);
+       u64 addr = res->addr;
+       int i, idx, tgt, lchan, shift;
+       u32 sad, ilv;
+       u64 limit, prev_limit;
+       int remote = 0;
+
+       /* Simple sanity check for I/O space or out of range */
+       if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
+               edac_dbg(0, "Address %llx out of range\n", addr);
+               return false;
+       }
+
+restart:
+       prev_limit = 0;
+       for (i = 0; i < SKX_MAX_SAD; i++) {
+               SKX_GET_SAD(d, i, sad);
+               limit = SKX_SAD_LIMIT(sad);
+               if (SKX_SAD_ENABLE(sad)) {
+                       if (addr >= prev_limit && addr <= limit)
+                               goto sad_found;
+               }
+               prev_limit = limit + 1;
+       }
+       edac_dbg(0, "No SAD entry for %llx\n", addr);
+       return false;
+
+sad_found:
+       SKX_GET_ILV(d, i, ilv);
+
+       switch (SKX_SAD_INTERLEAVE(sad)) {
+       case 0:
+               idx = GET_BITFIELD(addr, 6, 8);
+               break;
+       case 1:
+               idx = GET_BITFIELD(addr, 8, 10);
+               break;
+       case 2:
+               idx = GET_BITFIELD(addr, 12, 14);
+               break;
+       case 3:
+               idx = GET_BITFIELD(addr, 30, 32);
+               break;
+       }
+
+       tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
+
+       /* If point to another node, find it and start over */
+       if (SKX_ILV_REMOTE(tgt)) {
+               if (remote) {
+                       edac_dbg(0, "Double remote!\n");
+                       return false;
+               }
+               remote = 1;
+               list_for_each_entry(d, &skx_edac_list, list) {
+                       if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
+                               goto restart;
+               }
+               edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
+               return false;
+       }
+
+       if (SKX_SAD_MOD3(sad) == 0)
+               lchan = SKX_ILV_TARGET(tgt);
+       else {
+               switch (SKX_SAD_MOD3MODE(sad)) {
+               case 0:
+                       shift = 6;
+                       break;
+               case 1:
+                       shift = 8;
+                       break;
+               case 2:
+                       shift = 12;
+                       break;
+               default:
+                       edac_dbg(0, "illegal mod3mode\n");
+                       return false;
+               }
+               switch (SKX_SAD_MOD3ASMOD2(sad)) {
+               case 0:
+                       lchan = (addr >> shift) % 3;
+                       break;
+               case 1:
+                       lchan = (addr >> shift) % 2;
+                       break;
+               case 2:
+                       lchan = (addr >> shift) % 2;
+                       lchan = (lchan << 1) | ~lchan;
+                       break;
+               case 3:
+                       lchan = ((addr >> shift) % 2) << 1;
+                       break;
+               }
+               lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
+       }
+
+       res->dev = d;
+       res->socket = d->imc[0].src_id;
+       res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
+       res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
+
+       edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
+                res->addr, res->socket, res->imc, res->channel);
+       return true;
+}
+
+#define        SKX_MAX_TAD 8
+
+#define SKX_GET_TADBASE(d, mc, i, reg)                 \
+       pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg)
+#define SKX_GET_TADWAYNESS(d, mc, i, reg)              \
+       pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg)
+#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg)     \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg)
+
+#define        SKX_TAD_BASE(b)         ((u64)GET_BITFIELD((b), 12, 31) << 26)
+#define SKX_TAD_SKT_GRAN(b)    GET_BITFIELD((b), 4, 5)
+#define SKX_TAD_CHN_GRAN(b)    GET_BITFIELD((b), 6, 7)
+#define        SKX_TAD_LIMIT(b)        (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26)
+#define        SKX_TAD_OFFSET(b)       ((u64)GET_BITFIELD((b), 4, 23) << 26)
+#define        SKX_TAD_SKTWAYS(b)      (1 << GET_BITFIELD((b), 10, 11))
+#define        SKX_TAD_CHNWAYS(b)      (GET_BITFIELD((b), 8, 9) + 1)
+
+/* which bit used for both socket and channel interleave */
+static int skx_granularity[] = { 6, 8, 12, 30 };
+
+static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits)
+{
+       addr >>= shift;
+       addr /= ways;
+       addr <<= shift;
+
+       return addr | (lowbits & ((1ull << shift) - 1));
+}
+
+static bool skx_tad_decode(struct decoded_addr *res)
+{
+       int i;
+       u32 base, wayness, chnilvoffset;
+       int skt_interleave_bit, chn_interleave_bit;
+       u64 channel_addr;
+
+       for (i = 0; i < SKX_MAX_TAD; i++) {
+               SKX_GET_TADBASE(res->dev, res->imc, i, base);
+               SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness);
+               if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness))
+                       goto tad_found;
+       }
+       edac_dbg(0, "No TAD entry for %llx\n", res->addr);
+       return false;
+
+tad_found:
+       res->sktways = SKX_TAD_SKTWAYS(wayness);
+       res->chanways = SKX_TAD_CHNWAYS(wayness);
+       skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)];
+       chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)];
+
+       SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset);
+       channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset);
+
+       if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) {
+               /* Must handle channel first, then socket */
+               channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+                                                res->chanways, channel_addr);
+               channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+                                                res->sktways, channel_addr);
+       } else {
+               /* Handle socket then channel. Preserve low bits from original address */
+               channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+                                                res->sktways, res->addr);
+               channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+                                                res->chanways, res->addr);
+       }
+
+       res->chan_addr = channel_addr;
+
+       edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n",
+                res->addr, res->chan_addr, res->sktways, res->chanways);
+       return true;
+}
+
+#define SKX_MAX_RIR 4
+
+#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg)          \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev,       \
+                             0x108 + 4 * (i), &reg)
+#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg)         \
+       pci_read_config_dword((d)->imc[mc].chan[ch].cdev,       \
+                             0x120 + 16 * idx + 4 * (i), &reg)
+
+#define        SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
+#define        SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
+#define        SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29))
+#define        SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19)
+#define        SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26))
+
+static bool skx_rir_decode(struct decoded_addr *res)
+{
+       int i, idx, chan_rank;
+       int shift;
+       u32 rirway, rirlv;
+       u64 rank_addr, prev_limit = 0, limit;
+
+       if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg)
+               shift = 6;
+       else
+               shift = 13;
+
+       for (i = 0; i < SKX_MAX_RIR; i++) {
+               SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway);
+               limit = SKX_RIR_LIMIT(rirway);
+               if (SKX_RIR_VALID(rirway)) {
+                       if (prev_limit <= res->chan_addr &&
+                           res->chan_addr <= limit)
+                               goto rir_found;
+               }
+               prev_limit = limit;
+       }
+       edac_dbg(0, "No RIR entry for %llx\n", res->addr);
+       return false;
+
+rir_found:
+       rank_addr = res->chan_addr >> shift;
+       rank_addr /= SKX_RIR_WAYS(rirway);
+       rank_addr <<= shift;
+       rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0);
+
+       res->rank_address = rank_addr;
+       idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway);
+
+       SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv);
+       res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv);
+       chan_rank = SKX_RIR_CHAN_RANK(rirlv);
+       res->channel_rank = chan_rank;
+       res->dimm = chan_rank / 4;
+       res->rank = chan_rank % 4;
+
+       edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n",
+                res->addr, res->dimm, res->rank,
+                res->channel_rank, res->rank_address);
+       return true;
+}
+
+static u8 skx_close_row[] = {
+       15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
+};
+static u8 skx_close_column[] = {
+       3, 4, 5, 14, 19, 23, 24, 25, 26, 27
+};
+static u8 skx_open_row[] = {
+       14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
+};
+static u8 skx_open_column[] = {
+       3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+};
+static u8 skx_open_fine_column[] = {
+       3, 4, 5, 7, 8, 9, 10, 11, 12, 13
+};
+
+static int skx_bits(u64 addr, int nbits, u8 *bits)
+{
+       int i, res = 0;
+
+       for (i = 0; i < nbits; i++)
+               res |= ((addr >> bits[i]) & 1) << i;
+       return res;
+}
+
+static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
+{
+       int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
+
+       if (do_xor)
+               ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
+
+       return ret;
+}
+
+static bool skx_mad_decode(struct decoded_addr *r)
+{
+       struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm];
+       int bg0 = dimm->fine_grain_bank ? 6 : 13;
+
+       if (dimm->close_pg) {
+               r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row);
+               r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column);
+               r->column |= 0x400; /* C10 is autoprecharge, always set */
+               r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28);
+               r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21);
+       } else {
+               r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row);
+               if (dimm->fine_grain_bank)
+                       r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column);
+               else
+                       r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column);
+               r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23);
+               r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21);
+       }
+       r->row &= (1u << dimm->rowbits) - 1;
+
+       edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n",
+                r->addr, r->row, r->column, r->bank_address,
+                r->bank_group);
+       return true;
+}
+
+static bool skx_decode(struct decoded_addr *res)
+{
+
+       return skx_sad_decode(res) && skx_tad_decode(res) &&
+               skx_rir_decode(res) && skx_mad_decode(res);
+}
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr.
+ * Write an address to this file to exercise the address decode
+ * logic in this driver.
+ */
+static struct dentry *skx_test;
+static u64 skx_fake_addr;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+       struct decoded_addr res;
+
+       res.addr = val;
+       skx_decode(&res);
+
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static struct dentry *mydebugfs_create(const char *name, umode_t mode,
+                                      struct dentry *parent, u64 *value)
+{
+       return debugfs_create_file(name, mode, parent, value, &fops_u64_wo);
+}
+
+static void setup_skx_debug(void)
+{
+       skx_test = debugfs_create_dir("skx_edac_test", NULL);
+       mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr);
+}
+
+static void teardown_skx_debug(void)
+{
+       debugfs_remove_recursive(skx_test);
+}
+#else
+static void setup_skx_debug(void)
+{
+}
+
+static void teardown_skx_debug(void)
+{
+}
+#endif /*CONFIG_EDAC_DEBUG*/
+
+static void skx_mce_output_error(struct mem_ctl_info *mci,
+                                const struct mce *m,
+                                struct decoded_addr *res)
+{
+       enum hw_event_mc_err_type tp_event;
+       char *type, *optype, msg[256];
+       bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
+       bool overflow = GET_BITFIELD(m->status, 62, 62);
+       bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+       bool recoverable;
+       u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+       u32 mscod = GET_BITFIELD(m->status, 16, 31);
+       u32 errcode = GET_BITFIELD(m->status, 0, 15);
+       u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+
+       recoverable = GET_BITFIELD(m->status, 56, 56);
+
+       if (uncorrected_error) {
+               if (ripv) {
+                       type = "FATAL";
+                       tp_event = HW_EVENT_ERR_FATAL;
+               } else {
+                       type = "NON_FATAL";
+                       tp_event = HW_EVENT_ERR_UNCORRECTED;
+               }
+       } else {
+               type = "CORRECTED";
+               tp_event = HW_EVENT_ERR_CORRECTED;
+       }
+
+       /*
+        * According with Table 15-9 of the Intel Architecture spec vol 3A,
+        * memory errors should fit in this mask:
+        *      000f 0000 1mmm cccc (binary)
+        * where:
+        *      f = Correction Report Filtering Bit. If 1, subsequent errors
+        *          won't be shown
+        *      mmm = error type
+        *      cccc = channel
+        * If the mask doesn't match, report an error to the parsing logic
+        */
+       if (!((errcode & 0xef80) == 0x80)) {
+               optype = "Can't parse: it is not a mem";
+       } else {
+               switch (optypenum) {
+               case 0:
+                       optype = "generic undef request error";
+                       break;
+               case 1:
+                       optype = "memory read error";
+                       break;
+               case 2:
+                       optype = "memory write error";
+                       break;
+               case 3:
+                       optype = "addr/cmd error";
+                       break;
+               case 4:
+                       optype = "memory scrubbing error";
+                       break;
+               default:
+                       optype = "reserved";
+                       break;
+               }
+       }
+
+       snprintf(msg, sizeof(msg),
+                "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+                overflow ? " OVERFLOW" : "",
+                (uncorrected_error && recoverable) ? " recoverable" : "",
+                mscod, errcode,
+                res->socket, res->imc, res->rank,
+                res->bank_group, res->bank_address, res->row, res->column);
+
+       edac_dbg(0, "%s\n", msg);
+
+       /* Call the helper to output message */
+       edac_mc_handle_error(tp_event, mci, core_err_cnt,
+                            m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+                            res->channel, res->dimm, -1,
+                            optype, msg);
+}
+
+static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
+                              void *data)
+{
+       struct mce *mce = (struct mce *)data;
+       struct decoded_addr res;
+       struct mem_ctl_info *mci;
+       char *type;
+
+       if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+               return NOTIFY_DONE;
+
+       /* ignore unless this is memory related with an address */
+       if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
+               return NOTIFY_DONE;
+
+       res.addr = mce->addr;
+       if (!skx_decode(&res))
+               return NOTIFY_DONE;
+       mci = res.dev->imc[res.imc].mci;
+
+       if (mce->mcgstatus & MCG_STATUS_MCIP)
+               type = "Exception";
+       else
+               type = "Event";
+
+       skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
+
+       skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx "
+                         "Bank %d: %016Lx\n", mce->extcpu, type,
+                         mce->mcgstatus, mce->bank, mce->status);
+       skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc);
+       skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr);
+       skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc);
+
+       skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET "
+                         "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
+                         mce->time, mce->socketid, mce->apicid);
+
+       skx_mce_output_error(mci, mce, &res);
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block skx_mce_dec = {
+       .notifier_call = skx_mce_check_error,
+};
+
+static void skx_remove(void)
+{
+       int i, j;
+       struct skx_dev *d, *tmp;
+
+       edac_dbg(0, "\n");
+
+       list_for_each_entry_safe(d, tmp, &skx_edac_list, list) {
+               list_del(&d->list);
+               for (i = 0; i < NUM_IMC; i++) {
+                       skx_unregister_mci(&d->imc[i]);
+                       for (j = 0; j < NUM_CHANNELS; j++)
+                               pci_dev_put(d->imc[i].chan[j].cdev);
+               }
+               pci_dev_put(d->util_all);
+               pci_dev_put(d->sad_all);
+
+               kfree(d);
+       }
+}
+
+/*
+ * skx_init:
+ *     make sure we are running on the correct cpu model
+ *     search for all the devices we need
+ *     check which DIMMs are present.
+ */
+int __init skx_init(void)
+{
+       const struct x86_cpu_id *id;
+       const struct munit *m;
+       int rc = 0, i;
+       u8 mc = 0, src_id, node_id;
+       struct skx_dev *d;
+
+       edac_dbg(2, "\n");
+
+       id = x86_match_cpu(skx_cpuids);
+       if (!id)
+               return -ENODEV;
+
+       rc = skx_get_hi_lo();
+       if (rc)
+               return rc;
+
+       rc = get_all_bus_mappings();
+       if (rc < 0)
+               goto fail;
+       if (rc == 0) {
+               edac_dbg(2, "No memory controllers found\n");
+               return -ENODEV;
+       }
+
+       for (m = skx_all_munits; m->did; m++) {
+               rc = get_all_munits(m);
+               if (rc < 0)
+                       goto fail;
+               if (rc != m->per_socket * skx_num_sockets) {
+                       edac_dbg(2, "Expected %d, got %d of %x\n",
+                                m->per_socket * skx_num_sockets, rc, m->did);
+                       rc = -ENODEV;
+                       goto fail;
+               }
+       }
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               src_id = get_src_id(d);
+               node_id = skx_get_node_id(d);
+               edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+               for (i = 0; i < NUM_IMC; i++) {
+                       d->imc[i].mc = mc++;
+                       d->imc[i].lmc = i;
+                       d->imc[i].src_id = src_id;
+                       d->imc[i].node_id = node_id;
+                       rc = skx_register_mci(&d->imc[i]);
+                       if (rc < 0)
+                               goto fail;
+               }
+       }
+
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
+       setup_skx_debug();
+
+       mce_register_decode_chain(&skx_mce_dec);
+
+       return 0;
+fail:
+       skx_remove();
+       return rc;
+}
+
+static void __exit skx_exit(void)
+{
+       edac_dbg(2, "\n");
+       mce_unregister_decode_chain(&skx_mce_dec);
+       skx_remove();
+       teardown_skx_debug();
+}
+
+module_init(skx_init);
+module_exit(skx_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c

index c99c24bc79b02262298ea64b5ed3e6e625429f4d..9ae6c116c4746286770052fb6f241baddc08d5b0 100644 (file)
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -16,6 +16,7 @@
  #include <linux/slab.h>
  #include <linux/mutex.h>
  #include <linux/efi.h>
+#include <linux/vmalloc.h>
  
  #define NO_FURTHER_WRITE_ACTION -1
  
@@ -108,14 +109,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
         int ret;
         void *cap_hdr_temp;
  
-       cap_hdr_temp = kmap(cap_info->pages[0]);
+       cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
+                       VM_MAP, PAGE_KERNEL);
         if (!cap_hdr_temp) {
-               pr_debug("%s: kmap() failed\n", __func__);
+               pr_debug("%s: vmap() failed\n", __func__);
                 return -EFAULT;
         }
  
         ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
-       kunmap(cap_info->pages[0]);
+       vunmap(cap_hdr_temp);
         if (ret) {
                 pr_err("%s: efi_capsule_update() failed\n", __func__);
                 return ret;
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c

index 53b9fd2293ee8f5af6f7f6a38de61730c8c99ec9..6eedff45e6d77811a5c4922e6be6fa5cabb3b307 100644 (file)
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c
@@ -190,9 +190,9 @@ efi_capsule_update_locked(efi_capsule_header_t *capsule,
   * map the capsule described by @capsule with its data in @pages and
   * send it to the firmware via the UpdateCapsule() runtime service.
   *
- * @capsule must be a virtual mapping of the first page in @pages
- * (@pages[0]) in the kernel address space. That is, a
- * capsule_header_t that describes the entire contents of the capsule
+ * @capsule must be a virtual mapping of the complete capsule update in the
+ * kernel address space, as the capsule can be consumed immediately.
+ * A capsule_header_t that describes the entire contents of the capsule
   * must be at the start of the first data page.
   *
   * Even though this function will validate that the firmware supports
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index 8ebc5f1eb4c0fed15da2f40b0024f74099488c54..8c704c86597b361436e0df2b348cd35480ed0517 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -646,9 +646,9 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
  int amdgpu_gart_init(struct amdgpu_device *adev);
  void amdgpu_gart_fini(struct amdgpu_device *adev);
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                         int pages);
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                      int pages, struct page **pagelist,
                      dma_addr_t *dma_addr, uint32_t flags);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c

index 49de92600074dd3f16f117553ae44368b6b1d0d8..10b5ddf2c5887c36fd7f5d03f4ff6038c8e3cb8a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -200,16 +200,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
         atpx->is_hybrid = false;
         if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                 printk("ATPX Hybrid Graphics\n");
-#if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
-#else
                 atpx->functions.power_cntl = false;
-#endif
                 atpx->is_hybrid = true;
         }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index 921bce2df0b07cced181e9cb68693f989b7839fa..0feea347f680b1317dae1d37cbde7151bd271f9c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -221,7 +221,7 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
   * Unbinds the requested pages from the gart page table and
   * replaces them with the dummy page (all asics).
   */
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                         int pages)
  {
         unsigned t;
@@ -268,7 +268,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
   * (all asics).
   * Returns 0 for success, -EINVAL for failure.
   */
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                      int pages, struct page **pagelist, dma_addr_t *dma_addr,
                      uint32_t flags)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c

index b11f4e8868d7652503713b508da2c8820a99c36b..4aa993d190189aff82ecc44d18898df8d5164708 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1187,7 +1187,8 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
                 r = 0;
         }
  
-error:
         fence_put(fence);
+
+error:
         return r;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 8e642fc48df45c665053a3b835d0abe803d7124b..80120fa4092c76164f460ba5cef4de1bce1d6d83 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1535,7 +1535,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
         r = amd_sched_entity_init(&ring->sched, &vm->entity,
                                   rq, amdgpu_sched_jobs);
         if (r)
-               return r;
+               goto err;
  
         vm->page_directory_fence = NULL;
  
@@ -1565,6 +1565,9 @@ error_free_page_directory:
  error_free_sched_entity:
         amd_sched_entity_fini(&ring->sched, &vm->entity);
  
+err:
+       drm_free_large(vm->page_tables);
+
         return r;
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c

index e621eba63126a09dc65f33ee59dd6bf47785e06d..a7d3cb3fead0f6c63536996c1112f4c76a540e19 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -184,7 +184,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
                                                         sizeof(u32)) + inx;
  
         pr_debug("kfd: get kernel queue doorbell\n"
-                        "     doorbell offset   == 0x%08d\n"
+                        "     doorbell offset   == 0x%08X\n"
                          "     kernel address    == 0x%08lX\n",
                 *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
  
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c

index ce54e985d91ba0b2ee11000c6aa499cffb4a1bba..0a06f9120b5a61b56f0a8db9c77a3374bd20e37d 100644 (file)
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper)
  
         /* Sometimes user space wants everything disabled, so don't steal the
          * display if there's a master. */
-       if (lockless_dereference(dev->master))
+       if (READ_ONCE(dev->master))
                 return false;
  
         drm_for_each_crtc(crtc, dev) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c

index 87ef34150d466903ea128eddf599718eb5836dcb..b382cf505262b186e6b5367321d2b08438f582cc 100644 (file)
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1333,8 +1333,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
         if (ret < 0)
                 return ret;
  
-       mutex_lock(&gpu->lock);
-
         /*
          * TODO
          *
@@ -1348,16 +1346,18 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
         if (unlikely(event == ~0U)) {
                 DRM_ERROR("no free event\n");
                 ret = -EBUSY;
-               goto out_unlock;
+               goto out_pm_put;
         }
  
         fence = etnaviv_gpu_fence_alloc(gpu);
         if (!fence) {
                 event_free(gpu, event);
                 ret = -ENOMEM;
-               goto out_unlock;
+               goto out_pm_put;
         }
  
+       mutex_lock(&gpu->lock);
+
         gpu->event[event].fence = fence;
         submit->fence = fence->seqno;
         gpu->active_fence = submit->fence;
@@ -1395,9 +1395,9 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
         hangcheck_timer_reset(gpu);
         ret = 0;
  
-out_unlock:
         mutex_unlock(&gpu->lock);
  
+out_pm_put:
         etnaviv_gpu_pm_put(gpu);
  
         return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 21f939074abc88d28bef842b8261da298db1192f..20fe9d52e2562ffb256ced679638026cf234fd34 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1854,6 +1854,7 @@ struct drm_i915_private {
         enum modeset_restore modeset_restore;
         struct mutex modeset_restore_lock;
         struct drm_atomic_state *modeset_restore_state;
+       struct drm_modeset_acquire_ctx reset_ctx;
  
         struct list_head vm_list; /* Global list of all address spaces */
         struct i915_ggtt ggtt; /* VM representing the global address space */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 11681501d7b1314e9a996073d4bdba48226a22b0..a77ce9983f69c9965725f806a008fc06a129935f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -879,9 +879,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
         ret = i915_gem_shmem_pread(dev, obj, args, file);
  
         /* pread for non shmem backed objects */
-       if (ret == -EFAULT || ret == -ENODEV)
+       if (ret == -EFAULT || ret == -ENODEV) {
+               intel_runtime_pm_get(to_i915(dev));
                 ret = i915_gem_gtt_pread(dev, obj, args->size,
                                         args->offset, args->data_ptr);
+               intel_runtime_pm_put(to_i915(dev));
+       }
  
  out:
         drm_gem_object_unreference(&obj->base);
@@ -1306,7 +1309,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                  * textures). Fallback to the shmem path in that case. */
         }
  
-       if (ret == -EFAULT) {
+       if (ret == -EFAULT || ret == -ENOSPC) {
                 if (obj->phys_handle)
                         ret = i915_gem_phys_pwrite(obj, args, file);
                 else if (i915_gem_object_has_struct_page(obj))
@@ -3169,6 +3172,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
         }
  
         intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+
+       engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
  }
  
  void i915_gem_reset(struct drm_device *dev)
@@ -3186,6 +3191,7 @@ void i915_gem_reset(struct drm_device *dev)
  
         for_each_engine(engine, dev_priv)
                 i915_gem_reset_engine_cleanup(engine);
+       mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
  
         i915_gem_context_reset(dev);
  
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c

index 10f1e32767e60c7f636161485658bc3f9f5ec99f..7a30af79d7995732f5b2c9983846ab5f30b3ce76 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2873,6 +2873,7 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev)
                 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
  
                 ppgtt->base.cleanup(&ppgtt->base);
+               kfree(ppgtt);
         }
  
         i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index ce14fe09d96236a32af675b5320aaefb9d508f8c..5c06413ae0e61b5af6fba0e1d37d87c274d28c71 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1536,6 +1536,7 @@ enum skl_disp_power_wells {
  #define BALANCE_LEG_MASK(port)         (7<<(8+3*(port)))
  /* Balance leg disable bits */
  #define BALANCE_LEG_DISABLE_SHIFT      23
+#define BALANCE_LEG_DISABLE(port)      (1 << (23 + (port)))
  
  /*
   * Fence registers
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c

index 6700a7be7f78755675d6a8c1c17c69377d0117b4..d32f586f9c057670e882bbebdfd3e405332519de 100644 (file)
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
         if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
                 return;
  
+       i915_audio_component_get_power(dev);
+
         /*
          * Enable/disable generating the codec wake signal, overriding the
          * internal logic to generate the codec wake to controller.
@@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev,
                 I915_WRITE(HSW_AUD_CHICKENBIT, tmp);
                 usleep_range(1000, 1500);
         }
+
+       i915_audio_component_put_power(dev);
  }
  
  /* Get CDCLK in kHz  */
@@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
             !IS_HASWELL(dev_priv))
                 return 0;
  
+       i915_audio_component_get_power(dev);
         mutex_lock(&dev_priv->av_mutex);
         /* 1. get the pipe */
         intel_encoder = dev_priv->dig_port_map[port];
@@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev,
  
   unlock:
         mutex_unlock(&dev_priv->av_mutex);
+       i915_audio_component_put_power(dev);
         return err;
  }
  
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c

index dd1d6fe122976edb5f6dd78428f34bb2b4313adb..1a7efac65fd5d3537be258f10db8dae4879f9965 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = {
  static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
         { 0x0000201B, 0x000000A2, 0x0 },
         { 0x00005012, 0x00000088, 0x0 },
-       { 0x80007011, 0x000000CD, 0x0 },
+       { 0x80007011, 0x000000CD, 0x1 },
         { 0x80009010, 0x000000C0, 0x1 },
         { 0x0000201B, 0x0000009D, 0x0 },
         { 0x80005012, 0x000000C0, 0x1 },
@@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
  static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = {
         { 0x00000018, 0x000000A2, 0x0 },
         { 0x00005012, 0x00000088, 0x0 },
-       { 0x80007011, 0x000000CD, 0x0 },
+       { 0x80007011, 0x000000CD, 0x3 },
         { 0x80009010, 0x000000C0, 0x3 },
         { 0x00000018, 0x0000009D, 0x0 },
         { 0x80005012, 0x000000C0, 0x3 },
@@ -388,6 +388,40 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
         }
  }
  
+static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
+{
+       int n_hdmi_entries;
+       int hdmi_level;
+       int hdmi_default_entry;
+
+       hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+
+       if (IS_BROXTON(dev_priv))
+               return hdmi_level;
+
+       if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+               skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
+               hdmi_default_entry = 8;
+       } else if (IS_BROADWELL(dev_priv)) {
+               n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+               hdmi_default_entry = 7;
+       } else if (IS_HASWELL(dev_priv)) {
+               n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
+               hdmi_default_entry = 6;
+       } else {
+               WARN(1, "ddi translation table missing\n");
+               n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+               hdmi_default_entry = 7;
+       }
+
+       /* Choose a good default if VBT is badly populated */
+       if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
+           hdmi_level >= n_hdmi_entries)
+               hdmi_level = hdmi_default_entry;
+
+       return hdmi_level;
+}
+
  /*
   * Starting with Haswell, DDI port buffers must be programmed with correct
   * values in advance. The buffer values are different for FDI and DP modes,
@@ -399,7 +433,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
  {
         struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
         u32 iboost_bit = 0;
-       int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry,
+       int i, n_hdmi_entries, n_dp_entries, n_edp_entries,
             size;
         int hdmi_level;
         enum port port;
@@ -410,7 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
         const struct ddi_buf_trans *ddi_translations;
  
         port = intel_ddi_get_encoder_port(encoder);
-       hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+       hdmi_level = intel_ddi_hdmi_level(dev_priv, port);
  
         if (IS_BROXTON(dev_priv)) {
                 if (encoder->type != INTEL_OUTPUT_HDMI)
@@ -430,7 +464,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                                 skl_get_buf_trans_edp(dev_priv, &n_edp_entries);
                 ddi_translations_hdmi =
                                 skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
-               hdmi_default_entry = 8;
                 /* If we're boosting the current, set bit 31 of trans1 */
                 if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level ||
                     dev_priv->vbt.ddi_port_info[port].dp_boost_level)
@@ -456,7 +489,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
  
                 n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
                 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-               hdmi_default_entry = 7;
         } else if (IS_HASWELL(dev_priv)) {
                 ddi_translations_fdi = hsw_ddi_translations_fdi;
                 ddi_translations_dp = hsw_ddi_translations_dp;
@@ -464,7 +496,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                 ddi_translations_hdmi = hsw_ddi_translations_hdmi;
                 n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp);
                 n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
-               hdmi_default_entry = 6;
         } else {
                 WARN(1, "ddi translation table missing\n");
                 ddi_translations_edp = bdw_ddi_translations_dp;
@@ -474,7 +505,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
                 n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
                 n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
                 n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-               hdmi_default_entry = 7;
         }
  
         switch (encoder->type) {
@@ -505,11 +535,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
         if (encoder->type != INTEL_OUTPUT_HDMI)
                 return;
  
-       /* Choose a good default if VBT is badly populated */
-       if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
-           hdmi_level >= n_hdmi_entries)
-               hdmi_level = hdmi_default_entry;
-
         /* Entry 9 is for HDMI: */
         I915_WRITE(DDI_BUF_TRANS_LO(port, i),
                    ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit);
@@ -1379,14 +1404,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
                            TRANS_CLK_SEL_DISABLED);
  }
  
-static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
-                              u32 level, enum port port, int type)
+static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
+                               enum port port, uint8_t iboost)
  {
+       u32 tmp;
+
+       tmp = I915_READ(DISPIO_CR_TX_BMU_CR0);
+       tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port));
+       if (iboost)
+               tmp |= iboost << BALANCE_LEG_SHIFT(port);
+       else
+               tmp |= BALANCE_LEG_DISABLE(port);
+       I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp);
+}
+
+static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level)
+{
+       struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+       struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
+       enum port port = intel_dig_port->port;
+       int type = encoder->type;
         const struct ddi_buf_trans *ddi_translations;
         uint8_t iboost;
         uint8_t dp_iboost, hdmi_iboost;
         int n_entries;
-       u32 reg;
  
         /* VBT may override standard boost values */
         dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level;
@@ -1428,16 +1469,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
                 return;
         }
  
-       reg = I915_READ(DISPIO_CR_TX_BMU_CR0);
-       reg &= ~BALANCE_LEG_MASK(port);
-       reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port));
-
-       if (iboost)
-               reg |= iboost << BALANCE_LEG_SHIFT(port);
-       else
-               reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port);
+       _skl_ddi_set_iboost(dev_priv, port, iboost);
  
-       I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg);
+       if (port == PORT_A && intel_dig_port->max_lanes == 4)
+               _skl_ddi_set_iboost(dev_priv, PORT_E, iboost);
  }
  
  static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
@@ -1568,7 +1603,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
         level = translate_signal_level(signal_levels);
  
         if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
-               skl_ddi_set_iboost(dev_priv, level, port, encoder->type);
+               skl_ddi_set_iboost(encoder, level);
         else if (IS_BROXTON(dev_priv))
                 bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type);
  
@@ -1637,6 +1672,10 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
                         intel_dp_stop_link_train(intel_dp);
         } else if (type == INTEL_OUTPUT_HDMI) {
                 struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+               int level = intel_ddi_hdmi_level(dev_priv, port);
+
+               if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+                       skl_ddi_set_iboost(intel_encoder, level);
  
                 intel_hdmi->set_infoframes(encoder,
                                            crtc->config->has_hdmi_sink,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index dcf93b3d4fb6cb75a836a242ef2e560e8f065b49..2a751b6e0253552ee4be59edfe6aac675e50d4d3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3093,40 +3093,110 @@ static void intel_update_primary_planes(struct drm_device *dev)
  
         for_each_crtc(dev, crtc) {
                 struct intel_plane *plane = to_intel_plane(crtc->primary);
-               struct intel_plane_state *plane_state;
-
-               drm_modeset_lock_crtc(crtc, &plane->base);
-               plane_state = to_intel_plane_state(plane->base.state);
+               struct intel_plane_state *plane_state =
+                       to_intel_plane_state(plane->base.state);
  
                 if (plane_state->visible)
                         plane->update_plane(&plane->base,
                                             to_intel_crtc_state(crtc->state),
                                             plane_state);
+       }
+}
+
+static int
+__intel_display_resume(struct drm_device *dev,
+                      struct drm_atomic_state *state)
+{
+       struct drm_crtc_state *crtc_state;
+       struct drm_crtc *crtc;
+       int i, ret;
+
+       intel_modeset_setup_hw_state(dev);
+       i915_redisable_vga(dev);
  
-               drm_modeset_unlock_crtc(crtc);
+       if (!state)
+               return 0;
+
+       for_each_crtc_in_state(state, crtc, crtc_state, i) {
+               /*
+                * Force recalculation even if we restore
+                * current state. With fast modeset this may not result
+                * in a modeset when the state is compatible.
+                */
+               crtc_state->mode_changed = true;
         }
+
+       /* ignore any reset values/BIOS leftovers in the WM registers */
+       to_intel_atomic_state(state)->skip_intermediate_wm = true;
+
+       ret = drm_atomic_commit(state);
+
+       WARN_ON(ret == -EDEADLK);
+       return ret;
  }
  
  void intel_prepare_reset(struct drm_i915_private *dev_priv)
  {
+       struct drm_device *dev = &dev_priv->drm;
+       struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+       struct drm_atomic_state *state;
+       int ret;
+
         /* no reset support for gen2 */
         if (IS_GEN2(dev_priv))
                 return;
  
-       /* reset doesn't touch the display */
+       /*
+        * Need mode_config.mutex so that we don't
+        * trample ongoing ->detect() and whatnot.
+        */
+       mutex_lock(&dev->mode_config.mutex);
+       drm_modeset_acquire_init(ctx, 0);
+       while (1) {
+               ret = drm_modeset_lock_all_ctx(dev, ctx);
+               if (ret != -EDEADLK)
+                       break;
+
+               drm_modeset_backoff(ctx);
+       }
+
+       /* reset doesn't touch the display, but flips might get nuked anyway, */
         if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
                 return;
  
-       drm_modeset_lock_all(&dev_priv->drm);
         /*
          * Disabling the crtcs gracefully seems nicer. Also the
          * g33 docs say we should at least disable all the planes.
          */
-       intel_display_suspend(&dev_priv->drm);
+       state = drm_atomic_helper_duplicate_state(dev, ctx);
+       if (IS_ERR(state)) {
+               ret = PTR_ERR(state);
+               state = NULL;
+               DRM_ERROR("Duplicating state failed with %i\n", ret);
+               goto err;
+       }
+
+       ret = drm_atomic_helper_disable_all(dev, ctx);
+       if (ret) {
+               DRM_ERROR("Suspending crtc's failed with %i\n", ret);
+               goto err;
+       }
+
+       dev_priv->modeset_restore_state = state;
+       state->acquire_ctx = ctx;
+       return;
+
+err:
+       drm_atomic_state_free(state);
  }
  
  void intel_finish_reset(struct drm_i915_private *dev_priv)
  {
+       struct drm_device *dev = &dev_priv->drm;
+       struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+       struct drm_atomic_state *state = dev_priv->modeset_restore_state;
+       int ret;
+
         /*
          * Flips in the rings will be nuked by the reset,
          * so complete all pending flips so that user space
@@ -3138,6 +3208,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
         if (IS_GEN2(dev_priv))
                 return;
  
+       dev_priv->modeset_restore_state = NULL;
+
         /* reset doesn't touch the display */
         if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
                 /*
@@ -3149,29 +3221,32 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
                  * FIXME: Atomic will make this obsolete since we won't schedule
                  * CS-based flips (which might get lost in gpu resets) any more.
                  */
-               intel_update_primary_planes(&dev_priv->drm);
-               return;
-       }
-
-       /*
-        * The display has been reset as well,
-        * so need a full re-initialization.
-        */
-       intel_runtime_pm_disable_interrupts(dev_priv);
-       intel_runtime_pm_enable_interrupts(dev_priv);
+               intel_update_primary_planes(dev);
+       } else {
+               /*
+                * The display has been reset as well,
+                * so need a full re-initialization.
+                */
+               intel_runtime_pm_disable_interrupts(dev_priv);
+               intel_runtime_pm_enable_interrupts(dev_priv);
  
-       intel_modeset_init_hw(&dev_priv->drm);
+               intel_modeset_init_hw(dev);
  
-       spin_lock_irq(&dev_priv->irq_lock);
-       if (dev_priv->display.hpd_irq_setup)
-               dev_priv->display.hpd_irq_setup(dev_priv);
-       spin_unlock_irq(&dev_priv->irq_lock);
+               spin_lock_irq(&dev_priv->irq_lock);
+               if (dev_priv->display.hpd_irq_setup)
+                       dev_priv->display.hpd_irq_setup(dev_priv);
+               spin_unlock_irq(&dev_priv->irq_lock);
  
-       intel_display_resume(&dev_priv->drm);
+               ret = __intel_display_resume(dev, state);
+               if (ret)
+                       DRM_ERROR("Restoring old state failed with %i\n", ret);
  
-       intel_hpd_init(dev_priv);
+               intel_hpd_init(dev_priv);
+       }
  
-       drm_modeset_unlock_all(&dev_priv->drm);
+       drm_modeset_drop_locks(ctx);
+       drm_modeset_acquire_fini(ctx);
+       mutex_unlock(&dev->mode_config.mutex);
  }
  
  static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -16156,9 +16231,10 @@ void intel_display_resume(struct drm_device *dev)
         struct drm_atomic_state *state = dev_priv->modeset_restore_state;
         struct drm_modeset_acquire_ctx ctx;
         int ret;
-       bool setup = false;
  
         dev_priv->modeset_restore_state = NULL;
+       if (state)
+               state->acquire_ctx = &ctx;
  
         /*
          * This is a cludge because with real atomic modeset mode_config.mutex
@@ -16169,43 +16245,17 @@ void intel_display_resume(struct drm_device *dev)
         mutex_lock(&dev->mode_config.mutex);
         drm_modeset_acquire_init(&ctx, 0);
  
-retry:
-       ret = drm_modeset_lock_all_ctx(dev, &ctx);
-
-       if (ret == 0 && !setup) {
-               setup = true;
-
-               intel_modeset_setup_hw_state(dev);
-               i915_redisable_vga(dev);
-       }
-
-       if (ret == 0 && state) {
-               struct drm_crtc_state *crtc_state;
-               struct drm_crtc *crtc;
-               int i;
-
-               state->acquire_ctx = &ctx;
-
-               /* ignore any reset values/BIOS leftovers in the WM registers */
-               to_intel_atomic_state(state)->skip_intermediate_wm = true;
-
-               for_each_crtc_in_state(state, crtc, crtc_state, i) {
-                       /*
-                        * Force recalculation even if we restore
-                        * current state. With fast modeset this may not result
-                        * in a modeset when the state is compatible.
-                        */
-                       crtc_state->mode_changed = true;
-               }
-
-               ret = drm_atomic_commit(state);
-       }
+       while (1) {
+               ret = drm_modeset_lock_all_ctx(dev, &ctx);
+               if (ret != -EDEADLK)
+                       break;
  
-       if (ret == -EDEADLK) {
                 drm_modeset_backoff(&ctx);
-               goto retry;
         }
  
+       if (!ret)
+               ret = __intel_display_resume(dev, state);
+
         drm_modeset_drop_locks(&ctx);
         drm_modeset_acquire_fini(&ctx);
         mutex_unlock(&dev->mode_config.mutex);
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c

index 6a7ad3ed1463206c7b1b88e658e3103e7c58526e..3836a1c797141079218089099c837ed926f834c8 100644 (file)
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -1230,12 +1230,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
         if (i915.enable_fbc >= 0)
                 return !!i915.enable_fbc;
  
+       if (!HAS_FBC(dev_priv))
+               return 0;
+
         if (IS_BROADWELL(dev_priv))
                 return 1;
  
         return 0;
  }
  
+static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
+{
+#ifdef CONFIG_INTEL_IOMMU
+       /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
+       if (intel_iommu_gfx_mapped &&
+           (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
+               DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
+               return true;
+       }
+#endif
+
+       return false;
+}
+
  /**
   * intel_fbc_init - Initialize FBC
   * @dev_priv: the i915 device
@@ -1253,6 +1270,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
         fbc->active = false;
         fbc->work.scheduled = false;
  
+       if (need_fbc_vtd_wa(dev_priv))
+               mkwrite_device_info(dev_priv)->has_fbc = false;
+
         i915.enable_fbc = intel_sanitize_fbc_option(dev_priv);
         DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc);
  
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c

index 97ba6c8cf907862197a42bbd9935030df0e16856..d5deb58a2128dff86b68f1f9179f4aad23e7ee8d 100644 (file)
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3344,6 +3344,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
                 plane_bytes_per_line *= 4;
                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
                 plane_blocks_per_line /= 4;
+       } else if (tiling == DRM_FORMAT_MOD_NONE) {
+               plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
         } else {
                 plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
         }
@@ -6574,9 +6576,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
  
  void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
  {
-       if (IS_CHERRYVIEW(dev_priv))
-               return;
-       else if (IS_VALLEYVIEW(dev_priv))
+       if (IS_VALLEYVIEW(dev_priv))
                 valleyview_cleanup_gt_powersave(dev_priv);
  
         if (!i915.enable_rc6)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index cca7792f26d5058c99edfee0d99c76d7706b0ebe..1d3161bbea24ecc865c82287daa331adb6516f99 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1178,8 +1178,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
                 I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
                                            L3_HIGH_PRIO_CREDITS(2));
  
-       /* WaInsertDummyPushConstPs:bxt */
-       if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+       /* WaToEnableHwFixForPushConstHWBug:bxt */
+       if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  
@@ -1222,8 +1222,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
                 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
                            GEN8_LQSC_RO_PERF_DIS);
  
-       /* WaInsertDummyPushConstPs:kbl */
-       if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+       /* WaToEnableHwFixForPushConstHWBug:kbl */
+       if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
  
diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig

index 23ac8041c562924120b6fdbe82eda351914e0e85..294de4549922a12f85ffe40d8cfd87b85694cd52 100644 (file)
--- a/drivers/gpu/drm/mediatek/Kconfig
+++ b/drivers/gpu/drm/mediatek/Kconfig
@@ -2,6 +2,9 @@ config DRM_MEDIATEK
         tristate "DRM Support for Mediatek SoCs"
         depends on DRM
         depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST)
+       depends on COMMON_CLK
+       depends on HAVE_ARM_SMCCC
+       depends on OF
         select DRM_GEM_CMA_HELPER
         select DRM_KMS_HELPER
         select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c

index 6de3428612027f5da11ccec1e1827bc828e4e341..ddef0d4940843105de67327a7c5222d4ee6a78ed 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -198,16 +198,7 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
         atpx->is_hybrid = false;
         if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                 printk("ATPX Hybrid Graphics\n");
-#if 1
-               /* This is a temporary hack until the D3 cold support
-                * makes it upstream.  The ATPX power_control method seems
-                * to still work on even if the system should be using
-                * the new standardized hybrid D3 cold ACPI interface.
-                */
-               atpx->functions.power_cntl = true;
-#else
                 atpx->functions.power_cntl = false;
-#endif
                 atpx->is_hybrid = true;
         }
  
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c

index 730d840282603160ffad847fe938dc2e23319e7f..d0203a115effdd913d5fad1ed25775bad958f878 100644 (file)
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -491,7 +491,7 @@ struct it87_sio_data {
  struct it87_data {
         const struct attribute_group *groups[7];
         enum chips type;
-       u16 features;
+       u32 features;
         u8 peci_mask;
         u8 old_peci_mask;
  
diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c

index f23372669f770fe81154d116843d34e1b40c9b6a..1bb97f658b47a1fa4c5b15e85779fa5160c6c3e6 100644 (file)
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -38,6 +38,7 @@
  #define AT91_I2C_TIMEOUT       msecs_to_jiffies(100)   /* transfer timeout */
  #define AT91_I2C_DMA_THRESHOLD 8                       /* enable DMA if transfer size is bigger than this threshold */
  #define AUTOSUSPEND_TIMEOUT            2000
+#define AT91_I2C_MAX_ALT_CMD_DATA_SIZE 256
  
  /* AT91 TWI register definitions */
  #define        AT91_TWI_CR             0x0000  /* Control Register */
@@ -141,6 +142,7 @@ struct at91_twi_dev {
         unsigned twi_cwgr_reg;
         struct at91_twi_pdata *pdata;
         bool use_dma;
+       bool use_alt_cmd;
         bool recv_len_abort;
         u32 fifo_size;
         struct at91_twi_dma dma;
@@ -269,7 +271,7 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev)
  
         /* send stop when last byte has been written */
         if (--dev->buf_len == 0)
-               if (!dev->pdata->has_alt_cmd)
+               if (!dev->use_alt_cmd)
                         at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
  
         dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -292,7 +294,7 @@ static void at91_twi_write_data_dma_callback(void *data)
          * we just have to enable TXCOMP one.
          */
         at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP);
-       if (!dev->pdata->has_alt_cmd)
+       if (!dev->use_alt_cmd)
                 at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
  }
  
@@ -410,7 +412,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev)
         }
  
         /* send stop if second but last byte has been read */
-       if (!dev->pdata->has_alt_cmd && dev->buf_len == 1)
+       if (!dev->use_alt_cmd && dev->buf_len == 1)
                 at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
  
         dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -426,7 +428,7 @@ static void at91_twi_read_data_dma_callback(void *data)
         dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]),
                          dev->buf_len, DMA_FROM_DEVICE);
  
-       if (!dev->pdata->has_alt_cmd) {
+       if (!dev->use_alt_cmd) {
                 /* The last two bytes have to be read without using dma */
                 dev->buf += dev->buf_len - 2;
                 dev->buf_len = 2;
@@ -443,7 +445,7 @@ static void at91_twi_read_data_dma(struct at91_twi_dev *dev)
         struct dma_chan *chan_rx = dma->chan_rx;
         size_t buf_len;
  
-       buf_len = (dev->pdata->has_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
+       buf_len = (dev->use_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
         dma->direction = DMA_FROM_DEVICE;
  
         /* Keep in mind that we won't use dma to read the last two bytes */
@@ -651,7 +653,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev)
                 unsigned start_flags = AT91_TWI_START;
  
                 /* if only one byte is to be read, immediately stop transfer */
-               if (!has_alt_cmd && dev->buf_len <= 1 &&
+               if (!dev->use_alt_cmd && dev->buf_len <= 1 &&
                     !(dev->msg->flags & I2C_M_RECV_LEN))
                         start_flags |= AT91_TWI_STOP;
                 at91_twi_write(dev, AT91_TWI_CR, start_flags);
@@ -745,7 +747,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
         int ret;
         unsigned int_addr_flag = 0;
         struct i2c_msg *m_start = msg;
-       bool is_read, use_alt_cmd = false;
+       bool is_read;
  
         dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num);
  
@@ -768,14 +770,16 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
                 at91_twi_write(dev, AT91_TWI_IADR, internal_address);
         }
  
+       dev->use_alt_cmd = false;
         is_read = (m_start->flags & I2C_M_RD);
         if (dev->pdata->has_alt_cmd) {
-               if (m_start->len > 0) {
+               if (m_start->len > 0 &&
+                   m_start->len < AT91_I2C_MAX_ALT_CMD_DATA_SIZE) {
                         at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMEN);
                         at91_twi_write(dev, AT91_TWI_ACR,
                                        AT91_TWI_ACR_DATAL(m_start->len) |
                                        ((is_read) ? AT91_TWI_ACR_DIR : 0));
-                       use_alt_cmd = true;
+                       dev->use_alt_cmd = true;
                 } else {
                         at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMDIS);
                 }
@@ -784,7 +788,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num)
         at91_twi_write(dev, AT91_TWI_MMR,
                        (m_start->addr << 16) |
                        int_addr_flag |
-                      ((!use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
+                      ((!dev->use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
  
         dev->buf_len = m_start->len;
         dev->buf = m_start->buf;
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c

index 19c843828fe2ca504d9616d3d0fcad48d089c8a1..95f7cac76f89bfeac68ad22521d8c38c2df348c8 100644 (file)
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -158,7 +158,7 @@ static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data)
  
         if (status & BIT(IS_M_START_BUSY_SHIFT)) {
                 iproc_i2c->xfer_is_done = 1;
-               complete_all(&iproc_i2c->done);
+               complete(&iproc_i2c->done);
         }
  
         writel(status, iproc_i2c->base + IS_OFFSET);
diff --git a/drivers/i2c/busses/i2c-bcm-kona.c b/drivers/i2c/busses/i2c-bcm-kona.c

index ac9f47679c3a4b18eabfb48c76420ca5871cc34d..f98743277e3c7491ac7063fa9c57cddc5dc1d229 100644 (file)
--- a/drivers/i2c/busses/i2c-bcm-kona.c
+++ b/drivers/i2c/busses/i2c-bcm-kona.c
@@ -229,7 +229,7 @@ static irqreturn_t bcm_kona_i2c_isr(int irq, void *devid)
                        dev->base + TXFCR_OFFSET);
  
         writel(status & ~ISR_RESERVED_MASK, dev->base + ISR_OFFSET);
-       complete_all(&dev->done);
+       complete(&dev->done);
  
         return IRQ_HANDLED;
  }
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c

index 3f5a4d71d3bf32289997371955e6297a26dc1d20..385b57bfcb386ce8754892741584fd30a10b3b47 100644 (file)
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -228,7 +228,7 @@ static irqreturn_t brcmstb_i2c_isr(int irq, void *devid)
                 return IRQ_NONE;
  
         brcmstb_i2c_enable_disable_irq(dev, INT_DISABLE);
-       complete_all(&dev->done);
+       complete(&dev->done);
  
         dev_dbg(dev->device, "isr handled");
         return IRQ_HANDLED;
diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c

index a0d95ff682ae120186a689dd0dcdd74f3d65b0f3..2d5ff86398d0911cd0c3a261fb689cf2c9e04609 100644 (file)
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -215,7 +215,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
         msg->outsize = request_len;
         msg->insize = response_len;
  
-       result = cros_ec_cmd_xfer(bus->ec, msg);
+       result = cros_ec_cmd_xfer_status(bus->ec, msg);
         if (result < 0) {
                 dev_err(dev, "Error transferring EC i2c message %d\n", result);
                 goto exit;
diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c

index 71d3929adf54ed074e1600fcfd0b844f6e6574d6..76e28980904f0b166971ffa09688f4302f067c5b 100644 (file)
--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -211,7 +211,7 @@ static void meson_i2c_stop(struct meson_i2c *i2c)
                 meson_i2c_add_token(i2c, TOKEN_STOP);
         } else {
                 i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
         }
  }
  
@@ -238,7 +238,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id)
                 dev_dbg(i2c->dev, "error bit set\n");
                 i2c->error = -ENXIO;
                 i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
                 goto out;
         }
  
@@ -269,7 +269,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id)
                 break;
         case STATE_STOP:
                 i2c->state = STATE_IDLE;
-               complete_all(&i2c->done);
+               complete(&i2c->done);
                 break;
         case STATE_IDLE:
                 break;
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c

index dfa7a4b4a91d78ec827c074089e035f973c585eb..ac88a524143e07406bc2e4837a80a8ac17a4bd91 100644 (file)
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -379,6 +379,7 @@ static int ocores_i2c_of_probe(struct platform_device *pdev,
                         if (!clock_frequency_present) {
                                 dev_err(&pdev->dev,
                                         "Missing required parameter 'opencores,ip-clock-frequency'\n");
+                               clk_disable_unprepare(i2c->clk);
                                 return -ENODEV;
                         }
                         i2c->ip_clock_khz = clock_frequency / 1000;
@@ -467,20 +468,21 @@ static int ocores_i2c_probe(struct platform_device *pdev)
                 default:
                         dev_err(&pdev->dev, "Unsupported I/O width (%d)\n",
                                 i2c->reg_io_width);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err_clk;
                 }
         }
  
         ret = ocores_init(&pdev->dev, i2c);
         if (ret)
-               return ret;
+               goto err_clk;
  
         init_waitqueue_head(&i2c->wait);
         ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
                                pdev->name, i2c);
         if (ret) {
                 dev_err(&pdev->dev, "Cannot claim IRQ\n");
-               return ret;
+               goto err_clk;
         }
  
         /* hook up driver to tree */
@@ -494,7 +496,7 @@ static int ocores_i2c_probe(struct platform_device *pdev)
         ret = i2c_add_adapter(&i2c->adap);
         if (ret) {
                 dev_err(&pdev->dev, "Failed to add adapter\n");
-               return ret;
+               goto err_clk;
         }
  
         /* add in known devices to the bus */
@@ -504,6 +506,10 @@ static int ocores_i2c_probe(struct platform_device *pdev)
         }
  
         return 0;
+
+err_clk:
+       clk_disable_unprepare(i2c->clk);
+       return ret;
  }
  
  static int ocores_i2c_remove(struct platform_device *pdev)
diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c

index 8de073aed001482461b3ad12398c00fa9417fc95..215ac87f606d2d0f59bc17720ca2ec8fd60fae09 100644 (file)
--- a/drivers/i2c/muxes/i2c-demux-pinctrl.c
+++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c
@@ -68,7 +68,7 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
         adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np);
         if (!adap) {
                 ret = -ENODEV;
-               goto err;
+               goto err_with_revert;
         }
  
         p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name);
@@ -103,6 +103,8 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
  
   err_with_put:
         i2c_put_adapter(adap);
+ err_with_revert:
+       of_changeset_revert(&priv->chan[new_chan].chgset);
   err:
         dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret);
         return ret;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c

index 08a1e2f3690f148a38815239462a447321f25bb6..00c8a08d56e722349c64eff91b9ae9bb5a59a0bf 100644 (file)
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -68,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
         if (!iovad)
                 return;
  
-       put_iova_domain(iovad);
+       if (iovad->granule)
+               put_iova_domain(iovad);
         kfree(iovad);
         domain->iova_cookie = NULL;
  }
@@ -151,12 +152,15 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
         }
  }
  
-static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
                 dma_addr_t dma_limit)
  {
+       struct iova_domain *iovad = domain->iova_cookie;
         unsigned long shift = iova_shift(iovad);
         unsigned long length = iova_align(iovad, size) >> shift;
  
+       if (domain->geometry.force_aperture)
+               dma_limit = min(dma_limit, domain->geometry.aperture_end);
         /*
          * Enforce size-alignment to be safe - there could perhaps be an
          * attribute to control this per-device, or at least per-domain...
@@ -314,7 +318,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
         if (!pages)
                 return NULL;
  
-       iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+       iova = __alloc_iova(domain, size, dev->coherent_dma_mask);
         if (!iova)
                 goto out_free_pages;
  
@@ -386,7 +390,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
         phys_addr_t phys = page_to_phys(page) + offset;
         size_t iova_off = iova_offset(iovad, phys);
         size_t len = iova_align(iovad, size + iova_off);
-       struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+       struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev));
  
         if (!iova)
                 return DMA_ERROR_CODE;
@@ -538,7 +542,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
                 prev = s;
         }
  
-       iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+       iova = __alloc_iova(domain, iova_len, dma_get_mask(dev));
         if (!iova)
                 goto out_restore_sg;
  
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h

index 9ed0a8462ccf2983eaa0107e8cf8a4a81dd737b6..3dab13b4a211297fac3914e492ab709b27d25921 100644 (file)
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -55,19 +55,19 @@ struct mtk_iommu_data {
         bool                            enable_4GB;
  };
  
-static int compare_of(struct device *dev, void *data)
+static inline int compare_of(struct device *dev, void *data)
  {
         return dev->of_node == data;
  }
  
-static int mtk_iommu_bind(struct device *dev)
+static inline int mtk_iommu_bind(struct device *dev)
  {
         struct mtk_iommu_data *data = dev_get_drvdata(dev);
  
         return component_bind_all(dev, &data->smi_imu);
  }
  
-static void mtk_iommu_unbind(struct device *dev)
+static inline void mtk_iommu_unbind(struct device *dev)
  {
         struct mtk_iommu_data *data = dev_get_drvdata(dev);
  
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c

index 4e9784b4e0ac655659a521d7427658e44ad6bf7d..eedba67b0e3ef6bed544b959d8440a5433c6bc75 100644 (file)
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -181,7 +181,7 @@ struct crypt_config {
         u8 key[0];
  };
  
-#define MIN_IOS        16
+#define MIN_IOS        64
  
  static void clone_init(struct dm_crypt_io *, struct bio *);
  static void kcryptd_queue_crypt(struct dm_crypt_io *io);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c

index 1b9795d75ef898cd172bde67d1323ee8736606c3..8abde6b8cedc4540dac80b73256317fb671dbd69 100644 (file)
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -191,7 +191,6 @@ struct raid_dev {
  #define RT_FLAG_RS_BITMAP_LOADED       2
  #define RT_FLAG_UPDATE_SBS             3
  #define RT_FLAG_RESHAPE_RS             4
-#define RT_FLAG_KEEP_RS_FROZEN         5
  
  /* Array elements of 64 bit needed for rebuild/failed disk bits */
  #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -861,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
  {
         unsigned long min_region_size = rs->ti->len / (1 << 21);
  
+       if (rs_is_raid0(rs))
+               return 0;
+
         if (!region_size) {
                 /*
                  * Choose a reasonable default.  All figures in sectors.
@@ -930,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs)
                         rebuild_cnt++;
  
         switch (rs->raid_type->level) {
+       case 0:
+               break;
         case 1:
                 if (rebuild_cnt >= rs->md.raid_disks)
                         goto too_many;
@@ -2335,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
                 case 0:
                         break;
                 default:
+                       /*
+                        * We have to keep any raid0 data/metadata device pairs or
+                        * the MD raid0 personality will fail to start the array.
+                        */
+                       if (rs_is_raid0(rs))
+                               continue;
+
                         dev = container_of(rdev, struct raid_dev, rdev);
                         if (dev->meta_dev)
                                 dm_put_device(ti, dev->meta_dev);
@@ -2579,7 +2590,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
                 } else {
                         /* Process raid1 without delta_disks */
                         mddev->raid_disks = rs->raid_disks;
-                       set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
                         reshape = false;
                 }
         } else {
@@ -2590,7 +2600,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
         if (reshape) {
                 set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
                 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-               set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
         } else if (mddev->raid_disks < rs->raid_disks)
                 /* Create new superblocks and bitmaps, if any new disks */
                 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
@@ -2902,7 +2911,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                         goto bad;
  
                 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-               set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
                 /* Takeover ain't recovery, so disable recovery */
                 rs_setup_recovery(rs, MaxSector);
                 rs_set_new(rs);
@@ -3386,21 +3394,28 @@ static void raid_postsuspend(struct dm_target *ti)
  {
         struct raid_set *rs = ti->private;
  
-       if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
-               if (!rs->md.suspended)
-                       mddev_suspend(&rs->md);
-               rs->md.ro = 1;
-       }
+       if (!rs->md.suspended)
+               mddev_suspend(&rs->md);
+
+       rs->md.ro = 1;
  }
  
  static void attempt_restore_of_faulty_devices(struct raid_set *rs)
  {
         int i;
-       uint64_t failed_devices, cleared_failed_devices = 0;
+       uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS];
         unsigned long flags;
+       bool cleared = false;
         struct dm_raid_superblock *sb;
+       struct mddev *mddev = &rs->md;
         struct md_rdev *r;
  
+       /* RAID personalities have to provide hot add/remove methods or we need to bail out. */
+       if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk)
+               return;
+
+       memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
+
         for (i = 0; i < rs->md.raid_disks; i++) {
                 r = &rs->dev[i].rdev;
                 if (test_bit(Faulty, &r->flags) && r->sb_page &&
@@ -3420,7 +3435,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
                          * ourselves.
                          */
                         if ((r->raid_disk >= 0) &&
-                           (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
+                           (mddev->pers->hot_remove_disk(mddev, r) != 0))
                                 /* Failed to revive this device, try next */
                                 continue;
  
@@ -3430,22 +3445,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
                         clear_bit(Faulty, &r->flags);
                         clear_bit(WriteErrorSeen, &r->flags);
                         clear_bit(In_sync, &r->flags);
-                       if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
+                       if (mddev->pers->hot_add_disk(mddev, r)) {
                                 r->raid_disk = -1;
                                 r->saved_raid_disk = -1;
                                 r->flags = flags;
                         } else {
                                 r->recovery_offset = 0;
-                               cleared_failed_devices |= 1 << i;
+                               set_bit(i, (void *) cleared_failed_devices);
+                               cleared = true;
                         }
                 }
         }
-       if (cleared_failed_devices) {
+
+       /* If any failed devices could be cleared, update all sbs failed_devices bits */
+       if (cleared) {
+               uint64_t failed_devices[DISKS_ARRAY_ELEMS];
+
                 rdev_for_each(r, &rs->md) {
                         sb = page_address(r->sb_page);
-                       failed_devices = le64_to_cpu(sb->failed_devices);
-                       failed_devices &= ~cleared_failed_devices;
-                       sb->failed_devices = cpu_to_le64(failed_devices);
+                       sb_retrieve_failed_devices(sb, failed_devices);
+
+                       for (i = 0; i < DISKS_ARRAY_ELEMS; i++)
+                               failed_devices[i] &= ~cleared_failed_devices[i];
+
+                       sb_update_failed_devices(sb, failed_devices);
                 }
         }
  }
@@ -3610,26 +3633,15 @@ static void raid_resume(struct dm_target *ti)
                  * devices are reachable again.
                  */
                 attempt_restore_of_faulty_devices(rs);
-       } else {
-               mddev->ro = 0;
-               mddev->in_sync = 0;
+       }
  
-               /*
-                * When passing in flags to the ctr, we expect userspace
-                * to reset them because they made it to the superblocks
-                * and reload the mapping anyway.
-                *
-                * -> only unfreeze recovery in case of a table reload or
-                *    we'll have a bogus recovery/reshape position
-                *    retrieved from the superblock by the ctr because
-                *    the ongoing recovery/reshape will change it after read.
-                */
-               if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
-                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+       mddev->ro = 0;
+       mddev->in_sync = 0;
  
-               if (mddev->suspended)
-                       mddev_resume(mddev);
-       }
+       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+       if (mddev->suspended)
+               mddev_resume(mddev);
  }
  
  static struct target_type raid_target = {
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c

index 4ace1da17db8f4473fcb72d546a1067b943bc87b..6c25213ab38c8d271ff58e093adee523571443ed 100644 (file)
--- a/drivers/md/dm-round-robin.c
+++ b/drivers/md/dm-round-robin.c
@@ -210,14 +210,17 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes)
         struct path_info *pi = NULL;
         struct dm_path *current_path = NULL;
  
+       local_irq_save(flags);
         current_path = *this_cpu_ptr(s->current_path);
         if (current_path) {
                 percpu_counter_dec(&s->repeat_count);
-               if (percpu_counter_read_positive(&s->repeat_count) > 0)
+               if (percpu_counter_read_positive(&s->repeat_count) > 0) {
+                       local_irq_restore(flags);
                         return current_path;
+               }
         }
  
-       spin_lock_irqsave(&s->lock, flags);
+       spin_lock(&s->lock);
         if (!list_empty(&s->valid_paths)) {
                 pi = list_entry(s->valid_paths.next, struct path_info, list);
                 list_move_tail(&pi->list, &s->valid_paths);
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c

index bdee9a01ef35ad6fa34f1238ea7268454e0e56c0..c466ee2b0c973a7c77cb16566770dec3b426db33 100644 (file)
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -90,8 +90,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
          */
         mutex_lock(&afu->contexts_lock);
         idr_preload(GFP_KERNEL);
-       i = idr_alloc(&ctx->afu->contexts_idr, ctx,
-                     ctx->afu->adapter->native->sl_ops->min_pe,
+       i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe,
                       ctx->afu->num_procs, GFP_NOWAIT);
         idr_preload_end();
         mutex_unlock(&afu->contexts_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h

index de090533f18cb8eb5b2c8f0a0b3bfbf6f11e5e00..344a0ff8f8c7df97e5328f89e46cd3f55b791d68 100644 (file)
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -561,7 +561,6 @@ struct cxl_service_layer_ops {
         u64 (*timebase_read)(struct cxl *adapter);
         int capi_mode;
         bool needs_reset_before_disable;
-       int min_pe;
  };
  
  struct cxl_native {
@@ -603,6 +602,7 @@ struct cxl {
         struct bin_attribute cxl_attr;
         int adapter_num;
         int user_irqs;
+       int min_pe;
         u64 ps_size;
         u16 psl_rev;
         u16 base_image;
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c

index 3bcdaee11ba159aa13580bdd48151e0ef0755dbd..e606fdc4bc9cc3ec0ff7d3f29d4e691c30a26bd4 100644 (file)
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -924,7 +924,7 @@ static irqreturn_t native_irq_multiplexed(int irq, void *data)
         return fail_psl_irq(afu, &irq_info);
  }
  
-void native_irq_wait(struct cxl_context *ctx)
+static void native_irq_wait(struct cxl_context *ctx)
  {
         u64 dsisr;
         int timeout = 1000;
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c

index d152e2de8c9375e2760b03cd93857d542f9a36d4..6f0c4ac4b6498991913647b0d70b2d4805870f0b 100644 (file)
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -379,7 +379,7 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id
  
  static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev)
  {
-       u64 psl_dsnctl;
+       u64 psl_dsnctl, psl_fircntl;
         u64 chipid;
         u64 capp_unit_id;
         int rc;
@@ -398,8 +398,11 @@ static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_
         cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
         /* snoop write mask */
         cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
-       /* set fir_accum */
-       cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL);
+       /* set fir_cntl to recommended value for production env */
+       psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+       psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+       psl_fircntl |= 0x1ULL; /* ce_thresh */
+       cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
         /* for debugging with trace arrays */
         cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
  
@@ -1521,14 +1524,15 @@ static const struct cxl_service_layer_ops xsl_ops = {
         .write_timebase_ctrl = write_timebase_ctrl_xsl,
         .timebase_read = timebase_read_xsl,
         .capi_mode = OPAL_PHB_CAPI_MODE_DMA,
-       .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */
  };
  
  static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
  {
         if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) {
+               /* Mellanox CX-4 */
                 dev_info(&adapter->dev, "Device uses an XSL\n");
                 adapter->native->sl_ops = &xsl_ops;
+               adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */
         } else {
                 dev_info(&adapter->dev, "Device uses a PSL\n");
                 adapter->native->sl_ops = &psl_ops;
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c

index dee8def1c1936c36fc82d894e269c1d8825d5c29..7ada5f1b7bb67b808ec824561b0ceb6def317562 100644 (file)
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -221,7 +221,7 @@ int cxl_pci_vphb_add(struct cxl_afu *afu)
         /* Setup the PHB using arch provided callback */
         phb->ops = &cxl_pcie_pci_ops;
         phb->cfg_addr = NULL;
-       phb->cfg_data = 0;
+       phb->cfg_data = NULL;
         phb->private_data = afu;
         phb->controller_ops = cxl_pci_controller_ops;
  
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c

index 1f276fa30ba68233339682a0aec35b26a0f80e0f..217e8da0628caa909aa212e3c2598dc0ca09da68 100644 (file)
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -152,7 +152,7 @@ module_param(lacp_rate, charp, 0);
  MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; "
                             "0 for slow, 1 for fast");
  module_param(ad_select, charp, 0);
-MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; "
+MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; "
                             "0 for stable (default), 1 for bandwidth, "
                             "2 for count");
  module_param(min_links, int, 0);
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h

index 8f12bddd5dc90077add651246fc4e8467cd12960..a0b453ea34c90423ae476c7fc5f98aa8b3bff6ce 100644 (file)
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -258,7 +258,7 @@
   * BCM5325 and BCM5365 share most definitions below
   */
  #define B53_ARLTBL_MAC_VID_ENTRY(n)    (0x10 * (n))
-#define   ARLTBL_MAC_MASK              0xffffffffffff
+#define   ARLTBL_MAC_MASK              0xffffffffffffULL
  #define   ARLTBL_VID_S                 48
  #define   ARLTBL_VID_MASK_25           0xff
  #define   ARLTBL_VID_MASK              0xfff
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c

index d36aedde8cb9b9aa24be3832f727236e3e7cbd72..d1d9d3cf9139f66774fb3b1ed203254783d7f3c8 100644 (file)
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3187,6 +3187,7 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
         return err;
  }
  
+#ifdef CONFIG_NET_DSA_HWMON
  static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
                                     int reg)
  {
@@ -3212,6 +3213,7 @@ static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page,
  
         return ret;
  }
+#endif
  
  static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
  {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c

index 37a0f463b8de5c9d0479b220ac2303fa895cc6ea..18bb9556dd006861c0cc6e9a8997bac14ade5223 100644 (file)
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -793,6 +793,8 @@ int xgene_enet_phy_connect(struct net_device *ndev)
                         netdev_err(ndev, "Could not connect to PHY\n");
                         return  -ENODEV;
                 }
+#else
+               return -ENODEV;
  #endif
         }
  
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c

index 4bff0f3040df7a85bae40278e13b655009ba1a49..b0da9693f28a130a65e59a007548c773fa422e7f 100644 (file)
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -771,8 +771,10 @@ int arc_emac_probe(struct net_device *ndev, int interface)
         priv->dev = dev;
  
         priv->regs = devm_ioremap_resource(dev, &res_regs);
-       if (IS_ERR(priv->regs))
-               return PTR_ERR(priv->regs);
+       if (IS_ERR(priv->regs)) {
+               err = PTR_ERR(priv->regs);
+               goto out_put_node;
+       }
  
         dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs);
  
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c

index ff300f7cf5295fc0d4f1e1b0692d74e6a6091912..659261218d9f67fd2509718dee8fbd290003cb7d 100644 (file)
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12552,10 +12552,6 @@ static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
                                 info->data = TG3_RSS_MAX_NUM_QS;
                 }
  
-               /* The first interrupt vector only
-                * handles link interrupts.
-                */
-               info->data -= 1;
                 return 0;
  
         default:
@@ -14014,6 +14010,7 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
         }
  
         if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) ||
+           (!ec->rx_coalesce_usecs) ||
             (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) ||
             (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) ||
             (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) ||
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h

index 36893d8958d4d6ce3d73575149f44a68c99e0569..b6fcf10621b63c39975c3ce53528d9497ce628a2 100644 (file)
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -403,11 +403,11 @@
  #define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII    0x00000004
  #define MACB_CAPS_NO_GIGABIT_HALF              0x00000008
  #define MACB_CAPS_USRIO_DISABLED               0x00000010
+#define MACB_CAPS_JUMBO                                0x00000020
  #define MACB_CAPS_FIFO_MODE                    0x10000000
  #define MACB_CAPS_GIGABIT_MODE_AVAILABLE       0x20000000
  #define MACB_CAPS_SG_DISABLED                  0x40000000
  #define MACB_CAPS_MACB_IS_GEM                  0x80000000
-#define MACB_CAPS_JUMBO                                0x00000010
  
  /* Bit manipulation macros */
  #define MACB_BIT(name)                                 \
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c

index 1471e16ba7199b229c78884ee093799f24718423..f45385f5c6e58269d0d3f593b460fb5952a6c5e3 100644 (file)
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1299,6 +1299,7 @@ static int
  dm9000_open(struct net_device *dev)
  {
         struct board_info *db = netdev_priv(dev);
+       unsigned int irq_flags = irq_get_trigger_type(dev->irq);
  
         if (netif_msg_ifup(db))
                 dev_dbg(db->dev, "enabling %s\n", dev->name);
@@ -1306,9 +1307,11 @@ dm9000_open(struct net_device *dev)
         /* If there is no IRQ type specified, tell the user that this is a
          * problem
          */
-       if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
+       if (irq_flags == IRQF_TRIGGER_NONE)
                 dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
  
+       irq_flags |= IRQF_SHARED;
+
         /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
         iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
         mdelay(1); /* delay needs by DM9000B */
@@ -1316,8 +1319,7 @@ dm9000_open(struct net_device *dev)
         /* Initialize DM9000 board */
         dm9000_init_dm9000(dev);
  
-       if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
-                       dev->name, dev))
+       if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev))
                 return -EAGAIN;
         /* Now that we have an interrupt handler hooked up we can unmask
          * our interrupts
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c

index 1235c7f2564bd8de42a9416538b536af2dc07d50..1e1eb92998fb3d66f497f88b890817e22c4a8d3d 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -17,7 +17,7 @@ static const struct mac_stats_string g_gmac_stats_string[] = {
         {"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)},
         {"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)},
         {"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)},
-       {"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
+       {"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
         {"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)},
         {"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)},
         {"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)},
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c

index 7fd4d54599e4557dd37b457396145195a77da3fa..6b03c8553e59710b5cad2cb903f0e4e05d0cae5e 100644 (file)
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -2032,7 +2032,8 @@ const struct e1000_info e1000_82574_info = {
                                   | FLAG2_DISABLE_ASPM_L0S
                                   | FLAG2_DISABLE_ASPM_L1
                                   | FLAG2_NO_DISABLE_RX
-                                 | FLAG2_DMA_BURST,
+                                 | FLAG2_DMA_BURST
+                                 | FLAG2_CHECK_SYSTIM_OVERFLOW,
         .pba                    = 32,
         .max_hw_frame_size      = DEFAULT_JUMBO,
         .get_variants           = e1000_get_variants_82571,
@@ -2053,7 +2054,8 @@ const struct e1000_info e1000_82583_info = {
                                   | FLAG_HAS_CTRLEXT_ON_LOAD,
         .flags2                 = FLAG2_DISABLE_ASPM_L0S
                                   | FLAG2_DISABLE_ASPM_L1
-                                 | FLAG2_NO_DISABLE_RX,
+                                 | FLAG2_NO_DISABLE_RX
+                                 | FLAG2_CHECK_SYSTIM_OVERFLOW,
         .pba                    = 32,
         .max_hw_frame_size      = DEFAULT_JUMBO,
         .get_variants           = e1000_get_variants_82571,
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h

index ef96cd11d6d2c34a726db4b1c0c0a5ec8a76bf7c..879cca47b021446565f30ea4e877e569ec17531c 100644 (file)
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -452,6 +452,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca);
  #define FLAG2_PCIM2PCI_ARBITER_WA         BIT(11)
  #define FLAG2_DFLT_CRC_STRIPPING          BIT(12)
  #define FLAG2_CHECK_RX_HWTSTAMP           BIT(13)
+#define FLAG2_CHECK_SYSTIM_OVERFLOW       BIT(14)
  
  #define E1000_RX_DESC_PS(R, i)     \
         (&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c

index 3e11322d8d586a839bb19b15b2dc37b680ad8bf5..f3aaca743ea3ff3a2250547e47fb8d1c275f0cea 100644 (file)
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -5885,7 +5885,8 @@ const struct e1000_info e1000_pch_lpt_info = {
                                   | FLAG_HAS_JUMBO_FRAMES
                                   | FLAG_APME_IN_WUC,
         .flags2                 = FLAG2_HAS_PHY_STATS
-                                 | FLAG2_HAS_EEE,
+                                 | FLAG2_HAS_EEE
+                                 | FLAG2_CHECK_SYSTIM_OVERFLOW,
         .pba                    = 26,
         .max_hw_frame_size      = 9022,
         .get_variants           = e1000_get_variants_ich8lan,
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c

index 02f443958f3199aaf95c786ef1b59bd2948f4937..7017281ba2dc6355449c7970e6f4f3dabd3fb65d 100644 (file)
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4302,6 +4302,42 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter)
         clear_bit(__E1000_RESETTING, &adapter->state);
  }
  
+/**
+ * e1000e_sanitize_systim - sanitize raw cycle counter reads
+ * @hw: pointer to the HW structure
+ * @systim: cycle_t value read, sanitized and returned
+ *
+ * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
+ * check to see that the time is incrementing at a reasonable
+ * rate and is a multiple of incvalue.
+ **/
+static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim)
+{
+       u64 time_delta, rem, temp;
+       cycle_t systim_next;
+       u32 incvalue;
+       int i;
+
+       incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
+       for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
+               /* latch SYSTIMH on read of SYSTIML */
+               systim_next = (cycle_t)er32(SYSTIML);
+               systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+
+               time_delta = systim_next - systim;
+               temp = time_delta;
+               /* VMWare users have seen incvalue of zero, don't div / 0 */
+               rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
+
+               systim = systim_next;
+
+               if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0))
+                       break;
+       }
+
+       return systim;
+}
+
  /**
   * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
   * @cc: cyclecounter structure
@@ -4312,7 +4348,7 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
                                                      cc);
         struct e1000_hw *hw = &adapter->hw;
         u32 systimel, systimeh;
-       cycle_t systim, systim_next;
+       cycle_t systim;
         /* SYSTIMH latching upon SYSTIML read does not work well.
          * This means that if SYSTIML overflows after we read it but before
          * we read SYSTIMH, the value of SYSTIMH has been incremented and we
@@ -4335,33 +4371,9 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc)
         systim = (cycle_t)systimel;
         systim |= (cycle_t)systimeh << 32;
  
-       if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
-               u64 time_delta, rem, temp;
-               u32 incvalue;
-               int i;
-
-               /* errata for 82574/82583 possible bad bits read from SYSTIMH/L
-                * check to see that the time is incrementing at a reasonable
-                * rate and is a multiple of incvalue
-                */
-               incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
-               for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
-                       /* latch SYSTIMH on read of SYSTIML */
-                       systim_next = (cycle_t)er32(SYSTIML);
-                       systim_next |= (cycle_t)er32(SYSTIMH) << 32;
-
-                       time_delta = systim_next - systim;
-                       temp = time_delta;
-                       /* VMWare users have seen incvalue of zero, don't div / 0 */
-                       rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
-
-                       systim = systim_next;
+       if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
+               systim = e1000e_sanitize_systim(hw, systim);
  
-                       if ((time_delta < E1000_82574_SYSTIM_EPSILON) &&
-                           (rem == 0))
-                               break;
-               }
-       }
         return systim;
  }
  
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c

index 81c99e1be708d445270323ece7c6c63cbc681111..c6ac7a61812fbffed95257ec271ed19218b9549a 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4554,23 +4554,38 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
   **/
  static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
  {
+       int i, tc_unused = 0;
         u8 num_tc = 0;
-       int i;
+       u8 ret = 0;
  
         /* Scan the ETS Config Priority Table to find
          * traffic class enabled for a given priority
-        * and use the traffic class index to get the
-        * number of traffic classes enabled
+        * and create a bitmask of enabled TCs
          */
-       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-               if (dcbcfg->etscfg.prioritytable[i] > num_tc)
-                       num_tc = dcbcfg->etscfg.prioritytable[i];
-       }
+       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+               num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
  
-       /* Traffic class index starts from zero so
-        * increment to return the actual count
+       /* Now scan the bitmask to check for
+        * contiguous TCs starting with TC0
          */
-       return num_tc + 1;
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (num_tc & BIT(i)) {
+                       if (!tc_unused) {
+                               ret++;
+                       } else {
+                               pr_err("Non-contiguous TC - Disabling DCB\n");
+                               return 1;
+                       }
+               } else {
+                       tc_unused = 1;
+               }
+       }
+
+       /* There is always at least TC0 */
+       if (!ret)
+               ret = 1;
+
+       return ret;
  }
  
  /**
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c

index e61b647f5f2a86bcc5844d3d16a65a0db5acb7c1..336c103ae374e4175b2c8274f06e5adb20a17256 100644 (file)
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -744,7 +744,8 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
                 }
         }
  
-       shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust);
+       shhwtstamps.hwtstamp =
+               ktime_add_ns(shhwtstamps.hwtstamp, adjust);
  
         skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
         dev_kfree_skb_any(adapter->ptp_tx_skb);
@@ -767,13 +768,32 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector,
                          struct sk_buff *skb)
  {
         __le64 *regval = (__le64 *)va;
+       struct igb_adapter *adapter = q_vector->adapter;
+       int adjust = 0;
  
         /* The timestamp is recorded in little endian format.
          * DWORD: 0        1        2        3
          * Field: Reserved Reserved SYSTIML  SYSTIMH
          */
-       igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+       igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
                                    le64_to_cpu(regval[1]));
+
+       /* adjust timestamp for the RX latency based on link speed */
+       if (adapter->hw.mac.type == e1000_i210) {
+               switch (adapter->link_speed) {
+               case SPEED_10:
+                       adjust = IGB_I210_RX_LATENCY_10;
+                       break;
+               case SPEED_100:
+                       adjust = IGB_I210_RX_LATENCY_100;
+                       break;
+               case SPEED_1000:
+                       adjust = IGB_I210_RX_LATENCY_1000;
+                       break;
+               }
+       }
+       skb_hwtstamps(skb)->hwtstamp =
+               ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
  }
  
  /**
@@ -825,7 +845,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
                 }
         }
         skb_hwtstamps(skb)->hwtstamp =
-               ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+               ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
  
         /* Update the last_rx_timestamp timer in order to enable watchdog check
          * for error case of latched timestamp on a dropped packet.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

index 5418c69a74630bdd8b2aa2519c9e7f41c3e76ac9..b4f03748adc02592890cbdafb4e607f610fce224 100644 (file)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4100,6 +4100,8 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
         struct ixgbe_hw *hw = &adapter->hw;
         u32 vlnctrl, i;
  
+       vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
         switch (hw->mac.type) {
         case ixgbe_mac_82599EB:
         case ixgbe_mac_X540:
@@ -4112,8 +4114,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
                 /* fall through */
         case ixgbe_mac_82598EB:
                 /* legacy case, we can just disable VLAN filtering */
-               vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-               vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+               vlnctrl &= ~IXGBE_VLNCTRL_VFE;
                 IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
                 return;
         }
@@ -4125,6 +4126,10 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
         /* Set flag so we don't redo unnecessary work */
         adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
  
+       /* For VMDq and SR-IOV we must leave VLAN filtering enabled */
+       vlnctrl |= IXGBE_VLNCTRL_VFE;
+       IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
         /* Add PF to all active pools */
         for (i = IXGBE_VLVF_ENTRIES; --i;) {
                 u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
@@ -4191,6 +4196,11 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
         struct ixgbe_hw *hw = &adapter->hw;
         u32 vlnctrl, i;
  
+       /* Set VLAN filtering to enabled */
+       vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+       vlnctrl |= IXGBE_VLNCTRL_VFE;
+       IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
         switch (hw->mac.type) {
         case ixgbe_mac_82599EB:
         case ixgbe_mac_X540:
@@ -4202,10 +4212,6 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
                         break;
                 /* fall through */
         case ixgbe_mac_82598EB:
-               vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-               vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
-               vlnctrl |= IXGBE_VLNCTRL_VFE;
-               IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
                 return;
         }
  
@@ -8390,12 +8396,14 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
                             struct tcf_exts *exts, u64 *action, u8 *queue)
  {
         const struct tc_action *a;
+       LIST_HEAD(actions);
         int err;
  
         if (tc_no_actions(exts))
                 return -EINVAL;
  
-       tc_for_each_action(a, exts) {
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(a, &actions, list) {
  
                 /* Drop action */
                 if (is_tcf_gact_shot(a)) {
@@ -9517,6 +9525,7 @@ skip_sriov:
  
         /* copy netdev features into list of user selectable features */
         netdev->hw_features |= netdev->features |
+                              NETIF_F_HW_VLAN_CTAG_FILTER |
                                NETIF_F_HW_VLAN_CTAG_RX |
                                NETIF_F_HW_VLAN_CTAG_TX |
                                NETIF_F_RXALL |
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

index b57ae3afb994ab0dde3b22831af841bbec063081..f1609542adf19a75f71cf3517deb578df5b57d1b 100644 (file)
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -245,12 +245,16 @@ static int mtk_phy_connect(struct mtk_mac *mac)
         case PHY_INTERFACE_MODE_MII:
                 ge_mode = 1;
                 break;
-       case PHY_INTERFACE_MODE_RMII:
+       case PHY_INTERFACE_MODE_REVMII:
                 ge_mode = 2;
                 break;
+       case PHY_INTERFACE_MODE_RMII:
+               if (!mac->id)
+                       goto err_phy;
+               ge_mode = 3;
+               break;
         default:
-               dev_err(eth->dev, "invalid phy_mode\n");
-               return -1;
+               goto err_phy;
         }
  
         /* put the gmac into the right mode */
@@ -263,13 +267,25 @@ static int mtk_phy_connect(struct mtk_mac *mac)
         mac->phy_dev->autoneg = AUTONEG_ENABLE;
         mac->phy_dev->speed = 0;
         mac->phy_dev->duplex = 0;
+
+       if (of_phy_is_fixed_link(mac->of_node))
+               mac->phy_dev->supported |=
+               SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
         mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
                                    SUPPORTED_Asym_Pause;
         mac->phy_dev->advertising = mac->phy_dev->supported |
                                     ADVERTISED_Autoneg;
         phy_start_aneg(mac->phy_dev);
  
+       of_node_put(np);
+
         return 0;
+
+err_phy:
+       of_node_put(np);
+       dev_err(eth->dev, "invalid phy_mode\n");
+       return -EINVAL;
  }
  
  static int mtk_mdio_init(struct mtk_eth *eth)
@@ -542,15 +558,15 @@ static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring,
         return &ring->buf[idx];
  }
  
-static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
  {
         if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
-               dma_unmap_single(dev,
+               dma_unmap_single(eth->dev,
                                  dma_unmap_addr(tx_buf, dma_addr0),
                                  dma_unmap_len(tx_buf, dma_len0),
                                  DMA_TO_DEVICE);
         } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
-               dma_unmap_page(dev,
+               dma_unmap_page(eth->dev,
                                dma_unmap_addr(tx_buf, dma_addr0),
                                dma_unmap_len(tx_buf, dma_len0),
                                DMA_TO_DEVICE);
@@ -595,9 +611,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
         if (skb_vlan_tag_present(skb))
                 txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
  
-       mapped_addr = dma_map_single(&dev->dev, skb->data,
+       mapped_addr = dma_map_single(eth->dev, skb->data,
                                      skb_headlen(skb), DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+       if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
                 return -ENOMEM;
  
         WRITE_ONCE(itxd->txd1, mapped_addr);
@@ -623,10 +639,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
  
                         n_desc++;
                         frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
-                       mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
+                       mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
                                                        frag_map_size,
                                                        DMA_TO_DEVICE);
-                       if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+                       if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
                                 goto err_dma;
  
                         if (i == nr_frags - 1 &&
@@ -679,7 +695,7 @@ err_dma:
                 tx_buf = mtk_desc_to_tx_buf(ring, itxd);
  
                 /* unmap dma */
-               mtk_tx_unmap(&dev->dev, tx_buf);
+               mtk_tx_unmap(eth, tx_buf);
  
                 itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
                 itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
@@ -836,11 +852,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
                         netdev->stats.rx_dropped++;
                         goto release_desc;
                 }
-               dma_addr = dma_map_single(&eth->netdev[mac]->dev,
+               dma_addr = dma_map_single(eth->dev,
                                           new_data + NET_SKB_PAD,
                                           ring->buf_size,
                                           DMA_FROM_DEVICE);
-               if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
+               if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
                         skb_free_frag(new_data);
                         netdev->stats.rx_dropped++;
                         goto release_desc;
@@ -855,7 +871,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
                 }
                 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
  
-               dma_unmap_single(&netdev->dev, trxd.rxd1,
+               dma_unmap_single(eth->dev, trxd.rxd1,
                                  ring->buf_size, DMA_FROM_DEVICE);
                 pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
                 skb->dev = netdev;
@@ -937,7 +953,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
                         done[mac]++;
                         budget--;
                 }
-               mtk_tx_unmap(eth->dev, tx_buf);
+               mtk_tx_unmap(eth, tx_buf);
  
                 ring->last_free = desc;
                 atomic_inc(&ring->free_count);
@@ -1092,7 +1108,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
  
         if (ring->buf) {
                 for (i = 0; i < MTK_DMA_SIZE; i++)
-                       mtk_tx_unmap(eth->dev, &ring->buf[i]);
+                       mtk_tx_unmap(eth, &ring->buf[i]);
                 kfree(ring->buf);
                 ring->buf = NULL;
         }
@@ -1751,6 +1767,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
                 goto free_netdev;
         }
         spin_lock_init(&mac->hw_stats->stats_lock);
+       u64_stats_init(&mac->hw_stats->syncp);
         mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
  
         SET_NETDEV_DEV(eth->netdev[id], eth->dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index 0f19b01e3fffa202d01366f130012e34856cdaae..dc8b1cb0fdc8562b8564b78a324321bd479f80c4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -318,6 +318,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                 u32 *action, u32 *flow_tag)
  {
         const struct tc_action *a;
+       LIST_HEAD(actions);
  
         if (tc_no_actions(exts))
                 return -EINVAL;
@@ -325,7 +326,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
         *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
         *action = 0;
  
-       tc_for_each_action(a, exts) {
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(a, &actions, list) {
                 /* Only support a single action per rule */
                 if (*action)
                         return -EINVAL;
@@ -362,13 +364,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                 u32 *action, u32 *dest_vport)
  {
         const struct tc_action *a;
+       LIST_HEAD(actions);
  
         if (tc_no_actions(exts))
                 return -EINVAL;
  
         *action = 0;
  
-       tc_for_each_action(a, exts) {
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(a, &actions, list) {
                 /* Only support a single action per rule */
                 if (*action)
                         return -EINVAL;
@@ -503,6 +507,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
         struct mlx5e_tc_flow *flow;
         struct tc_action *a;
         struct mlx5_fc *counter;
+       LIST_HEAD(actions);
         u64 bytes;
         u64 packets;
         u64 lastuse;
@@ -518,7 +523,8 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
  
         mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
  
-       tc_for_each_action(a, f->exts)
+       tcf_exts_to_list(f->exts, &actions);
+       list_for_each_entry(a, &actions, list)
                 tcf_action_stats_update(a, bytes, packets, lastuse);
  
         return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h

index 7ca9201f7dcbf6de88e0cbf2d69393db13d0e57d..1721098eef131773471bb99aa281f7959ff817ff 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3383,6 +3383,15 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
   */
  MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
  
+/* reg_ritr_lb_en
+ * Loop-back filter enable for unicast packets.
+ * If the flag is set then loop-back filter for unicast packets is
+ * implemented on the RIF. Multicast packets are always subject to
+ * loop-back filtering.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1);
+
  /* reg_ritr_virtual_router
   * Virtual router ID associated with the router interface.
   * Access: RW
@@ -3484,6 +3493,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
         mlxsw_reg_ritr_op_set(payload, op);
         mlxsw_reg_ritr_rif_set(payload, rif);
         mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+       mlxsw_reg_ritr_lb_en_set(payload, 1);
         mlxsw_reg_ritr_mtu_set(payload, mtu);
         mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
  }
@@ -4000,6 +4010,7 @@ static inline void mlxsw_reg_ralue_pack(char *payload,
  {
         MLXSW_REG_ZERO(ralue, payload);
         mlxsw_reg_ralue_protocol_set(payload, protocol);
+       mlxsw_reg_ralue_op_set(payload, op);
         mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
         mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
         mlxsw_reg_ralue_entry_type_set(payload,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c

index c3e61500819d00c9d1c4d61448f99c51057d3cbb..1f816890681109e367f8b7a8e5639e35474e2a44 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -942,8 +942,8 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport)
         kfree(mlxsw_sp_vport);
  }
  
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-                         u16 vid)
+static int mlxsw_sp_port_add_vid(struct net_device *dev,
+                                __be16 __always_unused proto, u16 vid)
  {
         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
         struct mlxsw_sp_port *mlxsw_sp_vport;
@@ -956,16 +956,12 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
         if (!vid)
                 return 0;
  
-       if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) {
-               netdev_warn(dev, "VID=%d already configured\n", vid);
+       if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid))
                 return 0;
-       }
  
         mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid);
-       if (!mlxsw_sp_vport) {
-               netdev_err(dev, "Failed to create vPort for VID=%d\n", vid);
+       if (!mlxsw_sp_vport)
                 return -ENOMEM;
-       }
  
         /* When adding the first VLAN interface on a bridged port we need to
          * transition all the active 802.1Q bridge VLANs to use explicit
@@ -973,24 +969,17 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
          */
         if (list_is_singular(&mlxsw_sp_port->vports_list)) {
                 err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
-               if (err) {
-                       netdev_err(dev, "Failed to set to Virtual mode\n");
+               if (err)
                         goto err_port_vp_mode_trans;
-               }
         }
  
         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
-       if (err) {
-               netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
+       if (err)
                 goto err_port_vid_learning_set;
-       }
  
         err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
-       if (err) {
-               netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-                          vid);
+       if (err)
                 goto err_port_add_vid;
-       }
  
         return 0;
  
@@ -1010,7 +999,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
         struct mlxsw_sp_port *mlxsw_sp_vport;
         struct mlxsw_sp_fid *f;
-       int err;
  
         /* VLAN 0 is removed from HW filter when device goes down, but
          * it is reserved in our case, so simply return.
@@ -1019,23 +1007,12 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
                 return 0;
  
         mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
-       if (!mlxsw_sp_vport) {
-               netdev_warn(dev, "VID=%d does not exist\n", vid);
+       if (WARN_ON(!mlxsw_sp_vport))
                 return 0;
-       }
  
-       err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
-       if (err) {
-               netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-                          vid);
-               return err;
-       }
+       mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
  
-       err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
-       if (err) {
-               netdev_err(dev, "Failed to enable learning for VID=%d\n", vid);
-               return err;
-       }
+       mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
  
         /* Drop FID reference. If this was the last reference the
          * resources will be freed.
@@ -1048,13 +1025,8 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
          * transition all active 802.1Q bridge VLANs to use VID to FID
          * mappings and set port's mode to VLAN mode.
          */
-       if (list_is_singular(&mlxsw_sp_port->vports_list)) {
-               err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
-               if (err) {
-                       netdev_err(dev, "Failed to set to VLAN mode\n");
-                       return err;
-               }
-       }
+       if (list_is_singular(&mlxsw_sp_port->vports_list))
+               mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
  
         mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
  
@@ -1149,6 +1121,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
                                           bool ingress)
  {
         const struct tc_action *a;
+       LIST_HEAD(actions);
         int err;
  
         if (!tc_single_action(cls->exts)) {
@@ -1156,7 +1129,8 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
                 return -ENOTSUPP;
         }
  
-       tc_for_each_action(a, cls->exts) {
+       tcf_exts_to_list(cls->exts, &actions);
+       list_for_each_entry(a, &actions, list) {
                 if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL))
                         return -ENOTSUPP;
  
@@ -2076,6 +2050,18 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
         return 0;
  }
  
+static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       mlxsw_sp_port->pvid = 1;
+
+       return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
+static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
  static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                                 bool split, u8 module, u8 width, u8 lane)
  {
@@ -2191,7 +2177,15 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                 goto err_port_dcb_init;
         }
  
+       err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n",
+                       mlxsw_sp_port->local_port);
+               goto err_port_pvid_vport_create;
+       }
+
         mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
+       mlxsw_sp->ports[local_port] = mlxsw_sp_port;
         err = register_netdev(dev);
         if (err) {
                 dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
@@ -2208,24 +2202,23 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                 goto err_core_port_init;
         }
  
-       err = mlxsw_sp_port_vlan_init(mlxsw_sp_port);
-       if (err)
-               goto err_port_vlan_init;
-
-       mlxsw_sp->ports[local_port] = mlxsw_sp_port;
         return 0;
  
-err_port_vlan_init:
-       mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
  err_core_port_init:
         unregister_netdev(dev);
  err_register_netdev:
+       mlxsw_sp->ports[local_port] = NULL;
+       mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+       mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+err_port_pvid_vport_create:
+       mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
  err_port_dcb_init:
  err_port_ets_init:
  err_port_buffers_init:
  err_port_admin_status_set:
  err_port_mtu_set:
  err_port_speed_by_width_set:
+       mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
  err_port_swid_set:
  err_port_system_port_mapping_set:
  err_dev_addr_init:
@@ -2245,12 +2238,12 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
  
         if (!mlxsw_sp_port)
                 return;
-       mlxsw_sp->ports[local_port] = NULL;
         mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
         unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
-       mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-       mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+       mlxsw_sp->ports[local_port] = NULL;
         mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+       mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+       mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
         mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
         mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
         free_percpu(mlxsw_sp_port->pcpu_stats);
@@ -2659,6 +2652,26 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
                 .local_port = MLXSW_PORT_DONT_CARE,
                 .trap_id = MLXSW_TRAP_ID_ARPUC,
         },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_MTUERROR,
+       },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_TTLERROR,
+       },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_LBERROR,
+       },
+       {
+               .func = mlxsw_sp_rx_listener_func,
+               .local_port = MLXSW_PORT_DONT_CARE,
+               .trap_id = MLXSW_TRAP_ID_OSPF,
+       },
         {
                 .func = mlxsw_sp_rx_listener_func,
                 .local_port = MLXSW_PORT_DONT_CARE,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h

index f69aa37d1521854a2383c8db8c78bbffa1925ac6..ab3feb81bd432fe0dae79c41c82e2c2e5789ee1a 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -536,8 +536,6 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                  u16 vid);
  int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
                            u16 vid_end, bool is_member, bool untagged);
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-                         u16 vid);
  int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
                              bool set);
  void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c

index 074cdda7b6f337a6985e10a8d3620dd2825d2f3e..237418a0e6e086f425f3215990d443ac49e17e54 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -330,7 +330,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
         MLXSW_SP_CPU_PORT_SB_CM,
         MLXSW_SP_CPU_PORT_SB_CM,
         MLXSW_SP_CPU_PORT_SB_CM,
-       MLXSW_SP_CPU_PORT_SB_CM,
+       MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0),
         MLXSW_SP_CPU_PORT_SB_CM,
         MLXSW_SP_CPU_PORT_SB_CM,
         MLXSW_SP_CPU_PORT_SB_CM,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c

index 01cfb75128278ca2a1b263636c62314d5bc3d1c2..b6ed7f7c531eec666dfe6ee8343fdd0eb9aa3e1e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -341,6 +341,8 @@ static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port,
         char pfcc_pl[MLXSW_REG_PFCC_LEN];
  
         mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+       mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause);
+       mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause);
         mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
  
         return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
@@ -351,17 +353,17 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
                                       struct ieee_pfc *pfc)
  {
         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
         int err;
  
-       if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
-           pfc->pfc_en) {
+       if (pause_en && pfc->pfc_en) {
                 netdev_err(dev, "PAUSE frames already enabled on port\n");
                 return -EINVAL;
         }
  
         err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
                                            mlxsw_sp_port->dcb.ets->prio_tc,
-                                          false, pfc);
+                                          pause_en, pfc);
         if (err) {
                 netdev_err(dev, "Failed to configure port's headroom for PFC\n");
                 return err;
@@ -380,7 +382,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
  
  err_port_pfc_set:
         __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
-                                    mlxsw_sp_port->dcb.ets->prio_tc, false,
+                                    mlxsw_sp_port->dcb.ets->prio_tc, pause_en,
                                      mlxsw_sp_port->dcb.pfc);
         return err;
  }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c

index 81418d629231667e7fe879fd767e9446eae4eac3..90bb93b037eccf4710d2176c9c9f969fd74a852c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1651,9 +1651,10 @@ static void mlxsw_sp_router_fib4_add_info_destroy(void const *data)
         const struct mlxsw_sp_router_fib4_add_info *info = data;
         struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
         struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
+       struct mlxsw_sp_vr *vr = fib_entry->vr;
  
         mlxsw_sp_fib_entry_destroy(fib_entry);
-       mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr);
+       mlxsw_sp_vr_put(mlxsw_sp, vr);
         kfree(info);
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c

index a1ad5e6bdfa8abb640195b5221fa7b6fbfd30209..d1b59cdfacc13824a642f0db6ffcd4c05678d4ef 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -450,6 +450,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f)
  
         kfree(f);
  
+       mlxsw_sp_fid_map(mlxsw_sp, fid, false);
+
         mlxsw_sp_fid_op(mlxsw_sp, fid, false);
  }
  
@@ -997,13 +999,13 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev,
  }
  
  static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
-                                    u16 vid_begin, u16 vid_end, bool init)
+                                    u16 vid_begin, u16 vid_end)
  {
         struct net_device *dev = mlxsw_sp_port->dev;
         u16 vid, pvid;
         int err;
  
-       if (!init && !mlxsw_sp_port->bridged)
+       if (!mlxsw_sp_port->bridged)
                 return -EINVAL;
  
         err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
@@ -1014,9 +1016,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                 return err;
         }
  
-       if (init)
-               goto out;
-
         pvid = mlxsw_sp_port->pvid;
         if (pvid >= vid_begin && pvid <= vid_end) {
                 err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
@@ -1028,7 +1027,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
  
         mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
  
-out:
         /* Changing activity bits only if HW operation succeded */
         for (vid = vid_begin; vid <= vid_end; vid++)
                 clear_bit(vid, mlxsw_sp_port->active_vlans);
@@ -1039,8 +1037,8 @@ out:
  static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                                    const struct switchdev_obj_port_vlan *vlan)
  {
-       return __mlxsw_sp_port_vlans_del(mlxsw_sp_port,
-                                        vlan->vid_begin, vlan->vid_end, false);
+       return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin,
+                                        vlan->vid_end);
  }
  
  void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -1048,7 +1046,7 @@ void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
         u16 vid;
  
         for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
-               __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+               __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid);
  }
  
  static int
@@ -1546,32 +1544,6 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
         mlxsw_sp_fdb_fini(mlxsw_sp);
  }
  
-int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-       struct net_device *dev = mlxsw_sp_port->dev;
-       int err;
-
-       /* Allow only untagged packets to ingress and tag them internally
-        * with VID 1.
-        */
-       mlxsw_sp_port->pvid = 1;
-       err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1,
-                                       true);
-       if (err) {
-               netdev_err(dev, "Unable to init VLANs\n");
-               return err;
-       }
-
-       /* Add implicit VLAN interface in the device, so that untagged
-        * packets will be classified to the default vFID.
-        */
-       err = mlxsw_sp_port_add_vid(dev, 0, 1);
-       if (err)
-               netdev_err(dev, "Failed to configure default vFID\n");
-
-       return err;
-}
-
  void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
  {
         mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h

index 470d7696e9fede42f46ea74adcd6b764d8ca3748..ed8e301864004f8092bcf82caf50edc730a4b7d4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -56,6 +56,10 @@ enum {
         MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
         MLXSW_TRAP_ID_ARPBC = 0x50,
         MLXSW_TRAP_ID_ARPUC = 0x51,
+       MLXSW_TRAP_ID_MTUERROR = 0x52,
+       MLXSW_TRAP_ID_TTLERROR = 0x53,
+       MLXSW_TRAP_ID_LBERROR = 0x54,
+       MLXSW_TRAP_ID_OSPF = 0x55,
         MLXSW_TRAP_ID_IP2ME = 0x5F,
         MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
         MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c

index d0dc28f93c0e2b0426349d45f5ca7cb2615783ac..226cb08cc055fc21c6ea4ab414dfd798d883db8a 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -52,40 +52,94 @@ static bool qed_dcbx_app_ethtype(u32 app_info_bitmap)
                   DCBX_APP_SF_ETHTYPE);
  }
  
+static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap)
+{
+       u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+       /* Old MFW */
+       if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+               return qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE);
+}
+
  static bool qed_dcbx_app_port(u32 app_info_bitmap)
  {
         return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) ==
                   DCBX_APP_SF_PORT);
  }
  
-static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type)
  {
-       return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-                 proto_id == QED_ETH_TYPE_DEFAULT);
+       u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+       /* Old MFW */
+       if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+               return qed_dcbx_app_port(app_info_bitmap);
+
+       return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT);
  }
  
-static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
  {
-       return !!(qed_dcbx_app_port(app_info_bitmap) &&
-                 proto_id == QED_TCP_PORT_ISCSI);
+       bool ethtype;
+
+       if (ieee)
+               ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+       else
+               ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT));
  }
  
-static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
  {
-       return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-                 proto_id == QED_ETH_TYPE_FCOE);
+       bool port;
+
+       if (ieee)
+               port = qed_dcbx_ieee_app_port(app_info_bitmap,
+                                             DCBX_APP_SF_IEEE_TCP_PORT);
+       else
+               port = qed_dcbx_app_port(app_info_bitmap);
+
+       return !!(port && (proto_id == QED_TCP_PORT_ISCSI));
  }
  
-static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
  {
-       return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-                 proto_id == QED_ETH_TYPE_ROCE);
+       bool ethtype;
+
+       if (ieee)
+               ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+       else
+               ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE));
  }
  
-static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
  {
-       return !!(qed_dcbx_app_port(app_info_bitmap) &&
-                 proto_id == QED_UDP_PORT_TYPE_ROCE_V2);
+       bool ethtype;
+
+       if (ieee)
+               ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+       else
+               ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+       return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE));
+}
+
+static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
+{
+       bool port;
+
+       if (ieee)
+               port = qed_dcbx_ieee_app_port(app_info_bitmap,
+                                             DCBX_APP_SF_IEEE_UDP_PORT);
+       else
+               port = qed_dcbx_app_port(app_info_bitmap);
+
+       return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2));
  }
  
  static void
@@ -164,17 +218,17 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
  static bool
  qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
                                u32 app_prio_bitmap,
-                              u16 id, enum dcbx_protocol_type *type)
+                              u16 id, enum dcbx_protocol_type *type, bool ieee)
  {
-       if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) {
+       if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) {
                 *type = DCBX_PROTOCOL_FCOE;
-       } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) {
                 *type = DCBX_PROTOCOL_ROCE;
-       } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) {
                 *type = DCBX_PROTOCOL_ISCSI;
-       } else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) {
                 *type = DCBX_PROTOCOL_ETH;
-       } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) {
+       } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) {
                 *type = DCBX_PROTOCOL_ROCE_V2;
         } else {
                 *type = DCBX_MAX_PROTOCOL_TYPE;
@@ -194,17 +248,18 @@ static int
  qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
                      struct qed_dcbx_results *p_data,
                      struct dcbx_app_priority_entry *p_tbl,
-                    u32 pri_tc_tbl, int count, bool dcbx_enabled)
+                    u32 pri_tc_tbl, int count, u8 dcbx_version)
  {
         u8 tc, priority_map;
         enum dcbx_protocol_type type;
+       bool enable, ieee;
         u16 protocol_id;
         int priority;
-       bool enable;
         int i;
  
         DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count);
  
+       ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE);
         /* Parse APP TLV */
         for (i = 0; i < count; i++) {
                 protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
@@ -219,7 +274,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
  
                 tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority);
                 if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
-                                                  protocol_id, &type)) {
+                                                  protocol_id, &type, ieee)) {
                         /* ETH always have the enable bit reset, as it gets
                          * vlan information per packet. For other protocols,
                          * should be set according to the dcbx_enabled
@@ -275,15 +330,12 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
         struct dcbx_ets_feature *p_ets;
         struct qed_hw_info *p_info;
         u32 pri_tc_tbl, flags;
-       bool dcbx_enabled;
+       u8 dcbx_version;
         int num_entries;
         int rc = 0;
  
-       /* If DCBx version is non zero, then negotiation was
-        * successfuly performed
-        */
         flags = p_hwfn->p_dcbx_info->operational.flags;
-       dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
+       dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
  
         p_app = &p_hwfn->p_dcbx_info->operational.features.app;
         p_tbl = p_app->app_pri_tbl;
@@ -295,13 +347,13 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn)
         num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
  
         rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
-                                 num_entries, dcbx_enabled);
+                                 num_entries, dcbx_version);
         if (rc)
                 return rc;
  
         p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
         data.pf_id = p_hwfn->rel_pf_id;
-       data.dcbx_enabled = dcbx_enabled;
+       data.dcbx_enabled = !!dcbx_version;
  
         qed_dcbx_dp_protocol(p_hwfn, &data);
  
@@ -400,7 +452,7 @@ static void
  qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
                       struct dcbx_app_priority_feature *p_app,
                       struct dcbx_app_priority_entry *p_tbl,
-                     struct qed_dcbx_params *p_params)
+                     struct qed_dcbx_params *p_params, bool ieee)
  {
         struct qed_app_entry *entry;
         u8 pri_map;
@@ -414,15 +466,46 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
                                                       DCBX_APP_NUM_ENTRIES);
         for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
                 entry = &p_params->app_entry[i];
-               entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
-                                                    DCBX_APP_SF));
+               if (ieee) {
+                       u8 sf_ieee;
+                       u32 val;
+
+                       sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry,
+                                                   DCBX_APP_SF_IEEE);
+                       switch (sf_ieee) {
+                       case DCBX_APP_SF_IEEE_RESERVED:
+                               /* Old MFW */
+                               val = QED_MFW_GET_FIELD(p_tbl[i].entry,
+                                                       DCBX_APP_SF);
+                               entry->sf_ieee = val ?
+                                   QED_DCBX_SF_IEEE_TCP_UDP_PORT :
+                                   QED_DCBX_SF_IEEE_ETHTYPE;
+                               break;
+                       case DCBX_APP_SF_IEEE_ETHTYPE:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE;
+                               break;
+                       case DCBX_APP_SF_IEEE_TCP_PORT:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT;
+                               break;
+                       case DCBX_APP_SF_IEEE_UDP_PORT:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT;
+                               break;
+                       case DCBX_APP_SF_IEEE_TCP_UDP_PORT:
+                               entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT;
+                               break;
+                       }
+               } else {
+                       entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
+                                                            DCBX_APP_SF));
+               }
+
                 pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
                 entry->prio = ffs(pri_map) - 1;
                 entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
                                                     DCBX_APP_PROTOCOL_ID);
                 qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
                                                entry->proto_id,
-                                              &entry->proto_type);
+                                              &entry->proto_type, ieee);
         }
  
         DP_VERBOSE(p_hwfn, QED_MSG_DCB,
@@ -483,7 +566,7 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn,
         bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]);
         tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]);
         tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]);
-       pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]);
+       pri_map = p_ets->pri_tc_tbl[0];
         for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
                 p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i];
                 p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i];
@@ -500,9 +583,9 @@ qed_dcbx_get_common_params(struct qed_hwfn *p_hwfn,
                            struct dcbx_app_priority_feature *p_app,
                            struct dcbx_app_priority_entry *p_tbl,
                            struct dcbx_ets_feature *p_ets,
-                          u32 pfc, struct qed_dcbx_params *p_params)
+                          u32 pfc, struct qed_dcbx_params *p_params, bool ieee)
  {
-       qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params);
+       qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee);
         qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params);
         qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params);
  }
@@ -516,7 +599,7 @@ qed_dcbx_get_local_params(struct qed_hwfn *p_hwfn,
         p_feat = &p_hwfn->p_dcbx_info->local_admin.features;
         qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
                                    p_feat->app.app_pri_tbl, &p_feat->ets,
-                                  p_feat->pfc, &params->local.params);
+                                  p_feat->pfc, &params->local.params, false);
         params->local.valid = true;
  }
  
@@ -529,7 +612,7 @@ qed_dcbx_get_remote_params(struct qed_hwfn *p_hwfn,
         p_feat = &p_hwfn->p_dcbx_info->remote.features;
         qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
                                    p_feat->app.app_pri_tbl, &p_feat->ets,
-                                  p_feat->pfc, &params->remote.params);
+                                  p_feat->pfc, &params->remote.params, false);
         params->remote.valid = true;
  }
  
@@ -574,7 +657,8 @@ qed_dcbx_get_operational_params(struct qed_hwfn *p_hwfn,
  
         qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
                                    p_feat->app.app_pri_tbl, &p_feat->ets,
-                                  p_feat->pfc, &params->operational.params);
+                                  p_feat->pfc, &params->operational.params,
+                                  p_operational->ieee);
         qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results);
         err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
         p_operational->err = err;
@@ -944,7 +1028,6 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn,
                 val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4));
                 p_ets->pri_tc_tbl[0] |= val;
         }
-       p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]);
         for (i = 0; i < 2; i++) {
                 p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]);
                 p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]);
@@ -954,7 +1037,7 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn,
  static void
  qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
                       struct dcbx_app_priority_feature *p_app,
-                     struct qed_dcbx_params *p_params)
+                     struct qed_dcbx_params *p_params, bool ieee)
  {
         u32 *entry;
         int i;
@@ -975,12 +1058,36 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
  
         for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
                 entry = &p_app->app_pri_tbl[i].entry;
-               *entry &= ~DCBX_APP_SF_MASK;
-               if (p_params->app_entry[i].ethtype)
-                       *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
-                                  DCBX_APP_SF_SHIFT);
-               else
-                       *entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT);
+               if (ieee) {
+                       *entry &= ~DCBX_APP_SF_IEEE_MASK;
+                       switch (p_params->app_entry[i].sf_ieee) {
+                       case QED_DCBX_SF_IEEE_ETHTYPE:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       case QED_DCBX_SF_IEEE_TCP_PORT:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       case QED_DCBX_SF_IEEE_UDP_PORT:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       case QED_DCBX_SF_IEEE_TCP_UDP_PORT:
+                               *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
+                                          DCBX_APP_SF_IEEE_SHIFT);
+                               break;
+                       }
+               } else {
+                       *entry &= ~DCBX_APP_SF_MASK;
+                       if (p_params->app_entry[i].ethtype)
+                               *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+                                          DCBX_APP_SF_SHIFT);
+                       else
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
+               }
+
                 *entry &= ~DCBX_APP_PROTOCOL_ID_MASK;
                 *entry |= ((u32)p_params->app_entry[i].proto_id <<
                            DCBX_APP_PROTOCOL_ID_SHIFT);
@@ -995,15 +1102,19 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn,
                           struct dcbx_local_params *local_admin,
                           struct qed_dcbx_set *params)
  {
+       bool ieee = false;
+
         local_admin->flags = 0;
         memcpy(&local_admin->features,
                &p_hwfn->p_dcbx_info->operational.features,
                sizeof(local_admin->features));
  
-       if (params->enabled)
+       if (params->enabled) {
                 local_admin->config = params->ver_num;
-       else
+               ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE);
+       } else {
                 local_admin->config = DCBX_CONFIG_VERSION_DISABLED;
+       }
  
         if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG)
                 qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc,
@@ -1015,7 +1126,7 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn,
  
         if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG)
                 qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app,
-                                     &params->config.params);
+                                     &params->config.params, ieee);
  }
  
  int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
@@ -1596,8 +1707,10 @@ static int qed_dcbnl_setapp(struct qed_dev *cdev,
                 if ((entry->ethtype == ethtype) && (entry->proto_id == idval))
                         break;
                 /* First empty slot */
-               if (!entry->proto_id)
+               if (!entry->proto_id) {
+                       dcbx_set.config.params.num_app_entries++;
                         break;
+               }
         }
  
         if (i == QED_DCBX_MAX_APP_PROTOCOL) {
@@ -2117,8 +2230,10 @@ int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
                     (entry->proto_id == app->protocol))
                         break;
                 /* First empty slot */
-               if (!entry->proto_id)
+               if (!entry->proto_id) {
+                       dcbx_set.config.params.num_app_entries++;
                         break;
+               }
         }
  
         if (i == QED_DCBX_MAX_APP_PROTOCOL) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h

index 592784019994fec799c1a16524a2b5bb756baa74..6f9d3b831a2a0d545ef44b3dc6247ae5f141f3d0 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -6850,6 +6850,14 @@ struct dcbx_app_priority_entry {
  #define DCBX_APP_SF_SHIFT              8
  #define DCBX_APP_SF_ETHTYPE            0
  #define DCBX_APP_SF_PORT               1
+#define DCBX_APP_SF_IEEE_MASK          0x0000f000
+#define DCBX_APP_SF_IEEE_SHIFT         12
+#define DCBX_APP_SF_IEEE_RESERVED      0
+#define DCBX_APP_SF_IEEE_ETHTYPE       1
+#define DCBX_APP_SF_IEEE_TCP_PORT      2
+#define DCBX_APP_SF_IEEE_UDP_PORT      3
+#define DCBX_APP_SF_IEEE_TCP_UDP_PORT  4
+
  #define DCBX_APP_PROTOCOL_ID_MASK      0xffff0000
  #define DCBX_APP_PROTOCOL_ID_SHIFT     16
  };
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h

index fd973f4f16c7d2db66c5d56f850f392f178e2383..49bad00a0f8f994837b0554f3250d5ca0811bff7 100644 (file)
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -37,8 +37,8 @@
  
  #define _QLCNIC_LINUX_MAJOR 5
  #define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 64
-#define QLCNIC_LINUX_VERSIONID  "5.3.64"
+#define _QLCNIC_LINUX_SUBVERSION 65
+#define QLCNIC_LINUX_VERSIONID  "5.3.65"
  #define QLCNIC_DRV_IDC_VER  0x01
  #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
                  (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c

index 87c642d3b075b2bc9845ba2cdbb4204e788028c3..fedd7366713cf04da3ceb7fff1704d74fb3e26fe 100644 (file)
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -102,7 +102,6 @@
  #define QLCNIC_RESPONSE_DESC   0x05
  #define QLCNIC_LRO_DESC        0x12
  
-#define QLCNIC_TX_POLL_BUDGET          128
  #define QLCNIC_TCP_HDR_SIZE            20
  #define QLCNIC_TCP_TS_OPTION_SIZE      12
  #define QLCNIC_FETCH_RING_ID(handle)   ((handle) >> 63)
@@ -2008,7 +2007,6 @@ static int qlcnic_83xx_msix_tx_poll(struct napi_struct *napi, int budget)
         struct qlcnic_host_tx_ring *tx_ring;
         struct qlcnic_adapter *adapter;
  
-       budget = QLCNIC_TX_POLL_BUDGET;
         tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi);
         adapter = tx_ring->adapter;
         work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h

index 017d8c2c8285abe53eddedc9495fe8a75758f04d..24061b9b92e8c304ed8bd6a8d55ad3e4e281ddde 100644 (file)
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
@@ -156,10 +156,8 @@ struct qlcnic_vf_info {
         spinlock_t                      vlan_list_lock; /* Lock for VLAN list */
  };
  
-struct qlcnic_async_work_list {
+struct qlcnic_async_cmd {
         struct list_head        list;
-       struct work_struct      work;
-       void                    *ptr;
         struct qlcnic_cmd_args  *cmd;
  };
  
@@ -168,7 +166,10 @@ struct qlcnic_back_channel {
         struct workqueue_struct *bc_trans_wq;
         struct workqueue_struct *bc_async_wq;
         struct workqueue_struct *bc_flr_wq;
-       struct list_head        async_list;
+       struct qlcnic_adapter   *adapter;
+       struct list_head        async_cmd_list;
+       struct work_struct      vf_async_work;
+       spinlock_t              queue_lock; /* async_cmd_list queue lock */
  };
  
  struct qlcnic_sriov {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c

index 7327b729ba2eae4119efff54ae2a199a77b11da3..d7107055ec6035bf206380d98bbd4670d06660cd 100644 (file)
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -29,6 +29,7 @@
  #define QLC_83XX_VF_RESET_FAIL_THRESH  8
  #define QLC_BC_CMD_MAX_RETRY_CNT       5
  
+static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work);
  static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *);
  static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32);
  static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *);
@@ -177,7 +178,10 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs)
         }
  
         bc->bc_async_wq =  wq;
-       INIT_LIST_HEAD(&bc->async_list);
+       INIT_LIST_HEAD(&bc->async_cmd_list);
+       INIT_WORK(&bc->vf_async_work, qlcnic_sriov_handle_async_issue_cmd);
+       spin_lock_init(&bc->queue_lock);
+       bc->adapter = adapter;
  
         for (i = 0; i < num_vfs; i++) {
                 vf = &sriov->vf_info[i];
@@ -1517,17 +1521,21 @@ static void qlcnic_vf_add_mc_list(struct net_device *netdev, const u8 *mac,
  
  void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc)
  {
-       struct list_head *head = &bc->async_list;
-       struct qlcnic_async_work_list *entry;
+       struct list_head *head = &bc->async_cmd_list;
+       struct qlcnic_async_cmd *entry;
  
         flush_workqueue(bc->bc_async_wq);
+       cancel_work_sync(&bc->vf_async_work);
+
+       spin_lock(&bc->queue_lock);
         while (!list_empty(head)) {
-               entry = list_entry(head->next, struct qlcnic_async_work_list,
+               entry = list_entry(head->next, struct qlcnic_async_cmd,
                                    list);
-               cancel_work_sync(&entry->work);
                 list_del(&entry->list);
+               kfree(entry->cmd);
                 kfree(entry);
         }
+       spin_unlock(&bc->queue_lock);
  }
  
  void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
@@ -1587,57 +1595,64 @@ void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
  
  static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work)
  {
-       struct qlcnic_async_work_list *entry;
-       struct qlcnic_adapter *adapter;
+       struct qlcnic_async_cmd *entry, *tmp;
+       struct qlcnic_back_channel *bc;
         struct qlcnic_cmd_args *cmd;
+       struct list_head *head;
+       LIST_HEAD(del_list);
+
+       bc = container_of(work, struct qlcnic_back_channel, vf_async_work);
+       head = &bc->async_cmd_list;
+
+       spin_lock(&bc->queue_lock);
+       list_splice_init(head, &del_list);
+       spin_unlock(&bc->queue_lock);
+
+       list_for_each_entry_safe(entry, tmp, &del_list, list) {
+               list_del(&entry->list);
+               cmd = entry->cmd;
+               __qlcnic_sriov_issue_cmd(bc->adapter, cmd);
+               kfree(entry);
+       }
+
+       if (!list_empty(head))
+               queue_work(bc->bc_async_wq, &bc->vf_async_work);
  
-       entry = container_of(work, struct qlcnic_async_work_list, work);
-       adapter = entry->ptr;
-       cmd = entry->cmd;
-       __qlcnic_sriov_issue_cmd(adapter, cmd);
         return;
  }
  
-static struct qlcnic_async_work_list *
-qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc)
+static struct qlcnic_async_cmd *
+qlcnic_sriov_alloc_async_cmd(struct qlcnic_back_channel *bc,
+                            struct qlcnic_cmd_args *cmd)
  {
-       struct list_head *node;
-       struct qlcnic_async_work_list *entry = NULL;
-       u8 empty = 0;
+       struct qlcnic_async_cmd *entry = NULL;
  
-       list_for_each(node, &bc->async_list) {
-               entry = list_entry(node, struct qlcnic_async_work_list, list);
-               if (!work_pending(&entry->work)) {
-                       empty = 1;
-                       break;
-               }
-       }
+       entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+       if (!entry)
+               return NULL;
  
-       if (!empty) {
-               entry = kzalloc(sizeof(struct qlcnic_async_work_list),
-                               GFP_ATOMIC);
-               if (entry == NULL)
-                       return NULL;
-               list_add_tail(&entry->list, &bc->async_list);
-       }
+       entry->cmd = cmd;
+
+       spin_lock(&bc->queue_lock);
+       list_add_tail(&entry->list, &bc->async_cmd_list);
+       spin_unlock(&bc->queue_lock);
  
         return entry;
  }
  
  static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc,
-                                           work_func_t func, void *data,
                                             struct qlcnic_cmd_args *cmd)
  {
-       struct qlcnic_async_work_list *entry = NULL;
+       struct qlcnic_async_cmd *entry = NULL;
  
-       entry = qlcnic_sriov_get_free_node_async_work(bc);
-       if (!entry)
+       entry = qlcnic_sriov_alloc_async_cmd(bc, cmd);
+       if (!entry) {
+               qlcnic_free_mbx_args(cmd);
+               kfree(cmd);
                 return;
+       }
  
-       entry->ptr = data;
-       entry->cmd = cmd;
-       INIT_WORK(&entry->work, func);
-       queue_work(bc->bc_async_wq, &entry->work);
+       queue_work(bc->bc_async_wq, &bc->vf_async_work);
  }
  
  static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
@@ -1649,8 +1664,8 @@ static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
         if (adapter->need_fw_reset)
                 return -EIO;
  
-       qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd,
-                                       adapter, cmd);
+       qlcnic_sriov_schedule_async_cmd(bc, cmd);
+
         return 0;
  }
  
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c

index c51f34693eae40440604c4090e23dcf6201cd21e..f85d605e45606f1b3e8493dfd1e1b3383005d7b4 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -734,6 +734,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
                 netif_receive_skb(skb);
                 ndev->stats.rx_bytes += len;
                 ndev->stats.rx_packets++;
+               kmemleak_not_leak(new_skb);
         } else {
                 ndev->stats.rx_dropped++;
                 new_skb = skb;
@@ -1325,6 +1326,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
                                 kfree_skb(skb);
                                 goto err_cleanup;
                         }
+                       kmemleak_not_leak(skb);
                 }
                 /* continue even if we didn't manage to submit all
                  * receive descs
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c

index 01a77145a0fa487518b6233d5760d3a0a8866323..8fd131207ee106b8c198ed607473947cedc851c1 100644 (file)
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -166,6 +166,7 @@ static struct platform_driver tsi_eth_driver = {
  
  static void tsi108_timed_checker(unsigned long dev_ptr);
  
+#ifdef DEBUG
  static void dump_eth_one(struct net_device *dev)
  {
         struct tsi108_prv_data *data = netdev_priv(dev);
@@ -190,6 +191,7 @@ static void dump_eth_one(struct net_device *dev)
                TSI_READ(TSI108_EC_RXESTAT),
                TSI_READ(TSI108_EC_RXERR), data->rxpending);
  }
+#endif
  
  /* Synchronization is needed between the thread and up/down events.
   * Note that the PHY is accessed through the same registers for both
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h

index 467fb8b4d08389b00fbca446bae9261e2145b909..591af71eae56803d936b503d77a17460a43e38b3 100644 (file)
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -644,12 +644,6 @@ struct netvsc_reconfig {
         u32 event;
  };
  
-struct garp_wrk {
-       struct work_struct dwrk;
-       struct net_device *netdev;
-       struct netvsc_device *netvsc_dev;
-};
-
  /* The context of the netvsc device  */
  struct net_device_context {
         /* point back to our device context */
@@ -667,7 +661,6 @@ struct net_device_context {
  
         struct work_struct work;
         u32 msg_enable; /* debug level */
-       struct garp_wrk gwrk;
  
         struct netvsc_stats __percpu *tx_stats;
         struct netvsc_stats __percpu *rx_stats;
@@ -678,6 +671,15 @@ struct net_device_context {
  
         /* the device is going away */
         bool start_remove;
+
+       /* State to manage the associated VF interface. */
+       struct net_device *vf_netdev;
+       bool vf_inject;
+       atomic_t vf_use_cnt;
+       /* 1: allocated, serial number is valid. 0: not allocated */
+       u32 vf_alloc;
+       /* Serial number of the VF to team with */
+       u32 vf_serial;
  };
  
  /* Per netvsc device */
@@ -733,15 +735,7 @@ struct netvsc_device {
         u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
         u32 pkt_align; /* alignment bytes, e.g. 8 */
  
-       /* 1: allocated, serial number is valid. 0: not allocated */
-       u32 vf_alloc;
-       /* Serial number of the VF to team with */
-       u32 vf_serial;
         atomic_t open_cnt;
-       /* State to manage the associated VF interface. */
-       bool vf_inject;
-       struct net_device *vf_netdev;
-       atomic_t vf_use_cnt;
  };
  
  static inline struct netvsc_device *
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c

index 20e09174ff6240bb5b1ba2eb4f13f0a4fd0d5bf3..410fb8e81376f6ac5cd2272db0e1540aa8941aad 100644 (file)
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void)
         init_waitqueue_head(&net_device->wait_drain);
         net_device->destroy = false;
         atomic_set(&net_device->open_cnt, 0);
-       atomic_set(&net_device->vf_use_cnt, 0);
         net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
         net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
  
-       net_device->vf_netdev = NULL;
-       net_device->vf_inject = false;
-
         return net_device;
  }
  
@@ -1106,16 +1102,16 @@ static void netvsc_send_table(struct hv_device *hdev,
                 nvscdev->send_table[i] = tab[i];
  }
  
-static void netvsc_send_vf(struct netvsc_device *nvdev,
+static void netvsc_send_vf(struct net_device_context *net_device_ctx,
                            struct nvsp_message *nvmsg)
  {
-       nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
-       nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+       net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+       net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
  }
  
  static inline void netvsc_receive_inband(struct hv_device *hdev,
-                                        struct netvsc_device *nvdev,
-                                        struct nvsp_message *nvmsg)
+                                struct net_device_context *net_device_ctx,
+                                struct nvsp_message *nvmsg)
  {
         switch (nvmsg->hdr.msg_type) {
         case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
@@ -1123,7 +1119,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
                 break;
  
         case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
-               netvsc_send_vf(nvdev, nvmsg);
+               netvsc_send_vf(net_device_ctx, nvmsg);
                 break;
         }
  }
@@ -1136,6 +1132,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
                                    struct vmpacket_descriptor *desc)
  {
         struct nvsp_message *nvmsg;
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
  
         nvmsg = (struct nvsp_message *)((unsigned long)
                 desc + (desc->offset8 << 3));
@@ -1150,7 +1147,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
                 break;
  
         case VM_PKT_DATA_INBAND:
-               netvsc_receive_inband(device, net_device, nvmsg);
+               netvsc_receive_inband(device, net_device_ctx, nvmsg);
                 break;
  
         default:
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c

index 41bd952cc28d37299562b2a9025e6767b39973c1..3ba29fc80d057e744eef4ae98ad60315f742c4ce 100644 (file)
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -658,20 +658,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
         struct sk_buff *skb;
         struct sk_buff *vf_skb;
         struct netvsc_stats *rx_stats;
-       struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
         u32 bytes_recvd = packet->total_data_buflen;
         int ret = 0;
  
         if (!net || net->reg_state != NETREG_REGISTERED)
                 return NVSP_STAT_FAIL;
  
-       if (READ_ONCE(netvsc_dev->vf_inject)) {
-               atomic_inc(&netvsc_dev->vf_use_cnt);
-               if (!READ_ONCE(netvsc_dev->vf_inject)) {
+       if (READ_ONCE(net_device_ctx->vf_inject)) {
+               atomic_inc(&net_device_ctx->vf_use_cnt);
+               if (!READ_ONCE(net_device_ctx->vf_inject)) {
                         /*
                          * We raced; just move on.
                          */
-                       atomic_dec(&netvsc_dev->vf_use_cnt);
+                       atomic_dec(&net_device_ctx->vf_use_cnt);
                         goto vf_injection_done;
                 }
  
@@ -683,17 +682,19 @@ int netvsc_recv_callback(struct hv_device *device_obj,
                  * the host). Deliver these via the VF interface
                  * in the guest.
                  */
-               vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-                                              csum_info, *data, vlan_tci);
+               vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev,
+                                              packet, csum_info, *data,
+                                              vlan_tci);
                 if (vf_skb != NULL) {
-                       ++netvsc_dev->vf_netdev->stats.rx_packets;
-                       netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
+                       ++net_device_ctx->vf_netdev->stats.rx_packets;
+                       net_device_ctx->vf_netdev->stats.rx_bytes +=
+                               bytes_recvd;
                         netif_receive_skb(vf_skb);
                 } else {
                         ++net->stats.rx_dropped;
                         ret = NVSP_STAT_FAIL;
                 }
-               atomic_dec(&netvsc_dev->vf_use_cnt);
+               atomic_dec(&net_device_ctx->vf_use_cnt);
                 return ret;
         }
  
@@ -1150,17 +1151,6 @@ static void netvsc_free_netdev(struct net_device *netdev)
         free_netdev(netdev);
  }
  
-static void netvsc_notify_peers(struct work_struct *wrk)
-{
-       struct garp_wrk *gwrk;
-
-       gwrk = container_of(wrk, struct garp_wrk, dwrk);
-
-       netdev_notify_peers(gwrk->netdev);
-
-       atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
-}
-
  static struct net_device *get_netvsc_net_device(char *mac)
  {
         struct net_device *dev, *found = NULL;
@@ -1203,7 +1193,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
  
         net_device_ctx = netdev_priv(ndev);
         netvsc_dev = net_device_ctx->nvdev;
-       if (netvsc_dev == NULL)
+       if (!netvsc_dev || net_device_ctx->vf_netdev)
                 return NOTIFY_DONE;
  
         netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1211,10 +1201,23 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
          * Take a reference on the module.
          */
         try_module_get(THIS_MODULE);
-       netvsc_dev->vf_netdev = vf_netdev;
+       net_device_ctx->vf_netdev = vf_netdev;
         return NOTIFY_OK;
  }
  
+static void netvsc_inject_enable(struct net_device_context *net_device_ctx)
+{
+       net_device_ctx->vf_inject = true;
+}
+
+static void netvsc_inject_disable(struct net_device_context *net_device_ctx)
+{
+       net_device_ctx->vf_inject = false;
+
+       /* Wait for currently active users to drain out. */
+       while (atomic_read(&net_device_ctx->vf_use_cnt) != 0)
+               udelay(50);
+}
  
  static int netvsc_vf_up(struct net_device *vf_netdev)
  {
@@ -1233,11 +1236,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
         net_device_ctx = netdev_priv(ndev);
         netvsc_dev = net_device_ctx->nvdev;
  
-       if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+       if (!netvsc_dev || !net_device_ctx->vf_netdev)
                 return NOTIFY_DONE;
  
         netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-       netvsc_dev->vf_inject = true;
+       netvsc_inject_enable(net_device_ctx);
  
         /*
          * Open the device before switching data path.
@@ -1252,15 +1255,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
  
         netif_carrier_off(ndev);
  
-       /*
-        * Now notify peers. We are scheduling work to
-        * notify peers; take a reference to prevent
-        * the VF interface from vanishing.
-        */
-       atomic_inc(&netvsc_dev->vf_use_cnt);
-       net_device_ctx->gwrk.netdev = vf_netdev;
-       net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
-       schedule_work(&net_device_ctx->gwrk.dwrk);
+       /* Now notify peers through VF device. */
+       call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
  
         return NOTIFY_OK;
  }
@@ -1283,29 +1279,18 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
         net_device_ctx = netdev_priv(ndev);
         netvsc_dev = net_device_ctx->nvdev;
  
-       if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
+       if (!netvsc_dev || !net_device_ctx->vf_netdev)
                 return NOTIFY_DONE;
  
         netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-       netvsc_dev->vf_inject = false;
-       /*
-        * Wait for currently active users to
-        * drain out.
-        */
-
-       while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
-               udelay(50);
+       netvsc_inject_disable(net_device_ctx);
         netvsc_switch_datapath(ndev, false);
         netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
         rndis_filter_close(netvsc_dev);
         netif_carrier_on(ndev);
-       /*
-        * Notify peers.
-        */
-       atomic_inc(&netvsc_dev->vf_use_cnt);
-       net_device_ctx->gwrk.netdev = ndev;
-       net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
-       schedule_work(&net_device_ctx->gwrk.dwrk);
+
+       /* Now notify peers through netvsc device. */
+       call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
  
         return NOTIFY_OK;
  }
@@ -1327,11 +1312,11 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
  
         net_device_ctx = netdev_priv(ndev);
         netvsc_dev = net_device_ctx->nvdev;
-       if (netvsc_dev == NULL)
+       if (!netvsc_dev || !net_device_ctx->vf_netdev)
                 return NOTIFY_DONE;
         netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
-
-       netvsc_dev->vf_netdev = NULL;
+       netvsc_inject_disable(net_device_ctx);
+       net_device_ctx->vf_netdev = NULL;
         module_put(THIS_MODULE);
         return NOTIFY_OK;
  }
@@ -1377,11 +1362,14 @@ static int netvsc_probe(struct hv_device *dev,
  
         INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
         INIT_WORK(&net_device_ctx->work, do_set_multicast);
-       INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers);
  
         spin_lock_init(&net_device_ctx->lock);
         INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
  
+       atomic_set(&net_device_ctx->vf_use_cnt, 0);
+       net_device_ctx->vf_netdev = NULL;
+       net_device_ctx->vf_inject = false;
+
         net->netdev_ops = &device_ops;
  
         net->hw_features = NETVSC_HW_FEATURES;
@@ -1494,8 +1482,13 @@ static int netvsc_netdev_event(struct notifier_block *this,
  {
         struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
  
-       /* Avoid Vlan, Bonding dev with same MAC registering as VF */
-       if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING))
+       /* Avoid Vlan dev with same MAC registering as VF */
+       if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+               return NOTIFY_DONE;
+
+       /* Avoid Bonding master dev with same MAC registering as VF */
+       if (event_dev->priv_flags & IFF_BONDING &&
+           event_dev->flags & IFF_MASTER)
                 return NOTIFY_DONE;
  
         switch (event) {
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c

index d13e6e15d7b5e394dff13bee8a1238e6576658a7..351e701eb043b340204f1f07aff81ef100c7b5d6 100644 (file)
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -270,6 +270,7 @@ struct macsec_dev {
         struct pcpu_secy_stats __percpu *stats;
         struct list_head secys;
         struct gro_cells gro_cells;
+       unsigned int nest_level;
  };
  
  /**
@@ -2699,6 +2700,8 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
  
  #define MACSEC_FEATURES \
         (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
+static struct lock_class_key macsec_netdev_addr_lock_key;
+
  static int macsec_dev_init(struct net_device *dev)
  {
         struct macsec_dev *macsec = macsec_priv(dev);
@@ -2910,6 +2913,13 @@ static int macsec_get_iflink(const struct net_device *dev)
         return macsec_priv(dev)->real_dev->ifindex;
  }
  
+
+static int macsec_get_nest_level(struct net_device *dev)
+{
+       return macsec_priv(dev)->nest_level;
+}
+
+
  static const struct net_device_ops macsec_netdev_ops = {
         .ndo_init               = macsec_dev_init,
         .ndo_uninit             = macsec_dev_uninit,
@@ -2923,6 +2933,7 @@ static const struct net_device_ops macsec_netdev_ops = {
         .ndo_start_xmit         = macsec_start_xmit,
         .ndo_get_stats64        = macsec_get_stats64,
         .ndo_get_iflink         = macsec_get_iflink,
+       .ndo_get_lock_subclass  = macsec_get_nest_level,
  };
  
  static const struct device_type macsec_type = {
@@ -3047,22 +3058,31 @@ static void macsec_del_dev(struct macsec_dev *macsec)
         }
  }
  
+static void macsec_common_dellink(struct net_device *dev, struct list_head *head)
+{
+       struct macsec_dev *macsec = macsec_priv(dev);
+       struct net_device *real_dev = macsec->real_dev;
+
+       unregister_netdevice_queue(dev, head);
+       list_del_rcu(&macsec->secys);
+       macsec_del_dev(macsec);
+       netdev_upper_dev_unlink(real_dev, dev);
+
+       macsec_generation++;
+}
+
  static void macsec_dellink(struct net_device *dev, struct list_head *head)
  {
         struct macsec_dev *macsec = macsec_priv(dev);
         struct net_device *real_dev = macsec->real_dev;
         struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
  
-       macsec_generation++;
+       macsec_common_dellink(dev, head);
  
-       unregister_netdevice_queue(dev, head);
-       list_del_rcu(&macsec->secys);
         if (list_empty(&rxd->secys)) {
                 netdev_rx_handler_unregister(real_dev);
                 kfree(rxd);
         }
-
-       macsec_del_dev(macsec);
  }
  
  static int register_macsec_dev(struct net_device *real_dev,
@@ -3181,6 +3201,16 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
  
         dev_hold(real_dev);
  
+       macsec->nest_level = dev_get_nest_level(real_dev) + 1;
+       netdev_lockdep_set_classes(dev);
+       lockdep_set_class_and_subclass(&dev->addr_list_lock,
+                                      &macsec_netdev_addr_lock_key,
+                                      macsec_get_nest_level(dev));
+
+       err = netdev_upper_dev_link(real_dev, dev);
+       if (err < 0)
+               goto unregister;
+
         /* need to be already registered so that ->init has run and
          * the MAC addr is set
          */
@@ -3193,12 +3223,12 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
  
         if (rx_handler && sci_exists(real_dev, sci)) {
                 err = -EBUSY;
-               goto unregister;
+               goto unlink;
         }
  
         err = macsec_add_dev(dev, sci, icv_len);
         if (err)
-               goto unregister;
+               goto unlink;
  
         if (data)
                 macsec_changelink_common(dev, data);
@@ -3213,6 +3243,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
  
  del_dev:
         macsec_del_dev(macsec);
+unlink:
+       netdev_upper_dev_unlink(real_dev, dev);
  unregister:
         unregister_netdevice(dev);
         return err;
@@ -3382,8 +3414,12 @@ static int macsec_notify(struct notifier_block *this, unsigned long event,
  
                 rxd = macsec_data_rtnl(real_dev);
                 list_for_each_entry_safe(m, n, &rxd->secys, secys) {
-                       macsec_dellink(m->secy.netdev, &head);
+                       macsec_common_dellink(m->secy.netdev, &head);
                 }
+
+               netdev_rx_handler_unregister(real_dev);
+               kfree(rxd);
+
                 unregister_netdevice_many(&head);
                 break;
         }
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c

index cd9b53834bf60a4345880b8c6b4b02967372dba6..3234fcdea31745046cc5a7ac20f2ca676ddcb2e4 100644 (file)
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1315,7 +1315,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
         vlan->dev      = dev;
         vlan->port     = port;
         vlan->set_features = MACVLAN_FEATURES;
-       vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1;
+       vlan->nest_level = dev_get_nest_level(lowerdev) + 1;
  
         vlan->mode     = MACVLAN_MODE_VEPA;
         if (data && data[IFLA_MACVLAN_MODE])
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c

index a38c0dac514b89b1a9e7ab8d23e2b4aa56e07565..070e3290aa6efea6fcb505cdf0860a4dce676b74 100644 (file)
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -275,7 +275,6 @@ static void macvtap_put_queue(struct macvtap_queue *q)
         rtnl_unlock();
  
         synchronize_rcu();
-       skb_array_cleanup(&q->skb_array);
         sock_put(&q->sk);
  }
  
@@ -533,10 +532,8 @@ static void macvtap_sock_write_space(struct sock *sk)
  static void macvtap_sock_destruct(struct sock *sk)
  {
         struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
-       struct sk_buff *skb;
  
-       while ((skb = skb_array_consume(&q->skb_array)) != NULL)
-               kfree_skb(skb);
+       skb_array_cleanup(&q->skb_array);
  }
  
  static int macvtap_open(struct inode *inode, struct file *file)
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c

index 1882d9828c998c376b20c50f83e25f8e03f22b0c..053e87905b944e51f43c33355f235d42d6bcec54 100644 (file)
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -677,17 +677,28 @@ static void kszphy_get_stats(struct phy_device *phydev,
                 data[i] = kszphy_get_stat(phydev, i);
  }
  
-static int kszphy_resume(struct phy_device *phydev)
+static int kszphy_suspend(struct phy_device *phydev)
  {
-       int value;
+       /* Disable PHY Interrupts */
+       if (phy_interrupt_is_valid(phydev)) {
+               phydev->interrupts = PHY_INTERRUPT_DISABLED;
+               if (phydev->drv->config_intr)
+                       phydev->drv->config_intr(phydev);
+       }
  
-       mutex_lock(&phydev->lock);
+       return genphy_suspend(phydev);
+}
  
-       value = phy_read(phydev, MII_BMCR);
-       phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
+static int kszphy_resume(struct phy_device *phydev)
+{
+       genphy_resume(phydev);
  
-       kszphy_config_intr(phydev);
-       mutex_unlock(&phydev->lock);
+       /* Enable PHY Interrupts */
+       if (phy_interrupt_is_valid(phydev)) {
+               phydev->interrupts = PHY_INTERRUPT_ENABLED;
+               if (phydev->drv->config_intr)
+                       phydev->drv->config_intr(phydev);
+       }
  
         return 0;
  }
@@ -900,7 +911,7 @@ static struct phy_driver ksphy_driver[] = {
         .get_sset_count = kszphy_get_sset_count,
         .get_strings    = kszphy_get_strings,
         .get_stats      = kszphy_get_stats,
-       .suspend        = genphy_suspend,
+       .suspend        = kszphy_suspend,
         .resume         = kszphy_resume,
  }, {
         .phy_id         = PHY_ID_KSZ8061,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c

index da4e3d6632f647f95b6fdbc0d22abf098afdbe01..c0dda6fc09217bb31ab2f750e5d4d51abd6411cf 100644 (file)
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1811,7 +1811,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
         fl4.flowi4_mark = skb->mark;
         fl4.flowi4_proto = IPPROTO_UDP;
         fl4.daddr = daddr;
-       fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
+       fl4.saddr = *saddr;
  
         rt = ip_route_output_key(vxlan->net, &fl4);
         if (!IS_ERR(rt)) {
@@ -1847,7 +1847,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
         memset(&fl6, 0, sizeof(fl6));
         fl6.flowi6_oif = oif;
         fl6.daddr = *daddr;
-       fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+       fl6.saddr = *saddr;
         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
         fl6.flowi6_mark = skb->mark;
         fl6.flowi6_proto = IPPROTO_UDP;
@@ -1920,7 +1920,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
         struct rtable *rt = NULL;
         const struct iphdr *old_iph;
         union vxlan_addr *dst;
-       union vxlan_addr remote_ip;
+       union vxlan_addr remote_ip, local_ip;
+       union vxlan_addr *src;
         struct vxlan_metadata _md;
         struct vxlan_metadata *md = &_md;
         __be16 src_port = 0, dst_port;
@@ -1938,6 +1939,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                 dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
                 vni = rdst->remote_vni;
                 dst = &rdst->remote_ip;
+               src = &vxlan->cfg.saddr;
                 dst_cache = &rdst->dst_cache;
         } else {
                 if (!info) {
@@ -1948,11 +1950,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                 dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
                 vni = vxlan_tun_id_to_vni(info->key.tun_id);
                 remote_ip.sa.sa_family = ip_tunnel_info_af(info);
-               if (remote_ip.sa.sa_family == AF_INET)
+               if (remote_ip.sa.sa_family == AF_INET) {
                         remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
-               else
+                       local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
+               } else {
                         remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
+                       local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
+               }
                 dst = &remote_ip;
+               src = &local_ip;
                 dst_cache = &info->dst_cache;
         }
  
@@ -1992,15 +1998,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
         }
  
         if (dst->sa.sa_family == AF_INET) {
-               __be32 saddr;
-
                 if (!vxlan->vn4_sock)
                         goto drop;
                 sk = vxlan->vn4_sock->sock->sk;
  
                 rt = vxlan_get_route(vxlan, skb,
                                      rdst ? rdst->remote_ifindex : 0, tos,
-                                    dst->sin.sin_addr.s_addr, &saddr,
+                                    dst->sin.sin_addr.s_addr,
+                                    &src->sin.sin_addr.s_addr,
                                      dst_cache, info);
                 if (IS_ERR(rt)) {
                         netdev_dbg(dev, "no route to %pI4\n",
@@ -2017,7 +2022,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                 }
  
                 /* Bypass encapsulation if the destination is local */
-               if (rt->rt_flags & RTCF_LOCAL &&
+               if (!info && rt->rt_flags & RTCF_LOCAL &&
                     !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
                         struct vxlan_dev *dst_vxlan;
  
@@ -2043,13 +2048,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                 if (err < 0)
                         goto xmit_tx_error;
  
-               udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+               udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
                                     dst->sin.sin_addr.s_addr, tos, ttl, df,
                                     src_port, dst_port, xnet, !udp_sum);
  #if IS_ENABLED(CONFIG_IPV6)
         } else {
                 struct dst_entry *ndst;
-               struct in6_addr saddr;
                 u32 rt6i_flags;
  
                 if (!vxlan->vn6_sock)
@@ -2058,7 +2062,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
  
                 ndst = vxlan6_get_route(vxlan, skb,
                                         rdst ? rdst->remote_ifindex : 0, tos,
-                                       label, &dst->sin6.sin6_addr, &saddr,
+                                       label, &dst->sin6.sin6_addr,
+                                       &src->sin6.sin6_addr,
                                         dst_cache, info);
                 if (IS_ERR(ndst)) {
                         netdev_dbg(dev, "no route to %pI6\n",
@@ -2077,7 +2082,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
  
                 /* Bypass encapsulation if the destination is local */
                 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
-               if (rt6i_flags & RTF_LOCAL &&
+               if (!info && rt6i_flags & RTF_LOCAL &&
                     !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
                         struct vxlan_dev *dst_vxlan;
  
@@ -2104,7 +2109,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                         return;
                 }
                 udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
-                                    &saddr, &dst->sin6.sin6_addr, tos, ttl,
+                                    &src->sin6.sin6_addr,
+                                    &dst->sin6.sin6_addr, tos, ttl,
                                      label, src_port, dst_port, !udp_sum);
  #endif
         }
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c

index 1d689169da768167cea253a396dff3771220c5a1..9e1f2d9c98659279d85c914247c7b7f95a62b8c7 100644 (file)
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5700,10 +5700,11 @@ out:
         mutex_unlock(&wl->mutex);
  }
  
-static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta)
+static u32 wlcore_op_get_expected_throughput(struct ieee80211_hw *hw,
+                                            struct ieee80211_sta *sta)
  {
         struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv;
-       struct wl1271 *wl = wl_sta->wl;
+       struct wl1271 *wl = hw->priv;
         u8 hlid = wl_sta->hlid;
  
         /* return in units of Kbps */
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c

index 88e91666f145f0e7b0e95bf560ecc5e78f2ada22..368795aad5c974dbb59b1a43825b76c3e443acd2 100644 (file)
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1269,6 +1269,7 @@ static int btt_blk_init(struct btt *btt)
                 }
         }
         set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+       btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
         revalidate_disk(btt->btt_disk);
  
         return 0;
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c

index 3fa7919f94a8785860afd3487d803f5b3010acd9..97dd2925ed6e95f1f06ffa6f4a0b5643acd4c07a 100644 (file)
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -140,10 +140,30 @@ static ssize_t namespace_store(struct device *dev,
  }
  static DEVICE_ATTR_RW(namespace);
  
+static ssize_t size_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_btt *nd_btt = to_nd_btt(dev);
+       ssize_t rc;
+
+       device_lock(dev);
+       if (dev->driver)
+               rc = sprintf(buf, "%llu\n", nd_btt->size);
+       else {
+               /* no size to convey if the btt instance is disabled */
+               rc = -ENXIO;
+       }
+       device_unlock(dev);
+
+       return rc;
+}
+static DEVICE_ATTR_RO(size);
+
  static struct attribute *nd_btt_attributes[] = {
         &dev_attr_sector_size.attr,
         &dev_attr_namespace.attr,
         &dev_attr_uuid.attr,
+       &dev_attr_size.attr,
         NULL,
  };
  
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h

index 40476399d22793aece0438da0f5a0976cef063ab..8024a0ef86d3af9f0ba5ef169260e2e342023d8e 100644 (file)
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -143,6 +143,7 @@ struct nd_btt {
         struct nd_namespace_common *ndns;
         struct btt *btt;
         unsigned long lbasize;
+       u64 size;
         u8 *uuid;
         int id;
  };
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index d7c33f9361aa0361d762d1da31368f03bdca3082..8dcf5a960951805b09d650b2cc243ceaeff6a5bb 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1543,15 +1543,10 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
                 reinit_completion(&dev->ioq_wait);
   retry:
                 timeout = ADMIN_TIMEOUT;
-               for (; i > 0; i--) {
-                       struct nvme_queue *nvmeq = dev->queues[i];
-
-                       if (!pass)
-                               nvme_suspend_queue(nvmeq);
-                       if (nvme_delete_queue(nvmeq, opcode))
+               for (; i > 0; i--, sent++)
+                       if (nvme_delete_queue(dev->queues[i], opcode))
                                 break;
-                       ++sent;
-               }
+
                 while (sent--) {
                         timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
                         if (timeout == 0)
@@ -1693,11 +1688,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
                 nvme_stop_queues(&dev->ctrl);
                 csts = readl(dev->bar + NVME_REG_CSTS);
         }
+
+       for (i = dev->queue_count - 1; i > 0; i--)
+               nvme_suspend_queue(dev->queues[i]);
+
         if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
-               for (i = dev->queue_count - 1; i >= 0; i--) {
-                       struct nvme_queue *nvmeq = dev->queues[i];
-                       nvme_suspend_queue(nvmeq);
-               }
+               nvme_suspend_queue(dev->queues[0]);
         } else {
                 nvme_disable_io_queues(dev);
                 nvme_disable_admin_queue(dev, shutdown);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c

index 3e3ce2b0424e4844d759bcf395d4177726d4e301..8d2875b4c56d8c8bf7da8951512288b327dd6e7a 100644 (file)
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -12,13 +12,11 @@
   * more details.
   */
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/delay.h>
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/err.h>
  #include <linux/string.h>
-#include <linux/jiffies.h>
  #include <linux/atomic.h>
  #include <linux/blk-mq.h>
  #include <linux/types.h>
@@ -26,7 +24,6 @@
  #include <linux/mutex.h>
  #include <linux/scatterlist.h>
  #include <linux/nvme.h>
-#include <linux/t10-pi.h>
  #include <asm/unaligned.h>
  
  #include <rdma/ib_verbs.h>
@@ -169,7 +166,6 @@ MODULE_PARM_DESC(register_always,
  static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
                 struct rdma_cm_event *event);
  static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
-static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
  
  /* XXX: really should move to a generic header sooner or later.. */
  static inline void put_unaligned_le24(u32 val, u8 *p)
@@ -687,11 +683,6 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
         list_del(&ctrl->list);
         mutex_unlock(&nvme_rdma_ctrl_mutex);
  
-       if (ctrl->ctrl.tagset) {
-               blk_cleanup_queue(ctrl->ctrl.connect_q);
-               blk_mq_free_tag_set(&ctrl->tag_set);
-               nvme_rdma_dev_put(ctrl->device);
-       }
         kfree(ctrl->queues);
         nvmf_free_options(nctrl->opts);
  free_ctrl:
@@ -748,8 +739,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
         WARN_ON_ONCE(!changed);
  
-       if (ctrl->queue_count > 1)
+       if (ctrl->queue_count > 1) {
                 nvme_start_queues(&ctrl->ctrl);
+               nvme_queue_scan(&ctrl->ctrl);
+               nvme_queue_async_events(&ctrl->ctrl);
+       }
  
         dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
  
@@ -1269,7 +1263,7 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
  {
         struct nvme_rdma_ctrl *ctrl = queue->ctrl;
         struct rdma_conn_param param = { };
-       struct nvme_rdma_cm_req priv;
+       struct nvme_rdma_cm_req priv = { };
         int ret;
  
         param.qp_num = queue->qp->qp_num;
@@ -1318,37 +1312,39 @@ out_destroy_queue_ib:
   * that caught the event. Since we hold the callout until the controller
   * deletion is completed, we'll deadlock if the controller deletion will
   * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources
- * after the controller deletion completed with the exception of destroying
- * the cm_id implicitely by returning a non-zero rc to the callout.
+ * of destroying this queue before-hand, destroy the queue resources,
+ * then queue the controller deletion which won't destroy this queue and
+ * we destroy the cm_id implicitely by returning a non-zero rc to the callout.
   */
  static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
  {
         struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-       int ret, ctrl_deleted = 0;
+       int ret;
  
-       /* First disable the queue so ctrl delete won't free it */
-       if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
-               goto out;
+       /* Own the controller deletion */
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
+               return 0;
  
-       /* delete the controller */
-       ret = __nvme_rdma_del_ctrl(ctrl);
-       if (!ret) {
-               dev_warn(ctrl->ctrl.device,
-                       "Got rdma device removal event, deleting ctrl\n");
-               flush_work(&ctrl->delete_work);
+       dev_warn(ctrl->ctrl.device,
+               "Got rdma device removal event, deleting ctrl\n");
  
-               /* Return non-zero so the cm_id will destroy implicitly */
-               ctrl_deleted = 1;
+       /* Get rid of reconnect work if its running */
+       cancel_delayed_work_sync(&ctrl->reconnect_work);
  
+       /* Disable the queue so ctrl delete won't free it */
+       if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) {
                 /* Free this queue ourselves */
-               rdma_disconnect(queue->cm_id);
-               ib_drain_qp(queue->qp);
+               nvme_rdma_stop_queue(queue);
                 nvme_rdma_destroy_queue_ib(queue);
+
+               /* Return non-zero so the cm_id will destroy implicitly */
+               ret = 1;
         }
  
-out:
-       return ctrl_deleted;
+       /* Queue controller deletion */
+       queue_work(nvme_rdma_wq, &ctrl->delete_work);
+       flush_work(&ctrl->delete_work);
+       return ret;
  }
  
  static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
@@ -1648,7 +1644,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
                 nvme_rdma_free_io_queues(ctrl);
         }
  
-       if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+       if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
                 nvme_shutdown_ctrl(&ctrl->ctrl);
  
         blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1657,15 +1653,27 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
         nvme_rdma_destroy_admin_queue(ctrl);
  }
  
+static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+{
+       nvme_uninit_ctrl(&ctrl->ctrl);
+       if (shutdown)
+               nvme_rdma_shutdown_ctrl(ctrl);
+
+       if (ctrl->ctrl.tagset) {
+               blk_cleanup_queue(ctrl->ctrl.connect_q);
+               blk_mq_free_tag_set(&ctrl->tag_set);
+               nvme_rdma_dev_put(ctrl->device);
+       }
+
+       nvme_put_ctrl(&ctrl->ctrl);
+}
+
  static void nvme_rdma_del_ctrl_work(struct work_struct *work)
  {
         struct nvme_rdma_ctrl *ctrl = container_of(work,
                                 struct nvme_rdma_ctrl, delete_work);
  
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_rdma_shutdown_ctrl(ctrl);
-       nvme_uninit_ctrl(&ctrl->ctrl);
-       nvme_put_ctrl(&ctrl->ctrl);
+       __nvme_rdma_remove_ctrl(ctrl, true);
  }
  
  static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@@ -1698,9 +1706,7 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
         struct nvme_rdma_ctrl *ctrl = container_of(work,
                                 struct nvme_rdma_ctrl, delete_work);
  
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_uninit_ctrl(&ctrl->ctrl);
-       nvme_put_ctrl(&ctrl->ctrl);
+       __nvme_rdma_remove_ctrl(ctrl, false);
  }
  
  static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@ -1739,6 +1745,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
         if (ctrl->queue_count > 1) {
                 nvme_start_queues(&ctrl->ctrl);
                 nvme_queue_scan(&ctrl->ctrl);
+               nvme_queue_async_events(&ctrl->ctrl);
         }
  
         return;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c

index 2fac17a5ad53a8aba3b2f9cb84369887dfaa88f6..47c564b5a2895198b020a26851b824350f10006e 100644 (file)
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,7 +13,6 @@
   */
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  #include <linux/module.h>
-#include <linux/random.h>
  #include <generated/utsrelease.h>
  #include "nvmet.h"
  
@@ -83,7 +82,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
  {
         struct nvmet_ctrl *ctrl = req->sq->ctrl;
         struct nvme_id_ctrl *id;
-       u64 serial;
         u16 status = 0;
  
         id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -96,10 +94,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
         id->vid = 0;
         id->ssvid = 0;
  
-       /* generate a random serial number as our controllers are ephemeral: */
-       get_random_bytes(&serial, sizeof(serial));
         memset(id->sn, ' ', sizeof(id->sn));
-       snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+       snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
  
         memset(id->mn, ' ', sizeof(id->mn));
         strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c

index 8a891ca53367eaa6498f8326e745b081a110157e..6559d5afa7bfd9f808281658f686429c53fc7903 100644 (file)
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -13,6 +13,7 @@
   */
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  #include <linux/module.h>
+#include <linux/random.h>
  #include "nvmet.h"
  
  static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -728,6 +729,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
         memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
         memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
  
+       /* generate a random serial number as our controllers are ephemeral: */
+       get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
+
         kref_init(&ctrl->ref);
         ctrl->subsys = subsys;
  
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c

index 94e782987cc9eb9b2bd29d0a1534811030e7850a..7affd40a6b337ba110caf825fcec4ee2dbe1a96b 100644 (file)
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -414,9 +414,8 @@ static void nvme_loop_del_ctrl_work(struct work_struct *work)
         struct nvme_loop_ctrl *ctrl = container_of(work,
                                 struct nvme_loop_ctrl, delete_work);
  
-       nvme_remove_namespaces(&ctrl->ctrl);
-       nvme_loop_shutdown_ctrl(ctrl);
         nvme_uninit_ctrl(&ctrl->ctrl);
+       nvme_loop_shutdown_ctrl(ctrl);
         nvme_put_ctrl(&ctrl->ctrl);
  }
  
@@ -501,7 +500,6 @@ out_free_queues:
         nvme_loop_destroy_admin_queue(ctrl);
  out_disable:
         dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-       nvme_remove_namespaces(&ctrl->ctrl);
         nvme_uninit_ctrl(&ctrl->ctrl);
         nvme_put_ctrl(&ctrl->ctrl);
  }
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h

index 57dd6d834c28a84a2c8960f84737ec9d082d67e1..76b6eedccaf92ce4708cc4b730c03775d5469f0f 100644 (file)
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -113,6 +113,7 @@ struct nvmet_ctrl {
  
         struct mutex            lock;
         u64                     cap;
+       u64                     serial;
         u32                     cc;
         u32                     csts;
  
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c

index e06d504bdf0c81aad35c2643c299abec105e7337..b4d648536c3e43316cc2929f8346d4997db01306 100644 (file)
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@ enum nvmet_rdma_queue_state {
         NVMET_RDMA_Q_CONNECTING,
         NVMET_RDMA_Q_LIVE,
         NVMET_RDMA_Q_DISCONNECTING,
+       NVMET_RDMA_IN_DEVICE_REMOVAL,
  };
  
  struct nvmet_rdma_queue {
@@ -615,15 +616,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
         if (!len)
                 return 0;
  
-       /* use the already allocated data buffer if possible */
-       if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
-               nvmet_rdma_use_inline_sg(rsp, len, 0);
-       } else {
-               status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
-                               len);
-               if (status)
-                       return status;
-       }
+       status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+                       len);
+       if (status)
+               return status;
  
         ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
                         rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -984,7 +980,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w)
         struct nvmet_rdma_device *dev = queue->dev;
  
         nvmet_rdma_free_queue(queue);
-       rdma_destroy_id(cm_id);
+
+       if (queue->state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+               rdma_destroy_id(cm_id);
+
         kref_put(&dev->ref, nvmet_rdma_free_dev);
  }
  
@@ -1233,8 +1232,9 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
         switch (queue->state) {
         case NVMET_RDMA_Q_CONNECTING:
         case NVMET_RDMA_Q_LIVE:
-               disconnect = true;
                 queue->state = NVMET_RDMA_Q_DISCONNECTING;
+       case NVMET_RDMA_IN_DEVICE_REMOVAL:
+               disconnect = true;
                 break;
         case NVMET_RDMA_Q_DISCONNECTING:
                 break;
@@ -1272,6 +1272,62 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
         schedule_work(&queue->release_work);
  }
  
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue:      nvmet rdma queue (cm id qp_context)
+ * @addr:      nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+               struct nvmet_rdma_queue *queue)
+{
+       unsigned long flags;
+
+       if (!queue) {
+               struct nvmet_port *port = cm_id->context;
+
+               /*
+                * This is a listener cm_id. Make sure that
+                * future remove_port won't invoke a double
+                * cm_id destroy. use atomic xchg to make sure
+                * we don't compete with remove_port.
+                */
+               if (xchg(&port->priv, NULL) != cm_id)
+                       return 0;
+       } else {
+               /*
+                * This is a queue cm_id. Make sure that
+                * release queue will not destroy the cm_id
+                * and schedule all ctrl queues removal (only
+                * if the queue is not disconnecting already).
+                */
+               spin_lock_irqsave(&queue->state_lock, flags);
+               if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+                       queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+               spin_unlock_irqrestore(&queue->state_lock, flags);
+               nvmet_rdma_queue_disconnect(queue);
+               flush_scheduled_work();
+       }
+
+       /*
+        * We need to return 1 so that the core will destroy
+        * it's own ID.  What a great API design..
+        */
+       return 1;
+}
+
  static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
                 struct rdma_cm_event *event)
  {
@@ -1294,20 +1350,11 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
                 break;
         case RDMA_CM_EVENT_ADDR_CHANGE:
         case RDMA_CM_EVENT_DISCONNECTED:
-       case RDMA_CM_EVENT_DEVICE_REMOVAL:
         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-               /*
-                * We can get the device removal callback even for a
-                * CM ID that we aren't actually using.  In that case
-                * the context pointer is NULL, so we shouldn't try
-                * to disconnect a non-existing queue.  But we also
-                * need to return 1 so that the core will destroy
-                * it's own ID.  What a great API design..
-                */
-               if (queue)
-                       nvmet_rdma_queue_disconnect(queue);
-               else
-                       ret = 1;
+               nvmet_rdma_queue_disconnect(queue);
+               break;
+       case RDMA_CM_EVENT_DEVICE_REMOVAL:
+               ret = nvmet_rdma_device_removal(cm_id, queue);
                 break;
         case RDMA_CM_EVENT_REJECTED:
         case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1443,10 @@ out_destroy_id:
  
  static void nvmet_rdma_remove_port(struct nvmet_port *port)
  {
-       struct rdma_cm_id *cm_id = port->priv;
+       struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
  
-       rdma_destroy_id(cm_id);
+       if (cm_id)
+               rdma_destroy_id(cm_id);
  }
  
  static struct nvmet_fabrics_ops nvmet_rdma_ops = {
diff --git a/drivers/of/base.c b/drivers/of/base.c

index 7792266db2597b29f8158d87fdb61904eec64fe4..3ce69536a7b3c3832f652ff7b2c43bd9f0db506e 100644 (file)
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1631,8 +1631,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np,
          */
  
   err:
-       if (it.node)
-               of_node_put(it.node);
+       of_node_put(it.node);
         return rc;
  }
  
@@ -2343,20 +2342,13 @@ struct device_node *of_graph_get_endpoint_by_regs(
         const struct device_node *parent, int port_reg, int reg)
  {
         struct of_endpoint endpoint;
-       struct device_node *node, *prev_node = NULL;
-
-       while (1) {
-               node = of_graph_get_next_endpoint(parent, prev_node);
-               of_node_put(prev_node);
-               if (!node)
-                       break;
+       struct device_node *node = NULL;
  
+       for_each_endpoint_of_node(parent, node) {
                 of_graph_parse_endpoint(node, &endpoint);
                 if (((port_reg == -1) || (endpoint.port == port_reg)) &&
                         ((reg == -1) || (endpoint.id == reg)))
                         return node;
-
-               prev_node = node;
         }
  
         return NULL;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c

index 55f1b839114924aeaae2baf1f10a2db145037cca..085c6389afd135b25987add40f31725e33f4db0d 100644 (file)
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -517,7 +517,7 @@ static void *__unflatten_device_tree(const void *blob,
                 pr_warning("End of tree marker overwritten: %08x\n",
                            be32_to_cpup(mem + size));
  
-       if (detached) {
+       if (detached && mynodes) {
                 of_node_set_flag(*mynodes, OF_DETACHED);
                 pr_debug("unflattened tree is detached\n");
         }
diff --git a/drivers/of/irq.c b/drivers/of/irq.c

index 89a71c6074fc9d23fbc3dc2ac5bed7e4d69ce49b..a2e68f740edacbe900af35b5875cb65e6e40771a 100644 (file)
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -544,12 +544,15 @@ void __init of_irq_init(const struct of_device_id *matches)
  
                         list_del(&desc->list);
  
+                       of_node_set_flag(desc->dev, OF_POPULATED);
+
                         pr_debug("of_irq_init: init %s (%p), parent %p\n",
                                  desc->dev->full_name,
                                  desc->dev, desc->interrupt_parent);
                         ret = desc->irq_init_cb(desc->dev,
                                                 desc->interrupt_parent);
                         if (ret) {
+                               of_node_clear_flag(desc->dev, OF_POPULATED);
                                 kfree(desc);
                                 continue;
                         }
@@ -559,8 +562,6 @@ void __init of_irq_init(const struct of_device_id *matches)
                          * its children can get processed in a subsequent pass.
                          */
                         list_add_tail(&desc->list, &intc_parent_list);
-
-                       of_node_set_flag(desc->dev, OF_POPULATED);
                 }
  
                 /* Get the next pending parent that might have children */
diff --git a/drivers/of/platform.c b/drivers/of/platform.c

index 8aa19769107437b8d3bfada2d4fbd35b4b4fc297..f39ccd5aa70125a1fc22529fff0b3de185aa5cd6 100644 (file)
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -497,6 +497,7 @@ int of_platform_default_populate(struct device_node *root,
  }
  EXPORT_SYMBOL_GPL(of_platform_default_populate);
  
+#ifndef CONFIG_PPC
  static int __init of_platform_default_populate_init(void)
  {
         struct device_node *node;
@@ -521,6 +522,7 @@ static int __init of_platform_default_populate_init(void)
         return 0;
  }
  arch_initcall_sync(of_platform_default_populate_init);
+#endif
  
  static int of_platform_device_destroy(struct device *dev, void *data)
  {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c

index a02981efdad570148e39925cfbe4a8579ca7f7ca..eafa6138a6b81866a3bc09739ab3398513746253 100644 (file)
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1411,6 +1411,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
         if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
                 pci_msi_domain_update_chip_ops(info);
  
+       info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
         domain = msi_create_irq_domain(fwnode, info, parent);
         if (!domain)
                 return NULL;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c

index 6ccb994bdfcbd160148c535f18ea656ea7ee13e6..c494613c1909e16b88f06c0270bee1425239e5f3 100644 (file)
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -688,7 +688,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
         return 0;
  }
  
-static DEFINE_MUTEX(arm_pmu_mutex);
+static DEFINE_SPINLOCK(arm_pmu_lock);
  static LIST_HEAD(arm_pmu_list);
  
  /*
@@ -701,7 +701,7 @@ static int arm_perf_starting_cpu(unsigned int cpu)
  {
         struct arm_pmu *pmu;
  
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
         list_for_each_entry(pmu, &arm_pmu_list, entry) {
  
                 if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
@@ -709,7 +709,7 @@ static int arm_perf_starting_cpu(unsigned int cpu)
                 if (pmu->reset)
                         pmu->reset(pmu);
         }
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
         return 0;
  }
  
@@ -821,9 +821,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
         if (!cpu_hw_events)
                 return -ENOMEM;
  
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
         list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
  
         err = cpu_pm_pmu_register(cpu_pmu);
         if (err)
@@ -859,9 +859,9 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
         return 0;
  
  out_unregister:
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
         list_del(&cpu_pmu->entry);
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
         free_percpu(cpu_hw_events);
         return err;
  }
@@ -869,9 +869,9 @@ out_unregister:
  static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
  {
         cpu_pm_pmu_unregister(cpu_pmu);
-       mutex_lock(&arm_pmu_mutex);
+       spin_lock(&arm_pmu_lock);
         list_del(&cpu_pmu->entry);
-       mutex_unlock(&arm_pmu_mutex);
+       spin_unlock(&arm_pmu_lock);
         free_percpu(cpu_pmu->hw_events);
  }
  
@@ -967,11 +967,12 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu)
  
         /* If we didn't manage to parse anything, try the interrupt affinity */
         if (cpumask_weight(&pmu->supported_cpus) == 0) {
-               if (!using_spi) {
+               int irq = platform_get_irq(pdev, 0);
+
+               if (irq_is_percpu(irq)) {
                         /* If using PPIs, check the affinity of the partition */
-                       int ret, irq;
+                       int ret;
  
-                       irq = platform_get_irq(pdev, 0);
                         ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
                         if (ret) {
                                 kfree(irqs);
diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c

index eb4990ff26ca581fa8040e2e028111107524c8f9..7fb765642ee78bc320a30c04bf3bf8d1a4724878 100644 (file)
--- a/drivers/pinctrl/intel/pinctrl-merrifield.c
+++ b/drivers/pinctrl/intel/pinctrl-merrifield.c
@@ -11,6 +11,7 @@
  
  #include <linux/bitops.h>
  #include <linux/err.h>
+#include <linux/io.h>
  #include <linux/module.h>
  #include <linux/platform_device.h>
  #include <linux/pinctrl/pinconf.h>
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c

index 11623c6b0cb30270ea267068b495d3227dcf3d96..44e69c963f5da21ea63a9e558aca58aaebc3134c 100644 (file)
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -727,13 +727,7 @@ static int meson_pinctrl_probe(struct platform_device *pdev)
                 return PTR_ERR(pc->pcdev);
         }
  
-       ret = meson_gpiolib_register(pc);
-       if (ret) {
-               pinctrl_unregister(pc->pcdev);
-               return ret;
-       }
-
-       return 0;
+       return meson_gpiolib_register(pc);
  }
  
  static struct platform_driver meson_pinctrl_driver = {
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c

index 634b4d30eefb1e3c14c4ed6243ca3382fc4f5b66..b3e772390ab66397dccf9e8823c58a0e6b40d0a4 100644 (file)
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -43,17 +43,6 @@ static int amd_gpio_direction_input(struct gpio_chip *gc, unsigned offset)
  
         spin_lock_irqsave(&gpio_dev->lock, flags);
         pin_reg = readl(gpio_dev->base + offset * 4);
-       /*
-        * Suppose BIOS or Bootloader sets specific debounce for the
-        * GPIO. if not, set debounce to be  2.75ms and remove glitch.
-       */
-       if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
-               pin_reg |= 0xf;
-               pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
-               pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
-               pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
-       }
-
         pin_reg &= ~BIT(OUTPUT_ENABLE_OFF);
         writel(pin_reg, gpio_dev->base + offset * 4);
         spin_unlock_irqrestore(&gpio_dev->lock, flags);
@@ -326,15 +315,6 @@ static void amd_gpio_irq_enable(struct irq_data *d)
  
         spin_lock_irqsave(&gpio_dev->lock, flags);
         pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
-       /*
-               Suppose BIOS or Bootloader sets specific debounce for the
-               GPIO. if not, set debounce to be  2.75ms.
-       */
-       if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
-               pin_reg |= 0xf;
-               pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
-               pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
-       }
         pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
         pin_reg |= BIT(INTERRUPT_MASK_OFF);
         writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c

index c6d410ef8de08ec9bdaf6ee9ce803f1879b288a1..7bad200bd67c5cd4801c4098cac5421d9e02f24f 100644 (file)
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -1432,7 +1432,6 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev)
  {
         struct pistachio_pinctrl *pctl;
         struct resource *res;
-       int ret;
  
         pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL);
         if (!pctl)
@@ -1464,13 +1463,7 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev)
                 return PTR_ERR(pctl->pctldev);
         }
  
-       ret = pistachio_gpio_register(pctl);
-       if (ret < 0) {
-               pinctrl_unregister(pctl->pctldev);
-               return ret;
-       }
-
-       return 0;
+       return pistachio_gpio_register(pctl);
  }
  
  static struct platform_driver pistachio_pinctrl_driver = {
diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c

index 9c65f134d4474d843ffa907051191e2d8c484a24..da7a75f824891200f9db4c9dd4d274c5125d3dd8 100644 (file)
--- a/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@ -457,13 +457,16 @@ static inline void max17042_write_model_data(struct max17042_chip *chip,
  }
  
  static inline void max17042_read_model_data(struct max17042_chip *chip,
-                                       u8 addr, u32 *data, int size)
+                                       u8 addr, u16 *data, int size)
  {
         struct regmap *map = chip->regmap;
         int i;
+       u32 tmp;
  
-       for (i = 0; i < size; i++)
-               regmap_read(map, addr + i, &data[i]);
+       for (i = 0; i < size; i++) {
+               regmap_read(map, addr + i, &tmp);
+               data[i] = (u16)tmp;
+       }
  }
  
  static inline int max17042_model_data_compare(struct max17042_chip *chip,
@@ -486,7 +489,7 @@ static int max17042_init_model(struct max17042_chip *chip)
  {
         int ret;
         int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-       u32 *temp_data;
+       u16 *temp_data;
  
         temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
         if (!temp_data)
@@ -501,7 +504,7 @@ static int max17042_init_model(struct max17042_chip *chip)
         ret = max17042_model_data_compare(
                 chip,
                 chip->pdata->config_data->cell_char_tbl,
-               (u16 *)temp_data,
+               temp_data,
                 table_size);
  
         max10742_lock_model(chip);
@@ -514,7 +517,7 @@ static int max17042_verify_model_lock(struct max17042_chip *chip)
  {
         int i;
         int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-       u32 *temp_data;
+       u16 *temp_data;
         int ret = 0;
  
         temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig

index 3bfac539334b5c75ed78f5c3541ffe476e962f1b..c74c3f67b8da01fad62328ca31f8c9b8bb09c794 100644 (file)
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -200,8 +200,8 @@ config REBOOT_MODE
  config SYSCON_REBOOT_MODE
         tristate "Generic SYSCON regmap reboot mode driver"
         depends on OF
+       depends on MFD_SYSCON
         select REBOOT_MODE
-       select MFD_SYSCON
         help
           Say y here will enable reboot mode driver. This will
           get reboot mode arguments and store it in SYSCON mapped
diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c

index 9ab7f562a83ba6538054e1e39a8b758489a4f8d7..f69387e12c1e545a3cedcb126247050401c8996a 100644 (file)
--- a/drivers/power/reset/hisi-reboot.c
+++ b/drivers/power/reset/hisi-reboot.c
@@ -53,13 +53,16 @@ static int hisi_reboot_probe(struct platform_device *pdev)
  
         if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) {
                 pr_err("failed to find reboot-offset property\n");
+               iounmap(base);
                 return -EINVAL;
         }
  
         err = register_restart_handler(&hisi_restart_nb);
-       if (err)
+       if (err) {
                 dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n",
                         err);
+               iounmap(base);
+       }
  
         return err;
  }
diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c

index 73dfae41def8a659978eec9a738448f604cb1973..4c56e54af6ace4b11133d7a1944a54ca0725ddf6 100644 (file)
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -206,6 +206,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
         if (!charger)
                 return -ENOMEM;
  
+       platform_set_drvdata(pdev, charger);
         charger->tps = tps;
         charger->dev = &pdev->dev;
  
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c

index cecc15a880de6928fed5ee0d35588fbae03423dd..3fa17ac8df5492f4d2e4681d1a4f07f2f8defffa 100644 (file)
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -1080,8 +1080,8 @@ static int riocm_send_ack(struct rio_channel *ch)
  static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
                                            long timeout)
  {
-       struct rio_channel *ch = NULL;
-       struct rio_channel *new_ch = NULL;
+       struct rio_channel *ch;
+       struct rio_channel *new_ch;
         struct conn_req *req;
         struct cm_peer *peer;
         int found = 0;
@@ -1155,6 +1155,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
  
         spin_unlock_bh(&ch->lock);
         riocm_put_channel(ch);
+       ch = NULL;
         kfree(req);
  
         down_read(&rdev_sem);
@@ -1172,7 +1173,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
         if (!found) {
                 /* If peer device object not found, simply ignore the request */
                 err = -ENODEV;
-               goto err_nodev;
+               goto err_put_new_ch;
         }
  
         new_ch->rdev = peer->rdev;
@@ -1184,15 +1185,16 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
  
         *new_ch_id = new_ch->id;
         return new_ch;
+
+err_put_new_ch:
+       spin_lock_bh(&idr_lock);
+       idr_remove(&ch_idr, new_ch->id);
+       spin_unlock_bh(&idr_lock);
+       riocm_put_channel(new_ch);
+
  err_put:
-       riocm_put_channel(ch);
-err_nodev:
-       if (new_ch) {
-               spin_lock_bh(&idr_lock);
-               idr_remove(&ch_idr, new_ch->id);
-               spin_unlock_bh(&idr_lock);
-               riocm_put_channel(new_ch);
-       }
+       if (ch)
+               riocm_put_channel(ch);
         *new_ch_id = 0;
         return ERR_PTR(err);
  }
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c

index 8973d34ce5ba0f786afbe228e201fa36befe9f37..fb1b56a714753f208c29511c9cc6cea0550daffd 100644 (file)
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1643,9 +1643,18 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
         u8 *sense = NULL;
         int expires;
  
+       cqr = (struct dasd_ccw_req *) intparm;
         if (IS_ERR(irb)) {
                 switch (PTR_ERR(irb)) {
                 case -EIO:
+                       if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) {
+                               device = (struct dasd_device *) cqr->startdev;
+                               cqr->status = DASD_CQR_CLEARED;
+                               dasd_device_clear_timer(device);
+                               wake_up(&dasd_flush_wq);
+                               dasd_schedule_device_bh(device);
+                               return;
+                       }
                         break;
                 case -ETIMEDOUT:
                         DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: "
@@ -1661,7 +1670,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
         }
  
         now = get_tod_clock();
-       cqr = (struct dasd_ccw_req *) intparm;
         /* check for conditions that should be handled immediately */
         if (!cqr ||
             !(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c

index fd2eff44009806c039ad18bd5d546d452c4d81fa..98bbec44bcd05c0c37da4d853c17e2c525767c3e 100644 (file)
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -5078,6 +5078,8 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device,
                 return PTR_ERR(cqr);
         }
  
+       cqr->lpm = lpum;
+retry:
         cqr->startdev = device;
         cqr->memdev = device;
         cqr->block = NULL;
@@ -5122,6 +5124,14 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device,
                         (prssdp + 1);
                 memcpy(messages, message_buf,
                        sizeof(struct dasd_rssd_messages));
+       } else if (cqr->lpm) {
+               /*
+                * on z/VM we might not be able to do I/O on the requested path
+                * but instead we get the required information on any path
+                * so retry with open path mask
+                */
+               cqr->lpm = 0;
+               goto retry;
         } else
                 DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
                                 "Reading messages failed with rc=%d\n"
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c

index 7ada078ffdd04b09c81cc45ddfab4057f397158d..6a58bc8f46e2a20bd170fb458c53b2f77871e3f6 100644 (file)
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -762,7 +762,6 @@ static int io_subchannel_initialize_dev(struct subchannel *sch,
         priv->state = DEV_STATE_NOT_OPER;
         priv->dev_id.devno = sch->schib.pmcw.dev;
         priv->dev_id.ssid = sch->schid.ssid;
-       priv->schid = sch->schid;
  
         INIT_WORK(&priv->todo_work, ccw_device_todo);
         INIT_LIST_HEAD(&priv->cmb_list);
@@ -1000,7 +999,6 @@ static int ccw_device_move_to_sch(struct ccw_device *cdev,
         put_device(&old_sch->dev);
         /* Initialize new subchannel. */
         spin_lock_irq(sch->lock);
-       cdev->private->schid = sch->schid;
         cdev->ccwlock = sch->lock;
         if (!sch_is_pseudo_sch(sch))
                 sch_set_cdev(sch, cdev);
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c

index 15b56a15db151cf4cfdfbc1e251a57400fb0706c..9bc3512374c903980299c014947d86ba4cefcffa 100644 (file)
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -26,6 +26,7 @@
  static void
  ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
  {
+       struct subchannel *sch = to_subchannel(cdev->dev.parent);
         char dbf_text[15];
  
         if (!scsw_is_valid_cstat(&irb->scsw) ||
@@ -36,10 +37,10 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
                       "received"
                       " ... device %04x on subchannel 0.%x.%04x, dev_stat "
                       ": %02X sch_stat : %02X\n",
-                     cdev->private->dev_id.devno, cdev->private->schid.ssid,
-                     cdev->private->schid.sch_no,
+                     cdev->private->dev_id.devno, sch->schid.ssid,
+                     sch->schid.sch_no,
                       scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw));
-       sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no);
+       sprintf(dbf_text, "chk%x", sch->schid.sch_no);
         CIO_TRACE_EVENT(0, dbf_text);
         CIO_HEX_EVENT(0, irb, sizeof(struct irb));
  }
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h

index 8975060af96cb4640012fa0b427b14ce3893f1a0..220f49145b2f9bf48e29f92dc06f94ab7a9a0ef2 100644 (file)
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -120,7 +120,6 @@ struct ccw_device_private {
         int state;              /* device state */
         atomic_t onoff;
         struct ccw_dev_id dev_id;       /* device id */
-       struct subchannel_id schid;     /* subchannel number */
         struct ccw_request req;         /* internal I/O request */
         int iretry;
         u8 pgid_valid_mask;     /* mask of valid PGIDs */
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c

index 4bb5262f7aee705e7bf3b74c5ee950632dfbfd5a..71bf9bded48519c72a5d540a6ee8c1f89efc7bf2 100644 (file)
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -686,6 +686,15 @@ static void qdio_kick_handler(struct qdio_q *q)
         q->qdio_error = 0;
  }
  
+static inline int qdio_tasklet_schedule(struct qdio_q *q)
+{
+       if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) {
+               tasklet_schedule(&q->tasklet);
+               return 0;
+       }
+       return -EPERM;
+}
+
  static void __qdio_inbound_processing(struct qdio_q *q)
  {
         qperf_inc(q, tasklet_inbound);
@@ -698,10 +707,8 @@ static void __qdio_inbound_processing(struct qdio_q *q)
         if (!qdio_inbound_q_done(q)) {
                 /* means poll time is not yet over */
                 qperf_inc(q, tasklet_inbound_resched);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
-                       tasklet_schedule(&q->tasklet);
+               if (!qdio_tasklet_schedule(q))
                         return;
-               }
         }
  
         qdio_stop_polling(q);
@@ -711,8 +718,7 @@ static void __qdio_inbound_processing(struct qdio_q *q)
          */
         if (!qdio_inbound_q_done(q)) {
                 qperf_inc(q, tasklet_inbound_resched2);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
-                       tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
         }
  }
  
@@ -869,16 +875,15 @@ static void __qdio_outbound_processing(struct qdio_q *q)
          * is noticed and outbound_handler is called after some time.
          */
         if (qdio_outbound_q_done(q))
-               del_timer(&q->u.out.timer);
+               del_timer_sync(&q->u.out.timer);
         else
-               if (!timer_pending(&q->u.out.timer))
+               if (!timer_pending(&q->u.out.timer) &&
+                   likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
                         mod_timer(&q->u.out.timer, jiffies + 10 * HZ);
         return;
  
  sched:
-       if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
-               return;
-       tasklet_schedule(&q->tasklet);
+       qdio_tasklet_schedule(q);
  }
  
  /* outbound tasklet */
@@ -892,9 +897,7 @@ void qdio_outbound_timer(unsigned long data)
  {
         struct qdio_q *q = (struct qdio_q *)data;
  
-       if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
-               return;
-       tasklet_schedule(&q->tasklet);
+       qdio_tasklet_schedule(q);
  }
  
  static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
@@ -907,7 +910,7 @@ static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
  
         for_each_output_queue(q->irq_ptr, out, i)
                 if (!qdio_outbound_q_done(out))
-                       tasklet_schedule(&out->tasklet);
+                       qdio_tasklet_schedule(out);
  }
  
  static void __tiqdio_inbound_processing(struct qdio_q *q)
@@ -929,10 +932,8 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
  
         if (!qdio_inbound_q_done(q)) {
                 qperf_inc(q, tasklet_inbound_resched);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
-                       tasklet_schedule(&q->tasklet);
+               if (!qdio_tasklet_schedule(q))
                         return;
-               }
         }
  
         qdio_stop_polling(q);
@@ -942,8 +943,7 @@ static void __tiqdio_inbound_processing(struct qdio_q *q)
          */
         if (!qdio_inbound_q_done(q)) {
                 qperf_inc(q, tasklet_inbound_resched2);
-               if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
-                       tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
         }
  }
  
@@ -977,7 +977,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
         int i;
         struct qdio_q *q;
  
-       if (unlikely(irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
+       if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
                 return;
  
         for_each_input_queue(irq_ptr, q, i) {
@@ -1003,7 +1003,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr)
                         continue;
                 if (need_siga_sync(q) && need_siga_sync_out_after_pci(q))
                         qdio_siga_sync_q(q);
-               tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
         }
  }
  
@@ -1066,10 +1066,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
                       struct irb *irb)
  {
         struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct subchannel_id schid;
         int cstat, dstat;
  
         if (!intparm || !irq_ptr) {
-               DBF_ERROR("qint:%4x", cdev->private->schid.sch_no);
+               ccw_device_get_schid(cdev, &schid);
+               DBF_ERROR("qint:%4x", schid.sch_no);
                 return;
         }
  
@@ -1122,12 +1124,14 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
  int qdio_get_ssqd_desc(struct ccw_device *cdev,
                        struct qdio_ssqd_desc *data)
  {
+       struct subchannel_id schid;
  
         if (!cdev || !cdev->private)
                 return -EINVAL;
  
-       DBF_EVENT("get ssqd:%4x", cdev->private->schid.sch_no);
-       return qdio_setup_get_ssqd(NULL, &cdev->private->schid, data);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("get ssqd:%4x", schid.sch_no);
+       return qdio_setup_get_ssqd(NULL, &schid, data);
  }
  EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc);
  
@@ -1141,7 +1145,7 @@ static void qdio_shutdown_queues(struct ccw_device *cdev)
                 tasklet_kill(&q->tasklet);
  
         for_each_output_queue(irq_ptr, q, i) {
-               del_timer(&q->u.out.timer);
+               del_timer_sync(&q->u.out.timer);
                 tasklet_kill(&q->tasklet);
         }
  }
@@ -1154,14 +1158,15 @@ static void qdio_shutdown_queues(struct ccw_device *cdev)
  int qdio_shutdown(struct ccw_device *cdev, int how)
  {
         struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct subchannel_id schid;
         int rc;
-       unsigned long flags;
  
         if (!irq_ptr)
                 return -ENODEV;
  
         WARN_ON_ONCE(irqs_disabled());
-       DBF_EVENT("qshutdown:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qshutdown:%4x", schid.sch_no);
  
         mutex_lock(&irq_ptr->setup_mutex);
         /*
@@ -1184,7 +1189,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how)
         qdio_shutdown_debug_entries(irq_ptr);
  
         /* cleanup subchannel */
-       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
  
         if (how & QDIO_FLAG_CLEANUP_USING_CLEAR)
                 rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
@@ -1198,12 +1203,12 @@ int qdio_shutdown(struct ccw_device *cdev, int how)
         }
  
         qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP);
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
         wait_event_interruptible_timeout(cdev->private->wait_q,
                 irq_ptr->state == QDIO_IRQ_STATE_INACTIVE ||
                 irq_ptr->state == QDIO_IRQ_STATE_ERR,
                 10 * HZ);
-       spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
  
  no_cleanup:
         qdio_shutdown_thinint(irq_ptr);
@@ -1211,7 +1216,7 @@ no_cleanup:
         /* restore interrupt handler */
         if ((void *)cdev->handler == (void *)qdio_int_handler)
                 cdev->handler = irq_ptr->orig_handler;
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
  
         qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
         mutex_unlock(&irq_ptr->setup_mutex);
@@ -1228,11 +1233,13 @@ EXPORT_SYMBOL_GPL(qdio_shutdown);
  int qdio_free(struct ccw_device *cdev)
  {
         struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+       struct subchannel_id schid;
  
         if (!irq_ptr)
                 return -ENODEV;
  
-       DBF_EVENT("qfree:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qfree:%4x", schid.sch_no);
         DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf abandoned");
         mutex_lock(&irq_ptr->setup_mutex);
  
@@ -1251,9 +1258,11 @@ EXPORT_SYMBOL_GPL(qdio_free);
   */
  int qdio_allocate(struct qdio_initialize *init_data)
  {
+       struct subchannel_id schid;
         struct qdio_irq *irq_ptr;
  
-       DBF_EVENT("qallocate:%4x", init_data->cdev->private->schid.sch_no);
+       ccw_device_get_schid(init_data->cdev, &schid);
+       DBF_EVENT("qallocate:%4x", schid.sch_no);
  
         if ((init_data->no_input_qs && !init_data->input_handler) ||
             (init_data->no_output_qs && !init_data->output_handler))
@@ -1331,20 +1340,18 @@ static void qdio_detect_hsicq(struct qdio_irq *irq_ptr)
   */
  int qdio_establish(struct qdio_initialize *init_data)
  {
-       struct qdio_irq *irq_ptr;
         struct ccw_device *cdev = init_data->cdev;
-       unsigned long saveflags;
+       struct subchannel_id schid;
+       struct qdio_irq *irq_ptr;
         int rc;
  
-       DBF_EVENT("qestablish:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qestablish:%4x", schid.sch_no);
  
         irq_ptr = cdev->private->qdio_data;
         if (!irq_ptr)
                 return -ENODEV;
  
-       if (cdev->private->state != DEV_STATE_ONLINE)
-               return -EINVAL;
-
         mutex_lock(&irq_ptr->setup_mutex);
         qdio_setup_irq(init_data);
  
@@ -1361,17 +1368,14 @@ int qdio_establish(struct qdio_initialize *init_data)
         irq_ptr->ccw.count = irq_ptr->equeue.count;
         irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr);
  
-       spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
         ccw_device_set_options_mask(cdev, 0);
  
         rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
         if (rc) {
                 DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no);
                 DBF_ERROR("rc:%4x", rc);
-       }
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
-       if (rc) {
                 mutex_unlock(&irq_ptr->setup_mutex);
                 qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
                 return rc;
@@ -1407,19 +1411,17 @@ EXPORT_SYMBOL_GPL(qdio_establish);
   */
  int qdio_activate(struct ccw_device *cdev)
  {
+       struct subchannel_id schid;
         struct qdio_irq *irq_ptr;
         int rc;
-       unsigned long saveflags;
  
-       DBF_EVENT("qactivate:%4x", cdev->private->schid.sch_no);
+       ccw_device_get_schid(cdev, &schid);
+       DBF_EVENT("qactivate:%4x", schid.sch_no);
  
         irq_ptr = cdev->private->qdio_data;
         if (!irq_ptr)
                 return -ENODEV;
  
-       if (cdev->private->state != DEV_STATE_ONLINE)
-               return -EINVAL;
-
         mutex_lock(&irq_ptr->setup_mutex);
         if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) {
                 rc = -EBUSY;
@@ -1431,19 +1433,17 @@ int qdio_activate(struct ccw_device *cdev)
         irq_ptr->ccw.count = irq_ptr->aqueue.count;
         irq_ptr->ccw.cda = 0;
  
-       spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+       spin_lock_irq(get_ccwdev_lock(cdev));
         ccw_device_set_options(cdev, CCWDEV_REPORT_ALL);
  
         rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE,
                               0, DOIO_DENY_PREFETCH);
+       spin_unlock_irq(get_ccwdev_lock(cdev));
         if (rc) {
                 DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no);
                 DBF_ERROR("rc:%4x", rc);
-       }
-       spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
-       if (rc)
                 goto out;
+       }
  
         if (is_thinint_irq(irq_ptr))
                 tiqdio_add_input_queues(irq_ptr);
@@ -1585,10 +1585,11 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags,
  
         /* in case of SIGA errors we must process the error immediately */
         if (used >= q->u.out.scan_threshold || rc)
-               tasklet_schedule(&q->tasklet);
+               qdio_tasklet_schedule(q);
         else
                 /* free the SBALs in case of no further traffic */
-               if (!timer_pending(&q->u.out.timer))
+               if (!timer_pending(&q->u.out.timer) &&
+                   likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
                         mod_timer(&q->u.out.timer, jiffies + HZ);
         return rc;
  }
diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile

index 241891a57caf8e97637d3e6c2ce6baecd021589a..df40692a9011ceb2cb2481af2eaa58a9ff92136e 100644 (file)
--- a/drivers/s390/virtio/Makefile
+++ b/drivers/s390/virtio/Makefile
@@ -6,4 +6,8 @@
  # it under the terms of the GNU General Public License (version 2 only)
  # as published by the Free Software Foundation.
  
-obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o
+s390-virtio-objs := virtio_ccw.o
+ifdef CONFIG_S390_GUEST_OLD_TRANSPORT
+s390-virtio-objs += kvm_virtio.o
+endif
+obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs)
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c

index 1d060fd293a3b8e8a4d4095b2ad84241913272d5..5e5c11f37b2420cbb406ff5591ad15fe615f5ed8 100644 (file)
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -458,6 +458,8 @@ static int __init kvm_devices_init(void)
         if (test_devices_support(total_memory_size) < 0)
                 return -ENODEV;
  
+       pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n");
+
         rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
         if (rc)
                 return rc;
@@ -482,7 +484,7 @@ static int __init kvm_devices_init(void)
  }
  
  /* code for early console output with virtio_console */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+static int early_put_chars(u32 vtermno, const char *buf, int count)
  {
         char scratch[17];
         unsigned int len = count;
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c

index b381b3718a98f5d484b1769fc4ef727f5fd4c0bd..5648b715fed9c2d4e448c9f477953ecb83e94641 100644 (file)
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -63,7 +63,7 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
         struct fib *fibptr;
         struct hw_fib * hw_fib = (struct hw_fib *)0;
         dma_addr_t hw_fib_pa = (dma_addr_t)0LL;
-       unsigned size;
+       unsigned int size, osize;
         int retval;
  
         if (dev->in_reset) {
@@ -87,7 +87,8 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
          *      will not overrun the buffer when we copy the memory. Return
          *      an error if we would.
          */
-       size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr);
+       osize = size = le16_to_cpu(kfib->header.Size) +
+               sizeof(struct aac_fibhdr);
         if (size < le16_to_cpu(kfib->header.SenderSize))
                 size = le16_to_cpu(kfib->header.SenderSize);
         if (size > dev->max_fib_size) {
@@ -118,6 +119,14 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg)
                 goto cleanup;
         }
  
+       /* Sanity check the second copy */
+       if ((osize != le16_to_cpu(kfib->header.Size) +
+               sizeof(struct aac_fibhdr))
+               || (size < le16_to_cpu(kfib->header.SenderSize))) {
+               retval = -EINVAL;
+               goto cleanup;
+       }
+
         if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) {
                 aac_adapter_interrupt(dev);
                 /*
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c

index a569c65f22b18fbc247c1ab8c0e5c361328ccacb..dcf36537a767c72d9a2e956115e09f7d5f82dde9 100644 (file)
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -2923,7 +2923,7 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
         mutex_unlock(&fip->ctlr_mutex);
  
  drop:
-       kfree(skb);
+       kfree_skb(skb);
         return rc;
  }
  
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c

index 2dab3dc2aa69bf44c4f92a3cd539314711964081..c1ed25adb17ec1f11b039854ffa62e590032f9d6 100644 (file)
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5037,7 +5037,7 @@ static int megasas_init_fw(struct megasas_instance *instance)
         /* Find first memory bar */
         bar_list = pci_select_bars(instance->pdev, IORESOURCE_MEM);
         instance->bar = find_first_bit(&bar_list, sizeof(unsigned long));
-       if (pci_request_selected_regions(instance->pdev, instance->bar,
+       if (pci_request_selected_regions(instance->pdev, 1<<instance->bar,
                                          "megasas: LSI")) {
                 dev_printk(KERN_DEBUG, &instance->pdev->dev, "IO memory region busy!\n");
                 return -EBUSY;
@@ -5339,7 +5339,7 @@ fail_ready_state:
         iounmap(instance->reg_set);
  
        fail_ioremap:
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
  
         return -EINVAL;
  }
@@ -5360,7 +5360,7 @@ static void megasas_release_mfi(struct megasas_instance *instance)
  
         iounmap(instance->reg_set);
  
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
  }
  
  /**
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c

index ec837544f78479e9fad9b08070aca968b448706d..52d8bbf7feb5c50efe361aa2e57f30277f38a0df 100644 (file)
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2603,7 +2603,7 @@ megasas_release_fusion(struct megasas_instance *instance)
  
         iounmap(instance->reg_set);
  
-       pci_release_selected_regions(instance->pdev, instance->bar);
+       pci_release_selected_regions(instance->pdev, 1<<instance->bar);
  }
  
  /**
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c

index 751f13edece010177d162995e4f8b344722560dc..750f82c339d4d6c1d07a11d1bba7d14aa0a56b10 100644 (file)
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2188,6 +2188,17 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc)
         } else
                 ioc->msix96_vector = 0;
  
+       if (ioc->is_warpdrive) {
+               ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
+                   &ioc->chip->ReplyPostHostIndex;
+
+               for (i = 1; i < ioc->cpu_msix_table_sz; i++)
+                       ioc->reply_post_host_index[i] =
+                       (resource_size_t __iomem *)
+                       ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
+                       * 4)));
+       }
+
         list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
                 pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
                     reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
@@ -5280,17 +5291,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
         if (r)
                 goto out_free_resources;
  
-       if (ioc->is_warpdrive) {
-               ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
-                   &ioc->chip->ReplyPostHostIndex;
-
-               for (i = 1; i < ioc->cpu_msix_table_sz; i++)
-                       ioc->reply_post_host_index[i] =
-                       (resource_size_t __iomem *)
-                       ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
-                       * 4)));
-       }
-
         pci_set_drvdata(ioc->pdev, ioc->shost);
         r = _base_get_ioc_facts(ioc, CAN_SLEEP);
         if (r)
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c

index 53ef1cb6418e33f0ce9542468359d2ad0297444d..0e8601aa877a83e35dc1305ffa452a1c0c4eeffa 100644 (file)
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -778,6 +778,8 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
         if (!edev)
                 return;
  
+       enclosure_unregister(edev);
+
         ses_dev = edev->scratch;
         edev->scratch = NULL;
  
@@ -789,7 +791,6 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev)
         kfree(edev->component[0].scratch);
  
         put_device(&edev->edev);
-       enclosure_unregister(edev);
  }
  
  static void ses_intf_remove(struct device *cdev,
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c

index 1b4ff0f4c7168f578253d248507cb57fc02245c2..ed5dd0e8865746fe928f08459218779b97227103 100644 (file)
--- a/drivers/thermal/clock_cooling.c
+++ b/drivers/thermal/clock_cooling.c
@@ -426,6 +426,7 @@ clock_cooling_register(struct device *dev, const char *clock_name)
         if (!ccdev)
                 return ERR_PTR(-ENOMEM);
  
+       mutex_init(&ccdev->lock);
         ccdev->dev = dev;
         ccdev->clk = devm_clk_get(dev, clock_name);
         if (IS_ERR(ccdev->clk))
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c

index 34fe36504a552cdaf112a6982483655f25dc2238..68bd1b56911850ee41dd7c508c7cad7f7c278ddf 100644 (file)
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -116,7 +116,9 @@ static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
                 instance->target = get_target_state(tz, cdev, percentage,
                                                     cur_trip_level);
  
+               mutex_lock(&instance->cdev->lock);
                 instance->cdev->updated = false;
+               mutex_unlock(&instance->cdev->lock);
                 thermal_cdev_update(cdev);
         }
         return 0;
diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c

index fc52016d4e85be590e15f4e1976b8378d2c456d2..bb118a152cbbde3c570f90fb5355bc04afe54beb 100644 (file)
--- a/drivers/thermal/gov_bang_bang.c
+++ b/drivers/thermal/gov_bang_bang.c
@@ -71,7 +71,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                 dev_dbg(&instance->cdev->device, "target=%d\n",
                                         (int)instance->target);
  
+               mutex_lock(&instance->cdev->lock);
                 instance->cdev->updated = false; /* cdev needs update */
+               mutex_unlock(&instance->cdev->lock);
         }
  
         mutex_unlock(&tz->lock);
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c

index 6a6ec1c95a7a2d04c4afd6b68af03d3335627f55..9b4815e81b0df01cf2160d752499b670c4a2d731 100644 (file)
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -21,6 +21,7 @@
  #include <linux/init.h>
  #include <linux/pci.h>
  #include <linux/thermal.h>
+#include <linux/pm.h>
  
  /* Intel PCH thermal Device IDs */
  #define PCH_THERMAL_DID_WPT    0x9CA4 /* Wildcat Point */
@@ -65,6 +66,7 @@ struct pch_thermal_device {
         unsigned long crt_temp;
         int hot_trip_id;
         unsigned long hot_temp;
+       bool bios_enabled;
  };
  
  static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
@@ -75,8 +77,10 @@ static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
         *nr_trips = 0;
  
         /* Check if BIOS has already enabled thermal sensor */
-       if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS))
+       if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) {
+               ptd->bios_enabled = true;
                 goto read_trips;
+       }
  
         tsel = readb(ptd->hw_base + WPT_TSEL);
         /*
@@ -130,9 +134,39 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp)
         return 0;
  }
  
+static int pch_wpt_suspend(struct pch_thermal_device *ptd)
+{
+       u8 tsel;
+
+       if (ptd->bios_enabled)
+               return 0;
+
+       tsel = readb(ptd->hw_base + WPT_TSEL);
+
+       writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL);
+
+       return 0;
+}
+
+static int pch_wpt_resume(struct pch_thermal_device *ptd)
+{
+       u8 tsel;
+
+       if (ptd->bios_enabled)
+               return 0;
+
+       tsel = readb(ptd->hw_base + WPT_TSEL);
+
+       writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
+
+       return 0;
+}
+
  struct pch_dev_ops {
         int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips);
         int (*get_temp)(struct pch_thermal_device *ptd, int *temp);
+       int (*suspend)(struct pch_thermal_device *ptd);
+       int (*resume)(struct pch_thermal_device *ptd);
  };
  
  
@@ -140,6 +174,8 @@ struct pch_dev_ops {
  static const struct pch_dev_ops pch_dev_ops_wpt = {
         .hw_init = pch_wpt_init,
         .get_temp = pch_wpt_get_temp,
+       .suspend = pch_wpt_suspend,
+       .resume = pch_wpt_resume,
  };
  
  static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
@@ -269,6 +305,22 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev)
         pci_disable_device(pdev);
  }
  
+static int intel_pch_thermal_suspend(struct device *device)
+{
+       struct pci_dev *pdev = to_pci_dev(device);
+       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+       return ptd->ops->suspend(ptd);
+}
+
+static int intel_pch_thermal_resume(struct device *device)
+{
+       struct pci_dev *pdev = to_pci_dev(device);
+       struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+       return ptd->ops->resume(ptd);
+}
+
  static struct pci_device_id intel_pch_thermal_id[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
         { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
@@ -276,11 +328,17 @@ static struct pci_device_id intel_pch_thermal_id[] = {
  };
  MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
  
+static const struct dev_pm_ops intel_pch_pm_ops = {
+       .suspend = intel_pch_thermal_suspend,
+       .resume = intel_pch_thermal_resume,
+};
+
  static struct pci_driver intel_pch_thermal_driver = {
         .name           = "intel_pch_thermal",
         .id_table       = intel_pch_thermal_id,
         .probe          = intel_pch_thermal_probe,
         .remove         = intel_pch_thermal_remove,
+       .driver.pm      = &intel_pch_pm_ops,
  };
  
  module_pci_driver(intel_pch_thermal_driver);
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c

index 015ce2eb6eb7ba0a254e2918c8d9710011283fda..0e4dc0afcfd244d510b003575249c4c3ce1d16bd 100644 (file)
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -388,7 +388,7 @@ static int clamp_thread(void *arg)
                 int sleeptime;
                 unsigned long target_jiffies;
                 unsigned int guard;
-               unsigned int compensation = 0;
+               unsigned int compensated_ratio;
                 int interval; /* jiffies to sleep for each attempt */
                 unsigned int duration_jiffies = msecs_to_jiffies(duration);
                 unsigned int window_size_now;
@@ -409,8 +409,11 @@ static int clamp_thread(void *arg)
                  * c-states, thus we need to compensate the injected idle ratio
                  * to achieve the actual target reported by the HW.
                  */
-               compensation = get_compensation(target_ratio);
-               interval = duration_jiffies*100/(target_ratio+compensation);
+               compensated_ratio = target_ratio +
+                       get_compensation(target_ratio);
+               if (compensated_ratio <= 0)
+                       compensated_ratio = 1;
+               interval = duration_jiffies * 100 / compensated_ratio;
  
                 /* align idle time */
                 target_jiffies = roundup(jiffies, interval);
@@ -647,8 +650,8 @@ static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
                 goto exit_set;
         } else  if (set_target_ratio > 0 && new_target_ratio == 0) {
                 pr_info("Stop forced idle injection\n");
-               set_target_ratio = 0;
                 end_power_clamp();
+               set_target_ratio = 0;
         } else  /* adjust currently running */ {
                 set_target_ratio = new_target_ratio;
                 /* make new set_target_ratio visible to other cpus */
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c

index 2f1a863a8e15bc834e007fea264d262d180a04cf..b4d3116cfdafe81767b2b1c91fcab4a034f29041 100644 (file)
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -529,7 +529,9 @@ static void allow_maximum_power(struct thermal_zone_device *tz)
                         continue;
  
                 instance->target = 0;
+               mutex_lock(&instance->cdev->lock);
                 instance->cdev->updated = false;
+               mutex_unlock(&instance->cdev->lock);
                 thermal_cdev_update(instance->cdev);
         }
  }
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c

index ea9366ad3e6bb285e52e368691a0d495cbb3429f..bcef2e7c4ec96f1cfc662019ccf0440d52d26328 100644 (file)
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -175,7 +175,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                         update_passive_instance(tz, trip_type, -1);
  
                 instance->initialized = true;
+               mutex_lock(&instance->cdev->lock);
                 instance->cdev->updated = false; /* cdev needs update */
+               mutex_unlock(&instance->cdev->lock);
         }
  
         mutex_unlock(&tz->lock);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c

index 5133cd1e10b7ae99d838823af2eef272aa00ab76..e2fc6161dded9650c300cf829a381c0a2d14b38c 100644 (file)
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1093,7 +1093,9 @@ int power_actor_set_power(struct thermal_cooling_device *cdev,
                 return ret;
  
         instance->target = state;
+       mutex_lock(&cdev->lock);
         cdev->updated = false;
+       mutex_unlock(&cdev->lock);
         thermal_cdev_update(cdev);
  
         return 0;
@@ -1623,11 +1625,13 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
         struct thermal_instance *instance;
         unsigned long target = 0;
  
+       mutex_lock(&cdev->lock);
         /* cooling device is updated*/
-       if (cdev->updated)
+       if (cdev->updated) {
+               mutex_unlock(&cdev->lock);
                 return;
+       }
  
-       mutex_lock(&cdev->lock);
         /* Make sure cdev enters the deepest cooling state */
         list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
                 dev_dbg(&cdev->device, "zone%d->target=%lu\n",
@@ -1637,9 +1641,9 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
                 if (instance->target > target)
                         target = instance->target;
         }
-       mutex_unlock(&cdev->lock);
         cdev->ops->set_cur_state(cdev, target);
         cdev->updated = true;
+       mutex_unlock(&cdev->lock);
         trace_cdev_update(cdev, target);
         dev_dbg(&cdev->device, "set to state %lu\n", target);
  }
diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c

index 06fd2ed9ef9d13bf0ab09f727020f873152b8da9..c41c7742903ab43b2132241574376b85849666b9 100644 (file)
--- a/drivers/thermal/thermal_hwmon.c
+++ b/drivers/thermal/thermal_hwmon.c
@@ -232,6 +232,7 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
  
         return result;
  }
+EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs);
  
  void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
  {
@@ -270,3 +271,4 @@ void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
         hwmon_device_unregister(hwmon->device);
         kfree(hwmon);
  }
+EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs);
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c

index 71912301ef7f804fb9698d7915f7136041617906..0f3f62e81e5b20b50140ab482df70e818de860a5 100644 (file)
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1354,7 +1354,6 @@ made_compressed_probe:
         spin_lock_init(&acm->write_lock);
         spin_lock_init(&acm->read_lock);
         mutex_init(&acm->mutex);
-       acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
         acm->is_int_ep = usb_endpoint_xfer_int(epread);
         if (acm->is_int_ep)
                 acm->bInterval = epread->bInterval;
@@ -1394,14 +1393,14 @@ made_compressed_probe:
                 urb->transfer_dma = rb->dma;
                 if (acm->is_int_ep) {
                         usb_fill_int_urb(urb, acm->dev,
-                                        acm->rx_endpoint,
+                                        usb_rcvintpipe(usb_dev, epread->bEndpointAddress),
                                          rb->base,
                                          acm->readsize,
                                          acm_read_bulk_callback, rb,
                                          acm->bInterval);
                 } else {
                         usb_fill_bulk_urb(urb, acm->dev,
-                                         acm->rx_endpoint,
+                                         usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress),
                                           rb->base,
                                           acm->readsize,
                                           acm_read_bulk_callback, rb);
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h

index 05ce308d5d2afc10d91225e97e227396ace41cb0..1f1eabfd846280ada598b776504d7d01e5b69886 100644 (file)
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -96,7 +96,6 @@ struct acm {
         struct acm_rb read_buffers[ACM_NR];
         struct acm_wb *putbuffer;                       /* for acm_tty_put_char() */
         int rx_buflimit;
-       int rx_endpoint;
         spinlock_t read_lock;
         int write_used;                                 /* number of non-empty write buffers */
         int transmitting;
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c

index 31ccdccd7a04fda36003cdc3ba58b82e25773fa9..051163189810d4c2a96b28dbb9f758f9ab102f14 100644 (file)
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -171,6 +171,31 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
                                                         ep, buffer, size);
  }
  
+static const unsigned short low_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 8,
+       [USB_ENDPOINT_XFER_ISOC] = 0,
+       [USB_ENDPOINT_XFER_BULK] = 0,
+       [USB_ENDPOINT_XFER_INT] = 8,
+};
+static const unsigned short full_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 64,
+       [USB_ENDPOINT_XFER_ISOC] = 1023,
+       [USB_ENDPOINT_XFER_BULK] = 64,
+       [USB_ENDPOINT_XFER_INT] = 64,
+};
+static const unsigned short high_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 64,
+       [USB_ENDPOINT_XFER_ISOC] = 1024,
+       [USB_ENDPOINT_XFER_BULK] = 512,
+       [USB_ENDPOINT_XFER_INT] = 1023,
+};
+static const unsigned short super_speed_maxpacket_maxes[4] = {
+       [USB_ENDPOINT_XFER_CONTROL] = 512,
+       [USB_ENDPOINT_XFER_ISOC] = 1024,
+       [USB_ENDPOINT_XFER_BULK] = 1024,
+       [USB_ENDPOINT_XFER_INT] = 1024,
+};
+
  static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
      int asnum, struct usb_host_interface *ifp, int num_ep,
      unsigned char *buffer, int size)
@@ -179,6 +204,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
         struct usb_endpoint_descriptor *d;
         struct usb_host_endpoint *endpoint;
         int n, i, j, retval;
+       unsigned int maxp;
+       const unsigned short *maxpacket_maxes;
  
         d = (struct usb_endpoint_descriptor *) buffer;
         buffer += d->bLength;
@@ -286,6 +313,42 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
                         endpoint->desc.wMaxPacketSize = cpu_to_le16(8);
         }
  
+       /* Validate the wMaxPacketSize field */
+       maxp = usb_endpoint_maxp(&endpoint->desc);
+
+       /* Find the highest legal maxpacket size for this endpoint */
+       i = 0;          /* additional transactions per microframe */
+       switch (to_usb_device(ddev)->speed) {
+       case USB_SPEED_LOW:
+               maxpacket_maxes = low_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_FULL:
+               maxpacket_maxes = full_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_HIGH:
+               /* Bits 12..11 are allowed only for HS periodic endpoints */
+               if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) {
+                       i = maxp & (BIT(12) | BIT(11));
+                       maxp &= ~i;
+               }
+               /* fallthrough */
+       default:
+               maxpacket_maxes = high_speed_maxpacket_maxes;
+               break;
+       case USB_SPEED_SUPER:
+       case USB_SPEED_SUPER_PLUS:
+               maxpacket_maxes = super_speed_maxpacket_maxes;
+               break;
+       }
+       j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)];
+
+       if (maxp > j) {
+               dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
+                   cfgno, inum, asnum, d->bEndpointAddress, maxp, j);
+               maxp = j;
+               endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp);
+       }
+
         /*
          * Some buggy high speed devices have bulk endpoints using
          * maxpacket sizes other than 512.  High speed HCDs may not
@@ -293,9 +356,6 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
          */
         if (to_usb_device(ddev)->speed == USB_SPEED_HIGH
                         && usb_endpoint_xfer_bulk(d)) {
-               unsigned maxp;
-
-               maxp = usb_endpoint_maxp(&endpoint->desc) & 0x07ff;
                 if (maxp != 512)
                         dev_warn(ddev, "config %d interface %d altsetting %d "
                                 "bulk endpoint 0x%X has invalid maxpacket %d\n",
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c

index e9f5043a2167c2be13fb3cd5b5aab93efa7d9c23..e6a6d67c87058039cb39e23683cde1af49c3686a 100644 (file)
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -241,7 +241,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
                 goto error_decrease_mem;
         }
  
-       mem = usb_alloc_coherent(ps->dev, size, GFP_USER, &dma_handle);
+       mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
+                       &dma_handle);
         if (!mem) {
                 ret = -ENOMEM;
                 goto error_free_usbm;
@@ -2582,7 +2583,9 @@ static unsigned int usbdev_poll(struct file *file,
         if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed))
                 mask |= POLLOUT | POLLWRNORM;
         if (!connected(ps))
-               mask |= POLLERR | POLLHUP;
+               mask |= POLLHUP;
+       if (list_empty(&ps->list))
+               mask |= POLLERR;
         return mask;
  }
  
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c

index bee13517676f9b2f00e3cd4e3fd4185f5aaa13c9..1d5fc32d06d007a6f64526a531da8ae69e8e6950 100644 (file)
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1052,14 +1052,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
  
         /* Continue a partial initialization */
         if (type == HUB_INIT2 || type == HUB_INIT3) {
-               device_lock(hub->intfdev);
+               device_lock(&hdev->dev);
  
                 /* Was the hub disconnected while we were waiting? */
-               if (hub->disconnected) {
-                       device_unlock(hub->intfdev);
-                       kref_put(&hub->kref, hub_release);
-                       return;
-               }
+               if (hub->disconnected)
+                       goto disconnected;
                 if (type == HUB_INIT2)
                         goto init2;
                 goto init3;
@@ -1262,7 +1259,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
                         queue_delayed_work(system_power_efficient_wq,
                                         &hub->init_work,
                                         msecs_to_jiffies(delay));
-                       device_unlock(hub->intfdev);
+                       device_unlock(&hdev->dev);
                         return;         /* Continues at init3: below */
                 } else {
                         msleep(delay);
@@ -1281,12 +1278,12 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
         /* Scan all ports that need attention */
         kick_hub_wq(hub);
  
-       /* Allow autosuspend if it was suppressed */
-       if (type <= HUB_INIT3)
+       if (type == HUB_INIT2 || type == HUB_INIT3) {
+               /* Allow autosuspend if it was suppressed */
+ disconnected:
                 usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
-
-       if (type == HUB_INIT2 || type == HUB_INIT3)
-               device_unlock(hub->intfdev);
+               device_unlock(&hdev->dev);
+       }
  
         kref_put(&hub->kref, hub_release);
  }
@@ -1315,8 +1312,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type)
         struct usb_device *hdev = hub->hdev;
         int i;
  
-       cancel_delayed_work_sync(&hub->init_work);
-
         /* hub_wq and related activity won't re-trigger */
         hub->quiescing = 1;
  
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c

index 974335377d9f185b282295c4aa4c688d0798f9bc..e56d59b19a0ecacaf632cccdf7680ac5114075b7 100644 (file)
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -61,6 +61,7 @@ static int dwc3_of_simple_probe(struct platform_device *pdev)
         if (!simple->clks)
                 return -ENOMEM;
  
+       platform_set_drvdata(pdev, simple);
         simple->dev = dev;
  
         for (i = 0; i < simple->num_clocks; i++) {
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c

index 45f5a232d9fb658b76845e19bb3ef4fa0a0aabf6..2eb84d6c24a6903fe57039e59a4321d7bb7ac6b3 100644 (file)
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -37,6 +37,7 @@
  #define PCI_DEVICE_ID_INTEL_BXT                        0x0aaa
  #define PCI_DEVICE_ID_INTEL_BXT_M              0x1aaa
  #define PCI_DEVICE_ID_INTEL_APL                        0x5aaa
+#define PCI_DEVICE_ID_INTEL_KBP                        0xa2b0
  
  static const struct acpi_gpio_params reset_gpios = { 0, 0, false };
  static const struct acpi_gpio_params cs_gpios = { 1, 0, false };
@@ -227,6 +228,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), },
         { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), },
         { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
         {  }    /* Terminating Entry */
  };
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c

index 8f8c2157910e6b848561f75f192b7a45dd4225d1..1f5597ef945d409282cbeb4caf6c981ea8b1cade 100644 (file)
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -829,7 +829,7 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
         if (!req->request.no_interrupt && !chain)
                 trb->ctrl |= DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_ISP_IMI;
  
-       if (last)
+       if (last && !usb_endpoint_xfer_isoc(dep->endpoint.desc))
                 trb->ctrl |= DWC3_TRB_CTRL_LST;
  
         if (chain)
@@ -1955,7 +1955,8 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc)
  
  static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
                 struct dwc3_request *req, struct dwc3_trb *trb,
-               const struct dwc3_event_depevt *event, int status)
+               const struct dwc3_event_depevt *event, int status,
+               int chain)
  {
         unsigned int            count;
         unsigned int            s_pkt = 0;
@@ -1964,17 +1965,22 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
         dep->queued_requests--;
         trace_dwc3_complete_trb(dep, trb);
  
+       /*
+        * If we're in the middle of series of chained TRBs and we
+        * receive a short transfer along the way, DWC3 will skip
+        * through all TRBs including the last TRB in the chain (the
+        * where CHN bit is zero. DWC3 will also avoid clearing HWO
+        * bit and SW has to do it manually.
+        *
+        * We're going to do that here to avoid problems of HW trying
+        * to use bogus TRBs for transfers.
+        */
+       if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO))
+               trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+
         if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
-               /*
-                * We continue despite the error. There is not much we
-                * can do. If we don't clean it up we loop forever. If
-                * we skip the TRB then it gets overwritten after a
-                * while since we use them in a ring buffer. A BUG()
-                * would help. Lets hope that if this occurs, someone
-                * fixes the root cause instead of looking away :)
-                */
-               dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n",
-                               dep->name, trb);
+               return 1;
+
         count = trb->size & DWC3_TRB_SIZE_MASK;
  
         if (dep->direction) {
@@ -2013,15 +2019,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
                         s_pkt = 1;
         }
  
-       /*
-        * We assume here we will always receive the entire data block
-        * which we should receive. Meaning, if we program RX to
-        * receive 4K but we receive only 2K, we assume that's all we
-        * should receive and we simply bounce the request back to the
-        * gadget driver for further processing.
-        */
-       req->request.actual += req->request.length - count;
-       if (s_pkt)
+       if (s_pkt && !chain)
                 return 1;
         if ((event->status & DEPEVT_STATUS_LST) &&
                         (trb->ctrl & (DWC3_TRB_CTRL_LST |
@@ -2040,13 +2038,17 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
         struct dwc3_trb         *trb;
         unsigned int            slot;
         unsigned int            i;
+       int                     count = 0;
         int                     ret;
  
         do {
+               int chain;
+
                 req = next_request(&dep->started_list);
                 if (WARN_ON_ONCE(!req))
                         return 1;
  
+               chain = req->request.num_mapped_sgs > 0;
                 i = 0;
                 do {
                         slot = req->first_trb_index + i;
@@ -2054,13 +2056,22 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
                                 slot++;
                         slot %= DWC3_TRB_NUM;
                         trb = &dep->trb_pool[slot];
+                       count += trb->size & DWC3_TRB_SIZE_MASK;
  
                         ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-                                       event, status);
+                                       event, status, chain);
                         if (ret)
                                 break;
                 } while (++i < req->request.num_mapped_sgs);
  
+               /*
+                * We assume here we will always receive the entire data block
+                * which we should receive. Meaning, if we program RX to
+                * receive 4K but we receive only 2K, we assume that's all we
+                * should receive and we simply bounce the request back to the
+                * gadget driver for further processing.
+                */
+               req->request.actual += req->request.length - count;
                 dwc3_gadget_giveback(dep, req, status);
  
                 if (ret)
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c

index eb648485a58c571001c4e31b7c79aadaaee04445..5ebe6af7976ec4189363651dfe505cf6ec79fb12 100644 (file)
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1913,6 +1913,8 @@ unknown:
                         break;
  
                 case USB_RECIP_ENDPOINT:
+                       if (!cdev->config)
+                               break;
                         endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f);
                         list_for_each_entry(f, &cdev->config->functions, list) {
                                 if (test_bit(endp, f->endpoints))
@@ -2124,14 +2126,14 @@ int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
  
         cdev->os_desc_req = usb_ep_alloc_request(ep0, GFP_KERNEL);
         if (!cdev->os_desc_req) {
-               ret = PTR_ERR(cdev->os_desc_req);
+               ret = -ENOMEM;
                 goto end;
         }
  
         /* OS feature descriptor length <= 4kB */
         cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL);
         if (!cdev->os_desc_req->buf) {
-               ret = PTR_ERR(cdev->os_desc_req->buf);
+               ret = -ENOMEM;
                 kfree(cdev->os_desc_req);
                 goto end;
         }
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c

index 70cf3477f951a11931032c2c73d59ace5b7a1163..f9237fe2be0565857a23c637fd7d878d85fe049a 100644 (file)
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1490,7 +1490,9 @@ void unregister_gadget_item(struct config_item *item)
  {
         struct gadget_info *gi = to_gadget_info(item);
  
+       mutex_lock(&gi->lock);
         unregister_gadget(gi);
+       mutex_unlock(&gi->lock);
  }
  EXPORT_SYMBOL_GPL(unregister_gadget_item);
  
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c

index 943c21aafd3b573affe0e61d608b20ec74dc79ee..ab6ac1b74ac0f59e4a09ec340a66a954934836b1 100644 (file)
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -680,6 +680,12 @@ static int rndis_reset_response(struct rndis_params *params,
  {
         rndis_reset_cmplt_type *resp;
         rndis_resp_t *r;
+       u8 *xbuf;
+       u32 length;
+
+       /* drain the response queue */
+       while ((xbuf = rndis_get_next_response(params, &length)))
+               rndis_free_response(params, xbuf);
  
         r = rndis_add_response(params, sizeof(rndis_reset_cmplt_type));
         if (!r)
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c

index a3f7e7c55ebb18bd5045e27628c81075e484af37..5f562c1ec795718900e5e33de6d6e051eb2bb2ae 100644 (file)
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -556,7 +556,8 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
                         /* Multi frame CDC protocols may store the frame for
                          * later which is not a dropped frame.
                          */
-                       if (dev->port_usb->supports_multi_frame)
+                       if (dev->port_usb &&
+                                       dev->port_usb->supports_multi_frame)
                                 goto multiframe;
                         goto drop;
                 }
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c

index 66753ba7a42eb803e64964c6a4a470dad6391242..31125a4a2658938cdc67ef4b24ee9bfbfeeadf44 100644 (file)
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -2023,7 +2023,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src,
         if (!data) {
                 kfree(*class_array);
                 *class_array = NULL;
-               ret = PTR_ERR(data);
+               ret = -ENOMEM;
                 goto unlock;
         }
         cl_arr = *class_array;
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c

index aa3707bdebb4ad7fc2d56f50bbe80ab7093cf878..16104b5ebdcb73962edc506c5c5cb3e4c7b78108 100644 (file)
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -542,7 +542,7 @@ static ssize_t ep_aio(struct kiocb *iocb,
          */
         spin_lock_irq(&epdata->dev->lock);
         value = -ENODEV;
-       if (unlikely(epdata->ep))
+       if (unlikely(epdata->ep == NULL))
                 goto fail;
  
         req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
@@ -606,7 +606,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to)
         }
         if (is_sync_kiocb(iocb)) {
                 value = ep_io(epdata, buf, len);
-               if (value >= 0 && copy_to_iter(buf, value, to))
+               if (value >= 0 && (copy_to_iter(buf, value, to) != value))
                         value = -EFAULT;
         } else {
                 struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c

index ff8685ea721936e3a67f63c0d44d0ec0ae4d943b..934f83881c3074250d9117df691463cddb61ca0c 100644 (file)
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1145,7 +1145,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
                         if (ret != -EPROBE_DEFER)
                                 list_del(&driver->pending);
                         if (ret)
-                               goto err4;
+                               goto err5;
                         break;
                 }
         }
@@ -1154,6 +1154,9 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
  
         return 0;
  
+err5:
+       device_del(&udc->dev);
+
  err4:
         list_del(&udc->list);
         mutex_unlock(&udc_lock);
diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c

index 93d28cb00b76f15de7f281f465a1c791aa7edbbe..cf8819a5c5b263610eae75cf2066a6f1471b417e 100644 (file)
--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
+++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
@@ -2053,7 +2053,7 @@ static void setup_received_handle(struct qe_udc *udc,
                         struct qe_ep *ep;
  
                         if (wValue != 0 || wLength != 0
-                               || pipe > USB_MAX_ENDPOINTS)
+                               || pipe >= USB_MAX_ENDPOINTS)
                                 break;
                         ep = &udc->eps[pipe];
  
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c

index a962b89b65a665df737e8ebd4f48646064805334..1e5f529d51a21f4a17db61cf0f8b7f48d8234386 100644 (file)
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -332,11 +332,11 @@ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci)
         int     port = HCS_N_PORTS(ehci->hcs_params);
  
         while (port--) {
-               ehci_writel(ehci, PORT_RWC_BITS,
-                               &ehci->regs->port_status[port]);
                 spin_unlock_irq(&ehci->lock);
                 ehci_port_power(ehci, port, false);
                 spin_lock_irq(&ehci->lock);
+               ehci_writel(ehci, PORT_RWC_BITS,
+                               &ehci->regs->port_status[port]);
         }
  }
  
diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c

index c369c29e496d735e77f3be32226c479e6871e846..2f7690092a7ffb6e67ca893d9d491f48da6be8d3 100644 (file)
--- a/drivers/usb/host/max3421-hcd.c
+++ b/drivers/usb/host/max3421-hcd.c
@@ -1675,7 +1675,7 @@ max3421_gpout_set_value(struct usb_hcd *hcd, u8 pin_number, u8 value)
         if (pin_number > 7)
                 return;
  
-       mask = 1u << pin_number;
+       mask = 1u << (pin_number % 4);
         idx = pin_number / 4;
  
         if (value)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c

index d61fcc48099ed68f0a6084eef673ae33371cab95..730b9fd266852db5812e98456c9ff8299aa40ae6 100644 (file)
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -386,6 +386,9 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend)
  
         ret = 0;
         virt_dev = xhci->devs[slot_id];
+       if (!virt_dev)
+               return -ENODEV;
+
         cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO);
         if (!cmd) {
                 xhci_dbg(xhci, "Couldn't allocate command structure.\n");
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c

index 4fd041bec332c154604d6e1c262a70f791604fae..d7b0f97abbad608200cbfb5b59d0faacfa1b8b43 100644 (file)
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -314,11 +314,12 @@ static void xhci_pci_remove(struct pci_dev *dev)
                 usb_remove_hcd(xhci->shared_hcd);
                 usb_put_hcd(xhci->shared_hcd);
         }
-       usb_hcd_pci_remove(dev);
  
         /* Workaround for spurious wakeups at shutdown with HSW */
         if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
                 pci_set_power_state(dev, PCI_D3hot);
+
+       usb_hcd_pci_remove(dev);
  }
  
  #ifdef CONFIG_PM
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c

index 918e0c739b795ec3577ae1853708abf6249ac9e5..fd9fd12e486178d50d37fee53a328a8a3530f062 100644 (file)
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1334,12 +1334,6 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
  
         cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list);
  
-       if (cmd->command_trb != xhci->cmd_ring->dequeue) {
-               xhci_err(xhci,
-                        "Command completion event does not match command\n");
-               return;
-       }
-
         del_timer(&xhci->cmd_timer);
  
         trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event);
@@ -1351,6 +1345,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
                 xhci_handle_stopped_cmd_ring(xhci, cmd);
                 return;
         }
+
+       if (cmd->command_trb != xhci->cmd_ring->dequeue) {
+               xhci_err(xhci,
+                        "Command completion event does not match command\n");
+               return;
+       }
+
         /*
          * Host aborted the command ring, check if the current command was
          * supposed to be aborted, otherwise continue normally.
@@ -3243,7 +3244,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
         send_addr = addr;
  
         /* Queue the TRBs, even if they are zero-length */
-       for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) {
+       for (enqd_len = 0; first_trb || enqd_len < full_len;
+                       enqd_len += trb_buff_len) {
                 field = TRB_TYPE(TRB_NORMAL);
  
                 /* TRB buffer should not cross 64KB boundaries */
diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c

index 52c27cab78c3e44d1927c7bcacebc7fbcf1f7c61..9b5b3b2281cae47afb0b88042a5c956a826498f6 100644 (file)
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -665,7 +665,7 @@ static ssize_t ftdi_elan_read(struct file *file, char __user *buffer,
  {
         char data[30 *3 + 4];
         char *d = data;
-       int m = (sizeof(data) - 1) / 3;
+       int m = (sizeof(data) - 1) / 3 - 1;
         int bytes_read = 0;
         int retry_on_empty = 10;
         int retry_on_timeout = 5;
@@ -1684,7 +1684,7 @@ wait:if (ftdi->disconnected > 0) {
                         int i = 0;
                         char data[30 *3 + 4];
                         char *d = data;
-                       int m = (sizeof(data) - 1) / 3;
+                       int m = (sizeof(data) - 1) / 3 - 1;
                         int l = 0;
                         struct u132_target *target = &ftdi->target[ed];
                         struct u132_command *command = &ftdi->command[
@@ -1876,7 +1876,7 @@ more:{
                 if (packet_bytes > 2) {
                         char diag[30 *3 + 4];
                         char *d = diag;
-                       int m = (sizeof(diag) - 1) / 3;
+                       int m = (sizeof(diag) - 1) / 3 - 1;
                         char *b = ftdi->bulk_in_buffer;
                         int bytes_read = 0;
                         diag[0] = 0;
@@ -2053,7 +2053,7 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi)
                         if (packet_bytes > 2) {
                                 char diag[30 *3 + 4];
                                 char *d = diag;
-                               int m = (sizeof(diag) - 1) / 3;
+                               int m = (sizeof(diag) - 1) / 3 - 1;
                                 char *b = ftdi->bulk_in_buffer;
                                 int bytes_read = 0;
                                 unsigned char c = 0;
@@ -2155,7 +2155,7 @@ more:{
                 if (packet_bytes > 2) {
                         char diag[30 *3 + 4];
                         char *d = diag;
-                       int m = (sizeof(diag) - 1) / 3;
+                       int m = (sizeof(diag) - 1) / 3 - 1;
                         char *b = ftdi->bulk_in_buffer;
                         int bytes_read = 0;
                         diag[0] = 0;
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c

index 6b978f04b8d7217d72373ffe6e586a908e464471..5c8210dc6fd9cf9c0b39003d92b59a1440ddb0c2 100644 (file)
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -585,7 +585,6 @@ static void sg_timeout(unsigned long _req)
  {
         struct usb_sg_request   *req = (struct usb_sg_request *) _req;
  
-       req->status = -ETIMEDOUT;
         usb_sg_cancel(req);
  }
  
@@ -616,8 +615,10 @@ static int perform_sglist(
                 mod_timer(&sg_timer, jiffies +
                                 msecs_to_jiffies(SIMPLE_IO_TIMEOUT));
                 usb_sg_wait(req);
-               del_timer_sync(&sg_timer);
-               retval = req->status;
+               if (!del_timer_sync(&sg_timer))
+                       retval = -ETIMEDOUT;
+               else
+                       retval = req->status;
  
                 /* FIXME check resulting data pattern */
  
@@ -2602,7 +2603,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf)
         ktime_get_ts64(&start);
  
         retval = usbtest_do_ioctl(intf, param_32);
-       if (retval)
+       if (retval < 0)
                 goto free_mutex;
  
         ktime_get_ts64(&end);
diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c

index 6f6d2a7fd5a079149c6587c709b473582b3e5458..6523af4f8f93fc48ab8d9b26b409719a61486e24 100644 (file)
--- a/drivers/usb/phy/phy-omap-otg.c
+++ b/drivers/usb/phy/phy-omap-otg.c
@@ -140,6 +140,8 @@ static int omap_otg_probe(struct platform_device *pdev)
                  (rev >> 4) & 0xf, rev & 0xf, config->extcon, otg_dev->id,
                  otg_dev->vbus);
  
+       platform_set_drvdata(pdev, otg_dev);
+
         return 0;
  }
  
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c

index 8fbbc2d32371a973f4f3bd166c302cf0ece29267..ac67bab9124cc043e0c858a8858fc4adb7e5878d 100644 (file)
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -514,7 +514,8 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev)
         if (gpio > 0)
                 dparam->enable_gpio = gpio;
  
-       if (dparam->type == USBHS_TYPE_RCAR_GEN2)
+       if (dparam->type == USBHS_TYPE_RCAR_GEN2 ||
+           dparam->type == USBHS_TYPE_RCAR_GEN3)
                 dparam->has_usb_dmac = 1;
  
         return info;
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c

index 280ed5ff021bdb038f920562a0ccbed78e6ccff0..857e78337324b6488a77d8310da1a32c615e9349 100644 (file)
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -871,7 +871,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done)
  
         /* use PIO if packet is less than pio_dma_border or pipe is DCP */
         if ((len < usbhs_get_dparam(priv, pio_dma_border)) ||
-           usbhs_pipe_is_dcp(pipe))
+           usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
                 goto usbhsf_pio_prepare_push;
  
         /* check data length if this driver don't use USB-DMAC */
@@ -976,7 +976,7 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt,
  
         /* use PIO if packet is less than pio_dma_border or pipe is DCP */
         if ((pkt->length < usbhs_get_dparam(priv, pio_dma_border)) ||
-           usbhs_pipe_is_dcp(pipe))
+           usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
                 goto usbhsf_pio_prepare_pop;
  
         fifo = usbhsf_get_dma_fifo(priv, pkt);
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c

index 50f3363cc382b8eaebfa6d1083f8f44c8c734b1d..92bc83b92d10d3a02fc74cbda259ecec6c409d21 100644 (file)
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -617,10 +617,13 @@ static int usbhsg_ep_enable(struct usb_ep *ep,
                  * use dmaengine if possible.
                  * It will use pio handler if impossible.
                  */
-               if (usb_endpoint_dir_in(desc))
+               if (usb_endpoint_dir_in(desc)) {
                         pipe->handler = &usbhs_fifo_dma_push_handler;
-               else
+               } else {
                         pipe->handler = &usbhs_fifo_dma_pop_handler;
+                       usbhs_xxxsts_clear(priv, BRDYSTS,
+                                          usbhs_pipe_number(pipe));
+               }
  
                 ret = 0;
         }
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c

index 00820809139a0228e512e077aa20502d326737f7..b2d767e743fc2258c8b13e84401e5f34b60efcec 100644 (file)
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -648,6 +648,8 @@ static const struct usb_device_id id_table_combined[] = {
         { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) },
         { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) },
         { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) },
         { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) },
@@ -1008,6 +1010,7 @@ static const struct usb_device_id id_table_combined[] = {
         { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
         { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
         { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
+       { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
         { }                                     /* Terminating entry */
  };
  
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h

index c5d6c1e73e8e0450d46dc7140637146404f262bb..f87a938cf00571eb69edbd8d625f58041384d5fa 100644 (file)
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -405,6 +405,12 @@
  #define FTDI_4N_GALAXY_DE_2_PID        0xF3C1
  #define FTDI_4N_GALAXY_DE_3_PID        0xF3C2
  
+/*
+ * Ivium Technologies product IDs
+ */
+#define FTDI_PALMSENS_PID      0xf440
+#define FTDI_IVIUM_XSTAT_PID   0xf441
+
  /*
   * Linx Technologies product ids
   */
@@ -672,6 +678,12 @@
  #define INTREPID_VALUECAN_PID  0x0601
  #define INTREPID_NEOVI_PID     0x0701
  
+/*
+ * WICED USB UART
+ */
+#define WICED_VID              0x0A5C
+#define WICED_USB20706V2_PID   0x6422
+
  /*
   * Definitions for ID TECH (www.idt-net.com) devices
   */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c

index 8e07536c233a0c90b6a69ebac97cf9c38fe61416..bc472584a229db589f1fb54784df385cbfc737c0 100644 (file)
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -274,6 +274,12 @@ static void option_instat_callback(struct urb *urb);
  #define TELIT_PRODUCT_LE920                    0x1200
  #define TELIT_PRODUCT_LE910                    0x1201
  #define TELIT_PRODUCT_LE910_USBCFG4            0x1206
+#define TELIT_PRODUCT_LE920A4_1207             0x1207
+#define TELIT_PRODUCT_LE920A4_1208             0x1208
+#define TELIT_PRODUCT_LE920A4_1211             0x1211
+#define TELIT_PRODUCT_LE920A4_1212             0x1212
+#define TELIT_PRODUCT_LE920A4_1213             0x1213
+#define TELIT_PRODUCT_LE920A4_1214             0x1214
  
  /* ZTE PRODUCTS */
  #define ZTE_VENDOR_ID                          0x19d2
@@ -628,6 +634,11 @@ static const struct option_blacklist_info telit_le920_blacklist = {
         .reserved = BIT(1) | BIT(5),
  };
  
+static const struct option_blacklist_info telit_le920a4_blacklist_1 = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(1),
+};
+
  static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
         .sendsetup = BIT(2),
         .reserved = BIT(0) | BIT(1) | BIT(3),
@@ -1203,6 +1214,16 @@ static const struct usb_device_id option_ids[] = {
                 .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
         { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
                 .driver_info = (kernel_ulong_t)&telit_le920_blacklist },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208),
+               .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212),
+               .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+       { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
+               .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
         { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
         { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
                 .driver_info = (kernel_ulong_t)&net_intf1_blacklist },
@@ -1966,6 +1987,7 @@ static const struct usb_device_id option_ids[] = {
           .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
         { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
         { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */
         { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) },                /* OLICARD300 - MT6225 */
         { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
         { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c

index b1b9bac4401611e0afa61322714e5f2b3ead8a68..d213cf44a7e45ef8ae692bf4a9e63d31088cae8b 100644 (file)
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1433,7 +1433,7 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
  
         rc = usb_register(udriver);
         if (rc)
-               return rc;
+               goto failed_usb_register;
  
         for (sd = serial_drivers; *sd; ++sd) {
                 (*sd)->usb_driver = udriver;
@@ -1451,6 +1451,8 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[]
         while (sd-- > serial_drivers)
                 usb_serial_deregister(*sd);
         usb_deregister(udriver);
+failed_usb_register:
+       kfree(udriver);
         return rc;
  }
  EXPORT_SYMBOL_GPL(usb_serial_register_drivers);
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c

index 15ecfc9c5f6c59e14cdf4477e401700df47e19ff..152b43822ef1912c980292f7927286e6df76656c 100644 (file)
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -564,67 +564,80 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
  }
  
  static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
-                                          uint32_t flags, void *data)
+                                          unsigned int count, uint32_t flags,
+                                          void *data)
  {
-       int32_t fd = *(int32_t *)data;
-
-       if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
-               return -EINVAL;
-
         /* DATA_NONE/DATA_BOOL enables loopback testing */
         if (flags & VFIO_IRQ_SET_DATA_NONE) {
-               if (*ctx)
-                       eventfd_signal(*ctx, 1);
-               return 0;
+               if (*ctx) {
+                       if (count) {
+                               eventfd_signal(*ctx, 1);
+                       } else {
+                               eventfd_ctx_put(*ctx);
+                               *ctx = NULL;
+                       }
+                       return 0;
+               }
         } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
-               uint8_t trigger = *(uint8_t *)data;
+               uint8_t trigger;
+
+               if (!count)
+                       return -EINVAL;
+
+               trigger = *(uint8_t *)data;
                 if (trigger && *ctx)
                         eventfd_signal(*ctx, 1);
-               return 0;
-       }
  
-       /* Handle SET_DATA_EVENTFD */
-       if (fd == -1) {
-               if (*ctx)
-                       eventfd_ctx_put(*ctx);
-               *ctx = NULL;
                 return 0;
-       } else if (fd >= 0) {
-               struct eventfd_ctx *efdctx;
-               efdctx = eventfd_ctx_fdget(fd);
-               if (IS_ERR(efdctx))
-                       return PTR_ERR(efdctx);
-               if (*ctx)
-                       eventfd_ctx_put(*ctx);
-               *ctx = efdctx;
+       } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+               int32_t fd;
+
+               if (!count)
+                       return -EINVAL;
+
+               fd = *(int32_t *)data;
+               if (fd == -1) {
+                       if (*ctx)
+                               eventfd_ctx_put(*ctx);
+                       *ctx = NULL;
+               } else if (fd >= 0) {
+                       struct eventfd_ctx *efdctx;
+
+                       efdctx = eventfd_ctx_fdget(fd);
+                       if (IS_ERR(efdctx))
+                               return PTR_ERR(efdctx);
+
+                       if (*ctx)
+                               eventfd_ctx_put(*ctx);
+
+                       *ctx = efdctx;
+               }
                 return 0;
-       } else
-               return -EINVAL;
+       }
+
+       return -EINVAL;
  }
  
  static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
                                     unsigned index, unsigned start,
                                     unsigned count, uint32_t flags, void *data)
  {
-       if (index != VFIO_PCI_ERR_IRQ_INDEX)
+       if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
                 return -EINVAL;
  
-       /*
-        * We should sanitize start & count, but that wasn't caught
-        * originally, so this IRQ index must forever ignore them :-(
-        */
-
-       return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data);
+       return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
+                                              count, flags, data);
  }
  
  static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
                                     unsigned index, unsigned start,
                                     unsigned count, uint32_t flags, void *data)
  {
-       if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1)
+       if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
                 return -EINVAL;
  
-       return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data);
+       return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
+                                              count, flags, data);
  }
  
  int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c

index 388eec4e1a90d3856c9f52910003808f65fa6e3f..97fb2f8fa93041a5a6854e580907854e283d80a8 100644 (file)
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -220,20 +220,20 @@ static long vhost_test_reset_owner(struct vhost_test *n)
  {
         void *priv = NULL;
         long err;
-       struct vhost_memory *memory;
+       struct vhost_umem *umem;
  
         mutex_lock(&n->dev.mutex);
         err = vhost_dev_check_owner(&n->dev);
         if (err)
                 goto done;
-       memory = vhost_dev_reset_owner_prepare();
-       if (!memory) {
+       umem = vhost_dev_reset_owner_prepare();
+       if (!umem) {
                 err = -ENOMEM;
                 goto done;
         }
         vhost_test_stop(n, &priv);
         vhost_test_flush(n);
-       vhost_dev_reset_owner(&n->dev, memory);
+       vhost_dev_reset_owner(&n->dev, umem);
  done:
         mutex_unlock(&n->dev.mutex);
         return err;
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c

index 0ddf3a2dbfc490a58d150039a57136460e9a1e08..e3b30ea9ece5945c935791798ab27ed8f6c3dd11 100644 (file)
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -307,6 +307,8 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
  
         vhost_disable_notify(&vsock->dev, vq);
         for (;;) {
+               u32 len;
+
                 if (!vhost_vsock_more_replies(vsock)) {
                         /* Stop tx until the device processes already
                          * pending replies.  Leave tx virtqueue
@@ -334,13 +336,15 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
                         continue;
                 }
  
+               len = pkt->len;
+
                 /* Only accept correctly addressed packets */
                 if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
                         virtio_transport_recv_pkt(pkt);
                 else
                         virtio_transport_free_pkt(pkt);
  
-               vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+               vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
                 added = true;
         }
  
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c

index 114a0c88afb8bfad71dc7b4612ccf9ed66f59d54..e383ecdaca594ce0786c321af35ee59b1807007e 100644 (file)
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -327,6 +327,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
                  * host should service the ring ASAP. */
                 if (out_sgs)
                         vq->notify(&vq->vq);
+               if (indirect)
+                       kfree(desc);
                 END_USE(vq);
                 return -ENOSPC;
         }
@@ -426,6 +428,7 @@ unmap_release:
         if (indirect)
                 kfree(desc);
  
+       END_USE(vq);
         return -EIO;
  }
  
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c

index 4b0eff6da6740552043679764f0ebc76a47917a0..85737e96ab8b5a3d242f50dbf59a8f81c332e4dd 100644 (file)
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
         case 1:
                 _debug("extract FID count");
                 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->count = ntohl(call->tmp);
                 _debug("FID count: %u", call->count);
@@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                 _debug("extract FID array");
                 ret = afs_extract_data(call, skb, last, call->buffer,
                                        call->count * 3 * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 _debug("unmarshall FID array");
                 call->request = kcalloc(call->count,
@@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
         case 3:
                 _debug("extract CB count");
                 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 tmp = ntohl(call->tmp);
                 _debug("CB count: %u", tmp);
@@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                 _debug("extract CB array");
                 ret = afs_extract_data(call, skb, last, call->request,
                                        call->count * 3 * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 _debug("unmarshall CB array");
                 cb = call->request;
@@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                 call->unmarshall++;
  
         case 5:
-               _debug("trailer");
-               if (skb->len != 0)
-                       return -EBADMSG;
+               ret = afs_data_complete(call, skb, last);
+               if (ret < 0)
+                       return ret;
  
                 /* Record that the message was unmarshalled successfully so
                  * that the call destructor can know do the callback breaking
@@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                 break;
         }
  
-       if (!last)
-               return 0;
  
         call->state = AFS_CALL_REPLYING;
  
@@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
  {
         struct afs_server *server;
         struct in_addr addr;
+       int ret;
  
         _enter(",{%u},%d", skb->len, last);
  
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* no unmarshalling required */
         call->state = AFS_CALL_REPLYING;
@@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
  
         _enter(",{%u},%d", skb->len, last);
  
+       /* There are some arguments that we ignore */
+       afs_data_consumed(call, skb);
         if (!last)
-               return 0;
+               return -EAGAIN;
  
         /* no unmarshalling required */
         call->state = AFS_CALL_REPLYING;
@@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
  static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
                                 bool last)
  {
+       int ret;
+
         _enter(",{%u},%d", skb->len, last);
  
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* no unmarshalling required */
         call->state = AFS_CALL_REPLYING;
@@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
  
         _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
  
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         switch (call->unmarshall) {
         case 0:
@@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
                 break;
         }
  
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         call->state = AFS_CALL_REPLYING;
  
@@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
  static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
                                                  struct sk_buff *skb, bool last)
  {
+       int ret;
+
         _enter(",{%u},%d", skb->len, last);
  
-       if (skb->len > 0)
-               return -EBADMSG;
-       if (!last)
-               return 0;
+       ret = afs_data_complete(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* no unmarshalling required */
         call->state = AFS_CALL_REPLYING;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c

index c2e930ec288899cde31e96150dc828d8ec495b60..9312b92e54bedb3310e725a1d10249f2dee21337 100644 (file)
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
  {
         struct afs_vnode *vnode = call->reply;
         const __be32 *bp;
+       int ret;
  
         _enter(",,%u", last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
@@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
         case 1:
                 _debug("extract data length (MSW)");
                 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->count = ntohl(call->tmp);
                 _debug("DATA length MSW: %u", call->count);
@@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
         case 2:
                 _debug("extract data length");
                 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->count = ntohl(call->tmp);
                 _debug("DATA length: %u", call->count);
@@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                         ret = afs_extract_data(call, skb, last, buffer,
                                                call->count);
                         kunmap_atomic(buffer);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                 }
  
                 call->offset = 0;
@@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
         case 4:
                 ret = afs_extract_data(call, skb, last, call->buffer,
                                        (21 + 3 + 6) * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 bp = call->buffer;
                 xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
@@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
                 call->unmarshall++;
  
         case 5:
-               _debug("trailer");
-               if (skb->len != 0)
-                       return -EBADMSG;
+               ret = afs_data_complete(call, skb, last);
+               if (ret < 0)
+                       return ret;
                 break;
         }
  
-       if (!last)
-               return 0;
-
         if (call->count < PAGE_SIZE) {
                 _debug("clear");
                 page = call->reply3;
@@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
  {
         _enter(",{%u},%d", skb->len, last);
  
-       if (skb->len > 0)
-               return -EBADMSG; /* shouldn't be any reply data */
-       return 0;
+       /* shouldn't be any reply data */
+       return afs_data_complete(call, skb, last);
  }
  
  /*
@@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call,
  {
         struct afs_vnode *vnode = call->reply;
         const __be32 *bp;
+       int ret;
  
         _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
@@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call,
  {
         struct afs_vnode *vnode = call->reply;
         const __be32 *bp;
+       int ret;
  
         _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
@@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call,
  {
         struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
         const __be32 *bp;
+       int ret;
  
         _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
@@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
  {
         struct afs_vnode *vnode = call->reply;
         const __be32 *bp;
+       int ret;
  
         _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
@@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call,
  {
         struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
         const __be32 *bp;
+       int ret;
  
         _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
@@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call,
  {
         struct afs_vnode *vnode = call->reply;
         const __be32 *bp;
+       int ret;
  
         _enter(",,%u", last);
  
-       afs_transfer_reply(call, skb);
-       if (!last) {
-               _leave(" = 0 [more]");
-               return 0;
-       }
-
-       if (call->reply_size != call->reply_max) {
-               _leave(" = -EBADMSG [%u != %u]",
-                      call->reply_size, call->reply_max);
-               return -EBADMSG;
-       }
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
@@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call,
         afs_dataversion_t *store_version;
         struct afs_vnode *vnode = call->reply;
         const __be32 *bp;
+       int ret;
  
         _enter(",,%u", last);
  
-       afs_transfer_reply(call, skb);
-       if (!last) {
-               _leave(" = 0 [more]");
-               return 0;
-       }
-
-       if (call->reply_size != call->reply_max) {
-               _leave(" = -EBADMSG [%u != %u]",
-                      call->reply_size, call->reply_max);
-               return -EBADMSG;
-       }
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         store_version = NULL;
@@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                 _debug("extract status");
                 ret = afs_extract_data(call, skb, last, call->buffer,
                                        12 * 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 bp = call->buffer;
                 xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
@@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                 /* extract the volume name length */
         case 2:
                 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->count = ntohl(call->tmp);
                 _debug("volname length: %u", call->count);
@@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                 if (call->count > 0) {
                         ret = afs_extract_data(call, skb, last, call->reply3,
                                                call->count);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                 }
  
                 p = call->reply3;
@@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
         case 4:
                 ret = afs_extract_data(call, skb, last, call->buffer,
                                        call->count);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->offset = 0;
                 call->unmarshall++;
@@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                 /* extract the offline message length */
         case 5:
                 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->count = ntohl(call->tmp);
                 _debug("offline msg length: %u", call->count);
@@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                 if (call->count > 0) {
                         ret = afs_extract_data(call, skb, last, call->reply3,
                                                call->count);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                 }
  
                 p = call->reply3;
@@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
         case 7:
                 ret = afs_extract_data(call, skb, last, call->buffer,
                                        call->count);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->offset = 0;
                 call->unmarshall++;
@@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                 /* extract the message of the day length */
         case 8:
                 ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->count = ntohl(call->tmp);
                 _debug("motd length: %u", call->count);
@@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
                 if (call->count > 0) {
                         ret = afs_extract_data(call, skb, last, call->reply3,
                                                call->count);
-                       switch (ret) {
-                       case 0:         break;
-                       case -EAGAIN:   return 0;
-                       default:        return ret;
-                       }
+                       if (ret < 0)
+                               return ret;
                 }
  
                 p = call->reply3;
@@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
         case 10:
                 ret = afs_extract_data(call, skb, last, call->buffer,
                                        call->count);
-               switch (ret) {
-               case 0:         break;
-               case -EAGAIN:   return 0;
-               default:        return ret;
-               }
+               if (ret < 0)
+                       return ret;
  
                 call->offset = 0;
                 call->unmarshall++;
         no_motd_padding:
  
         case 11:
-               _debug("trailer %d", skb->len);
-               if (skb->len != 0)
-                       return -EBADMSG;
+               ret = afs_data_complete(call, skb, last);
+               if (ret < 0)
+                       return ret;
                 break;
         }
  
-       if (!last)
-               return 0;
-
         _leave(" = 0 [done]");
         return 0;
  }
@@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
                                     struct sk_buff *skb, bool last)
  {
         const __be32 *bp;
+       int ret;
  
         _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         bp = call->buffer;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h

index 71d5982312f3d11dd6e3dd23079e5c6bef7c23a6..df976b2a7f40fbccf708e572372e41098b31f43f 100644 (file)
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *);
   */
  extern int afs_open_socket(void);
  extern void afs_close_socket(void);
+extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
  extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
                          const struct afs_wait_mode *);
  extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
                                             size_t, size_t);
  extern void afs_flat_call_destructor(struct afs_call *);
-extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
+extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
  extern void afs_send_empty_reply(struct afs_call *);
  extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
  extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
                             size_t);
  
+static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
+                                   bool last)
+{
+       if (skb->len > 0)
+               return -EBADMSG;
+       afs_data_consumed(call, skb);
+       if (!last)
+               return -EAGAIN;
+       return 0;
+}
+
  /*
   * security.c
   */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c

index 4832de84d52cba73f96466ae4e73ffc2906566bd..14d04c848465a68f2b107c87b22b90f69062e9c5 100644 (file)
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -150,10 +150,9 @@ void afs_close_socket(void)
  }
  
  /*
- * note that the data in a socket buffer is now delivered and that the buffer
- * should be freed
+ * Note that the data in a socket buffer is now consumed.
   */
-static void afs_data_delivered(struct sk_buff *skb)
+void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
  {
         if (!skb) {
                 _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
@@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb)
         } else {
                 _debug("DLVR %p{%u} [%d]",
                        skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-               if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-                       BUG();
-               rxrpc_kernel_data_delivered(skb);
+               rxrpc_kernel_data_consumed(call->rxcall, skb);
         }
  }
  
@@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call)
                         last = rxrpc_kernel_is_data_last(skb);
                         ret = call->type->deliver(call, skb, last);
                         switch (ret) {
+                       case -EAGAIN:
+                               if (last) {
+                                       _debug("short data");
+                                       goto unmarshal_error;
+                               }
+                               break;
                         case 0:
-                               if (last &&
-                                   call->state == AFS_CALL_AWAIT_REPLY)
+                               ASSERT(last);
+                               if (call->state == AFS_CALL_AWAIT_REPLY)
                                         call->state = AFS_CALL_COMPLETE;
                                 break;
                         case -ENOTCONN:
@@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call)
                                 abort_code = RX_INVALID_OPERATION;
                                 goto do_abort;
                         default:
+                       unmarshal_error:
                                 abort_code = RXGEN_CC_UNMARSHAL;
                                 if (call->state != AFS_CALL_AWAIT_REPLY)
                                         abort_code = RXGEN_SS_UNMARSHAL;
@@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call)
                                 call->state = AFS_CALL_ERROR;
                                 break;
                         }
-                       afs_data_delivered(skb);
-                       skb = NULL;
-                       continue;
+                       break;
                 case RXRPC_SKB_MARK_FINAL_ACK:
                         _debug("Rcv ACK");
                         call->state = AFS_CALL_COMPLETE;
@@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call)
  }
  
  /*
- * empty a socket buffer into a flat reply buffer
+ * Empty a socket buffer into a flat reply buffer.
   */
-void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
  {
         size_t len = skb->len;
  
-       if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
-               BUG();
-       call->reply_size += len;
+       if (len > call->reply_max - call->reply_size) {
+               _leave(" = -EBADMSG [%zu > %u]",
+                      len, call->reply_max - call->reply_size);
+               return -EBADMSG;
+       }
+
+       if (len > 0) {
+               if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
+                                 len) < 0)
+                       BUG();
+               call->reply_size += len;
+       }
+
+       afs_data_consumed(call, skb);
+       if (!last)
+               return -EAGAIN;
+
+       if (call->reply_size != call->reply_max) {
+               _leave(" = -EBADMSG [%u != %u]",
+                      call->reply_size, call->reply_max);
+               return -EBADMSG;
+       }
+       return 0;
  }
  
  /*
@@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work)
  }
  
  /*
- * grab the operation ID from an incoming cache manager call
+ * Grab the operation ID from an incoming cache manager call.  The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
   */
  static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
                                 bool last)
@@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
         call->offset += len;
  
         if (call->offset < 4) {
-               if (last) {
-                       _leave(" = -EBADMSG [op ID short]");
-                       return -EBADMSG;
-               }
-               _leave(" = 0 [incomplete]");
-               return 0;
+               afs_data_consumed(call, skb);
+               _leave(" = -EAGAIN");
+               return -EAGAIN;
         }
  
         call->state = AFS_CALL_AWAIT_REQUEST;
@@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
  }
  
  /*
- * extract a piece of data from the received data socket buffers
+ * Extract a piece of data from the received data socket buffers.
   */
  int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
                      bool last, void *buf, size_t count)
@@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
         call->offset += len;
  
         if (call->offset < count) {
-               if (last) {
-                       _leave(" = -EBADMSG [%d < %zu]", call->offset, count);
-                       return -EBADMSG;
-               }
+               afs_data_consumed(call, skb);
                 _leave(" = -EAGAIN");
                 return -EAGAIN;
         }
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c

index 340afd0cd18290e319bcc1d9695c887b39284025..f94d1abdc3ebc47e5750098fddc4098fb9f3df2f 100644 (file)
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
         struct afs_cache_vlocation *entry;
         __be32 *bp;
         u32 tmp;
-       int loop;
+       int loop, ret;
  
         _enter(",,%u", last);
  
-       afs_transfer_reply(call, skb);
-       if (!last)
-               return 0;
-
-       if (call->reply_size != call->reply_max)
-               return -EBADMSG;
+       ret = afs_transfer_reply(call, skb, last);
+       if (ret < 0)
+               return ret;
  
         /* unmarshall the reply once we've received all of it */
         entry = call->reply;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c

index b6d210e7a993fd67634b3523aa3e61a1121d31bd..d9ddcfc18c91f8acd0ac977171116e3659aa2242 100644 (file)
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -862,33 +862,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
         return 0;
  }
  
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_trans_handle *trans,
-                                    u64 ref_root, u64 bytenr, u64 num_bytes)
-{
-       struct btrfs_delayed_ref_root *delayed_refs;
-       struct btrfs_delayed_ref_head *ref_head;
-       int ret = 0;
-
-       if (!fs_info->quota_enabled || !is_fstree(ref_root))
-               return 0;
-
-       delayed_refs = &trans->transaction->delayed_refs;
-
-       spin_lock(&delayed_refs->lock);
-       ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
-       if (!ref_head) {
-               ret = -ENOENT;
-               goto out;
-       }
-       WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
-       ref_head->qgroup_ref_root = ref_root;
-       ref_head->qgroup_reserved = num_bytes;
-out:
-       spin_unlock(&delayed_refs->lock);
-       return ret;
-}
-
  int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                 struct btrfs_trans_handle *trans,
                                 u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h

index 5fca9534a2712b0b4dec9e9b15a1e024f272bb2f..43f3629760e90f186730842b0b1c609f799ae256 100644 (file)
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,9 +250,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                                u64 parent, u64 ref_root,
                                u64 owner, u64 offset, u64 reserved, int action,
                                struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-                                    struct btrfs_trans_handle *trans,
-                                    u64 ref_root, u64 bytenr, u64 num_bytes);
  int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
                                 struct btrfs_trans_handle *trans,
                                 u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index 9404121fd5f7b44f165c6f76c856548cf5722aff..5842423f8f47b6a7146240c6b47ea1ead637f984 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2033,6 +2033,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                  */
                 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                           &BTRFS_I(inode)->runtime_flags);
+               /*
+                * An ordered extent might have started before and completed
+                * already with io errors, in which case the inode was not
+                * updated and we end up here. So check the inode's mapping
+                * flags for any errors that might have happened while doing
+                * writeback of file data.
+                */
+               ret = btrfs_inode_check_errors(inode);
                 inode_unlock(inode);
                 goto out;
         }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 2f5975954ccf198737e07b29c8706024114a78ae..08dfc57e22705363f1159def79316263a9f4293a 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3435,10 +3435,10 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 found_key.offset = 0;
                 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
                 ret = PTR_ERR_OR_ZERO(inode);
-               if (ret && ret != -ESTALE)
+               if (ret && ret != -ENOENT)
                         goto out;
  
-               if (ret == -ESTALE && root == root->fs_info->tree_root) {
+               if (ret == -ENOENT && root == root->fs_info->tree_root) {
                         struct btrfs_root *dead_root;
                         struct btrfs_fs_info *fs_info = root->fs_info;
                         int is_dead_root = 0;
@@ -3474,7 +3474,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                  * Inode is already gone but the orphan item is still there,
                  * kill the orphan item.
                  */
-               if (ret == -ESTALE) {
+               if (ret == -ENOENT) {
                         trans = btrfs_start_transaction(root, 1);
                         if (IS_ERR(trans)) {
                                 ret = PTR_ERR(trans);
@@ -3633,7 +3633,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
  /*
   * read an inode from the btree into the in-memory inode
   */
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
  {
         struct btrfs_path *path;
         struct extent_buffer *leaf;
@@ -3652,14 +3652,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
                 filled = true;
  
         path = btrfs_alloc_path();
-       if (!path)
+       if (!path) {
+               ret = -ENOMEM;
                 goto make_bad;
+       }
  
         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
  
         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
-       if (ret)
+       if (ret) {
+               if (ret > 0)
+                       ret = -ENOENT;
                 goto make_bad;
+       }
  
         leaf = path->nodes[0];
  
@@ -3812,11 +3817,12 @@ cache_acl:
         }
  
         btrfs_update_iflags(inode);
-       return;
+       return 0;
  
  make_bad:
         btrfs_free_path(path);
         make_bad_inode(inode);
+       return ret;
  }
  
  /*
@@ -4204,6 +4210,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
         int err = 0;
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_trans_handle *trans;
+       u64 last_unlink_trans;
  
         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                 return -ENOTEMPTY;
@@ -4226,11 +4233,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
         if (err)
                 goto out;
  
+       last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
         /* now the directory is empty */
         err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                  dentry->d_name.name, dentry->d_name.len);
-       if (!err)
+       if (!err) {
                 btrfs_i_size_write(inode, 0);
+               /*
+                * Propagate the last_unlink_trans value of the deleted dir to
+                * its parent directory. This is to prevent an unrecoverable
+                * log tree in the case we do something like this:
+                * 1) create dir foo
+                * 2) create snapshot under dir foo
+                * 3) delete the snapshot
+                * 4) rmdir foo
+                * 5) mkdir foo
+                * 6) fsync foo or some file inside foo
+                */
+               if (last_unlink_trans >= trans->transid)
+                       BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+       }
  out:
         btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root);
@@ -5606,7 +5629,9 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                 return ERR_PTR(-ENOMEM);
  
         if (inode->i_state & I_NEW) {
-               btrfs_read_locked_inode(inode);
+               int ret;
+
+               ret = btrfs_read_locked_inode(inode);
                 if (!is_bad_inode(inode)) {
                         inode_tree_add(inode);
                         unlock_new_inode(inode);
@@ -5615,7 +5640,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                 } else {
                         unlock_new_inode(inode);
                         iput(inode);
-                       inode = ERR_PTR(-ESTALE);
+                       ASSERT(ret < 0);
+                       inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
                 }
         }
  
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index b71dd298385c1b5cfb3c00761db0c8ee674e01e7..efe129fe26788c1078d737a2bc238373c2efcbac 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -231,7 +231,6 @@ struct pending_dir_move {
         u64 parent_ino;
         u64 ino;
         u64 gen;
-       bool is_orphan;
         struct list_head update_refs;
  };
  
@@ -274,6 +273,39 @@ struct name_cache_entry {
         char name[];
  };
  
+static void inconsistent_snapshot_error(struct send_ctx *sctx,
+                                       enum btrfs_compare_tree_result result,
+                                       const char *what)
+{
+       const char *result_string;
+
+       switch (result) {
+       case BTRFS_COMPARE_TREE_NEW:
+               result_string = "new";
+               break;
+       case BTRFS_COMPARE_TREE_DELETED:
+               result_string = "deleted";
+               break;
+       case BTRFS_COMPARE_TREE_CHANGED:
+               result_string = "updated";
+               break;
+       case BTRFS_COMPARE_TREE_SAME:
+               ASSERT(0);
+               result_string = "unchanged";
+               break;
+       default:
+               ASSERT(0);
+               result_string = "unexpected";
+       }
+
+       btrfs_err(sctx->send_root->fs_info,
+                 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
+                 result_string, what, sctx->cmp_key->objectid,
+                 sctx->send_root->root_key.objectid,
+                 (sctx->parent_root ?
+                  sctx->parent_root->root_key.objectid : 0));
+}
+
  static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
  
  static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
          * was already unlinked/moved, so we can safely assume that we will not
          * overwrite anything at this point in time.
          */
-       if (other_inode > sctx->send_progress) {
+       if (other_inode > sctx->send_progress ||
+           is_waiting_for_move(sctx, other_inode)) {
                 ret = get_inode_info(sctx->parent_root, other_inode, NULL,
                                 who_gen, NULL, NULL, NULL, NULL);
                 if (ret < 0)
@@ -2502,6 +2535,8 @@ verbose_printk("btrfs: send_utimes %llu\n", ino);
         key.type = BTRFS_INODE_ITEM_KEY;
         key.offset = 0;
         ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
+       if (ret > 0)
+               ret = -ENOENT;
         if (ret < 0)
                 goto out;
  
@@ -2947,6 +2982,10 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
                 }
  
                 if (loc.objectid > send_progress) {
+                       struct orphan_dir_info *odi;
+
+                       odi = get_orphan_dir_info(sctx, dir);
+                       free_orphan_dir_info(sctx, odi);
                         ret = 0;
                         goto out;
                 }
@@ -3047,7 +3086,6 @@ static int add_pending_dir_move(struct send_ctx *sctx,
         pm->parent_ino = parent_ino;
         pm->ino = ino;
         pm->gen = ino_gen;
-       pm->is_orphan = is_orphan;
         INIT_LIST_HEAD(&pm->list);
         INIT_LIST_HEAD(&pm->update_refs);
         RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
         return NULL;
  }
  
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+                    u64 ino, u64 gen, u64 *ancestor_ino)
+{
+       int ret = 0;
+       u64 parent_inode = 0;
+       u64 parent_gen = 0;
+       u64 start_ino = ino;
+
+       *ancestor_ino = 0;
+       while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+               fs_path_reset(name);
+
+               if (is_waiting_for_rm(sctx, ino))
+                       break;
+               if (is_waiting_for_move(sctx, ino)) {
+                       if (*ancestor_ino == 0)
+                               *ancestor_ino = ino;
+                       ret = get_first_ref(sctx->parent_root, ino,
+                                           &parent_inode, &parent_gen, name);
+               } else {
+                       ret = __get_cur_name_and_parent(sctx, ino, gen,
+                                                       &parent_inode,
+                                                       &parent_gen, name);
+                       if (ret > 0) {
+                               ret = 0;
+                               break;
+                       }
+               }
+               if (ret < 0)
+                       break;
+               if (parent_inode == start_ino) {
+                       ret = 1;
+                       if (*ancestor_ino == 0)
+                               *ancestor_ino = ino;
+                       break;
+               }
+               ino = parent_inode;
+               gen = parent_gen;
+       }
+       return ret;
+}
+
  static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
  {
         struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
         u64 parent_ino, parent_gen;
         struct waiting_dir_move *dm = NULL;
         u64 rmdir_ino = 0;
+       u64 ancestor;
+       bool is_orphan;
         int ret;
  
         name = fs_path_alloc();
@@ -3135,9 +3217,10 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
         dm = get_waiting_dir_move(sctx, pm->ino);
         ASSERT(dm);
         rmdir_ino = dm->rmdir_ino;
+       is_orphan = dm->orphanized;
         free_waiting_dir_move(sctx, dm);
  
-       if (pm->is_orphan) {
+       if (is_orphan) {
                 ret = gen_unique_name(sctx, pm->ino,
                                       pm->gen, from_path);
         } else {
@@ -3155,6 +3238,24 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                 goto out;
  
         sctx->send_progress = sctx->cur_ino + 1;
+       ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+       if (ret < 0)
+               goto out;
+       if (ret) {
+               LIST_HEAD(deleted_refs);
+               ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+               ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+                                          &pm->update_refs, &deleted_refs,
+                                          is_orphan);
+               if (ret < 0)
+                       goto out;
+               if (rmdir_ino) {
+                       dm = get_waiting_dir_move(sctx, pm->ino);
+                       ASSERT(dm);
+                       dm->rmdir_ino = rmdir_ino;
+               }
+               goto out;
+       }
         fs_path_reset(name);
         to_path = name;
         name = NULL;
@@ -3174,7 +3275,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                         /* already deleted */
                         goto finish;
                 }
-               ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
+               ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
                 if (ret < 0)
                         goto out;
                 if (!ret)
@@ -3204,8 +3305,18 @@ finish:
          * and old parent(s).
          */
         list_for_each_entry(cur, &pm->update_refs, list) {
-               if (cur->dir == rmdir_ino)
+               /*
+                * The parent inode might have been deleted in the send snapshot
+                */
+               ret = get_inode_info(sctx->send_root, cur->dir, NULL,
+                                    NULL, NULL, NULL, NULL, NULL);
+               if (ret == -ENOENT) {
+                       ret = 0;
                         continue;
+               }
+               if (ret < 0)
+                       goto out;
+
                 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
                 if (ret < 0)
                         goto out;
@@ -3325,6 +3436,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
         u64 left_gen;
         u64 right_gen;
         int ret = 0;
+       struct waiting_dir_move *wdm;
  
         if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
                 return 0;
@@ -3383,7 +3495,8 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
                 goto out;
         }
  
-       if (is_waiting_for_move(sctx, di_key.objectid)) {
+       wdm = get_waiting_dir_move(sctx, di_key.objectid);
+       if (wdm && !wdm->orphanized) {
                 ret = add_pending_dir_move(sctx,
                                            sctx->cur_ino,
                                            sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
                         ret = is_ancestor(sctx->parent_root,
                                           sctx->cur_ino, sctx->cur_inode_gen,
                                           ino, path_before);
-                       break;
+                       if (ret)
+                               break;
                 }
  
                 fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                 goto out;
                         if (ret) {
                                 struct name_cache_entry *nce;
+                               struct waiting_dir_move *wdm;
  
                                 ret = orphanize_inode(sctx, ow_inode, ow_gen,
                                                 cur->full_path);
                                 if (ret < 0)
                                         goto out;
+
+                               /*
+                                * If ow_inode has its rename operation delayed
+                                * make sure that its orphanized name is used in
+                                * the source path when performing its rename
+                                * operation.
+                                */
+                               if (is_waiting_for_move(sctx, ow_inode)) {
+                                       wdm = get_waiting_dir_move(sctx,
+                                                                  ow_inode);
+                                       ASSERT(wdm);
+                                       wdm->orphanized = true;
+                               }
+
                                 /*
                                  * Make sure we clear our orphanized inode's
                                  * name from the name cache. This is because the
@@ -3663,6 +3792,19 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                         name_cache_delete(sctx, nce);
                                         kfree(nce);
                                 }
+
+                               /*
+                                * ow_inode might currently be an ancestor of
+                                * cur_ino, therefore compute valid_path (the
+                                * current path of cur_ino) again because it
+                                * might contain the pre-orphanization name of
+                                * ow_inode, which is no longer valid.
+                                */
+                               fs_path_reset(valid_path);
+                               ret = get_cur_path(sctx, sctx->cur_ino,
+                                          sctx->cur_inode_gen, valid_path);
+                               if (ret < 0)
+                                       goto out;
                         } else {
                                 ret = send_unlink(sctx, cur->full_path);
                                 if (ret < 0)
@@ -5602,7 +5744,10 @@ static int changed_ref(struct send_ctx *sctx,
  {
         int ret = 0;
  
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "reference");
+               return -EIO;
+       }
  
         if (!sctx->cur_inode_new_gen &&
             sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5772,10 @@ static int changed_xattr(struct send_ctx *sctx,
  {
         int ret = 0;
  
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "xattr");
+               return -EIO;
+       }
  
         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
                 if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5799,10 @@ static int changed_extent(struct send_ctx *sctx,
  {
         int ret = 0;
  
-       BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+       if (sctx->cur_ino != sctx->cmp_key->objectid) {
+               inconsistent_snapshot_error(sctx, result, "extent");
+               return -EIO;
+       }
  
         if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
                 if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index d31a0c4f56bed436e0eb933cceb592fdc498eb53..fff3f3efa43602e0c04f9e5e019bf0e85a239d6f 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4469,7 +4469,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
  static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                          const int slot,
                                          const struct btrfs_key *key,
-                                        struct inode *inode)
+                                        struct inode *inode,
+                                        u64 *other_ino)
  {
         int ret;
         struct btrfs_path *search_path;
@@ -4528,7 +4529,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                            search_path, parent,
                                            name, this_name_len, 0);
                 if (di && !IS_ERR(di)) {
-                       ret = 1;
+                       struct btrfs_key di_key;
+
+                       btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+                                                 di, &di_key);
+                       if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+                               ret = 1;
+                               *other_ino = di_key.objectid;
+                       } else {
+                               ret = -EAGAIN;
+                       }
                         goto out;
                 } else if (IS_ERR(di)) {
                         ret = PTR_ERR(di);
@@ -4722,16 +4732,71 @@ again:
                 if ((min_key.type == BTRFS_INODE_REF_KEY ||
                      min_key.type == BTRFS_INODE_EXTREF_KEY) &&
                     BTRFS_I(inode)->generation == trans->transid) {
+                       u64 other_ino = 0;
+
                         ret = btrfs_check_ref_name_override(path->nodes[0],
                                                             path->slots[0],
-                                                           &min_key, inode);
+                                                           &min_key, inode,
+                                                           &other_ino);
                         if (ret < 0) {
                                 err = ret;
                                 goto out_unlock;
                         } else if (ret > 0) {
-                               err = 1;
-                               btrfs_set_log_full_commit(root->fs_info, trans);
-                               goto out_unlock;
+                               struct btrfs_key inode_key;
+                               struct inode *other_inode;
+
+                               if (ins_nr > 0) {
+                                       ins_nr++;
+                               } else {
+                                       ins_nr = 1;
+                                       ins_start_slot = path->slots[0];
+                               }
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, ins_start_slot,
+                                                ins_nr, inode_only,
+                                                logged_isize);
+                               if (ret < 0) {
+                                       err = ret;
+                                       goto out_unlock;
+                               }
+                               ins_nr = 0;
+                               btrfs_release_path(path);
+                               inode_key.objectid = other_ino;
+                               inode_key.type = BTRFS_INODE_ITEM_KEY;
+                               inode_key.offset = 0;
+                               other_inode = btrfs_iget(root->fs_info->sb,
+                                                        &inode_key, root,
+                                                        NULL);
+                               /*
+                                * If the other inode that had a conflicting dir
+                                * entry was deleted in the current transaction,
+                                * we don't need to do more work nor fallback to
+                                * a transaction commit.
+                                */
+                               if (IS_ERR(other_inode) &&
+                                   PTR_ERR(other_inode) == -ENOENT) {
+                                       goto next_key;
+                               } else if (IS_ERR(other_inode)) {
+                                       err = PTR_ERR(other_inode);
+                                       goto out_unlock;
+                               }
+                               /*
+                                * We are safe logging the other inode without
+                                * acquiring its i_mutex as long as we log with
+                                * the LOG_INODE_EXISTS mode. We're safe against
+                                * concurrent renames of the other inode as well
+                                * because during a rename we pin the log and
+                                * update the log with the new name before we
+                                * unpin it.
+                                */
+                               err = btrfs_log_inode(trans, root, other_inode,
+                                                     LOG_INODE_EXISTS,
+                                                     0, LLONG_MAX, ctx);
+                               iput(other_inode);
+                               if (err)
+                                       goto out_unlock;
+                               else
+                                       goto next_key;
                         }
                 }
  
@@ -4799,7 +4864,7 @@ next_slot:
                         ins_nr = 0;
                 }
                 btrfs_release_path(path);
-
+next_key:
                 if (min_key.offset < (u64)-1) {
                         min_key.offset++;
                 } else if (min_key.type < max_key.type) {
@@ -4993,8 +5058,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
                         break;
  
-               if (IS_ROOT(parent))
+               if (IS_ROOT(parent)) {
+                       inode = d_inode(parent);
+                       if (btrfs_must_commit_transaction(trans, inode))
+                               ret = 1;
                         break;
+               }
  
                 parent = dget_parent(parent);
                 dput(old_parent);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c

index 99115cae1652ac1661d37a5a286da7180b6d9d94..16e6ded0b7f281bf72e8074b9d2896713fe4aef2 100644 (file)
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1347,9 +1347,12 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
  {
         struct inode *inode = &ci->vfs_inode;
         struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-       struct ceph_mds_session *session = *psession;
+       struct ceph_mds_session *session = NULL;
         int mds;
+
         dout("ceph_flush_snaps %p\n", inode);
+       if (psession)
+               session = *psession;
  retry:
         spin_lock(&ci->i_ceph_lock);
         if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index fa59a85226b262f2fe086ec5dfc1bf6813711986..f72d4ae303b273a98ee2631d8ed3dde21a71e796 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2759,6 +2759,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
         } else {
                 path = NULL;
                 pathlen = 0;
+               pathbase = 0;
         }
  
         spin_lock(&ci->i_ceph_lock);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index 4d09d4441e3ee4fb7a2939a5c2cabbfa9e08070e..05713a5da0834233edad0049d7443dd63e6c1078 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1949,6 +1949,12 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
  {
         struct backing_dev_info *bdi;
  
+       /*
+        * If we are expecting writeback progress we must submit plugged IO.
+        */
+       if (blk_needs_flush_plug(current))
+               blk_schedule_flush_plug(current);
+
         if (!nr_pages)
                 nr_pages = get_nr_dirty_pages();
  
diff --git a/fs/iomap.c b/fs/iomap.c

index 48141b8eff5f4f799f804674c83173863c156b2c..0342254646e358407ca35520c307529cc454a3c4 100644 (file)
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -84,8 +84,11 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
          * Now the data has been copied, commit the range we've copied.  This
          * should not fail unless the filesystem has had a fatal error.
          */
-       ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
-                       flags, &iomap);
+       if (ops->iomap_end) {
+               ret = ops->iomap_end(inode, pos, length,
+                                    written > 0 ? written : 0,
+                                    flags, &iomap);
+       }
  
         return written ? written : ret;
  }
@@ -194,12 +197,9 @@ again:
                 if (mapping_writably_mapped(inode->i_mapping))
                         flush_dcache_page(page);
  
-               pagefault_disable();
                 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
-               pagefault_enable();
  
                 flush_dcache_page(page);
-               mark_page_accessed(page);
  
                 status = iomap_write_end(inode, pos, bytes, copied, page);
                 if (unlikely(status < 0))
@@ -470,13 +470,18 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
         if (ret)
                 return ret;
  
-       ret = filemap_write_and_wait(inode->i_mapping);
-       if (ret)
-               return ret;
+       if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
+               ret = filemap_write_and_wait(inode->i_mapping);
+               if (ret)
+                       return ret;
+       }
  
         while (len > 0) {
                 ret = iomap_apply(inode, start, len, 0, ops, &ctx,
                                 iomap_fiemap_actor);
+               /* inode with no (attribute) mapping will give ENOENT */
+               if (ret == -ENOENT)
+                       break;
                 if (ret < 0)
                         return ret;
                 if (ret == 0)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c

index 33da841a21bb2871f753fb38a72dd76ce2725ded..6f47527348042dc83f5b97f48870eed6d1afcf41 100644 (file)
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -338,6 +338,8 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
         case 0:
                 break;
         case -NFS4ERR_EXPIRED:
+       case -NFS4ERR_ADMIN_REVOKED:
+       case -NFS4ERR_DELEG_REVOKED:
         case -NFS4ERR_STALE_STATEID:
         case -NFS4ERR_OLD_STATEID:
         case -NFS4ERR_BAD_STATEID:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h

index 324bfdc212504de591347da77c1ad3f1db595974..9bf64eacba5bd6d47a04ca3c9ac66b74912bdc72 100644 (file)
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -396,6 +396,10 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *);
  extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
  extern void nfs4_kill_renewd(struct nfs_client *);
  extern void nfs4_renew_state(struct work_struct *);
+extern void nfs4_set_lease_period(struct nfs_client *clp,
+               unsigned long lease,
+               unsigned long lastrenewed);
+
  
  /* nfs4state.c */
  struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index a036e93bdf9656813abec3a3565e26fca3b28838..1949bbd806ebd4381ec54ea06ea97fa2f5619bed 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4237,12 +4237,9 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
                 err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
                 trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
                 if (err == 0) {
-                       struct nfs_client *clp = server->nfs_client;
-
-                       spin_lock(&clp->cl_lock);
-                       clp->cl_lease_time = fsinfo->lease_time * HZ;
-                       clp->cl_last_renewal = now;
-                       spin_unlock(&clp->cl_lock);
+                       nfs4_set_lease_period(server->nfs_client,
+                                       fsinfo->lease_time * HZ,
+                                       now);
                         break;
                 }
                 err = nfs4_handle_exception(server, err, &exception);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c

index e1ba58c3d1ad305ab28d932a5b90ac269092f98b..82e77198d17efdf656315f39c3d50c4d9aa568a9 100644 (file)
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -136,6 +136,26 @@ nfs4_kill_renewd(struct nfs_client *clp)
         cancel_delayed_work_sync(&clp->cl_renewd);
  }
  
+/**
+ * nfs4_set_lease_period - Sets the lease period on a nfs_client
+ *
+ * @clp: pointer to nfs_client
+ * @lease: new value for lease period
+ * @lastrenewed: time at which lease was last renewed
+ */
+void nfs4_set_lease_period(struct nfs_client *clp,
+               unsigned long lease,
+               unsigned long lastrenewed)
+{
+       spin_lock(&clp->cl_lock);
+       clp->cl_lease_time = lease;
+       clp->cl_last_renewal = lastrenewed;
+       spin_unlock(&clp->cl_lock);
+
+       /* Cap maximum reconnect timeout at 1/2 lease period */
+       rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+}
+
  /*
   * Local variables:
   *   c-basic-offset: 8
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

index 834b875900d62addf6db7f6eb590ce5c2d1b09bc..cada00aa5096d7dbd57a0b8717d1a6583abbbfa7 100644 (file)
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -277,20 +277,17 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
  {
         int status;
         struct nfs_fsinfo fsinfo;
+       unsigned long now;
  
         if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
                 nfs4_schedule_state_renewal(clp);
                 return 0;
         }
  
+       now = jiffies;
         status = nfs4_proc_get_lease_time(clp, &fsinfo);
         if (status == 0) {
-               /* Update lease time and schedule renewal */
-               spin_lock(&clp->cl_lock);
-               clp->cl_lease_time = fsinfo.lease_time * HZ;
-               clp->cl_last_renewal = jiffies;
-               spin_unlock(&clp->cl_lock);
-
+               nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
                 nfs4_schedule_state_renewal(clp);
         }
  
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 8410ca275db1aecf0a1f8a022b92cdc1597ff258..a204d7e109d4d63a76d01a31198b00f3f1cd09be 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4903,6 +4903,32 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         return nfs_ok;
  }
  
+static __be32
+nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
+{
+       struct nfs4_ol_stateid *stp = openlockstateid(s);
+       __be32 ret;
+
+       mutex_lock(&stp->st_mutex);
+
+       ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+       if (ret)
+               goto out;
+
+       ret = nfserr_locks_held;
+       if (check_for_locks(stp->st_stid.sc_file,
+                           lockowner(stp->st_stateowner)))
+               goto out;
+
+       release_lock_stateid(stp);
+       ret = nfs_ok;
+
+out:
+       mutex_unlock(&stp->st_mutex);
+       nfs4_put_stid(s);
+       return ret;
+}
+
  __be32
  nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                    struct nfsd4_free_stateid *free_stateid)
@@ -4910,7 +4936,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         stateid_t *stateid = &free_stateid->fr_stateid;
         struct nfs4_stid *s;
         struct nfs4_delegation *dp;
-       struct nfs4_ol_stateid *stp;
         struct nfs4_client *cl = cstate->session->se_client;
         __be32 ret = nfserr_bad_stateid;
  
@@ -4929,18 +4954,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                 ret = nfserr_locks_held;
                 break;
         case NFS4_LOCK_STID:
-               ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
-               if (ret)
-                       break;
-               stp = openlockstateid(s);
-               ret = nfserr_locks_held;
-               if (check_for_locks(stp->st_stid.sc_file,
-                                   lockowner(stp->st_stateowner)))
-                       break;
-               WARN_ON(!unhash_lock_stateid(stp));
+               atomic_inc(&s->sc_count);
                 spin_unlock(&cl->cl_lock);
-               nfs4_put_stid(s);
-               ret = nfs_ok;
+               ret = nfsd4_free_lock_stateid(stateid, s);
                 goto out;
         case NFS4_REVOKED_DELEG_STID:
                 dp = delegstateid(s);
@@ -5507,7 +5523,7 @@ static __be32
  lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
                             struct nfs4_ol_stateid *ost,
                             struct nfsd4_lock *lock,
-                           struct nfs4_ol_stateid **lst, bool *new)
+                           struct nfs4_ol_stateid **plst, bool *new)
  {
         __be32 status;
         struct nfs4_file *fi = ost->st_stid.sc_file;
@@ -5515,7 +5531,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
         struct nfs4_client *cl = oo->oo_owner.so_client;
         struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
         struct nfs4_lockowner *lo;
+       struct nfs4_ol_stateid *lst;
         unsigned int strhashval;
+       bool hashed;
  
         lo = find_lockowner_str(cl, &lock->lk_new_owner);
         if (!lo) {
@@ -5531,12 +5549,27 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
                         goto out;
         }
  
-       *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
-       if (*lst == NULL) {
+retry:
+       lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
+       if (lst == NULL) {
                 status = nfserr_jukebox;
                 goto out;
         }
+
+       mutex_lock(&lst->st_mutex);
+
+       /* See if it's still hashed to avoid race with FREE_STATEID */
+       spin_lock(&cl->cl_lock);
+       hashed = !list_empty(&lst->st_perfile);
+       spin_unlock(&cl->cl_lock);
+
+       if (!hashed) {
+               mutex_unlock(&lst->st_mutex);
+               nfs4_put_stid(&lst->st_stid);
+               goto retry;
+       }
         status = nfs_ok;
+       *plst = lst;
  out:
         nfs4_put_stateowner(&lo->lo_owner);
         return status;
@@ -5603,8 +5636,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                         goto out;
                 status = lookup_or_create_lock_state(cstate, open_stp, lock,
                                                         &lock_stp, &new);
-               if (status == nfs_ok)
-                       mutex_lock(&lock_stp->st_mutex);
         } else {
                 status = nfs4_preprocess_seqid_op(cstate,
                                        lock->lk_old_lock_seqid,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index ba944123167b92f3a7460d8acc66b02dc7c53575..ff476e654b8f8044b84808b1c92c54055e4ca393 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1252,10 +1252,13 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
         if (IS_ERR(dchild))
                 return nfserrno(host_err);
         err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
-       if (err) {
-               dput(dchild);
+       /*
+        * We unconditionally drop our ref to dchild as fh_compose will have
+        * already grabbed its own ref for it.
+        */
+       dput(dchild);
+       if (err)
                 return err;
-       }
         return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
                                         rdev, resfhp);
  }
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 54e270262979b6f331a31b132171c0b74b92b920..e9ff186c723fc193c8d26e70322f3a52c5af6b0b 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -483,7 +483,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
                 save_stack_trace_tsk(task, &trace);
  
                 for (i = 0; i < trace.nr_entries; i++) {
-                       seq_printf(m, "[<%pK>] %pS\n",
+                       seq_printf(m, "[<%pK>] %pB\n",
                                    (void *)entries[i], (void *)entries[i]);
                 }
                 unlock_trace(task);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c

index 09e18fdf61e5b48dd67234b19972a10dd5131662..b9a8c813e5e66b5e751080e1bd7b11b7e8d87634 100644 (file)
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -46,7 +46,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                 cached = 0;
  
         for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+               pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
  
         available = si_mem_available();
  
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index 776ae2f325d1e4f534540206a30a138b01c1ea46..3dd8f1d5449877ed45aa0fb391c1e9048c5c1893 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1582,6 +1582,7 @@ xfs_alloc_ag_vextent_small(
         xfs_extlen_t    *flenp, /* result length */
         int             *stat)  /* status: 0-freelist, 1-normal/none */
  {
+       struct xfs_owner_info   oinfo;
         int             error;
         xfs_agblock_t   fbno;
         xfs_extlen_t    flen;
@@ -1624,6 +1625,18 @@ xfs_alloc_ag_vextent_small(
                                 error0);
                         args->wasfromfl = 1;
                         trace_xfs_alloc_small_freelist(args);
+
+                       /*
+                        * If we're feeding an AGFL block to something that
+                        * doesn't live in the free space, we need to clear
+                        * out the OWN_AG rmap.
+                        */
+                       xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+                       error = xfs_rmap_free(args->tp, args->agbp, args->agno,
+                                       fbno, 1, &oinfo);
+                       if (error)
+                               goto error0;
+
                         *stat = 0;
                         return 0;
                 }
@@ -2264,6 +2277,7 @@ xfs_alloc_log_agf(
                 offsetof(xfs_agf_t, agf_longest),
                 offsetof(xfs_agf_t, agf_btreeblks),
                 offsetof(xfs_agf_t, agf_uuid),
+               offsetof(xfs_agf_t, agf_rmap_blocks),
                 sizeof(xfs_agf_t)
         };
  
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h

index f814d42c73b2fb7484dd76ad024f63891de155f0..e6a8bea0f7bad219cdb98a97f2844fef293e6c92 100644 (file)
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -640,12 +640,15 @@ typedef struct xfs_agf {
         __be32          agf_btreeblks;  /* # of blocks held in AGF btrees */
         uuid_t          agf_uuid;       /* uuid of filesystem */
  
+       __be32          agf_rmap_blocks;        /* rmapbt blocks used */
+       __be32          agf_padding;            /* padding */
+
         /*
          * reserve some contiguous space for future logged fields before we add
          * the unlogged fields. This makes the range logging via flags and
          * structure offsets much simpler.
          */
-       __be64          agf_spare64[16];
+       __be64          agf_spare64[15];
  
         /* unlogged fields, written during buffer writeback. */
         __be64          agf_lsn;        /* last write sequence */
@@ -670,7 +673,8 @@ typedef struct xfs_agf {
  #define        XFS_AGF_LONGEST         0x00000400
  #define        XFS_AGF_BTREEBLKS       0x00000800
  #define        XFS_AGF_UUID            0x00001000
-#define        XFS_AGF_NUM_BITS        13
+#define        XFS_AGF_RMAP_BLOCKS     0x00002000
+#define        XFS_AGF_NUM_BITS        14
  #define        XFS_AGF_ALL_BITS        ((1 << XFS_AGF_NUM_BITS) - 1)
  
  #define XFS_AGF_FLAGS \
@@ -686,7 +690,8 @@ typedef struct xfs_agf {
         { XFS_AGF_FREEBLKS,     "FREEBLKS" }, \
         { XFS_AGF_LONGEST,      "LONGEST" }, \
         { XFS_AGF_BTREEBLKS,    "BTREEBLKS" }, \
-       { XFS_AGF_UUID,         "UUID" }
+       { XFS_AGF_UUID,         "UUID" }, \
+       { XFS_AGF_RMAP_BLOCKS,  "RMAP_BLOCKS" }
  
  /* disk block (xfs_daddr_t) in the AG */
  #define XFS_AGF_DADDR(mp)      ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c

index bc1faebc84eccdf658ec177029beb3b4d84455b6..17b8eeb34ac89ffb9260d7ac1c3cf342148eb1da 100644 (file)
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -98,6 +98,8 @@ xfs_rmapbt_alloc_block(
         union xfs_btree_ptr     *new,
         int                     *stat)
  {
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
         int                     error;
         xfs_agblock_t           bno;
  
@@ -124,6 +126,8 @@ xfs_rmapbt_alloc_block(
  
         xfs_trans_agbtree_delta(cur->bc_tp, 1);
         new->s = cpu_to_be32(bno);
+       be32_add_cpu(&agf->agf_rmap_blocks, 1);
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
  
         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
         *stat = 1;
@@ -143,6 +147,8 @@ xfs_rmapbt_free_block(
         bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
         trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
                         bno, 1);
+       be32_add_cpu(&agf->agf_rmap_blocks, -1);
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
         error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
         if (error)
                 return error;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

index 47a318ce82e0ab5828164e6604ffe9dd5f66a0d6..607cc29bba21eb504f6ac01a331656d62849d5b9 100644 (file)
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -115,7 +115,6 @@ xfs_buf_ioacct_dec(
         if (!(bp->b_flags & _XBF_IN_FLIGHT))
                 return;
  
-       ASSERT(bp->b_flags & XBF_ASYNC);
         bp->b_flags &= ~_XBF_IN_FLIGHT;
         percpu_counter_dec(&bp->b_target->bt_io_count);
  }
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index ed95e5bb04e692b614983ea83bf870ac373a9b0c..e612a0233710850f34eae93540b098c3ae7cda8c 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -741,9 +741,20 @@ xfs_file_dax_write(
          * page is inserted into the pagecache when we have to serve a write
          * fault on a hole.  It should never be dirtied and can simply be
          * dropped from the pagecache once we get real data for the page.
+        *
+        * XXX: This is racy against mmap, and there's nothing we can do about
+        * it. dax_do_io() should really do this invalidation internally as
+        * it will know if we've allocated over a holei for this specific IO and
+        * if so it needs to update the mapping tree and invalidate existing
+        * PTEs over the newly allocated range. Remove this invalidation when
+        * dax_do_io() is fixed up.
          */
         if (mapping->nrpages) {
-               ret = invalidate_inode_pages2(mapping);
+               loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
+
+               ret = invalidate_inode_pages2_range(mapping,
+                                                   iocb->ki_pos >> PAGE_SHIFT,
+                                                   end >> PAGE_SHIFT);
                 WARN_ON_ONCE(ret);
         }
  
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 0f96847b90e1175d2c6d0f497fe278e67f95e141..0b7f986745c17ff044a4d230329db1951837759a 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -248,6 +248,7 @@ xfs_growfs_data_private(
                         agf->agf_roots[XFS_BTNUM_RMAPi] =
                                                 cpu_to_be32(XFS_RMAP_BLOCK(mp));
                         agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+                       agf->agf_rmap_blocks = cpu_to_be32(1);
                 }
  
                 agf->agf_flfirst = cpu_to_be32(1);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index 2114d53df433134a35084635b5238e2a775c6f0f..2af0dda1c978d5bc463670a09308577770965e7d 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -715,12 +715,16 @@ xfs_iomap_write_allocate(
                  * is in the delayed allocation extent on which we sit
                  * but before our buffer starts.
                  */
-
                 nimaps = 0;
                 while (nimaps == 0) {
                         nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-
-                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
+                       /*
+                        * We have already reserved space for the extent and any
+                        * indirect blocks when creating the delalloc extent,
+                        * there is no need to reserve space in this transaction
+                        * again.
+                        */
+                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
                                         0, XFS_TRANS_RESERVE, &tp);
                         if (error)
                                 return error;
@@ -1037,20 +1041,14 @@ xfs_file_iomap_begin(
                         return error;
  
                 trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
-               xfs_bmbt_to_iomap(ip, iomap, &imap);
-       } else if (nimaps) {
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               trace_xfs_iomap_found(ip, offset, length, 0, &imap);
-               xfs_bmbt_to_iomap(ip, iomap, &imap);
         } else {
+               ASSERT(nimaps);
+
                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
-               iomap->blkno = IOMAP_NULL_BLOCK;
-               iomap->type = IOMAP_HOLE;
-               iomap->offset = offset;
-               iomap->length = length;
+               trace_xfs_iomap_found(ip, offset, length, 0, &imap);
         }
  
+       xfs_bmbt_to_iomap(ip, iomap, &imap);
         return 0;
  }
  
@@ -1112,3 +1110,48 @@ struct iomap_ops xfs_iomap_ops = {
         .iomap_begin            = xfs_file_iomap_begin,
         .iomap_end              = xfs_file_iomap_end,
  };
+
+static int
+xfs_xattr_iomap_begin(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       unsigned                flags,
+       struct iomap            *iomap)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + length);
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps = 1, error = 0;
+       unsigned                lockmode;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       lockmode = xfs_ilock_data_map_shared(ip);
+
+       /* if there are no attribute fork or extents, return ENOENT */
+       if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
+               error = -ENOENT;
+               goto out_unlock;
+       }
+
+       ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
+       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+                              &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+
+       if (!error) {
+               ASSERT(nimaps);
+               xfs_bmbt_to_iomap(ip, iomap, &imap);
+       }
+
+       return error;
+}
+
+struct iomap_ops xfs_xattr_iomap_ops = {
+       .iomap_begin            = xfs_xattr_iomap_begin,
+};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h

index e066d045e2ffe629919517b9003ad88a036b0449..fb8aca3d69ab30c458a94733e6dc9f7f0e2d0b8d 100644 (file)
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -35,5 +35,6 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
                 struct xfs_bmbt_irec *);
  
  extern struct iomap_ops xfs_iomap_ops;
+extern struct iomap_ops xfs_xattr_iomap_ops;
  
  #endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index ab820f84ed507c26b6b355d70660814c82f377c0..b24c3102fa93f94fd98b3fd91d0d23e4c624b646 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1009,7 +1009,14 @@ xfs_vn_fiemap(
         int                     error;
  
         xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
-       error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+               fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
+               error = iomap_fiemap(inode, fieinfo, start, length,
+                               &xfs_xattr_iomap_ops);
+       } else {
+               error = iomap_fiemap(inode, fieinfo, start, length,
+                               &xfs_iomap_ops);
+       }
         xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
  
         return error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 551b7e26980c51886d4bd8edb19c45824fc45198..7e88bec3f3596c03ba7f5acd711bb72db620d51c 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1298,7 +1298,6 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
  DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
  DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
  DEFINE_IOMAP_EVENT(xfs_iomap_found);
-DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
  
  DECLARE_EVENT_CLASS(xfs_simple_io_class,
         TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h

index 54a8e65e18b622edf10df6d07ca0d84fea9e49ae..7d026bf277131f7bc4c79529cc26488e3a8281ee 100644 (file)
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -25,7 +25,20 @@
  #include <asm-generic/qrwlock_types.h>
  
  /*
- * Writer states & reader shift and bias
+ * Writer states & reader shift and bias.
+ *
+ *       | +0 | +1 | +2 | +3 |
+ *   ----+----+----+----+----+
+ *    LE | 78 | 56 | 34 | 12 | 0x12345678
+ *   ----+----+----+----+----+
+ *       | wr |      rd      |
+ *       +----+----+----+----+
+ *
+ *   ----+----+----+----+----+
+ *    BE | 12 | 34 | 56 | 78 | 0x12345678
+ *   ----+----+----+----+----+
+ *       |      rd      | wr |
+ *       +----+----+----+----+
   */
  #define        _QW_WAITING     1               /* A writer is waiting     */
  #define        _QW_LOCKED      0xff            /* A writer holds the lock */
@@ -133,13 +146,23 @@ static inline void queued_read_unlock(struct qrwlock *lock)
         (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
  }
  
+/**
+ * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: the write byte address of a queue rwlock
+ */
+static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
+{
+       return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
  /**
   * queued_write_unlock - release write lock of a queue rwlock
   * @lock : Pointer to queue rwlock structure
   */
  static inline void queued_write_unlock(struct qrwlock *lock)
  {
-       smp_store_release((u8 *)&lock->cnts, 0);
+       smp_store_release(__qrwlock_write_byte(lock), 0);
  }
  
  /*
diff --git a/include/linux/bvec.h b/include/linux/bvec.h

index 701b64a3b7c5e3e94b0487f0ec73801d06715817..89b65b82d98f5c5e77c34f967e856dcc6028dabe 100644 (file)
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -74,7 +74,8 @@ static inline void bvec_iter_advance(const struct bio_vec *bv,
                   "Attempted to advance past end of bvec iter\n");
  
         while (bytes) {
-               unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+               unsigned iter_len = bvec_iter_len(bv, *iter);
+               unsigned len = min(bytes, iter_len);
  
                 bytes -= len;
                 iter->bi_size -= len;
diff --git a/include/linux/compiler.h b/include/linux/compiler.h

index 1bb95484272501bbc8d0603da489f56b4f87714a..436aa4e42221beb16edebd1d5e1bfb3a4fa6af26 100644 (file)
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
   * object's lifetime is managed by something other than RCU.  That
   * "something other" might be reference counting or simple immortality.
   *
- * The seemingly unused void * variable is to validate @p is indeed a pointer
- * type. All pointer types silently cast to void *.
+ * The seemingly unused size_t variable is to validate @p is indeed a pointer
+ * type by making sure it can be dereferenced.
   */
  #define lockless_dereference(p) \
  ({ \
         typeof(p) _________p1 = READ_ONCE(p); \
-       __maybe_unused const void * const _________p2 = _________p1; \
+       size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \
         smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
         (_________p1); \
  })
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h

index 7d565afe35d2fa9ba25359c9bf879d8bde9893e7..6f93ac46e7f0a52f6091755ebf879f14dc80c525 100644 (file)
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -795,7 +795,12 @@ struct ftrace_ret_stack {
         unsigned long func;
         unsigned long long calltime;
         unsigned long long subtime;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
         unsigned long fp;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+       unsigned long *retp;
+#endif
  };
  
  /*
@@ -807,7 +812,10 @@ extern void return_to_handler(void);
  
  extern int
  ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
-                        unsigned long frame_pointer);
+                        unsigned long frame_pointer, unsigned long *retp);
+
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+                                   unsigned long ret, unsigned long *retp);
  
  /*
   * Sometimes we don't want to trace a function with the function
@@ -870,6 +878,13 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
         return -1;
  }
  
+static inline unsigned long
+ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret,
+                     unsigned long *retp)
+{
+       return ret;
+}
+
  static inline void pause_graph_tracing(void) { }
  static inline void unpause_graph_tracing(void) { }
  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h

index 01e908ac4a39a7ed65a68a59f097b302a80c3b57..9c28b4d4c90b137ac72acad323ae2785e3d4dcce 100644 (file)
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1113,8 +1113,20 @@ struct kvm_device {
  /* create, destroy, and name are mandatory */
  struct kvm_device_ops {
         const char *name;
+
+       /*
+        * create is called holding kvm->lock and any operations not suitable
+        * to do while holding the lock should be deferred to init (see
+        * below).
+        */
         int (*create)(struct kvm_device *dev, u32 type);
  
+       /*
+        * init is called after create if create is successful and is called
+        * outside of holding kvm->lock.
+        */
+       void (*init)(struct kvm_device *dev);
+
         /*
          * Destroy is responsible for freeing dev.
          *
diff --git a/include/linux/msi.h b/include/linux/msi.h

index 4f0bfe5912b2f1eb9be7a5c25f4e1ec64c2fafbe..e8c81fbd5f9cd11d4ac01adb929c7d1eee86e69e 100644 (file)
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -270,6 +270,8 @@ enum {
         MSI_FLAG_MULTI_PCI_MSI          = (1 << 2),
         /* Support PCI MSIX interrupts */
         MSI_FLAG_PCI_MSIX               = (1 << 3),
+       /* Needs early activate, required for PCI */
+       MSI_FLAG_ACTIVATE_EARLY         = (1 << 4),
  };
  
  int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index 076df5360ba50544b0d835b1290038cf85be30e4..3a788bf0affdcd80a1282c5e4f4a5dc59e56c8ca 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3891,8 +3891,7 @@ void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
  extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
  void netdev_rss_key_fill(void *buffer, size_t len);
  
-int dev_get_nest_level(struct net_device *dev,
-                      bool (*type_check)(const struct net_device *dev));
+int dev_get_nest_level(struct net_device *dev);
  int skb_checksum_help(struct sk_buff *skb);
  struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
                                   netdev_features_t features, bool tx_path);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

index 8ed4326164cc843b41da6fbfe69d85cee2d61232..2b6b43cc0dd5121d8d4f6024f8ec67f862ff3328 100644 (file)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,7 +743,9 @@ struct perf_event_context {
         u64                             parent_gen;
         u64                             generation;
         int                             pin_count;
+#ifdef CONFIG_CGROUP_PERF
         int                             nr_cgroups;      /* cgroup evts */
+#endif
         void                            *task_ctx_data; /* pmu specific data */
         struct rcu_head                 rcu_head;
  };
@@ -769,7 +771,9 @@ struct perf_cpu_context {
         unsigned int                    hrtimer_active;
  
         struct pmu                      *unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
         struct perf_cgroup              *cgrp;
+#endif
  };
  
  struct perf_output_handle {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h

index b1e3c57c7117c936954d55a1c635709bcb9c9bf9..d6c4177df7cb690537384d8626fe41690579146a 100644 (file)
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -70,8 +70,16 @@ struct qed_dbcx_pfc_params {
         u8 max_tc;
  };
  
+enum qed_dcbx_sf_ieee_type {
+       QED_DCBX_SF_IEEE_ETHTYPE,
+       QED_DCBX_SF_IEEE_TCP_PORT,
+       QED_DCBX_SF_IEEE_UDP_PORT,
+       QED_DCBX_SF_IEEE_TCP_UDP_PORT
+};
+
  struct qed_app_entry {
         bool ethtype;
+       enum qed_dcbx_sf_ieee_type sf_ieee;
         bool enabled;
         u8 prio;
         u16 proto_id;
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 62c68e513e391372b28c3bf49ded290634d94189..20f9f47bcfd07c3069e7653270a805a101a15a41 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1923,6 +1923,9 @@ struct task_struct {
  #ifdef CONFIG_MMU
         struct task_struct *oom_reaper_list;
  #endif
+#ifdef CONFIG_VMAP_STACK
+       struct vm_struct *stack_vm_area;
+#endif
  /* CPU-specific state of this task */
         struct thread_struct thread;
  /*
@@ -1939,6 +1942,18 @@ extern int arch_task_struct_size __read_mostly;
  # define arch_task_struct_size (sizeof(struct task_struct))
  #endif
  
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+       return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+       return NULL;
+}
+#endif
+
  /* Future-safe accessor for struct task_struct's cpus_allowed. */
  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  
diff --git a/include/linux/sctp.h b/include/linux/sctp.h

index de1f64318fc4ec02aa5f2b449bbbd152bc1a0144..fcb4c364617329f10257c5cfe6d86d75d42c7132 100644 (file)
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -705,70 +705,6 @@ typedef struct sctp_auth_chunk {
         sctp_authhdr_t auth_hdr;
  } __packed sctp_auth_chunk_t;
  
-struct sctp_info {
-       __u32   sctpi_tag;
-       __u32   sctpi_state;
-       __u32   sctpi_rwnd;
-       __u16   sctpi_unackdata;
-       __u16   sctpi_penddata;
-       __u16   sctpi_instrms;
-       __u16   sctpi_outstrms;
-       __u32   sctpi_fragmentation_point;
-       __u32   sctpi_inqueue;
-       __u32   sctpi_outqueue;
-       __u32   sctpi_overall_error;
-       __u32   sctpi_max_burst;
-       __u32   sctpi_maxseg;
-       __u32   sctpi_peer_rwnd;
-       __u32   sctpi_peer_tag;
-       __u8    sctpi_peer_capable;
-       __u8    sctpi_peer_sack;
-       __u16   __reserved1;
-
-       /* assoc status info */
-       __u64   sctpi_isacks;
-       __u64   sctpi_osacks;
-       __u64   sctpi_opackets;
-       __u64   sctpi_ipackets;
-       __u64   sctpi_rtxchunks;
-       __u64   sctpi_outofseqtsns;
-       __u64   sctpi_idupchunks;
-       __u64   sctpi_gapcnt;
-       __u64   sctpi_ouodchunks;
-       __u64   sctpi_iuodchunks;
-       __u64   sctpi_oodchunks;
-       __u64   sctpi_iodchunks;
-       __u64   sctpi_octrlchunks;
-       __u64   sctpi_ictrlchunks;
-
-       /* primary transport info */
-       struct sockaddr_storage sctpi_p_address;
-       __s32   sctpi_p_state;
-       __u32   sctpi_p_cwnd;
-       __u32   sctpi_p_srtt;
-       __u32   sctpi_p_rto;
-       __u32   sctpi_p_hbinterval;
-       __u32   sctpi_p_pathmaxrxt;
-       __u32   sctpi_p_sackdelay;
-       __u32   sctpi_p_sackfreq;
-       __u32   sctpi_p_ssthresh;
-       __u32   sctpi_p_partial_bytes_acked;
-       __u32   sctpi_p_flight_size;
-       __u16   sctpi_p_error;
-       __u16   __reserved2;
-
-       /* sctp sock info */
-       __u32   sctpi_s_autoclose;
-       __u32   sctpi_s_adaptation_ind;
-       __u32   sctpi_s_pd_point;
-       __u8    sctpi_s_nodelay;
-       __u8    sctpi_s_disable_fragments;
-       __u8    sctpi_s_v4mapped;
-       __u8    sctpi_s_frag_interleave;
-       __u32   sctpi_s_type;
-       __u32   __reserved3;
-};
-
  struct sctp_infox {
         struct sctp_info *sctpinfo;
         struct sctp_association *asoc;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index 6f0b3e0adc73674f56449f49e61a7264463efd5f..0f665cb26b505729fad04be94ead98ff99e79359 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2847,6 +2847,18 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
                __skb_linearize(skb) : 0;
  }
  
+static __always_inline void
+__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+                    unsigned int off)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_block_sub(skb->csum,
+                                          csum_partial(start, len, 0), off);
+       else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+                skb_checksum_start_offset(skb) < 0)
+               skb->ip_summed = CHECKSUM_NONE;
+}
+
  /**
   *     skb_postpull_rcsum - update checksum for received skb after pull
   *     @skb: buffer to update
@@ -2857,36 +2869,38 @@ static inline int skb_linearize_cow(struct sk_buff *skb)
   *     update the CHECKSUM_COMPLETE checksum, or set ip_summed to
   *     CHECKSUM_NONE so that it can be recomputed from scratch.
   */
-
  static inline void skb_postpull_rcsum(struct sk_buff *skb,
                                       const void *start, unsigned int len)
  {
-       if (skb->ip_summed == CHECKSUM_COMPLETE)
-               skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
-       else if (skb->ip_summed == CHECKSUM_PARTIAL &&
-                skb_checksum_start_offset(skb) < 0)
-               skb->ip_summed = CHECKSUM_NONE;
+       __skb_postpull_rcsum(skb, start, len, 0);
  }
  
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+static __always_inline void
+__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+                    unsigned int off)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_block_add(skb->csum,
+                                          csum_partial(start, len, 0), off);
+}
  
+/**
+ *     skb_postpush_rcsum - update checksum for received skb after push
+ *     @skb: buffer to update
+ *     @start: start of data after push
+ *     @len: length of data pushed
+ *
+ *     After doing a push on a received packet, you need to call this to
+ *     update the CHECKSUM_COMPLETE checksum.
+ */
  static inline void skb_postpush_rcsum(struct sk_buff *skb,
                                       const void *start, unsigned int len)
  {
-       /* For performing the reverse operation to skb_postpull_rcsum(),
-        * we can instead of ...
-        *
-        *   skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
-        *
-        * ... just use this equivalent version here to save a few
-        * instructions. Feeding csum of 0 in csum_partial() and later
-        * on adding skb->csum is equivalent to feed skb->csum in the
-        * first place.
-        */
-       if (skb->ip_summed == CHECKSUM_COMPLETE)
-               skb->csum = csum_partial(start, len, skb->csum);
+       __skb_postpush_rcsum(skb, start, len, 0);
  }
  
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
  /**
   *     skb_push_rcsum - push skb and update receive checksum
   *     @skb: buffer to update
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h

index b6810c92b8bb14d9ef8c9d1036a0b25d40530ac3..5c02b0691587797e303758eb74a4cad7d5e577ff 100644 (file)
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -195,6 +195,8 @@ int         rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *,
                                 struct rpc_xprt *,
                                 void *),
                         void *data);
+void           rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+                       unsigned long timeo);
  
  const char *rpc_proc_name(const struct rpc_task *task);
  #endif /* __KERNEL__ */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h

index 5e3e1b63dbb3c97f0145bd15a23dba7d4e916528..a16070dd03eefe9281476183ff4b5a0692523a09 100644 (file)
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -218,7 +218,8 @@ struct rpc_xprt {
         struct work_struct      task_cleanup;
         struct timer_list       timer;
         unsigned long           last_used,
-                               idle_timeout;
+                               idle_timeout,
+                               max_reconnect_timeout;
  
         /*
          * Send stuff
diff --git a/include/net/act_api.h b/include/net/act_api.h

index 41e6a24a44b9b11413b33d8fe717992dd2a2d6db..82f3c912a5b176d5f2d57361765c9bccbdded367 100644 (file)
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -176,8 +176,8 @@ int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
  int tcf_unregister_action(struct tc_action_ops *a,
                           struct pernet_operations *ops);
  int tcf_action_destroy(struct list_head *actions, int bind);
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-                   struct tcf_result *res);
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+                   int nr_actions, struct tcf_result *res);
  int tcf_action_init(struct net *net, struct nlattr *nla,
                                   struct nlattr *est, char *n, int ovr,
                                   int bind, struct list_head *);
@@ -189,30 +189,17 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
  int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
  int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
  
-#define tc_no_actions(_exts) \
-       (list_empty(&(_exts)->actions))
-
-#define tc_for_each_action(_a, _exts) \
-       list_for_each_entry(a, &(_exts)->actions, list)
-
-#define tc_single_action(_exts) \
-       (list_is_singular(&(_exts)->actions))
+#endif /* CONFIG_NET_CLS_ACT */
  
  static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
                                            u64 packets, u64 lastuse)
  {
+#ifdef CONFIG_NET_CLS_ACT
         if (!a->ops->stats_update)
                 return;
  
         a->ops->stats_update(a, bytes, packets, lastuse);
+#endif
  }
  
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
-#define tc_single_action(_exts) false
-#define tcf_action_stats_update(a, bytes, packets, lastuse)
-
-#endif /* CONFIG_NET_CLS_ACT */
  #endif
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h

index ac1bc3c49fbdf9832fdb4f895657c0336bb61926..7b0f88699b25eafceeca6a52da908ee03aa70d95 100644 (file)
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -40,12 +40,12 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
                                            unsigned long,
                                            gfp_t);
  int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t);
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
  void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
  void rxrpc_kernel_end_call(struct rxrpc_call *);
  bool rxrpc_kernel_is_data_last(struct sk_buff *);
  u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
  int rxrpc_kernel_get_error_number(struct sk_buff *);
-void rxrpc_kernel_data_delivered(struct sk_buff *);
  void rxrpc_kernel_free_skb(struct sk_buff *);
  struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long);
  int rxrpc_kernel_reject_call(struct socket *);
diff --git a/include/net/gre.h b/include/net/gre.h

index 7a54a31d1d4cf7988ff7bd3bf3015b4a8f23e359..73ea256eb7d79f15429caf12760444e6e8753f10 100644 (file)
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -104,6 +104,7 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len,
  
         skb_push(skb, hdr_len);
  
+       skb_set_inner_protocol(skb, proto);
         skb_reset_transport_header(skb);
         greh = (struct gre_base_hdr *)skb->data;
         greh->flags = gre_tnl_flags_to_gre_flags(flags);
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h

index 0dc0a51da38faacab2ea275681f5f70e09a6c79e..dce2d586d9cecb9e9de381aa0926f3e3d3ec9568 100644 (file)
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -128,7 +128,8 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
         to = from | htonl(INET_ECN_CE << 20);
         *(__be32 *)iph = to;
         if (skb->ip_summed == CHECKSUM_COMPLETE)
-               skb->csum = csum_add(csum_sub(skb->csum, from), to);
+               skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+                                    (__force __wsum)to);
         return 1;
  }
  
diff --git a/include/net/mac80211.h b/include/net/mac80211.h

index b4faadbb4e01f9ca14b19ebce355823f5716d129..cca510a585c3d55895da6a1f593474c15d666bca 100644 (file)
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3620,7 +3620,8 @@ struct ieee80211_ops {
  
         int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
         void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
-       u32 (*get_expected_throughput)(struct ieee80211_sta *sta);
+       u32 (*get_expected_throughput)(struct ieee80211_hw *hw,
+                                      struct ieee80211_sta *sta);
         int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                            int *dbm);
  
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h

index 6f8d65342d3adb86ea636bec00c750ad72015eb2..c99508d426ccfe8220c992209a35f84589d09906 100644 (file)
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -59,7 +59,8 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
  struct tcf_exts {
  #ifdef CONFIG_NET_CLS_ACT
         __u32   type; /* for backward compat(TCA_OLD_COMPAT) */
-       struct list_head actions;
+       int nr_actions;
+       struct tc_action **actions;
  #endif
         /* Map to export classifier specific extension TLV types to the
          * generic extensions API. Unsupported extensions must be set to 0.
@@ -72,7 +73,10 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
  {
  #ifdef CONFIG_NET_CLS_ACT
         exts->type = 0;
-       INIT_LIST_HEAD(&exts->actions);
+       exts->nr_actions = 0;
+       exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
+                               GFP_KERNEL);
+       WARN_ON(!exts->actions); /* TODO: propagate the error to callers */
  #endif
         exts->action = action;
         exts->police = police;
@@ -89,7 +93,7 @@ static inline int
  tcf_exts_is_predicative(struct tcf_exts *exts)
  {
  #ifdef CONFIG_NET_CLS_ACT
-       return !list_empty(&exts->actions);
+       return exts->nr_actions;
  #else
         return 0;
  #endif
@@ -108,6 +112,20 @@ tcf_exts_is_available(struct tcf_exts *exts)
         return tcf_exts_is_predicative(exts);
  }
  
+static inline void tcf_exts_to_list(const struct tcf_exts *exts,
+                                   struct list_head *actions)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       int i;
+
+       for (i = 0; i < exts->nr_actions; i++) {
+               struct tc_action *a = exts->actions[i];
+
+               list_add(&a->list, actions);
+       }
+#endif
+}
+
  /**
   * tcf_exts_exec - execute tc filter extensions
   * @skb: socket buffer
@@ -124,12 +142,25 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
                struct tcf_result *res)
  {
  #ifdef CONFIG_NET_CLS_ACT
-       if (!list_empty(&exts->actions))
-               return tcf_action_exec(skb, &exts->actions, res);
+       if (exts->nr_actions)
+               return tcf_action_exec(skb, exts->actions, exts->nr_actions,
+                                      res);
  #endif
         return 0;
  }
  
+#ifdef CONFIG_NET_CLS_ACT
+
+#define tc_no_actions(_exts)  ((_exts)->nr_actions == 0)
+#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
+
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_single_action(_exts) false
+
+#endif /* CONFIG_NET_CLS_ACT */
+
  int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
                       struct nlattr **tb, struct nlattr *rate_tlv,
                       struct tcf_exts *exts, bool ovr);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index da218fec605657ee415f8ad71a95d8851330a9de..9e5fc168c8a3d8cb3d8ef424eed6d67a74f2e8ac 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -339,7 +339,7 @@ enum bpf_func_id {
         BPF_FUNC_skb_change_type,
  
         /**
-        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
          * @skb: pointer to skb
          * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
          * @index: index of the cgroup in the bpf_map
@@ -348,7 +348,7 @@ enum bpf_func_id {
          *   == 1 skb succeeded the cgroup2 descendant test
          *    < 0 error
          */
-       BPF_FUNC_skb_in_cgroup,
+       BPF_FUNC_skb_under_cgroup,
  
         /**
          * bpf_get_hash_recalc(skb)
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h

index 01751faccaf87661adb96e63cba35a2df653bceb..c674ba2563b7df6e692161402fe84594f83a3c77 100644 (file)
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -24,7 +24,7 @@ enum nft_registers {
         __NFT_REG_MAX,
  
         NFT_REG32_00    = 8,
-       MFT_REG32_01,
+       NFT_REG32_01,
         NFT_REG32_02,
         NFT_REG32_03,
         NFT_REG32_04,
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h

index d304f4c9792c4b83d641701cbb589d09bcca499b..a406adcc0793e0f3a09706cb693867b49d0ef914 100644 (file)
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -944,4 +944,68 @@ struct sctp_default_prinfo {
         __u16 pr_policy;
  };
  
+struct sctp_info {
+       __u32   sctpi_tag;
+       __u32   sctpi_state;
+       __u32   sctpi_rwnd;
+       __u16   sctpi_unackdata;
+       __u16   sctpi_penddata;
+       __u16   sctpi_instrms;
+       __u16   sctpi_outstrms;
+       __u32   sctpi_fragmentation_point;
+       __u32   sctpi_inqueue;
+       __u32   sctpi_outqueue;
+       __u32   sctpi_overall_error;
+       __u32   sctpi_max_burst;
+       __u32   sctpi_maxseg;
+       __u32   sctpi_peer_rwnd;
+       __u32   sctpi_peer_tag;
+       __u8    sctpi_peer_capable;
+       __u8    sctpi_peer_sack;
+       __u16   __reserved1;
+
+       /* assoc status info */
+       __u64   sctpi_isacks;
+       __u64   sctpi_osacks;
+       __u64   sctpi_opackets;
+       __u64   sctpi_ipackets;
+       __u64   sctpi_rtxchunks;
+       __u64   sctpi_outofseqtsns;
+       __u64   sctpi_idupchunks;
+       __u64   sctpi_gapcnt;
+       __u64   sctpi_ouodchunks;
+       __u64   sctpi_iuodchunks;
+       __u64   sctpi_oodchunks;
+       __u64   sctpi_iodchunks;
+       __u64   sctpi_octrlchunks;
+       __u64   sctpi_ictrlchunks;
+
+       /* primary transport info */
+       struct sockaddr_storage sctpi_p_address;
+       __s32   sctpi_p_state;
+       __u32   sctpi_p_cwnd;
+       __u32   sctpi_p_srtt;
+       __u32   sctpi_p_rto;
+       __u32   sctpi_p_hbinterval;
+       __u32   sctpi_p_pathmaxrxt;
+       __u32   sctpi_p_sackdelay;
+       __u32   sctpi_p_sackfreq;
+       __u32   sctpi_p_ssthresh;
+       __u32   sctpi_p_partial_bytes_acked;
+       __u32   sctpi_p_flight_size;
+       __u16   sctpi_p_error;
+       __u16   __reserved2;
+
+       /* sctp sock info */
+       __u32   sctpi_s_autoclose;
+       __u32   sctpi_s_adaptation_ind;
+       __u32   sctpi_s_pd_point;
+       __u8    sctpi_s_nodelay;
+       __u8    sctpi_s_disable_fragments;
+       __u8    sctpi_s_v4mapped;
+       __u8    sctpi_s_frag_interleave;
+       __u32   sctpi_s_type;
+       __u32   __reserved3;
+};
+
  #endif /* _UAPI_SCTP_H */
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h

index 6b011c19b50f969d66d3ec0b26d7036d6bac7525..1d57ed3d84d2c3d10d02dce56d728b9cf0ece17b 100644 (file)
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -32,7 +32,7 @@
   */
  
  #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
-#define _UAPI_LINUX_VIRTIO_VOSCK_H
+#define _UAPI_LINUX_VIRTIO_VSOCK_H
  
  #include <linux/types.h>
  #include <linux/virtio_ids.h>
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h

index cbae529b7ce0999684c42d7906df3de3febdc209..180d526a55c3ab5b54befbb264279e61d7e5a519 100644 (file)
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -136,8 +136,8 @@ struct cxl_event_afu_driver_reserved {
          *
          * Of course the contents will be ABI, but that's up the AFU driver.
          */
-       size_t data_size;
-       u8 data[];
+       __u32 data_size;
+       __u8 data[];
  };
  
  struct cxl_event {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c

index fff3650d52fc774f7efdf2d289e879a916f44c60..570eeca7bdfa79ce16d18ee94fac64c40aa11d38 100644 (file)
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@ struct bpf_htab {
         struct bucket *buckets;
         void *elems;
         struct pcpu_freelist freelist;
+       void __percpu *extra_elems;
         atomic_t count; /* number of elements in this hashtable */
         u32 n_buckets;  /* number of hash buckets */
         u32 elem_size;  /* size of each element in bytes */
  };
  
+enum extra_elem_state {
+       HTAB_NOT_AN_EXTRA_ELEM = 0,
+       HTAB_EXTRA_ELEM_FREE,
+       HTAB_EXTRA_ELEM_USED
+};
+
  /* each htab element is struct htab_elem + key + value */
  struct htab_elem {
         union {
@@ -38,7 +45,10 @@ struct htab_elem {
                 struct bpf_htab *htab;
                 struct pcpu_freelist_node fnode;
         };
-       struct rcu_head rcu;
+       union {
+               struct rcu_head rcu;
+               enum extra_elem_state state;
+       };
         u32 hash;
         char key[0] __aligned(8);
  };
@@ -113,6 +123,23 @@ free_elems:
         return err;
  }
  
+static int alloc_extra_elems(struct bpf_htab *htab)
+{
+       void __percpu *pptr;
+       int cpu;
+
+       pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+       if (!pptr)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu) {
+               ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
+                       HTAB_EXTRA_ELEM_FREE;
+       }
+       htab->extra_elems = pptr;
+       return 0;
+}
+
  /* Called from syscall */
  static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
  {
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
         if (percpu)
                 cost += (u64) round_up(htab->map.value_size, 8) *
                         num_possible_cpus() * htab->map.max_entries;
+       else
+              cost += (u64) htab->elem_size * num_possible_cpus();
  
         if (cost >= U32_MAX - PAGE_SIZE)
                 /* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
                 raw_spin_lock_init(&htab->buckets[i].lock);
         }
  
+       if (!percpu) {
+               err = alloc_extra_elems(htab);
+               if (err)
+                       goto free_buckets;
+       }
+
         if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
                 err = prealloc_elems_and_freelist(htab);
                 if (err)
-                       goto free_buckets;
+                       goto free_extra_elems;
         }
  
         return &htab->map;
  
+free_extra_elems:
+       free_percpu(htab->extra_elems);
  free_buckets:
         kvfree(htab->buckets);
  free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
         if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
                 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
         kfree(l);
-
  }
  
  static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
  
  static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
  {
+       if (l->state == HTAB_EXTRA_ELEM_USED) {
+               l->state = HTAB_EXTRA_ELEM_FREE;
+               return;
+       }
+
         if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
                 pcpu_freelist_push(&htab->freelist, &l->fnode);
         } else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
  
  static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                                          void *value, u32 key_size, u32 hash,
-                                        bool percpu, bool onallcpus)
+                                        bool percpu, bool onallcpus,
+                                        bool old_elem_exists)
  {
         u32 size = htab->map.value_size;
         bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
         struct htab_elem *l_new;
         void __percpu *pptr;
+       int err = 0;
  
         if (prealloc) {
                 l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
                 if (!l_new)
-                       return ERR_PTR(-E2BIG);
+                       err = -E2BIG;
         } else {
                 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
                         atomic_dec(&htab->count);
-                       return ERR_PTR(-E2BIG);
+                       err = -E2BIG;
+               } else {
+                       l_new = kmalloc(htab->elem_size,
+                                       GFP_ATOMIC | __GFP_NOWARN);
+                       if (!l_new)
+                               return ERR_PTR(-ENOMEM);
                 }
-               l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
-               if (!l_new)
-                       return ERR_PTR(-ENOMEM);
+       }
+
+       if (err) {
+               if (!old_elem_exists)
+                       return ERR_PTR(err);
+
+               /* if we're updating the existing element and the hash table
+                * is full, use per-cpu extra elems
+                */
+               l_new = this_cpu_ptr(htab->extra_elems);
+               if (l_new->state != HTAB_EXTRA_ELEM_FREE)
+                       return ERR_PTR(-E2BIG);
+               l_new->state = HTAB_EXTRA_ELEM_USED;
+       } else {
+               l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
         }
  
         memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
         if (ret)
                 goto err;
  
-       l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+       l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+                               !!l_old);
         if (IS_ERR(l_new)) {
                 /* all pre-allocated elements are in use or memory exhausted */
                 ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                 }
         } else {
                 l_new = alloc_htab_elem(htab, key, value, key_size,
-                                       hash, true, onallcpus);
+                                       hash, true, onallcpus, false);
                 if (IS_ERR(l_new)) {
                         ret = PTR_ERR(l_new);
                         goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
                 htab_free_elems(htab);
                 pcpu_freelist_destroy(&htab->freelist);
         }
+       free_percpu(htab->extra_elems);
         kvfree(htab->buckets);
         kfree(htab);
  }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index f72f23b8fdab42b8e2c3add264bb2680f93bf940..daea765d72e6f536c31b14bb569208b0e975b75c 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -194,6 +194,7 @@ struct verifier_env {
         struct verifier_state_list **explored_states; /* search pruning optimization */
         struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
         u32 used_map_cnt;               /* number of used maps */
+       u32 id_gen;                     /* used to generate unique reg IDs */
         bool allow_ptr_leaks;
  };
  
@@ -1052,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                         goto error;
                 break;
         case BPF_MAP_TYPE_CGROUP_ARRAY:
-               if (func_id != BPF_FUNC_skb_in_cgroup)
+               if (func_id != BPF_FUNC_skb_under_cgroup)
                         goto error;
                 break;
         default:
@@ -1074,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
                         goto error;
                 break;
-       case BPF_FUNC_skb_in_cgroup:
+       case BPF_FUNC_skb_under_cgroup:
                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
                         goto error;
                 break;
@@ -1301,7 +1302,7 @@ add_imm:
                 /* dst_reg stays as pkt_ptr type and since some positive
                  * integer value was added to the pointer, increment its 'id'
                  */
-               dst_reg->id++;
+               dst_reg->id = ++env->id_gen;
  
                 /* something was added to pkt_ptr, set range and off to zero */
                 dst_reg->off = 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c

index a19550d80ab1724d03ac1b0799aad54a6f1cf823..5650f5317e0c3bf48782349f1f3e64e2aa3d48a1 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -242,18 +242,6 @@ unlock:
         return ret;
  }
  
-static void event_function_local(struct perf_event *event, event_f func, void *data)
-{
-       struct event_function_struct efs = {
-               .event = event,
-               .func = func,
-               .data = data,
-       };
-
-       int ret = event_function(&efs);
-       WARN_ON_ONCE(ret);
-}
-
  static void event_function_call(struct perf_event *event, event_f func, void *data)
  {
         struct perf_event_context *ctx = event->ctx;
@@ -303,6 +291,54 @@ again:
         raw_spin_unlock_irq(&ctx->lock);
  }
  
+/*
+ * Similar to event_function_call() + event_function(), but hard assumes IRQs
+ * are already disabled and we're on the right CPU.
+ */
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+       struct perf_event_context *ctx = event->ctx;
+       struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+       struct task_struct *task = READ_ONCE(ctx->task);
+       struct perf_event_context *task_ctx = NULL;
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       if (task) {
+               if (task == TASK_TOMBSTONE)
+                       return;
+
+               task_ctx = ctx;
+       }
+
+       perf_ctx_lock(cpuctx, task_ctx);
+
+       task = ctx->task;
+       if (task == TASK_TOMBSTONE)
+               goto unlock;
+
+       if (task) {
+               /*
+                * We must be either inactive or active and the right task,
+                * otherwise we're screwed, since we cannot IPI to somewhere
+                * else.
+                */
+               if (ctx->is_active) {
+                       if (WARN_ON_ONCE(task != current))
+                               goto unlock;
+
+                       if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
+                               goto unlock;
+               }
+       } else {
+               WARN_ON_ONCE(&cpuctx->ctx != ctx);
+       }
+
+       func(event, cpuctx, ctx, data);
+unlock:
+       perf_ctx_unlock(cpuctx, task_ctx);
+}
+
  #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
                        PERF_FLAG_FD_OUTPUT  |\
                        PERF_FLAG_PID_CGROUP |\
@@ -843,6 +879,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                 }
         }
  }
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+       struct perf_cpu_context *cpuctx;
+
+       if (!is_cgroup_event(event))
+               return;
+
+       if (add && ctx->nr_cgroups++)
+               return;
+       else if (!add && --ctx->nr_cgroups)
+               return;
+       /*
+        * Because cgroup events are always per-cpu events,
+        * this will always be called from the right CPU.
+        */
+       cpuctx = __get_cpu_context(ctx);
+       cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
  #else /* !CONFIG_CGROUP_PERF */
  
  static inline bool
@@ -920,6 +982,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
                          struct perf_event_context *ctx)
  {
  }
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+                        struct perf_event_context *ctx, bool add)
+{
+}
+
  #endif
  
  /*
@@ -1392,6 +1461,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
  static void
  list_add_event(struct perf_event *event, struct perf_event_context *ctx)
  {
+
         lockdep_assert_held(&ctx->lock);
  
         WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1482,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                 list_add_tail(&event->group_entry, list);
         }
  
-       if (is_cgroup_event(event))
-               ctx->nr_cgroups++;
+       list_update_cgroup_event(event, ctx, true);
  
         list_add_rcu(&event->event_entry, &ctx->event_list);
         ctx->nr_events++;
@@ -1581,8 +1650,6 @@ static void perf_group_attach(struct perf_event *event)
  static void
  list_del_event(struct perf_event *event, struct perf_event_context *ctx)
  {
-       struct perf_cpu_context *cpuctx;
-
         WARN_ON_ONCE(event->ctx != ctx);
         lockdep_assert_held(&ctx->lock);
  
@@ -1594,20 +1661,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
  
         event->attach_state &= ~PERF_ATTACH_CONTEXT;
  
-       if (is_cgroup_event(event)) {
-               ctx->nr_cgroups--;
-               /*
-                * Because cgroup events are always per-cpu events, this will
-                * always be called from the right CPU.
-                */
-               cpuctx = __get_cpu_context(ctx);
-               /*
-                * If there are no more cgroup events then clear cgrp to avoid
-                * stale pointer in update_cgrp_time_from_cpuctx().
-                */
-               if (!ctx->nr_cgroups)
-                       cpuctx->cgrp = NULL;
-       }
+       list_update_cgroup_event(event, ctx, false);
  
         ctx->nr_events--;
         if (event->attr.inherit_stat)
@@ -1716,8 +1770,8 @@ static inline int pmu_filter_match(struct perf_event *event)
  static inline int
  event_filter_match(struct perf_event *event)
  {
-       return (event->cpu == -1 || event->cpu == smp_processor_id())
-           && perf_cgroup_match(event) && pmu_filter_match(event);
+       return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+              perf_cgroup_match(event) && pmu_filter_match(event);
  }
  
  static void
@@ -1737,8 +1791,8 @@ event_sched_out(struct perf_event *event,
          * maintained, otherwise bogus information is return
          * via read() for time_enabled, time_running:
          */
-       if (event->state == PERF_EVENT_STATE_INACTIVE
-           && !event_filter_match(event)) {
+       if (event->state == PERF_EVENT_STATE_INACTIVE &&
+           !event_filter_match(event)) {
                 delta = tstamp - event->tstamp_stopped;
                 event->tstamp_running += delta;
                 event->tstamp_stopped = tstamp;
@@ -2236,10 +2290,15 @@ perf_install_in_context(struct perf_event_context *ctx,
  
         lockdep_assert_held(&ctx->mutex);
  
-       event->ctx = ctx;
         if (event->cpu != -1)
                 event->cpu = cpu;
  
+       /*
+        * Ensures that if we can observe event->ctx, both the event and ctx
+        * will be 'complete'. See perf_iterate_sb_cpu().
+        */
+       smp_store_release(&event->ctx, ctx);
+
         if (!task) {
                 cpu_function_call(cpu, __perf_install_in_context, event);
                 return;
@@ -3490,9 +3549,10 @@ static int perf_event_read(struct perf_event *event, bool group)
                         .group = group,
                         .ret = 0,
                 };
-               smp_call_function_single(event->oncpu,
-                                        __perf_event_read, &data, 1);
-               ret = data.ret;
+               ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
+               /* The event must have been read from an online CPU: */
+               WARN_ON_ONCE(ret);
+               ret = ret ? : data.ret;
         } else if (event->state == PERF_EVENT_STATE_INACTIVE) {
                 struct perf_event_context *ctx = event->ctx;
                 unsigned long flags;
@@ -5969,6 +6029,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
         struct perf_event *event;
  
         list_for_each_entry_rcu(event, &pel->list, sb_list) {
+               /*
+                * Skip events that are not fully formed yet; ensure that
+                * if we observe event->ctx, both event and ctx will be
+                * complete enough. See perf_install_in_context().
+                */
+               if (!smp_load_acquire(&event->ctx))
+                       continue;
+
                 if (event->state < PERF_EVENT_STATE_INACTIVE)
                         continue;
                 if (!event_filter_match(event))
@@ -6552,15 +6620,6 @@ got_name:
         kfree(buf);
  }
  
-/*
- * Whether this @filter depends on a dynamic object which is not loaded
- * yet or its load addresses are not known.
- */
-static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
-{
-       return filter->filter && filter->inode;
-}
-
  /*
   * Check whether inode and address range match filter criteria.
   */
@@ -6622,6 +6681,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma)
         struct perf_event_context *ctx;
         int ctxn;
  
+       /*
+        * Data tracing isn't supported yet and as such there is no need
+        * to keep track of anything that isn't related to executable code:
+        */
+       if (!(vma->vm_flags & VM_EXEC))
+               return;
+
         rcu_read_lock();
         for_each_task_context_nr(ctxn) {
                 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
@@ -7774,7 +7840,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
         list_for_each_entry(filter, &ifh->list, entry) {
                 event->addr_filters_offs[count] = 0;
  
-               if (perf_addr_filter_needs_mmap(filter))
+               /*
+                * Adjust base offset if the filter is associated to a binary
+                * that needs to be mapped:
+                */
+               if (filter->inode)
                         event->addr_filters_offs[count] =
                                 perf_addr_filter_apply(filter, mm);
  
@@ -7905,8 +7975,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
                                         goto fail;
                         }
  
-                       if (token == IF_SRC_FILE) {
-                               filename = match_strdup(&args[2]);
+                       if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
+                               int fpos = filter->range ? 2 : 1;
+
+                               filename = match_strdup(&args[fpos]);
                                 if (!filename) {
                                         ret = -ENOMEM;
                                         goto fail;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c

index b7a525ab2083708f0203db32059dd66551d11d9b..8c50276b60d1c7fb75da997f85819f5b9a313c27 100644 (file)
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
         mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
         err = -EAGAIN;
         ptep = page_check_address(page, mm, addr, &ptl, 0);
-       if (!ptep)
+       if (!ptep) {
+               mem_cgroup_cancel_charge(kpage, memcg, false);
                 goto unlock;
+       }
  
         get_page(kpage);
         page_add_new_anon_rmap(kpage, vma, addr, false);
@@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
  
         err = 0;
   unlock:
-       mem_cgroup_cancel_charge(kpage, memcg, false);
         mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
         unlock_page(page);
         return err;
diff --git a/kernel/fork.c b/kernel/fork.c

index 52e725d4a866b4ac30f16b4db075b9b197e4e53d..9b85f6b2cdcd824d2027f98bfd4ca23f3a9ea0b1 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -158,19 +158,39 @@ void __weak arch_release_thread_stack(unsigned long *stack)
   * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
   * kmemcache based allocator.
   */
-# if THREAD_SIZE >= PAGE_SIZE
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
-                                                 int node)
+# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
  {
+#ifdef CONFIG_VMAP_STACK
+       void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+                                          VMALLOC_START, VMALLOC_END,
+                                          THREADINFO_GFP | __GFP_HIGHMEM,
+                                          PAGE_KERNEL,
+                                          0, node,
+                                          __builtin_return_address(0));
+
+       /*
+        * We can't call find_vm_area() in interrupt context, and
+        * free_thread_stack() can be called in interrupt context,
+        * so cache the vm_struct.
+        */
+       if (stack)
+               tsk->stack_vm_area = find_vm_area(stack);
+       return stack;
+#else
         struct page *page = alloc_pages_node(node, THREADINFO_GFP,
                                              THREAD_SIZE_ORDER);
  
         return page ? page_address(page) : NULL;
+#endif
  }
  
-static inline void free_thread_stack(unsigned long *stack)
+static inline void free_thread_stack(struct task_struct *tsk)
  {
-       __free_pages(virt_to_page(stack), THREAD_SIZE_ORDER);
+       if (task_stack_vm_area(tsk))
+               vfree(tsk->stack);
+       else
+               __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
  }
  # else
  static struct kmem_cache *thread_stack_cache;
@@ -181,9 +201,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
         return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
  }
  
-static void free_thread_stack(unsigned long *stack)
+static void free_thread_stack(struct task_struct *tsk)
  {
-       kmem_cache_free(thread_stack_cache, stack);
+       kmem_cache_free(thread_stack_cache, tsk->stack);
  }
  
  void thread_stack_cache_init(void)
@@ -213,24 +233,47 @@ struct kmem_cache *vm_area_cachep;
  /* SLAB cache for mm_struct structures (tsk->mm) */
  static struct kmem_cache *mm_cachep;
  
-static void account_kernel_stack(unsigned long *stack, int account)
+static void account_kernel_stack(struct task_struct *tsk, int account)
  {
-       /* All stack pages are in the same zone and belong to the same memcg. */
-       struct page *first_page = virt_to_page(stack);
+       void *stack = task_stack_page(tsk);
+       struct vm_struct *vm = task_stack_vm_area(tsk);
+
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+
+       if (vm) {
+               int i;
+
+               BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+                       mod_zone_page_state(page_zone(vm->pages[i]),
+                                           NR_KERNEL_STACK_KB,
+                                           PAGE_SIZE / 1024 * account);
+               }
  
-       mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
-                           THREAD_SIZE / 1024 * account);
+               /* All stack pages belong to the same memcg. */
+               memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
+                                           account * (THREAD_SIZE / 1024));
+       } else {
+               /*
+                * All stack pages are in the same zone and belong to the
+                * same memcg.
+                */
+               struct page *first_page = virt_to_page(stack);
+
+               mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
+                                   THREAD_SIZE / 1024 * account);
  
-       memcg_kmem_update_page_stat(
-               first_page, MEMCG_KERNEL_STACK_KB,
-               account * (THREAD_SIZE / 1024));
+               memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
+                                           account * (THREAD_SIZE / 1024));
+       }
  }
  
  void free_task(struct task_struct *tsk)
  {
-       account_kernel_stack(tsk->stack, -1);
+       account_kernel_stack(tsk, -1);
         arch_release_thread_stack(tsk->stack);
-       free_thread_stack(tsk->stack);
+       free_thread_stack(tsk);
         rt_mutex_debug_task_free(tsk);
         ftrace_graph_exit_task(tsk);
         put_seccomp_filter(tsk);
@@ -342,6 +385,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
  {
         struct task_struct *tsk;
         unsigned long *stack;
+       struct vm_struct *stack_vm_area;
         int err;
  
         if (node == NUMA_NO_NODE)
@@ -354,11 +398,23 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
         if (!stack)
                 goto free_tsk;
  
+       stack_vm_area = task_stack_vm_area(tsk);
+
         err = arch_dup_task_struct(tsk, orig);
+
+       /*
+        * arch_dup_task_struct() clobbers the stack-related fields.  Make
+        * sure they're properly initialized before using any stack-related
+        * functions again.
+        */
+       tsk->stack = stack;
+#ifdef CONFIG_VMAP_STACK
+       tsk->stack_vm_area = stack_vm_area;
+#endif
+
         if (err)
                 goto free_stack;
  
-       tsk->stack = stack;
  #ifdef CONFIG_SECCOMP
         /*
          * We must handle setting up seccomp filters once we're under
@@ -390,14 +446,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
         tsk->task_frag.page = NULL;
         tsk->wake_q.next = NULL;
  
-       account_kernel_stack(stack, 1);
+       account_kernel_stack(tsk, 1);
  
         kcov_task_init(tsk);
  
         return tsk;
  
  free_stack:
-       free_thread_stack(stack);
+       free_thread_stack(tsk);
  free_tsk:
         free_task_struct(tsk);
         return NULL;
diff --git a/kernel/futex.c b/kernel/futex.c

index 33664f70e2d25e880efdbc8695fcc12989871150..46cb3a301bc1555a84607bab7b2aea8a2bcaf7e6 100644 (file)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -179,7 +179,15 @@ int __read_mostly futex_cmpxchg_enabled;
   * Futex flags used to encode options to functions and preserve them across
   * restarts.
   */
-#define FLAGS_SHARED           0x01
+#ifdef CONFIG_MMU
+# define FLAGS_SHARED          0x01
+#else
+/*
+ * NOMMU does not have per process address space. Let the compiler optimize
+ * code away.
+ */
+# define FLAGS_SHARED          0x00
+#endif
  #define FLAGS_CLOCKRT          0x02
  #define FLAGS_HAS_TIMEOUT      0x04
  
@@ -405,6 +413,16 @@ static void get_futex_key_refs(union futex_key *key)
         if (!key->both.ptr)
                 return;
  
+       /*
+        * On MMU less systems futexes are always "private" as there is no per
+        * process address space. We need the smp wmb nevertheless - yes,
+        * arch/blackfin has MMU less SMP ...
+        */
+       if (!IS_ENABLED(CONFIG_MMU)) {
+               smp_mb(); /* explicit smp_mb(); (B) */
+               return;
+       }
+
         switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
         case FUT_OFF_INODE:
                 ihold(key->shared.inode); /* implies smp_mb(); (B) */
@@ -436,6 +454,9 @@ static void drop_futex_key_refs(union futex_key *key)
                 return;
         }
  
+       if (!IS_ENABLED(CONFIG_MMU))
+               return;
+
         switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
         case FUT_OFF_INODE:
                 iput(key->shared.inode);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c

index 54999350162cbc89326a67a3728814e17871d784..19e9dfbe97fa53f732edd375cdd50341327750ac 100644 (file)
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -359,6 +359,17 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
                 else
                         dev_dbg(dev, "irq [%d-%d] for MSI\n",
                                 virq, virq + desc->nvec_used - 1);
+               /*
+                * This flag is set by the PCI layer as we need to activate
+                * the MSI entries before the PCI layer enables MSI in the
+                * card. Otherwise the card latches a random msi message.
+                */
+               if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
+                       struct irq_data *irq_data;
+
+                       irq_data = irq_domain_get_irq_data(domain, desc->irq);
+                       irq_domain_activate_irq(irq_data);
+               }
         }
  
         return 0;
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h

index 37649e69056cf974e27d0137260f8ff46ad688df..8a99abf58080be21fbb954777b48aca24d4342b5 100644 (file)
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -450,7 +450,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
                                 goto gotlock;
                         }
                 }
-               WRITE_ONCE(pn->state, vcpu_halted);
+               WRITE_ONCE(pn->state, vcpu_hashed);
                 qstat_inc(qstat_pv_wait_head, true);
                 qstat_inc(qstat_pv_wait_again, waitcnt);
                 pv_wait(&l->locked, _Q_SLOW_VAL);
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h

index 22e02530984574a6fee718aad26720516c34d497..b9d0315162540d1236e5e1268f184531d8259114 100644 (file)
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -153,7 +153,6 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf,
                  */
                 if ((counter == qstat_pv_latency_kick) ||
                     (counter == qstat_pv_latency_wake)) {
-                       stat = 0;
                         if (kicks)
                                 stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
                 }
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c

index a881c6a7ba74020db9d4fee663445112af0edcc8..33c79b6105c55fdc4f228bb0348fbc2b92d68f0f 100644 (file)
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -300,12 +300,12 @@ static int create_image(int platform_mode)
         save_processor_state();
         trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
         error = swsusp_arch_suspend();
+       /* Restore control flow magically appears here */
+       restore_processor_state();
         trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
         if (error)
                 printk(KERN_ERR "PM: Error %d creating hibernation image\n",
                         error);
-       /* Restore control flow magically appears here */
-       restore_processor_state();
         if (!in_suspend)
                 events_check_enabled = false;
  
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c

index 9a0178c2ac1df6b68f52a31e96beda95a1517781..b02228411d575b87ddc62db9f92b3fadf4765a5a 100644 (file)
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
   */
  static bool rtree_next_node(struct memory_bitmap *bm)
  {
-       bm->cur.node = list_entry(bm->cur.node->list.next,
-                                 struct rtree_node, list);
-       if (&bm->cur.node->list != &bm->cur.zone->leaves) {
+       if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
+               bm->cur.node = list_entry(bm->cur.node->list.next,
+                                         struct rtree_node, list);
                 bm->cur.node_pfn += BM_BITS_PER_BLOCK;
                 bm->cur.node_bit  = 0;
                 touch_softlockup_watchdog();
@@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
         }
  
         /* No more nodes, goto next zone */
-       bm->cur.zone = list_entry(bm->cur.zone->list.next,
+       if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
+               bm->cur.zone = list_entry(bm->cur.zone->list.next,
                                   struct mem_zone_bm_rtree, list);
-       if (&bm->cur.zone->list != &bm->zones) {
                 bm->cur.node = list_entry(bm->cur.zone->leaves.next,
                                           struct rtree_node, list);
                 bm->cur.node_pfn = 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 5c883fe8e44016df1109e8f66dd73377dfecb5e9..3d91b63dd2f626b3ef8b0384b5dc4f3cf90afa7c 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
  #include <linux/context_tracking.h>
  #include <linux/compiler.h>
  #include <linux/frame.h>
+#include <linux/prefetch.h>
  
  #include <asm/switch_to.h>
  #include <asm/tlb.h>
@@ -2971,6 +2972,23 @@ DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
  EXPORT_PER_CPU_SYMBOL(kstat);
  EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
  
+/*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+       struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+       prefetch(curr);
+       prefetch(&curr->exec_start);
+}
+
  /*
   * Return accounted runtime for the task.
   * In case the task is currently running, return the runtime plus current's
@@ -3005,6 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
          * thread, breaking clock_gettime().
          */
         if (task_current(rq, p) && task_on_rq_queued(p)) {
+               prefetch_curr_exec_start(p);
                 update_rq_clock(rq);
                 p->sched_class->update_curr(rq);
         }
@@ -3362,7 +3381,6 @@ static void __sched notrace __schedule(bool preempt)
  
         balance_callback(rq);
  }
-STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
  
  static inline void sched_submit_work(struct task_struct *tsk)
  {
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c

index 5be58820465cced6c0d1dc06c9de146bddcf664f..d4184498c9f5e3c8674015f97fe04da2417dafbd 100644 (file)
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -168,7 +168,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
  
         if (old_idx == IDX_INVALID) {
                 cp->size++;
-               cp->elements[cp->size - 1].dl = 0;
+               cp->elements[cp->size - 1].dl = dl;
                 cp->elements[cp->size - 1].cpu = cpu;
                 cp->elements[cpu].idx = cp->size - 1;
                 cpudl_change_key(cp, cp->size - 1, dl);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c

index 1934f658c03604272e5809f32fee1a6a3c928990..a846cf89eb96182950db834207322c92722d0d2e 100644 (file)
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime)
                 cpustat[CPUTIME_IDLE] += (__force u64) cputime;
  }
  
+/*
+ * When a guest is interrupted for a longer amount of time, missed clock
+ * ticks are not redelivered later. Due to that, this function may on
+ * occasion account more time than the calling functions think elapsed.
+ */
  static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
  {
  #ifdef CONFIG_PARAVIRT
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
          * idle, or potentially user or system time. Due to rounding,
          * other time can exceed ticks occasionally.
          */
-       other = account_other_time(cputime);
+       other = account_other_time(ULONG_MAX);
         if (other >= cputime)
                 return;
         cputime -= other;
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
         }
  
         cputime = cputime_one_jiffy;
-       steal = steal_account_process_time(cputime);
+       steal = steal_account_process_time(ULONG_MAX);
  
         if (steal >= cputime)
                 return;
@@ -508,13 +513,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
   */
  void account_idle_ticks(unsigned long ticks)
  {
+       cputime_t cputime, steal;
  
         if (sched_clock_irqtime) {
                 irqtime_account_idle_ticks(ticks);
                 return;
         }
  
-       account_idle_time(jiffies_to_cputime(ticks));
+       cputime = jiffies_to_cputime(ticks);
+       steal = steal_account_process_time(ULONG_MAX);
+
+       if (steal >= cputime)
+               return;
+
+       cputime -= steal;
+       account_idle_time(cputime);
  }
  
  /*
@@ -606,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr,
         stime = curr->stime;
         utime = curr->utime;
  
-       if (utime == 0) {
-               stime = rtime;
+       /*
+        * If either stime or both stime and utime are 0, assume all runtime is
+        * userspace. Once a task gets some ticks, the monotonicy code at
+        * 'update' will ensure things converge to the observed ratio.
+        */
+       if (stime == 0) {
+               utime = rtime;
                 goto update;
         }
  
-       if (stime == 0) {
-               utime = rtime;
+       if (utime == 0) {
+               stime = rtime;
                 goto update;
         }
  
         stime = scale_stime((__force u64)stime, (__force u64)rtime,
                             (__force u64)(stime + utime));
  
+update:
         /*
          * Make sure stime doesn't go backwards; this preserves monotonicity
          * for utime because rtime is monotonic.
@@ -641,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr,
                 stime = rtime - utime;
         }
  
-update:
         prev->stime = stime;
         prev->utime = utime;
  out:
@@ -686,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
         unsigned long now = READ_ONCE(jiffies);
         cputime_t delta, other;
  
+       /*
+        * Unlike tick based timing, vtime based timing never has lost
+        * ticks, and no need for steal time accounting to make up for
+        * lost ticks. Vtime accounts a rounded version of actual
+        * elapsed time. Limit account_other_time to prevent rounding
+        * errors from causing elapsed vtime to go negative.
+        */
         delta = jiffies_to_cputime(now - tsk->vtime_snap);
         other = account_other_time(delta);
         WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index fcb7f0217ff48610cca9bd5bd078f2f05df79164..1ce8867283dcde6e35ef74a72a1bca968decb918 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -658,8 +658,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
          *
          * XXX figure out if select_task_rq_dl() deals with offline cpus.
          */
-       if (unlikely(!rq->online))
+       if (unlikely(!rq->online)) {
+               lockdep_unpin_lock(&rq->lock, rf.cookie);
                 rq = dl_task_offline_migration(rq, p);
+               rf.cookie = lockdep_pin_lock(&rq->lock);
+       }
  
         /*
          * Queueing this task back might have overloaded rq, check if we need
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 4088eedea7637859844c777dfa56dfb23136c142..039de34f15216d19f61386b6d6c66744660516c9 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4269,7 +4269,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
         pcfs_rq = tg->parent->cfs_rq[cpu];
  
         cfs_rq->throttle_count = pcfs_rq->throttle_count;
-       pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+       cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
  }
  
  /* conditionally throttle active cfs_rq's from put_prev_entity() */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c

index 555670a5143c61bed5e7015f4f13849240be37b9..32bf6f75a8fec255c6d5fbf38e9fecd9e1e848fa 100644 (file)
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1496,6 +1496,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
         u64 expires = KTIME_MAX;
         unsigned long nextevt;
+       bool is_max_delta;
  
         /*
          * Pretend that there is no timer pending if the cpu is offline.
@@ -1506,6 +1507,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
  
         spin_lock(&base->lock);
         nextevt = __next_timer_interrupt(base);
+       is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
         base->next_expiry = nextevt;
         /*
          * We have a fresh next event. Check whether we can forward the base:
@@ -1519,7 +1521,8 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
                 expires = basem;
                 base->is_idle = false;
         } else {
-               expires = basem + (nextevt - basej) * TICK_NSEC;
+               if (!is_max_delta)
+                       expires = basem + (nextevt - basej) * TICK_NSEC;
                 /*
                  * If we expect to sleep more than a tick, mark the base idle:
                  */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig

index f4b86e8ca1e77dba8d969aa009491876186f457f..ba3326785ca4cffcf245da527f0386b2d8c23608 100644 (file)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -24,11 +24,6 @@ config HAVE_FUNCTION_GRAPH_TRACER
         help
           See Documentation/trace/ftrace-design.txt
  
-config HAVE_FUNCTION_GRAPH_FP_TEST
-       bool
-       help
-         See Documentation/trace/ftrace-design.txt
-
  config HAVE_DYNAMIC_FTRACE
         bool
         help
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c

index 7363ccf795125ce14d89a3303300e47a096c877e..0cbe38a844fa9b57563cb650f93973b7d35e71f0 100644 (file)
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -119,7 +119,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
  /* Add a function return address to the trace stack on thread info.*/
  int
  ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
-                        unsigned long frame_pointer)
+                        unsigned long frame_pointer, unsigned long *retp)
  {
         unsigned long long calltime;
         int index;
@@ -171,7 +171,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
         current->ret_stack[index].func = func;
         current->ret_stack[index].calltime = calltime;
         current->ret_stack[index].subtime = 0;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
         current->ret_stack[index].fp = frame_pointer;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+       current->ret_stack[index].retp = retp;
+#endif
         *depth = current->curr_ret_stack;
  
         return 0;
@@ -204,7 +209,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
                 return;
         }
  
-#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
         /*
          * The arch may choose to record the frame pointer used
          * and check it here to make sure that it is what we expect it
@@ -279,6 +284,64 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
         return ret;
  }
  
+/**
+ * ftrace_graph_ret_addr - convert a potentially modified stack return address
+ *                        to its original value
+ *
+ * This function can be called by stack unwinding code to convert a found stack
+ * return address ('ret') to its original value, in case the function graph
+ * tracer has modified it to be 'return_to_handler'.  If the address hasn't
+ * been modified, the unchanged value of 'ret' is returned.
+ *
+ * 'idx' is a state variable which should be initialized by the caller to zero
+ * before the first call.
+ *
+ * 'retp' is a pointer to the return address on the stack.  It's ignored if
+ * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
+ */
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+                                   unsigned long ret, unsigned long *retp)
+{
+       int index = task->curr_ret_stack;
+       int i;
+
+       if (ret != (unsigned long)return_to_handler)
+               return ret;
+
+       if (index < -1)
+               index += FTRACE_NOTRACE_DEPTH;
+
+       if (index < 0)
+               return ret;
+
+       for (i = 0; i <= index; i++)
+               if (task->ret_stack[i].retp == retp)
+                       return task->ret_stack[i].ret;
+
+       return ret;
+}
+#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+                                   unsigned long ret, unsigned long *retp)
+{
+       int task_idx;
+
+       if (ret != (unsigned long)return_to_handler)
+               return ret;
+
+       task_idx = task->curr_ret_stack;
+
+       if (!task->ret_stack || task_idx < *idx)
+               return ret;
+
+       task_idx -= *idx;
+       (*idx)++;
+
+       return task->ret_stack[task_idx].ret;
+}
+#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+
  int __trace_graph_entry(struct trace_array *tr,
                                 struct ftrace_graph_ent *trace,
                                 unsigned long flags,
diff --git a/lib/dma-debug.c b/lib/dma-debug.c

index fcfa1939ac41abe768b401f235b8afd7c6d75dbe..06f02f6aecd2b7ee974497941d0c6b7302f83f1c 100644 (file)
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,6 +22,7 @@
  #include <linux/stacktrace.h>
  #include <linux/dma-debug.h>
  #include <linux/spinlock.h>
+#include <linux/vmalloc.h>
  #include <linux/debugfs.h>
  #include <linux/uaccess.h>
  #include <linux/export.h>
@@ -1164,11 +1165,32 @@ static void check_unmap(struct dma_debug_entry *ref)
         put_hash_bucket(bucket, &flags);
  }
  
-static void check_for_stack(struct device *dev, void *addr)
+static void check_for_stack(struct device *dev,
+                           struct page *page, size_t offset)
  {
-       if (object_is_on_stack(addr))
-               err_printk(dev, NULL, "DMA-API: device driver maps memory from "
-                               "stack [addr=%p]\n", addr);
+       void *addr;
+       struct vm_struct *stack_vm_area = task_stack_vm_area(current);
+
+       if (!stack_vm_area) {
+               /* Stack is direct-mapped. */
+               if (PageHighMem(page))
+                       return;
+               addr = page_address(page) + offset;
+               if (object_is_on_stack(addr))
+                       err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
+       } else {
+               /* Stack is vmalloced. */
+               int i;
+
+               for (i = 0; i < stack_vm_area->nr_pages; i++) {
+                       if (page != stack_vm_area->pages[i])
+                               continue;
+
+                       addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
+                       err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
+                       break;
+               }
+       }
  }
  
  static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
@@ -1291,10 +1313,11 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
         if (map_single)
                 entry->type = dma_debug_single;
  
+       check_for_stack(dev, page, offset);
+
         if (!PageHighMem(page)) {
                 void *addr = page_address(page) + offset;
  
-               check_for_stack(dev, addr);
                 check_for_illegal_area(dev, addr, size);
         }
  
@@ -1386,8 +1409,9 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
                 entry->sg_call_ents   = nents;
                 entry->sg_mapped_ents = mapped_ents;
  
+               check_for_stack(dev, sg_page(s), s->offset);
+
                 if (!PageHighMem(sg_page(s))) {
-                       check_for_stack(dev, sg_virt(s));
                         check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
                 }
  
diff --git a/lib/rhashtable.c b/lib/rhashtable.c

index 5d845ffd7982770d39af65b6c8906ef7000a4b95..5ba520b544d73ff1a62705584d1971aeee6ec2ba 100644 (file)
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
  
  #define HASH_DEFAULT_SIZE      64UL
  #define HASH_MIN_SIZE          4U
-#define BUCKET_LOCKS_PER_CPU   128UL
+#define BUCKET_LOCKS_PER_CPU   32UL
  
  static u32 head_hashfn(struct rhashtable *ht,
                        const struct bucket_table *tbl,
@@ -70,7 +70,7 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
         unsigned int nr_pcpus = num_possible_cpus();
  #endif
  
-       nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+       nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
         size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
  
         /* Never allocate more than 0.5 locks per bucket */
@@ -83,6 +83,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
                         tbl->locks = vmalloc(size * sizeof(spinlock_t));
                 else
  #endif
+               if (gfp != GFP_KERNEL)
+                       gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
                 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
                                            gfp);
                 if (!tbl->locks)
@@ -321,12 +324,14 @@ static int rhashtable_expand(struct rhashtable *ht)
  static int rhashtable_shrink(struct rhashtable *ht)
  {
         struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-       unsigned int size;
+       unsigned int nelems = atomic_read(&ht->nelems);
+       unsigned int size = 0;
         int err;
  
         ASSERT_RHT_MUTEX(ht);
  
-       size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+       if (nelems)
+               size = roundup_pow_of_two(nelems * 3 / 2);
         if (size < ht->p.min_size)
                 size = ht->p.min_size;
  
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c

index 297fdb5e74bd05bc4a3258c90465510463cbf8fb..64e899b633371d252deaf057156408a8ea8f4625 100644 (file)
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
  
  static int max_size = 0;
  module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
  
  static bool shrinking = false;
  module_param(shrinking, bool, 0);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index b9aa1b0b38b0ecdb769cd33a8d77c0ffda2621bf..87e11d8ad536b8c360740ca9ce96461b0daeaaee 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1448,6 +1448,7 @@ static void dissolve_free_huge_page(struct page *page)
                 list_del(&page->lru);
                 h->free_huge_pages--;
                 h->free_huge_pages_node[nid]--;
+               h->max_huge_pages--;
                 update_and_free_page(h, page);
         }
         spin_unlock(&hugetlb_lock);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c

index b6728a33a4aca104fde8022b90fdf2df5630af31..baabaad4a4aaa89bb13fc691cf5df58af46c8b3b 100644 (file)
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -217,11 +217,8 @@ void quarantine_reduce(void)
         new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
                 QUARANTINE_FRACTION;
         percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
-       if (WARN_ONCE(new_quarantine_size < percpu_quarantines,
-               "Too little memory, disabling global KASAN quarantine.\n"))
-               new_quarantine_size = 0;
-       else
-               new_quarantine_size -= percpu_quarantines;
+       new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
+               0 : new_quarantine_size - percpu_quarantines;
         WRITE_ONCE(quarantine_size, new_quarantine_size);
  
         last = global_quarantine.head;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index e74d7080ec9e63681ce3145cda26d2fce6eb8ed3..2ff0289ad061322298b472edba9eebb9abb302a7 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4077,14 +4077,32 @@ static struct cftype mem_cgroup_legacy_files[] = {
  
  static DEFINE_IDR(mem_cgroup_idr);
  
-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
  {
-       atomic_inc(&memcg->id.ref);
+       atomic_add(n, &memcg->id.ref);
  }
  
-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
  {
-       if (atomic_dec_and_test(&memcg->id.ref)) {
+       while (!atomic_inc_not_zero(&memcg->id.ref)) {
+               /*
+                * The root cgroup cannot be destroyed, so it's refcount must
+                * always be >= 1.
+                */
+               if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+                       VM_BUG_ON(1);
+                       break;
+               }
+               memcg = parent_mem_cgroup(memcg);
+               if (!memcg)
+                       memcg = root_mem_cgroup;
+       }
+       return memcg;
+}
+
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
+{
+       if (atomic_sub_and_test(n, &memcg->id.ref)) {
                 idr_remove(&mem_cgroup_idr, memcg->id.id);
                 memcg->id.id = 0;
  
@@ -4093,6 +4111,16 @@ static void mem_cgroup_id_put(struct mem_cgroup *memcg)
         }
  }
  
+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+       mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+       mem_cgroup_id_put_many(memcg, 1);
+}
+
  /**
   * mem_cgroup_from_id - look up a memcg from a memcg id
   * @id: the memcg id to look up
@@ -4727,6 +4755,8 @@ static void __mem_cgroup_clear_mc(void)
                 if (!mem_cgroup_is_root(mc.from))
                         page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
  
+               mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
                 /*
                  * we charged both to->memory and to->memsw, so we
                  * should uncharge to->memory.
@@ -4734,9 +4764,9 @@ static void __mem_cgroup_clear_mc(void)
                 if (!mem_cgroup_is_root(mc.to))
                         page_counter_uncharge(&mc.to->memory, mc.moved_swap);
  
-               css_put_many(&mc.from->css, mc.moved_swap);
+               mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+               css_put_many(&mc.to->css, mc.moved_swap);
  
-               /* we've already done css_get(mc.to) */
                 mc.moved_swap = 0;
         }
         memcg_oom_recover(from);
@@ -5800,7 +5830,7 @@ subsys_initcall(mem_cgroup_init);
   */
  void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
  {
-       struct mem_cgroup *memcg;
+       struct mem_cgroup *memcg, *swap_memcg;
         unsigned short oldid;
  
         VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5815,16 +5845,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
         if (!memcg)
                 return;
  
-       mem_cgroup_id_get(memcg);
-       oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+       /*
+        * In case the memcg owning these pages has been offlined and doesn't
+        * have an ID allocated to it anymore, charge the closest online
+        * ancestor for the swap instead and transfer the memory+swap charge.
+        */
+       swap_memcg = mem_cgroup_id_get_online(memcg);
+       oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
         VM_BUG_ON_PAGE(oldid, page);
-       mem_cgroup_swap_statistics(memcg, true);
+       mem_cgroup_swap_statistics(swap_memcg, true);
  
         page->mem_cgroup = NULL;
  
         if (!mem_cgroup_is_root(memcg))
                 page_counter_uncharge(&memcg->memory, 1);
  
+       if (memcg != swap_memcg) {
+               if (!mem_cgroup_is_root(swap_memcg))
+                       page_counter_charge(&swap_memcg->memsw, 1);
+               page_counter_uncharge(&memcg->memsw, 1);
+       }
+
         /*
          * Interrupts should be disabled here because the caller holds the
          * mapping->tree_lock lock which is taken with interrupts-off. It is
@@ -5863,11 +5904,14 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
         if (!memcg)
                 return 0;
  
+       memcg = mem_cgroup_id_get_online(memcg);
+
         if (!mem_cgroup_is_root(memcg) &&
-           !page_counter_try_charge(&memcg->swap, 1, &counter))
+           !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+               mem_cgroup_id_put(memcg);
                 return -ENOMEM;
+       }
  
-       mem_cgroup_id_get(memcg);
         oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
         VM_BUG_ON_PAGE(oldid, page);
         mem_cgroup_swap_statistics(memcg, true);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 3894b65b155555f11076f0cae90f71e2475b6929..41266dc29f33fb1278d7e4e9d6fd2efab69380a1 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1219,6 +1219,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
  
         /* init node's zones as empty zones, we don't have any present pages.*/
         free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+       pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
  
         /*
          * The node we allocated has no zone fallback lists. For avoiding
@@ -1249,6 +1250,7 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
  static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
  {
         arch_refresh_nodedata(nid, NULL);
+       free_percpu(pgdat->per_cpu_nodestats);
         arch_free_nodedata(pgdat);
         return;
  }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c

index 7d0a275df822e9e14c55e5d472cfc473ac3ae173..d53a9aa00977cbd0f81970e9e8a30b011cc73f31 100644 (file)
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -764,7 +764,7 @@ bool task_will_free_mem(struct task_struct *task)
  {
         struct mm_struct *mm = task->mm;
         struct task_struct *p;
-       bool ret;
+       bool ret = true;
  
         /*
          * Skip tasks without mm because it might have passed its exit_mm and
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index ee744fa3b93d50a9215daf43966cc97f957c77a4..3fbe73a6fe4b6869dcd44a45de43928d4c08fe0c 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4060,7 +4060,7 @@ long si_mem_available(void)
         int lru;
  
         for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-               pages[lru] = global_page_state(NR_LRU_BASE + lru);
+               pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
  
         for_each_zone(zone)
                 wmark_low += zone->watermark[WMARK_LOW];
@@ -4757,6 +4757,8 @@ int local_memory_node(int node)
  }
  #endif
  
+static void setup_min_unmapped_ratio(void);
+static void setup_min_slab_ratio(void);
  #else  /* CONFIG_NUMA */
  
  static void set_zonelist_order(void)
@@ -5878,9 +5880,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
                 zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
  #ifdef CONFIG_NUMA
                 zone->node = nid;
-               pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
-                                               / 100;
-               pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
  #endif
                 zone->name = zone_names[j];
                 zone->zone_pgdat = pgdat;
@@ -6801,6 +6800,12 @@ int __meminit init_per_zone_wmark_min(void)
         setup_per_zone_wmarks();
         refresh_zone_stat_thresholds();
         setup_per_zone_lowmem_reserve();
+
+#ifdef CONFIG_NUMA
+       setup_min_unmapped_ratio();
+       setup_min_slab_ratio();
+#endif
+
         return 0;
  }
  core_initcall(init_per_zone_wmark_min)
@@ -6842,43 +6847,58 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
  }
  
  #ifdef CONFIG_NUMA
+static void setup_min_unmapped_ratio(void)
+{
+       pg_data_t *pgdat;
+       struct zone *zone;
+
+       for_each_online_pgdat(pgdat)
+               pgdat->min_unmapped_pages = 0;
+
+       for_each_zone(zone)
+               zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
+                               sysctl_min_unmapped_ratio) / 100;
+}
+
+
  int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
-       struct pglist_data *pgdat;
-       struct zone *zone;
         int rc;
  
         rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
         if (rc)
                 return rc;
  
+       setup_min_unmapped_ratio();
+
+       return 0;
+}
+
+static void setup_min_slab_ratio(void)
+{
+       pg_data_t *pgdat;
+       struct zone *zone;
+
         for_each_online_pgdat(pgdat)
                 pgdat->min_slab_pages = 0;
  
         for_each_zone(zone)
-               zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
-                               sysctl_min_unmapped_ratio) / 100;
-       return 0;
+               zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
+                               sysctl_min_slab_ratio) / 100;
  }
  
  int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
  {
-       struct pglist_data *pgdat;
-       struct zone *zone;
         int rc;
  
         rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
         if (rc)
                 return rc;
  
-       for_each_online_pgdat(pgdat)
-               pgdat->min_slab_pages = 0;
+       setup_min_slab_ratio();
  
-       for_each_zone(zone)
-               zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
-                               sysctl_min_slab_ratio) / 100;
         return 0;
  }
  #endif
diff --git a/mm/rmap.c b/mm/rmap.c

index 709bc83703b1bfef419fa674ef5b5e28f5d70f05..1ef36404e7b2d7daeef2061ff8f79524d7750bb9 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1284,8 +1284,9 @@ void page_add_file_rmap(struct page *page, bool compound)
                 VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
                 __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
         } else {
-               if (PageTransCompound(page)) {
-                       VM_BUG_ON_PAGE(!PageLocked(page), page);
+               if (PageTransCompound(page) && page_mapping(page)) {
+                       VM_WARN_ON_ONCE(!PageLocked(page));
+
                         SetPageDoubleMap(compound_head(page));
                         if (PageMlocked(page))
                                 clear_page_mlock(compound_head(page));
@@ -1303,7 +1304,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
  {
         int i, nr = 1;
  
-       VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
+       VM_BUG_ON_PAGE(compound && !PageHead(page), page);
         lock_page_memcg(page);
  
         /* Hugepages are not counted in NR_FILE_MAPPED for now. */
diff --git a/mm/shmem.c b/mm/shmem.c

index 7f7748a0f9e1f738fd1ffcaceccdee2ae54d8d35..fd8b2b5741b141a7bc4457d93929c100d988a639 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3975,7 +3975,9 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
  
  struct kobj_attribute shmem_enabled_attr =
         __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
  
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
  bool shmem_huge_enabled(struct vm_area_struct *vma)
  {
         struct inode *inode = file_inode(vma->vm_file);
@@ -4006,7 +4008,7 @@ bool shmem_huge_enabled(struct vm_area_struct *vma)
                         return false;
         }
  }
-#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
  
  #else /* !CONFIG_SHMEM */
  
diff --git a/mm/slub.c b/mm/slub.c

index cead06394e9e5e96539f11624698b8e797bc3a43..9adae58462f8191b22659b1aa438ec637f6fc765 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3629,6 +3629,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
   */
  static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
  {
+       LIST_HEAD(discard);
         struct page *page, *h;
  
         BUG_ON(irqs_disabled());
@@ -3636,13 +3637,16 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
         list_for_each_entry_safe(page, h, &n->partial, lru) {
                 if (!page->inuse) {
                         remove_partial(n, page);
-                       discard_slab(s, page);
+                       list_add(&page->lru, &discard);
                 } else {
                         list_slab_objects(s, page,
                         "Objects remaining in %s on __kmem_cache_shutdown()");
                 }
         }
         spin_unlock_irq(&n->list_lock);
+
+       list_for_each_entry_safe(page, h, &discard, lru)
+               discard_slab(s, page);
  }
  
  /*
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c

index 82a116ba590eb5d289a76f73ca09f1cab9b4fdf5..8de138d3306bdbe6f3164db323637ded5bf5b5b3 100644 (file)
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,7 +169,7 @@ int register_vlan_dev(struct net_device *dev)
         if (err < 0)
                 goto out_uninit_mvrp;
  
-       vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
+       vlan->nest_level = dev_get_nest_level(real_dev) + 1;
         err = register_netdevice(dev);
         if (err < 0)
                 goto out_uninit_mvrp;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c

index 4acb1d5417aaf980bc7797c817eb9a9a350ecbf5..f24b25c25106fb55fb713b7308a8d43413a2143b 100644 (file)
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -507,8 +507,8 @@ err_out:
                 /* wakeup anybody waiting for slots to pin pages */
                 wake_up(&vp_wq);
         }
-       kfree(in_pages);
-       kfree(out_pages);
+       kvfree(in_pages);
+       kvfree(out_pages);
         return err;
  }
  
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c

index c18080ad408572f53df75e18e2b56f714f784edb..cd620fab41b07827b922ac2c8fd1cbc2d50b143a 100644 (file)
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -267,7 +267,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
  
         /* If old entry was unassociated with any port, then delete it. */
         f = __br_fdb_get(br, br->dev->dev_addr, 0);
-       if (f && f->is_local && !f->dst)
+       if (f && f->is_local && !f->dst && !f->added_by_user)
                 fdb_delete_local(br, NULL, f);
  
         fdb_insert(br, NULL, newaddr, 0);
@@ -282,7 +282,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
                 if (!br_vlan_should_use(v))
                         continue;
                 f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
-               if (f && f->is_local && !f->dst)
+               if (f && f->is_local && !f->dst && !f->added_by_user)
                         fdb_delete_local(br, NULL, f);
                 fdb_insert(br, NULL, newaddr, v->vid);
         }
@@ -764,20 +764,25 @@ out:
  }
  
  /* Update (create or replace) forwarding database entry */
-static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
-                        __u16 state, __u16 flags, __u16 vid)
+static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
+                        const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
  {
-       struct net_bridge *br = source->br;
         struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
         struct net_bridge_fdb_entry *fdb;
         bool modified = false;
  
         /* If the port cannot learn allow only local and static entries */
-       if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
+       if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
             !(source->state == BR_STATE_LEARNING ||
               source->state == BR_STATE_FORWARDING))
                 return -EPERM;
  
+       if (!source && !(state & NUD_PERMANENT)) {
+               pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n",
+                       br->dev->name);
+               return -EINVAL;
+       }
+
         fdb = fdb_find(head, addr, vid);
         if (fdb == NULL) {
                 if (!(flags & NLM_F_CREATE))
@@ -832,22 +837,28 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
         return 0;
  }
  
-static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
-              const unsigned char *addr, u16 nlh_flags, u16 vid)
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
+                       struct net_bridge_port *p, const unsigned char *addr,
+                       u16 nlh_flags, u16 vid)
  {
         int err = 0;
  
         if (ndm->ndm_flags & NTF_USE) {
+               if (!p) {
+                       pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n",
+                               br->dev->name);
+                       return -EINVAL;
+               }
                 local_bh_disable();
                 rcu_read_lock();
-               br_fdb_update(p->br, p, addr, vid, true);
+               br_fdb_update(br, p, addr, vid, true);
                 rcu_read_unlock();
                 local_bh_enable();
         } else {
-               spin_lock_bh(&p->br->hash_lock);
-               err = fdb_add_entry(p, addr, ndm->ndm_state,
+               spin_lock_bh(&br->hash_lock);
+               err = fdb_add_entry(br, p, addr, ndm->ndm_state,
                                     nlh_flags, vid);
-               spin_unlock_bh(&p->br->hash_lock);
+               spin_unlock_bh(&br->hash_lock);
         }
  
         return err;
@@ -884,6 +895,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                                 dev->name);
                         return -EINVAL;
                 }
+               br = p->br;
                 vg = nbp_vlan_group(p);
         }
  
@@ -895,15 +907,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                 }
  
                 /* VID was specified, so use it. */
-               if (dev->priv_flags & IFF_EBRIDGE)
-                       err = br_fdb_insert(br, NULL, addr, vid);
-               else
-                       err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
         } else {
-               if (dev->priv_flags & IFF_EBRIDGE)
-                       err = br_fdb_insert(br, NULL, addr, 0);
-               else
-                       err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
                 if (err || !vg || !vg->num_vlans)
                         goto out;
  
@@ -914,11 +920,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                 list_for_each_entry(v, &vg->vlan_list, vlist) {
                         if (!br_vlan_should_use(v))
                                 continue;
-                       if (dev->priv_flags & IFF_EBRIDGE)
-                               err = br_fdb_insert(br, NULL, addr, v->vid);
-                       else
-                               err = __br_fdb_add(ndm, p, addr, nlh_flags,
-                                                  v->vid);
+                       err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
                         if (err)
                                 goto out;
                 }
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c

index c83326c5ba580480b877079d2a465430ff408cf5..ef34a02719d73147f4a4af29f3d861b4dc34391e 100644 (file)
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -574,7 +574,7 @@ static void complete_generic_request(struct ceph_mon_generic_request *req)
         put_generic_request(req);
  }
  
-void cancel_generic_request(struct ceph_mon_generic_request *req)
+static void cancel_generic_request(struct ceph_mon_generic_request *req)
  {
         struct ceph_mon_client *monc = req->monc;
         struct ceph_mon_generic_request *lookup_req;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c

index b5ec09612ff71daeb1b95ed4b6f939a172cc7545..a97e7b506612b4255f4b99de76d74c46a1b3896d 100644 (file)
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4220,7 +4220,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
  
                 pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
                                                GFP_NOIO);
-               if (!pages) {
+               if (IS_ERR(pages)) {
                         ceph_msg_put(m);
                         return NULL;
                 }
diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c

index ca53c8319209469a25011b15d26af951a09d392d..22fb96efcf3467713a9e5430057ead684326e3db 100644 (file)
--- a/net/ceph/string_table.c
+++ b/net/ceph/string_table.c
@@ -84,12 +84,6 @@ retry:
  }
  EXPORT_SYMBOL(ceph_find_or_create_string);
  
-static void ceph_free_string(struct rcu_head *head)
-{
-       struct ceph_string *cs = container_of(head, struct ceph_string, rcu);
-       kfree(cs);
-}
-
  void ceph_release_string(struct kref *ref)
  {
         struct ceph_string *cs = container_of(ref, struct ceph_string, kref);
@@ -101,7 +95,7 @@ void ceph_release_string(struct kref *ref)
         }
         spin_unlock(&string_tree_lock);
  
-       call_rcu(&cs->rcu, ceph_free_string);
+       kfree_rcu(cs, rcu);
  }
  EXPORT_SYMBOL(ceph_release_string);
  
diff --git a/net/core/dev.c b/net/core/dev.c

index 4ce07dc25573ed3d20f181f5b36327cb0f407fe3..dd6ce598de897363bbb7eea252a22cbc2e846dc4 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6045,8 +6045,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
  EXPORT_SYMBOL(netdev_lower_dev_get_private);
  
  
-int dev_get_nest_level(struct net_device *dev,
-                      bool (*type_check)(const struct net_device *dev))
+int dev_get_nest_level(struct net_device *dev)
  {
         struct net_device *lower = NULL;
         struct list_head *iter;
@@ -6056,15 +6055,12 @@ int dev_get_nest_level(struct net_device *dev,
         ASSERT_RTNL();
  
         netdev_for_each_lower_dev(dev, lower, iter) {
-               nest = dev_get_nest_level(lower, type_check);
+               nest = dev_get_nest_level(lower);
                 if (max_nest < nest)
                         max_nest = nest;
         }
  
-       if (type_check(dev))
-               max_nest++;
-
-       return max_nest;
+       return max_nest + 1;
  }
  EXPORT_SYMBOL(dev_get_nest_level);
  
diff --git a/net/core/filter.c b/net/core/filter.c

index 5708999f8a7945ec738e2043057817403e1b64ef..cb06aceb512acd225eb909fd36055fbf2e9dbf51 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1355,56 +1355,47 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
  {
         int err;
  
-       if (!skb_cloned(skb))
-               return 0;
-       if (skb_clone_writable(skb, write_len))
-               return 0;
-       err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-       if (!err)
-               bpf_compute_data_end(skb);
+       err = skb_ensure_writable(skb, write_len);
+       bpf_compute_data_end(skb);
+
         return err;
  }
  
+static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
+{
+       if (skb_at_tc_ingress(skb))
+               skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
+static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
+{
+       if (skb_at_tc_ingress(skb))
+               skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
  static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
  {
-       struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
         struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       int offset = (int) r2;
+       unsigned int offset = (unsigned int) r2;
         void *from = (void *) (long) r3;
         unsigned int len = (unsigned int) r4;
         void *ptr;
  
         if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
                 return -EINVAL;
-
-       /* bpf verifier guarantees that:
-        * 'from' pointer points to bpf program stack
-        * 'len' bytes of it were initialized
-        * 'len' > 0
-        * 'skb' is a valid pointer to 'struct sk_buff'
-        *
-        * so check for invalid 'offset' and too large 'len'
-        */
-       if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
+       if (unlikely(offset > 0xffff))
                 return -EFAULT;
         if (unlikely(bpf_try_make_writable(skb, offset + len)))
                 return -EFAULT;
  
-       ptr = skb_header_pointer(skb, offset, len, sp->buff);
-       if (unlikely(!ptr))
-               return -EFAULT;
-
+       ptr = skb->data + offset;
         if (flags & BPF_F_RECOMPUTE_CSUM)
-               skb_postpull_rcsum(skb, ptr, len);
+               __skb_postpull_rcsum(skb, ptr, len, offset);
  
         memcpy(ptr, from, len);
  
-       if (ptr == sp->buff)
-               /* skb_store_bits cannot return -EFAULT here */
-               skb_store_bits(skb, offset, ptr, len);
-
         if (flags & BPF_F_RECOMPUTE_CSUM)
-               skb_postpush_rcsum(skb, ptr, len);
+               __skb_postpush_rcsum(skb, ptr, len, offset);
         if (flags & BPF_F_INVALIDATE_HASH)
                 skb_clear_hash(skb);
  
@@ -1425,12 +1416,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
  static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  {
         const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
-       int offset = (int) r2;
+       unsigned int offset = (unsigned int) r2;
         void *to = (void *)(unsigned long) r3;
         unsigned int len = (unsigned int) r4;
         void *ptr;
  
-       if (unlikely((u32) offset > 0xffff))
+       if (unlikely(offset > 0xffff))
                 goto err_clear;
  
         ptr = skb_header_pointer(skb, offset, len, to);
@@ -1458,20 +1449,17 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
  static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
  {
         struct sk_buff *skb = (struct sk_buff *) (long) r1;
-       int offset = (int) r2;
-       __sum16 sum, *ptr;
+       unsigned int offset = (unsigned int) r2;
+       __sum16 *ptr;
  
         if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
                 return -EINVAL;
-       if (unlikely((u32) offset > 0xffff))
+       if (unlikely(offset > 0xffff || offset & 1))
                 return -EFAULT;
-       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
-               return -EFAULT;
-
-       ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-       if (unlikely(!ptr))
+       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
                 return -EFAULT;
  
+       ptr = (__sum16 *)(skb->data + offset);
         switch (flags & BPF_F_HDR_FIELD_MASK) {
         case 0:
                 if (unlikely(from != 0))
@@ -1489,10 +1477,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
                 return -EINVAL;
         }
  
-       if (ptr == &sum)
-               /* skb_store_bits guaranteed to not return -EFAULT here */
-               skb_store_bits(skb, offset, ptr, sizeof(sum));
-
         return 0;
  }
  
@@ -1512,20 +1496,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
         struct sk_buff *skb = (struct sk_buff *) (long) r1;
         bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
         bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
-       int offset = (int) r2;
-       __sum16 sum, *ptr;
+       unsigned int offset = (unsigned int) r2;
+       __sum16 *ptr;
  
         if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
                                BPF_F_HDR_FIELD_MASK)))
                 return -EINVAL;
-       if (unlikely((u32) offset > 0xffff))
+       if (unlikely(offset > 0xffff || offset & 1))
                 return -EFAULT;
-       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+       if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
                 return -EFAULT;
  
-       ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-       if (unlikely(!ptr))
-               return -EFAULT;
+       ptr = (__sum16 *)(skb->data + offset);
         if (is_mmzero && !*ptr)
                 return 0;
  
@@ -1548,10 +1530,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
  
         if (is_mmzero && !*ptr)
                 *ptr = CSUM_MANGLED_0;
-       if (ptr == &sum)
-               /* skb_store_bits guaranteed to not return -EFAULT here */
-               skb_store_bits(skb, offset, ptr, sizeof(sum));
-
         return 0;
  }
  
@@ -1607,9 +1585,6 @@ static const struct bpf_func_proto bpf_csum_diff_proto = {
  
  static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
  {
-       if (skb_at_tc_ingress(skb))
-               skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
-
         return dev_forward_skb(dev, skb);
  }
  
@@ -1648,6 +1623,8 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
         if (unlikely(!skb))
                 return -ENOMEM;
  
+       bpf_push_mac_rcsum(skb);
+
         return flags & BPF_F_INGRESS ?
                __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
  }
@@ -1693,6 +1670,8 @@ int skb_do_redirect(struct sk_buff *skb)
                 return -EINVAL;
         }
  
+       bpf_push_mac_rcsum(skb);
+
         return ri->flags & BPF_F_INGRESS ?
                __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
  }
@@ -1756,7 +1735,10 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
                      vlan_proto != htons(ETH_P_8021AD)))
                 vlan_proto = htons(ETH_P_8021Q);
  
+       bpf_push_mac_rcsum(skb);
         ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+       bpf_pull_mac_rcsum(skb);
+
         bpf_compute_data_end(skb);
         return ret;
  }
@@ -1776,7 +1758,10 @@ static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
         struct sk_buff *skb = (struct sk_buff *) (long) r1;
         int ret;
  
+       bpf_push_mac_rcsum(skb);
         ret = skb_vlan_pop(skb);
+       bpf_pull_mac_rcsum(skb);
+
         bpf_compute_data_end(skb);
         return ret;
  }
@@ -2298,7 +2283,7 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
  }
  
  #ifdef CONFIG_SOCK_CGROUP_DATA
-static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  {
         struct sk_buff *skb = (struct sk_buff *)(long)r1;
         struct bpf_map *map = (struct bpf_map *)(long)r2;
@@ -2321,8 +2306,8 @@ static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
         return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
  }
  
-static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
-       .func           = bpf_skb_in_cgroup,
+static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
+       .func           = bpf_skb_under_cgroup,
         .gpl_only       = false,
         .ret_type       = RET_INTEGER,
         .arg1_type      = ARG_PTR_TO_CTX,
@@ -2402,8 +2387,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
         case BPF_FUNC_get_smp_processor_id:
                 return &bpf_get_smp_processor_id_proto;
  #ifdef CONFIG_SOCK_CGROUP_DATA
-       case BPF_FUNC_skb_in_cgroup:
-               return &bpf_skb_in_cgroup_proto;
+       case BPF_FUNC_skb_under_cgroup:
+               return &bpf_skb_under_cgroup_proto;
  #endif
         default:
                 return sk_filter_func_proto(func_id);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c

index d07fc076bea0a4bc96f68075fb3bb79b95007e63..febca0f1008cb898014c95e332c8398b8a6bcd5d 100644 (file)
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2452,9 +2452,7 @@ struct fib_route_iter {
  static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
                                             loff_t pos)
  {
-       struct fib_table *tb = iter->main_tb;
         struct key_vector *l, **tp = &iter->tnode;
-       struct trie *t;
         t_key key;
  
         /* use cache location of next-to-find key */
@@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
                 pos -= iter->pos;
                 key = iter->key;
         } else {
-               t = (struct trie *)tb->tb_data;
-               iter->tnode = t->kv;
                 iter->pos = 0;
                 key = 0;
         }
@@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
                 return NULL;
  
         iter->main_tb = tb;
+       t = (struct trie *)tb->tb_data;
+       iter->tnode = t->kv;
  
         if (*pos != 0)
                 return fib_route_get_idx(iter, *pos);
  
-       t = (struct trie *)tb->tb_data;
-       iter->tnode = t->kv;
         iter->pos = 0;
         iter->key = 0;
  
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c

index 5b1481be028212cfb61735a2d8ff8fbccbee203f..113cc43df789a34b80fcf897621c936ff0cd6ca8 100644 (file)
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -370,7 +370,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
                          tunnel->parms.o_flags, proto, tunnel->parms.o_key,
                          htonl(tunnel->o_seqno));
  
-       skb_set_inner_protocol(skb, proto);
         ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
  }
  
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c

index a917903d5e9742fb07bac1b2a7fa94ee069c0d54..cc701fa70b1231c95f1871bc4d0227731c33d768 100644 (file)
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
         .get_link_net   = ip_tunnel_get_link_net,
  };
  
+static bool is_vti_tunnel(const struct net_device *dev)
+{
+       return dev->netdev_ops == &vti_netdev_ops;
+}
+
+static int vti_device_event(struct notifier_block *unused,
+                           unsigned long event, void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct ip_tunnel *tunnel = netdev_priv(dev);
+
+       if (!is_vti_tunnel(dev))
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case NETDEV_DOWN:
+               if (!net_eq(tunnel->net, dev_net(dev)))
+                       xfrm_garbage_collect(tunnel->net);
+               break;
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block vti_notifier_block __read_mostly = {
+       .notifier_call = vti_device_event,
+};
+
  static int __init vti_init(void)
  {
         const char *msg;
@@ -564,6 +591,8 @@ static int __init vti_init(void)
  
         pr_info("IPv4 over IPsec tunneling driver\n");
  
+       register_netdevice_notifier(&vti_notifier_block);
+
         msg = "tunnel device";
         err = register_pernet_device(&vti_net_ops);
         if (err < 0)
@@ -596,6 +625,7 @@ xfrm_proto_ah_failed:
  xfrm_proto_esp_failed:
         unregister_pernet_device(&vti_net_ops);
  pernet_dev_failed:
+       unregister_netdevice_notifier(&vti_notifier_block);
         pr_err("vti init: failed to register %s\n", msg);
         return err;
  }
@@ -607,6 +637,7 @@ static void __exit vti_fini(void)
         xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
         xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
         unregister_pernet_device(&vti_net_ops);
+       unregister_netdevice_notifier(&vti_notifier_block);
  }
  
  module_init(vti_init);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index ab3e796596b1dc065ba46390e82306fe138c8509..df8425fcbc2cab843c9af24272d45fc95d4040b7 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3543,7 +3543,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
         /* combine the user config with event to determine if permanent
          * addresses are to be removed from address hash table
          */
-       keep_addr = !(how || _keep_addr <= 0);
+       keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
  
         /* Step 2: clear hash table */
         for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3599,7 +3599,7 @@ restart:
         /* re-combine the user config with event to determine if permanent
          * addresses are to be removed from the interface list
          */
-       keep_addr = (!how && _keep_addr > 0);
+       keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
  
         INIT_LIST_HEAD(&del_list);
         list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c

index c53b92c617c545c041d50a4e458e0533d93a59df..37ac9de713c69af30ae50d03e53ee472a7520b98 100644 (file)
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -952,8 +952,10 @@ calipso_opt_insert(struct ipv6_opt_hdr *hop,
                 memcpy(new, hop, start);
         ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def,
                                  secattr);
-       if (ret_val < 0)
+       if (ret_val < 0) {
+               kfree(new);
                 return ERR_PTR(ret_val);
+       }
  
         buf_len = start + ret_val;
         /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c

index 776d145113e138872f45d97e7f66ff0416762d85..704274cbd495848848f0daf7dab53c57018b3e2d 100644 (file)
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -519,8 +519,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
         gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
                          protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
  
-       skb_set_inner_protocol(skb, protocol);
-
         return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
                             NEXTHDR_GRE);
  }
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c

index fed40d1ec29b1fc331ad04b5a7173b183a116279..0900352c924c163b4e0d945a5a30699628b5abfb 100644 (file)
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -55,7 +55,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
         struct icmp6hdr user_icmph;
         int addr_type;
         struct in6_addr *daddr;
-       int iif = 0;
+       int oif = 0;
         struct flowi6 fl6;
         int err;
         struct dst_entry *dst;
@@ -78,25 +78,30 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                 if (u->sin6_family != AF_INET6) {
                         return -EAFNOSUPPORT;
                 }
-               if (sk->sk_bound_dev_if &&
-                   sk->sk_bound_dev_if != u->sin6_scope_id) {
-                       return -EINVAL;
-               }
                 daddr = &(u->sin6_addr);
-               iif = u->sin6_scope_id;
+               if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
+                       oif = u->sin6_scope_id;
         } else {
                 if (sk->sk_state != TCP_ESTABLISHED)
                         return -EDESTADDRREQ;
                 daddr = &sk->sk_v6_daddr;
         }
  
-       if (!iif)
-               iif = sk->sk_bound_dev_if;
+       if (!oif)
+               oif = sk->sk_bound_dev_if;
+
+       if (!oif)
+               oif = np->sticky_pktinfo.ipi6_ifindex;
+
+       if (!oif && ipv6_addr_is_multicast(daddr))
+               oif = np->mcast_oif;
+       else if (!oif)
+               oif = np->ucast_oif;
  
         addr_type = ipv6_addr_type(daddr);
-       if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
-               return -EINVAL;
-       if (addr_type & IPV6_ADDR_MAPPED)
+       if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
+           (addr_type & IPV6_ADDR_MAPPED) ||
+           (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
                 return -EINVAL;
  
         /* TODO: use ip6_datagram_send_ctl to get options from cmsg */
@@ -106,16 +111,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
         fl6.flowi6_proto = IPPROTO_ICMPV6;
         fl6.saddr = np->saddr;
         fl6.daddr = *daddr;
+       fl6.flowi6_oif = oif;
         fl6.flowi6_mark = sk->sk_mark;
         fl6.fl6_icmp_type = user_icmph.icmp6_type;
         fl6.fl6_icmp_code = user_icmph.icmp6_code;
         security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
  
-       if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
-               fl6.flowi6_oif = np->mcast_oif;
-       else if (!fl6.flowi6_oif)
-               fl6.flowi6_oif = np->ucast_oif;
-
         ipc6.tclass = np->tclass;
         fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
  
diff --git a/net/irda/iriap.c b/net/irda/iriap.c

index 4a7ae32afa09b90fab1d57bcb807ca2b8a1200c6..1138eaf5c6829ac08431a91c31fb3339dfe6213d 100644 (file)
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
  
         self->magic = IAS_MAGIC;
         self->mode = mode;
-       if (mode == IAS_CLIENT)
-               iriap_register_lsap(self, slsap_sel, mode);
+       if (mode == IAS_CLIENT) {
+               if (iriap_register_lsap(self, slsap_sel, mode)) {
+                       kfree(self);
+                       return NULL;
+               }
+       }
  
         self->confirm = callback;
         self->priv = priv;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c

index 47e99ab8d97a93697790a5de0c373af16869bad7..543b1d4fc33d563f7a4234c92ff7d22d92e01265 100644 (file)
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -869,7 +869,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
  
         /* free all potentially still buffered bcast frames */
         local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
-       skb_queue_purge(&sdata->u.ap.ps.bc_buf);
+       ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
  
         mutex_lock(&local->mtx);
         ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h

index 184473c257eb9aeca282134e4fd7ba715b074a19..ba5fc1f01e5353da7c0d8bf3a28156abb83a92ca 100644 (file)
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1094,7 +1094,7 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
  
         trace_drv_get_expected_throughput(sta);
         if (local->ops->get_expected_throughput)
-               ret = local->ops->get_expected_throughput(sta);
+               ret = local->ops->get_expected_throughput(&local->hw, sta);
         trace_drv_return_u32(local, ret);
  
         return ret;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c

index c66411df986311c12e78b0243e3a18b7c3070366..42120d965263d2ec1719211da37b7900814e4122 100644 (file)
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -881,20 +881,22 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
  
         netif_carrier_off(sdata->dev);
  
+       /* flush STAs and mpaths on this iface */
+       sta_info_flush(sdata);
+       mesh_path_flush_by_iface(sdata);
+
         /* stop the beacon */
         ifmsh->mesh_id_len = 0;
         sdata->vif.bss_conf.enable_beacon = false;
         clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
         ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+
+       /* remove beacon */
         bcn = rcu_dereference_protected(ifmsh->beacon,
                                         lockdep_is_held(&sdata->wdev.mtx));
         RCU_INIT_POINTER(ifmsh->beacon, NULL);
         kfree_rcu(bcn, rcu_head);
  
-       /* flush STAs and mpaths on this iface */
-       sta_info_flush(sdata);
-       mesh_path_flush_by_iface(sdata);
-
         /* free all potentially still buffered group-addressed frames */
         local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
         skb_queue_purge(&ifmsh->ps.bc_buf);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c

index 2e8a9024625a2ae49ac046bccac671c77911120f..9dce3b157908b3d229fb293a8f5acbce9057ea63 100644 (file)
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1268,7 +1268,7 @@ static void sta_ps_start(struct sta_info *sta)
         for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
                 struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
  
-               if (!txqi->tin.backlog_packets)
+               if (txqi->tin.backlog_packets)
                         set_bit(tid, &sta->txq_buffered_tids);
                 else
                         clear_bit(tid, &sta->txq_buffered_tids);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c

index c6d5c724e0326e04921969c9f00d11fac777749d..a2a68269675de8d06236e4d97ab46a617715445b 100644 (file)
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -771,6 +771,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
                         clear_sta_flag(sta, WLAN_STA_SP);
  
                 acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+
+               /* mesh Peer Service Period support */
+               if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+                   ieee80211_is_data_qos(fc))
+                       ieee80211_mpsp_trigger_process(
+                               ieee80211_get_qos_ctl(hdr), sta, true, acked);
+
                 if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) {
                         /*
                          * The STA is in power save mode, so assume
@@ -781,13 +788,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
                         return;
                 }
  
-               /* mesh Peer Service Period support */
-               if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
-                   ieee80211_is_data_qos(fc))
-                       ieee80211_mpsp_trigger_process(
-                                       ieee80211_get_qos_ctl(hdr),
-                                       sta, true, acked);
-
                 if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
                     (ieee80211_is_data(hdr->frame_control)) &&
                     (rates_idx != -1))
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c

index 91461c4155255eab59f570d85ee4104d23ef57a4..502396694f4793ebf213ccc1a5b976c2496bfbeb 100644 (file)
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -368,7 +368,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
                 skb = skb_dequeue(&ps->bc_buf);
                 if (skb) {
                         purged++;
-                       dev_kfree_skb(skb);
+                       ieee80211_free_txskb(&local->hw, skb);
                 }
                 total += skb_queue_len(&ps->bc_buf);
         }
@@ -451,7 +451,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
         if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) {
                 ps_dbg(tx->sdata,
                        "BC TX buffer full - dropping the oldest frame\n");
-               dev_kfree_skb(skb_dequeue(&ps->bc_buf));
+               ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf));
         } else
                 tx->local->total_ps_buffered++;
  
@@ -4275,7 +4275,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
                         sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
                 if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb))
                         break;
-               dev_kfree_skb_any(skb);
+               ieee80211_free_txskb(hw, skb);
         }
  
         info = IEEE80211_SKB_CB(skb);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c

index 9e3693128313ffd57845fcabc28916f4b86515c2..f8dbacf66795d929a220d1acf35ea40298ec3be4 100644 (file)
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -574,7 +574,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
         helper = rcu_dereference(nfct_help(expect->master)->helper);
         if (helper) {
                 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
-               if (helper->expect_policy[expect->class].name)
+               if (helper->expect_policy[expect->class].name[0])
                         seq_printf(s, "/%s",
                                    helper->expect_policy[expect->class].name);
         }
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c

index bb77a97961bfde7b64a74aabdeafe405f73090ec..5c0db5c64734a98d909e7e329ac7aecc54cfb431 100644 (file)
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1473,7 +1473,8 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
                                  "timeout to %u seconds for",
                                  info->timeout);
                         nf_ct_dump_tuple(&exp->tuple);
-                       mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
+                       mod_timer_pending(&exp->timeout,
+                                         jiffies + info->timeout * HZ);
                 }
                 spin_unlock_bh(&nf_conntrack_expect_lock);
         }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 050bb3420a6baf2e75f1b6eac080fe0b92b7e6f6..fdfc71f416b7a2d084c3eb38d85affaf15776fd2 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1894,6 +1894,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
  
                         if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
                                 return -EINVAL;
+                       if (otuple.dst.protonum != rtuple.dst.protonum)
+                               return -EINVAL;
  
                         ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
                                                         &rtuple, u3);
@@ -2362,12 +2364,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
                 return PTR_ERR(exp);
  
         err = nf_ct_expect_related_report(exp, portid, report);
-       if (err < 0) {
-               nf_ct_expect_put(exp);
-               return err;
-       }
-
-       return 0;
+       nf_ct_expect_put(exp);
+       return err;
  }
  
  static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c

index 8d9db9d4702b06965a896b3e85a3254aa2e3191b..7d77217de6a3b4e7821452f721c0902ca6fecd81 100644 (file)
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1383,7 +1383,7 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff,
                 return NF_DROP;
         }
         cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-       if (!cseq) {
+       if (!cseq && *(*dptr + matchoff) != '0') {
                 nf_ct_helper_log(skb, ct, "cannot get cseq");
                 return NF_DROP;
         }
@@ -1446,7 +1446,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
                         return NF_DROP;
                 }
                 cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-               if (!cseq) {
+               if (!cseq && *(*dptr + matchoff) != '0') {
                         nf_ct_helper_log(skb, ct, "cannot get cseq");
                         return NF_DROP;
                 }
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 5d36a0926b4a4859304fdd1808b428c87a2ee8c4..f49f45081acb2200cc8acaed205f82a3298888ba 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1145,10 +1145,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
         int err;
  
-       queue = instance_lookup(q, queue_num);
-       if (!queue)
-               queue = verdict_instance_lookup(q, queue_num,
-                                               NETLINK_CB(skb).portid);
+       queue = verdict_instance_lookup(q, queue_num,
+                                       NETLINK_CB(skb).portid);
         if (IS_ERR(queue))
                 return PTR_ERR(queue);
  
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c

index ba7aed13e1749442d3add6fde2e6b377fd1ad39d..82c264e402781d8b8c52d04332c1b993e5c83fed 100644 (file)
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -59,6 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
                            const struct nlattr * const tb[])
  {
         struct nft_exthdr *priv = nft_expr_priv(expr);
+       u32 offset, len;
  
         if (tb[NFTA_EXTHDR_DREG] == NULL ||
             tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -66,9 +67,15 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
             tb[NFTA_EXTHDR_LEN] == NULL)
                 return -EINVAL;
  
+       offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+       len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+
+       if (offset > U8_MAX || len > U8_MAX)
+               return -ERANGE;
+
         priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
-       priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
-       priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+       priv->offset = offset;
+       priv->len    = len;
         priv->dreg   = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
  
         return nft_validate_register_store(ctx, priv->dreg, NULL,
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c

index 6473936d05c67aa08b7c507bb2bfd8c836bdfcce..ffe9ae062d23e48fe39f9136e8e01d3737a852af 100644 (file)
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -70,7 +70,6 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
                 } else if (d > 0)
                         parent = parent->rb_right;
                 else {
-found:
                         if (!nft_set_elem_active(&rbe->ext, genmask)) {
                                 parent = parent->rb_left;
                                 continue;
@@ -84,9 +83,12 @@ found:
                 }
         }
  
-       if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
-               rbe = interval;
-               goto found;
+       if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+           nft_set_elem_active(&interval->ext, genmask) &&
+           !nft_rbtree_interval_end(interval)) {
+               spin_unlock_bh(&nft_rbtree_lock);
+               *ext = &interval->ext;
+               return true;
         }
  out:
         spin_unlock_bh(&nft_rbtree_lock);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c

index c644c78ed485d7c690ef8284df4276c6d18703c7..e054a748ff2502f2d1a63b61056c64a6aa24059e 100644 (file)
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -433,7 +433,6 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
         struct nf_conntrack_l4proto *l4proto;
         struct nf_conntrack_tuple tuple;
         struct nf_conntrack_tuple_hash *h;
-       enum ip_conntrack_info ctinfo;
         struct nf_conn *ct;
         unsigned int dataoff;
         u8 protonum;
@@ -458,13 +457,8 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
  
         ct = nf_ct_tuplehash_to_ctrack(h);
  
-       ctinfo = ovs_ct_get_info(h);
-       if (ctinfo == IP_CT_NEW) {
-               /* This should not happen. */
-               WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
-       }
         skb->nfct = &ct->ct_general;
-       skb->nfctinfo = ctinfo;
+       skb->nfctinfo = ovs_ct_get_info(h);
         return ct;
  }
  
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c

index 1a1fcec8869593a8c99710e2021baa45ebfdedc8..5aaf3babfc3fa0bf70b8a72ebd95c40962dd6ea2 100644 (file)
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -93,7 +93,14 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
                 return ERR_CAST(dev);
         }
  
-       dev_change_flags(dev, dev->flags | IFF_UP);
+       err = dev_change_flags(dev, dev->flags | IFF_UP);
+       if (err < 0) {
+               rtnl_delete_link(dev);
+               rtnl_unlock();
+               ovs_vport_free(vport);
+               goto error;
+       }
+
         rtnl_unlock();
         return vport;
  error:
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c

index 7f8897f33a67fe6512436aff86c43098c15e3445..0e72d95b0e8f1fcd2e6d1446fc0944b519c0500e 100644 (file)
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -54,6 +54,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
         struct net *net = ovs_dp_get_net(parms->dp);
         struct net_device *dev;
         struct vport *vport;
+       int err;
  
         vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
         if (IS_ERR(vport))
@@ -67,9 +68,15 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
                 return ERR_CAST(dev);
         }
  
-       dev_change_flags(dev, dev->flags | IFF_UP);
-       rtnl_unlock();
+       err = dev_change_flags(dev, dev->flags | IFF_UP);
+       if (err < 0) {
+               rtnl_delete_link(dev);
+               rtnl_unlock();
+               ovs_vport_free(vport);
+               return ERR_PTR(err);
+       }
  
+       rtnl_unlock();
         return vport;
  }
  
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c

index 434e04c3a189b91512b30ab70e9a238b7d5d9df2..95c36147a6e136b1e11db6bbbb673c4b7f248e1a 100644 (file)
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -140,7 +140,7 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
  
  static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
  {
-       dev->needed_headroom = new_hr;
+       dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
  }
  
  static const struct net_device_ops internal_dev_netdev_ops = {
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c

index 5eb7694348b5b82a3e80dc6262912eef441ec88e..7eb955e453e6d657d13d0aa7b35ce7c8b7de2f15 100644 (file)
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -130,7 +130,14 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
                 return ERR_CAST(dev);
         }
  
-       dev_change_flags(dev, dev->flags | IFF_UP);
+       err = dev_change_flags(dev, dev->flags | IFF_UP);
+       if (err < 0) {
+               rtnl_delete_link(dev);
+               rtnl_unlock();
+               ovs_vport_free(vport);
+               goto error;
+       }
+
         rtnl_unlock();
         return vport;
  error:
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h

index 1bb9e7ac9e14280a8a83dbf002f0bded91a3c5d6..ff83fb1ddd47fead1fc0f7141242991c3a1a1d1c 100644 (file)
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -425,6 +425,7 @@ struct rxrpc_call {
         spinlock_t              lock;
         rwlock_t                state_lock;     /* lock for state transition */
         atomic_t                usage;
+       atomic_t                skb_count;      /* Outstanding packets on this call */
         atomic_t                sequence;       /* Tx data packet sequence counter */
         u32                     local_abort;    /* local abort code */
         u32                     remote_abort;   /* remote abort code */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c

index 0b2832141bd079797deb27580dc3cbf59611b3fd..9bae21e66d6547580a9d575a3569a9303d3cc7ea 100644 (file)
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -130,6 +130,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
                         call->state = RXRPC_CALL_SERVER_ACCEPTING;
                         list_add_tail(&call->accept_link, &rx->acceptq);
                         rxrpc_get_call(call);
+                       atomic_inc(&call->skb_count);
                         nsp = rxrpc_skb(notification);
                         nsp->call = call;
  
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c

index fc32aa5764a24268ecf09526f34969f5dc1e0845..e60cf65c223237fd1a9bbf8bd6e9d72a16742b32 100644 (file)
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -460,6 +460,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
         ASSERTCMP(sp->call, ==, NULL);
         sp->call = call;
         rxrpc_get_call(call);
+       atomic_inc(&call->skb_count);
  
         /* insert into the buffer in sequence order */
         spin_lock_bh(&call->lock);
@@ -734,6 +735,7 @@ all_acked:
                 skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
                 sp->call = call;
                 rxrpc_get_call(call);
+               atomic_inc(&call->skb_count);
                 spin_lock_bh(&call->lock);
                 if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
                         BUG();
@@ -793,6 +795,7 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
                 sp->error = error;
                 sp->call = call;
                 rxrpc_get_call(call);
+               atomic_inc(&call->skb_count);
  
                 spin_lock_bh(&call->lock);
                 ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
@@ -834,6 +837,9 @@ void rxrpc_process_call(struct work_struct *work)
                 return;
         }
  
+       if (!call->conn)
+               goto skip_msg_init;
+
         /* there's a good chance we're going to have to send a message, so set
          * one up in advance */
         msg.msg_name    = &call->conn->params.peer->srx.transport;
@@ -856,6 +862,7 @@ void rxrpc_process_call(struct work_struct *work)
         memset(iov, 0, sizeof(iov));
         iov[0].iov_base = &whdr;
         iov[0].iov_len  = sizeof(whdr);
+skip_msg_init:
  
         /* deal with events of a final nature */
         if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c

index 91287c9d01bb460c56eda06d3d4e7ceea074f14e..ae057e0740f3de43260d6f482818925ae7992cc8 100644 (file)
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -275,6 +275,7 @@ error:
         list_del_init(&call->link);
         write_unlock_bh(&rxrpc_call_lock);
  
+       set_bit(RXRPC_CALL_RELEASED, &call->flags);
         call->state = RXRPC_CALL_DEAD;
         rxrpc_put_call(call);
         _leave(" = %d", ret);
@@ -287,6 +288,7 @@ error:
          */
  found_user_ID_now_present:
         write_unlock(&rx->call_lock);
+       set_bit(RXRPC_CALL_RELEASED, &call->flags);
         call->state = RXRPC_CALL_DEAD;
         rxrpc_put_call(call);
         _leave(" = -EEXIST [%p]", call);
@@ -491,15 +493,9 @@ void rxrpc_release_call(struct rxrpc_call *call)
                 spin_lock_bh(&call->lock);
                 while ((skb = skb_dequeue(&call->rx_queue)) ||
                        (skb = skb_dequeue(&call->rx_oos_queue))) {
-                       sp = rxrpc_skb(skb);
-                       if (sp->call) {
-                               ASSERTCMP(sp->call, ==, call);
-                               rxrpc_put_call(call);
-                               sp->call = NULL;
-                       }
-                       skb->destructor = NULL;
                         spin_unlock_bh(&call->lock);
  
+                       sp = rxrpc_skb(skb);
                         _debug("- zap %s %%%u #%u",
                                rxrpc_pkts[sp->hdr.type],
                                sp->hdr.serial, sp->hdr.seq);
@@ -605,6 +601,7 @@ void __rxrpc_put_call(struct rxrpc_call *call)
  
         if (atomic_dec_and_test(&call->usage)) {
                 _debug("call %d dead", call->debug_id);
+               WARN_ON(atomic_read(&call->skb_count) != 0);
                 ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
                 rxrpc_queue_work(&call->destroyer);
         }
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c

index 991a20d250930307cd0fb94ef4f474e8457264bd..70bb77818deab9393b79a6499dcf0f42ecbd29e6 100644 (file)
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -55,9 +55,6 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
         if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
                 _debug("already terminated");
                 ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
-               skb->destructor = NULL;
-               sp->call = NULL;
-               rxrpc_put_call(call);
                 rxrpc_free_skb(skb);
                 return 0;
         }
@@ -111,13 +108,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
         ret = 0;
  
  out:
-       /* release the socket buffer */
-       if (skb) {
-               skb->destructor = NULL;
-               sp->call = NULL;
-               rxrpc_put_call(call);
-               rxrpc_free_skb(skb);
-       }
+       rxrpc_free_skb(skb);
  
         _leave(" = %d", ret);
         return ret;
@@ -133,11 +124,15 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
         struct rxrpc_skb_priv *sp;
         bool terminal;
         int ret, ackbit, ack;
+       u32 serial;
+       u8 flags;
  
         _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
  
         sp = rxrpc_skb(skb);
         ASSERTCMP(sp->call, ==, NULL);
+       flags = sp->hdr.flags;
+       serial = sp->hdr.serial;
  
         spin_lock(&call->lock);
  
@@ -200,8 +195,9 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
  
         sp->call = call;
         rxrpc_get_call(call);
-       terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
-                   !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+       atomic_inc(&call->skb_count);
+       terminal = ((flags & RXRPC_LAST_PACKET) &&
+                   !(flags & RXRPC_CLIENT_INITIATED));
         ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
         if (ret < 0) {
                 if (ret == -ENOMEM || ret == -ENOBUFS) {
@@ -213,12 +209,13 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
         }
  
         skb = NULL;
+       sp = NULL;
  
         _debug("post #%u", seq);
         ASSERTCMP(call->rx_data_post, ==, seq);
         call->rx_data_post++;
  
-       if (sp->hdr.flags & RXRPC_LAST_PACKET)
+       if (flags & RXRPC_LAST_PACKET)
                 set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
  
         /* if we've reached an out of sequence packet then we need to drain
@@ -234,7 +231,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
  
         spin_unlock(&call->lock);
         atomic_inc(&call->ackr_not_idle);
-       rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+       rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
         _leave(" = 0 [posted]");
         return 0;
  
@@ -247,7 +244,7 @@ out:
  
  discard_and_ack:
         _debug("discard and ACK packet %p", skb);
-       __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+       __rxrpc_propose_ACK(call, ack, serial, true);
  discard:
         spin_unlock(&call->lock);
         rxrpc_free_skb(skb);
@@ -255,7 +252,7 @@ discard:
         return 0;
  
  enqueue_and_ack:
-       __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+       __rxrpc_propose_ACK(call, ack, serial, true);
  enqueue_packet:
         _net("defer skb %p", skb);
         spin_unlock(&call->lock);
@@ -575,13 +572,13 @@ done:
   * post connection-level events to the connection
   * - this includes challenges, responses and some aborts
   */
-static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
                                       struct sk_buff *skb)
  {
         _enter("%p,%p", conn, skb);
  
         skb_queue_tail(&conn->rx_queue, skb);
-       return rxrpc_queue_conn(conn);
+       rxrpc_queue_conn(conn);
  }
  
  /*
@@ -702,7 +699,6 @@ void rxrpc_data_ready(struct sock *sk)
  
         rcu_read_lock();
  
-retry_find_conn:
         conn = rxrpc_find_connection_rcu(local, skb);
         if (!conn)
                 goto cant_route_call;
@@ -710,8 +706,7 @@ retry_find_conn:
         if (sp->hdr.callNumber == 0) {
                 /* Connection-level packet */
                 _debug("CONN %p {%d}", conn, conn->debug_id);
-               if (!rxrpc_post_packet_to_conn(conn, skb))
-                       goto retry_find_conn;
+               rxrpc_post_packet_to_conn(conn, skb);
         } else {
                 /* Call-bound packets are routed by connection channel. */
                 unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
@@ -749,6 +744,8 @@ cant_route_call:
         if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
                 _debug("reject type %d",sp->hdr.type);
                 rxrpc_reject_packet(local, skb);
+       } else {
+               rxrpc_free_skb(skb);
         }
         _leave(" [no call]");
         return;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c

index a3fa2ed85d6306af3e126d25ed6597f15e716e87..9ed66d533002df33c8f0167a6eb43d94d8d4132c 100644 (file)
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -203,6 +203,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                 }
  
                 /* we transferred the whole data packet */
+               if (!(flags & MSG_PEEK))
+                       rxrpc_kernel_data_consumed(call, skb);
+
                 if (sp->hdr.flags & RXRPC_LAST_PACKET) {
                         _debug("last");
                         if (rxrpc_conn_is_client(call->conn)) {
@@ -359,28 +362,6 @@ wait_error:
  
  }
  
-/**
- * rxrpc_kernel_data_delivered - Record delivery of data message
- * @skb: Message holding data
- *
- * Record the delivery of a data message.  This permits RxRPC to keep its
- * tracking correct.  The socket buffer will be deleted.
- */
-void rxrpc_kernel_data_delivered(struct sk_buff *skb)
-{
-       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-       struct rxrpc_call *call = sp->call;
-
-       ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
-       ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
-       call->rx_data_recv = sp->hdr.seq;
-
-       ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
-       rxrpc_free_skb(skb);
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
-
  /**
   * rxrpc_kernel_is_data_last - Determine if data message is last one
   * @skb: Message holding data
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c

index eee0cfd9ac8c0d8c2a7cb4e52a213df0df2f6082..06c51d4b622d67ae77d54813a668e4f722a4c8a9 100644 (file)
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -98,11 +98,39 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
         spin_unlock_bh(&call->lock);
  }
  
+/**
+ * rxrpc_kernel_data_consumed - Record consumption of data message
+ * @call: The call to which the message pertains.
+ * @skb: Message holding data
+ *
+ * Record the consumption of a data message and generate an ACK if appropriate.
+ * The call state is shifted if this was the final packet.  The caller must be
+ * in process context with no spinlocks held.
+ *
+ * TODO: Actually generate the ACK here rather than punting this to the
+ * workqueue.
+ */
+void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
+{
+       struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+       _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq);
+
+       ASSERTCMP(sp->call, ==, call);
+       ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA);
+
+       /* TODO: Fix the sequence number tracking */
+       ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
+       ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
+       ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
+
+       call->rx_data_recv = sp->hdr.seq;
+       rxrpc_hard_ACK_data(call, sp);
+}
+EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
+
  /*
- * destroy a packet that has an RxRPC control buffer
- * - advance the hard-ACK state of the parent call (done here in case something
- *   in the kernel bypasses recvmsg() and steals the packet directly off of the
- *   socket receive queue)
+ * Destroy a packet that has an RxRPC control buffer
   */
  void rxrpc_packet_destructor(struct sk_buff *skb)
  {
@@ -112,9 +140,8 @@ void rxrpc_packet_destructor(struct sk_buff *skb)
         _enter("%p{%p}", skb, call);
  
         if (call) {
-               /* send the final ACK on a client call */
-               if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
-                       rxrpc_hard_ACK_data(call, sp);
+               if (atomic_dec_return(&call->skb_count) < 0)
+                       BUG();
                 rxrpc_put_call(call);
                 sp->call = NULL;
         }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c

index e4a5f2607ffa2edb6c9c3206c96b578946edfa57..d09d0687594b0b5ae04ad54a2b937044522bfbea 100644 (file)
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -64,7 +64,6 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict)
                 if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
                         if (p->ops->cleanup)
                                 p->ops->cleanup(p, bind);
-                       list_del(&p->list);
                         tcf_hash_destroy(p->hinfo, p);
                         ret = ACT_P_DELETED;
                 }
@@ -421,18 +420,19 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
         return res;
  }
  
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-                   struct tcf_result *res)
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+                   int nr_actions, struct tcf_result *res)
  {
-       const struct tc_action *a;
-       int ret = -1;
+       int ret = -1, i;
  
         if (skb->tc_verd & TC_NCLS) {
                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
                 ret = TC_ACT_OK;
                 goto exec_done;
         }
-       list_for_each_entry(a, actions, list) {
+       for (i = 0; i < nr_actions; i++) {
+               const struct tc_action *a = actions[i];
+
  repeat:
                 ret = a->ops->act(skb, a, res);
                 if (ret == TC_ACT_REPEAT)
@@ -754,16 +754,6 @@ err_out:
         return ERR_PTR(err);
  }
  
-static void cleanup_a(struct list_head *actions)
-{
-       struct tc_action *a, *tmp;
-
-       list_for_each_entry_safe(a, tmp, actions, list) {
-               list_del(&a->list);
-               kfree(a);
-       }
-}
-
  static int tca_action_flush(struct net *net, struct nlattr *nla,
                             struct nlmsghdr *n, u32 portid)
  {
@@ -905,7 +895,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
                 return ret;
         }
  err:
-       cleanup_a(&actions);
+       tcf_action_destroy(&actions, 0);
         return ret;
  }
  
@@ -942,15 +932,9 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
  
         ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
         if (ret)
-               goto done;
+               return ret;
  
-       /* dump then free all the actions after update; inserted policy
-        * stays intact
-        */
-       ret = tcf_add_notify(net, n, &actions, portid);
-       cleanup_a(&actions);
-done:
-       return ret;
+       return tcf_add_notify(net, n, &actions, portid);
  }
  
  static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c

index b3c7e975fc9e255a412d704c6c08dd6d6df0d1ea..8a3be1d99775b0589bcb09f97e7bf347a4017868 100644 (file)
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -63,49 +63,8 @@ static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
                                  const struct tc_action_ops *ops)
  {
         struct tc_action_net *tn = net_generic(net, police_net_id);
-       struct tcf_hashinfo *hinfo = tn->hinfo;
-       int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
-       struct nlattr *nest;
-
-       spin_lock_bh(&hinfo->lock);
-
-       s_i = cb->args[0];
-
-       for (i = 0; i < (POL_TAB_MASK + 1); i++) {
-               struct hlist_head *head;
-               struct tc_action *p;
-
-               head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)];
-
-               hlist_for_each_entry_rcu(p, head, tcfa_head) {
-                       index++;
-                       if (index < s_i)
-                               continue;
-                       nest = nla_nest_start(skb, index);
-                       if (nest == NULL)
-                               goto nla_put_failure;
-                       if (type == RTM_DELACTION)
-                               err = tcf_action_dump_1(skb, p, 0, 1);
-                       else
-                               err = tcf_action_dump_1(skb, p, 0, 0);
-                       if (err < 0) {
-                               index--;
-                               nla_nest_cancel(skb, nest);
-                               goto done;
-                       }
-                       nla_nest_end(skb, nest);
-                       n_i++;
-               }
-       }
-done:
-       spin_unlock_bh(&hinfo->lock);
-       if (n_i)
-               cb->args[0] += n_i;
-       return n_i;
  
-nla_put_failure:
-       nla_nest_cancel(skb, nest);
-       goto done;
+       return tcf_generic_walker(tn, skb, cb, type, ops);
  }
  
  static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -125,6 +84,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
         struct tcf_police *police;
         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
         struct tc_action_net *tn = net_generic(net, police_net_id);
+       bool exists = false;
         int size;
  
         if (nla == NULL)
@@ -139,24 +99,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
         size = nla_len(tb[TCA_POLICE_TBF]);
         if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
                 return -EINVAL;
+
         parm = nla_data(tb[TCA_POLICE_TBF]);
+       exists = tcf_hash_check(tn, parm->index, a, bind);
+       if (exists && bind)
+               return 0;
  
-       if (parm->index) {
-               if (tcf_hash_check(tn, parm->index, a, bind)) {
-                       if (ovr)
-                               goto override;
-                       /* not replacing */
-                       return -EEXIST;
-               }
-       } else {
+       if (!exists) {
                 ret = tcf_hash_create(tn, parm->index, NULL, a,
                                       &act_police_ops, bind, false);
                 if (ret)
                         return ret;
                 ret = ACT_P_CREATED;
+       } else {
+               tcf_hash_release(*a, bind);
+               if (!ovr)
+                       return -EEXIST;
         }
  
-override:
         police = to_police(*a);
         if (parm->rate.rate) {
                 err = -ENOMEM;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c

index 843a716a4303e71cb39c7ad8d95106a6d5c5e872..a7c5645373afb02a90d2ad76595e6dfcf14fe6b7 100644 (file)
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -541,8 +541,12 @@ out:
  void tcf_exts_destroy(struct tcf_exts *exts)
  {
  #ifdef CONFIG_NET_CLS_ACT
-       tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
-       INIT_LIST_HEAD(&exts->actions);
+       LIST_HEAD(actions);
+
+       tcf_exts_to_list(exts, &actions);
+       tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+       kfree(exts->actions);
+       exts->nr_actions = 0;
  #endif
  }
  EXPORT_SYMBOL(tcf_exts_destroy);
@@ -554,7 +558,6 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
         {
                 struct tc_action *act;
  
-               INIT_LIST_HEAD(&exts->actions);
                 if (exts->police && tb[exts->police]) {
                         act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
                                                 "police", ovr,
@@ -563,14 +566,20 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                                 return PTR_ERR(act);
  
                         act->type = exts->type = TCA_OLD_COMPAT;
-                       list_add(&act->list, &exts->actions);
+                       exts->actions[0] = act;
+                       exts->nr_actions = 1;
                 } else if (exts->action && tb[exts->action]) {
-                       int err;
+                       LIST_HEAD(actions);
+                       int err, i = 0;
+
                         err = tcf_action_init(net, tb[exts->action], rate_tlv,
                                               NULL, ovr,
-                                             TCA_ACT_BIND, &exts->actions);
+                                             TCA_ACT_BIND, &actions);
                         if (err)
                                 return err;
+                       list_for_each_entry(act, &actions, list)
+                               exts->actions[i++] = act;
+                       exts->nr_actions = i;
                 }
         }
  #else
@@ -587,37 +596,49 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
                      struct tcf_exts *src)
  {
  #ifdef CONFIG_NET_CLS_ACT
-       LIST_HEAD(tmp);
+       struct tcf_exts old = *dst;
+
         tcf_tree_lock(tp);
-       list_splice_init(&dst->actions, &tmp);
-       list_splice(&src->actions, &dst->actions);
+       dst->nr_actions = src->nr_actions;
+       dst->actions = src->actions;
         dst->type = src->type;
         tcf_tree_unlock(tp);
-       tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
+
+       tcf_exts_destroy(&old);
  #endif
  }
  EXPORT_SYMBOL(tcf_exts_change);
  
-#define tcf_exts_first_act(ext)                                        \
-       list_first_entry_or_null(&(exts)->actions,              \
-                                struct tc_action, list)
+#ifdef CONFIG_NET_CLS_ACT
+static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
+{
+       if (exts->nr_actions == 0)
+               return NULL;
+       else
+               return exts->actions[0];
+}
+#endif
  
  int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
  {
  #ifdef CONFIG_NET_CLS_ACT
         struct nlattr *nest;
  
-       if (exts->action && !list_empty(&exts->actions)) {
+       if (exts->action && exts->nr_actions) {
                 /*
                  * again for backward compatible mode - we want
                  * to work with both old and new modes of entering
                  * tc data even if iproute2  was newer - jhs
                  */
                 if (exts->type != TCA_OLD_COMPAT) {
+                       LIST_HEAD(actions);
+
                         nest = nla_nest_start(skb, exts->action);
                         if (nest == NULL)
                                 goto nla_put_failure;
-                       if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
+
+                       tcf_exts_to_list(exts, &actions);
+                       if (tcf_action_dump(skb, &actions, 0, 0) < 0)
                                 goto nla_put_failure;
                         nla_nest_end(skb, nest);
                 } else if (exts->police) {
diff --git a/net/sctp/proc.c b/net/sctp/proc.c

index 4cb5aedfe3ee2cf188385ef1f18cd63908a558ba..ef8ba77a5beace906ac3e83b3c0824a2134e6a63 100644 (file)
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
                 return ERR_PTR(err);
         }
  
+       iter->start_fail = 0;
         return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
  }
  
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c

index f69edcf219e514d864c4af2d57eb0429e8f4938a..bb691538adc8e89d6920f8684d8a64779ea1865a 100644 (file)
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -13,6 +13,7 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
  {
         union sctp_addr laddr, paddr;
         struct dst_entry *dst;
+       struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
  
         laddr = list_entry(asoc->base.bind_addr.address_list.next,
                            struct sctp_sockaddr_entry, list)->a;
@@ -40,10 +41,15 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
         }
  
         r->idiag_state = asoc->state;
-       r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
-       r->idiag_retrans = asoc->rtx_data_chunks;
-       r->idiag_expires = jiffies_to_msecs(
-               asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies);
+       if (timer_pending(t3_rtx)) {
+               r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+               r->idiag_retrans = asoc->rtx_data_chunks;
+               r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+       } else {
+               r->idiag_timer = 0;
+               r->idiag_retrans = 0;
+               r->idiag_expires = 0;
+       }
  }
  
  static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
@@ -350,7 +356,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
         if (cb->args[4] < cb->args[1])
                 goto next;
  
-       if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs))
+       if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
                 goto next;
  
         if (r->sdiag_family != AF_UNSPEC &&
@@ -465,7 +471,7 @@ skip:
          * 3 : to mark if we have dumped the ep info of the current asoc
          * 4 : to work as a temporary variable to traversal list
          */
-       if (!(idiag_states & ~TCPF_LISTEN))
+       if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
                 goto done;
         sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
  done:
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c

index 1bc4f71aaba860776a0a6e4b664ef1c3a0f8af37..d85b803da11d21202d876c95811bcab4e1fb507e 100644 (file)
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -702,14 +702,14 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
          */
         sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff));
  
-       sctp_ulpevent_receive_data(event, asoc);
-
         /* And hold the chunk as we need it for getting the IP headers
          * later in recvmsg
          */
         sctp_chunk_hold(chunk);
         event->chunk = chunk;
  
+       sctp_ulpevent_receive_data(event, asoc);
+
         event->stream = ntohs(chunk->subh.data_hdr->stream);
         event->ssn = ntohs(chunk->subh.data_hdr->ssn);
         event->ppid = chunk->subh.data_hdr->ppid;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c

index 23c8e7c3965651ad5ee03ee617ad92d06646802f..976c7812bbd520e51d34eb542b15f0e4730034b9 100644 (file)
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -340,12 +340,14 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
  }
  
  static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
  {
         struct gss_upcall_msg *pos;
         list_for_each_entry(pos, &pipe->in_downcall, list) {
                 if (!uid_eq(pos->uid, uid))
                         continue;
+               if (auth && pos->auth->service != auth->service)
+                       continue;
                 atomic_inc(&pos->count);
                 dprintk("RPC:       %s found msg %p\n", __func__, pos);
                 return pos;
@@ -365,7 +367,7 @@ gss_add_msg(struct gss_upcall_msg *gss_msg)
         struct gss_upcall_msg *old;
  
         spin_lock(&pipe->lock);
-       old = __gss_find_upcall(pipe, gss_msg->uid);
+       old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
         if (old == NULL) {
                 atomic_inc(&gss_msg->count);
                 list_add(&gss_msg->list, &pipe->in_downcall);
@@ -714,7 +716,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         err = -ENOENT;
         /* Find a matching upcall */
         spin_lock(&pipe->lock);
-       gss_msg = __gss_find_upcall(pipe, uid);
+       gss_msg = __gss_find_upcall(pipe, uid, NULL);
         if (gss_msg == NULL) {
                 spin_unlock(&pipe->lock);
                 goto err_put_ctx;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c

index cb49898a5a58aacfadceda27a07ceb45eb88a8d3..7f79fb7dc6a00d6cc4082815d35fab4c60be1943 100644 (file)
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2638,6 +2638,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
  {
         struct rpc_xprt_switch *xps;
         struct rpc_xprt *xprt;
+       unsigned long reconnect_timeout;
         unsigned char resvport;
         int ret = 0;
  
@@ -2649,6 +2650,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
                 return -EAGAIN;
         }
         resvport = xprt->resvport;
+       reconnect_timeout = xprt->max_reconnect_timeout;
         rcu_read_unlock();
  
         xprt = xprt_create_transport(xprtargs);
@@ -2657,6 +2659,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
                 goto out_put_switch;
         }
         xprt->resvport = resvport;
+       xprt->max_reconnect_timeout = reconnect_timeout;
  
         rpc_xprt_switch_set_roundrobin(xps);
         if (setup) {
@@ -2673,6 +2676,27 @@ out_put_switch:
  }
  EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
  
+static int
+rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+               struct rpc_xprt *xprt,
+               void *data)
+{
+       unsigned long timeout = *((unsigned long *)data);
+
+       if (timeout < xprt->max_reconnect_timeout)
+               xprt->max_reconnect_timeout = timeout;
+       return 0;
+}
+
+void
+rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+{
+       rpc_clnt_iterate_for_each_xprt(clnt,
+                       rpc_xprt_cap_max_reconnect_timeout,
+                       &timeo);
+}
+EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  static void rpc_show_header(void)
  {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c

index 8313960cac524dd36d220f9b55d124435400f25a..ea244b29138b0b86cf7860ce5c1e4605ade86a2a 100644 (file)
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -680,6 +680,20 @@ out:
         spin_unlock_bh(&xprt->transport_lock);
  }
  
+static bool
+xprt_has_timer(const struct rpc_xprt *xprt)
+{
+       return xprt->idle_timeout != 0;
+}
+
+static void
+xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
+       __must_hold(&xprt->transport_lock)
+{
+       if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+               mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
+}
+
  static void
  xprt_init_autodisconnect(unsigned long data)
  {
@@ -688,6 +702,8 @@ xprt_init_autodisconnect(unsigned long data)
         spin_lock(&xprt->transport_lock);
         if (!list_empty(&xprt->recv))
                 goto out_abort;
+       /* Reset xprt->last_used to avoid connect/autodisconnect cycling */
+       xprt->last_used = jiffies;
         if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                 goto out_abort;
         spin_unlock(&xprt->transport_lock);
@@ -725,6 +741,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
                 goto out;
         xprt->snd_task =NULL;
         xprt->ops->release_xprt(xprt, NULL);
+       xprt_schedule_autodisconnect(xprt);
  out:
         spin_unlock_bh(&xprt->transport_lock);
         wake_up_bit(&xprt->state, XPRT_LOCKED);
@@ -888,11 +905,6 @@ static void xprt_timer(struct rpc_task *task)
         spin_unlock_bh(&xprt->transport_lock);
  }
  
-static inline int xprt_has_timer(struct rpc_xprt *xprt)
-{
-       return xprt->idle_timeout != 0;
-}
-
  /**
   * xprt_prepare_transmit - reserve the transport before sending a request
   * @task: RPC task about to send a request
@@ -1280,9 +1292,7 @@ void xprt_release(struct rpc_task *task)
         if (!list_empty(&req->rq_list))
                 list_del(&req->rq_list);
         xprt->last_used = jiffies;
-       if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
-               mod_timer(&xprt->timer,
-                               xprt->last_used + xprt->idle_timeout);
+       xprt_schedule_autodisconnect(xprt);
         spin_unlock_bh(&xprt->transport_lock);
         if (req->rq_buffer)
                 xprt->ops->buf_free(req->rq_buffer);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c

index 111767ab124aa4037dfe8c7040866d7196343292..8ede3bc52481b73c82834aa684111013c6d40cad 100644 (file)
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -177,7 +177,6 @@ static struct ctl_table sunrpc_table[] = {
   * increase over time if the server is down or not responding.
   */
  #define XS_TCP_INIT_REEST_TO   (3U * HZ)
-#define XS_TCP_MAX_REEST_TO    (5U * 60 * HZ)
  
  /*
   * TCP idle timeout; client drops the transport socket if it is idle
@@ -2173,6 +2172,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                 write_unlock_bh(&sk->sk_callback_lock);
         }
         xs_udp_do_set_buffer_size(xprt);
+
+       xprt->stat.connect_start = jiffies;
  }
  
  static void xs_udp_setup_socket(struct work_struct *work)
@@ -2236,6 +2237,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                 unsigned int keepcnt = xprt->timeout->to_retries + 1;
                 unsigned int opt_on = 1;
                 unsigned int timeo;
+               unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
  
                 /* TCP Keepalive options */
                 kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2247,6 +2249,16 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                 kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
                                 (char *)&keepcnt, sizeof(keepcnt));
  
+               /* Avoid temporary address, they are bad for long-lived
+                * connections such as NFS mounts.
+                * RFC4941, section 3.6 suggests that:
+                *    Individual applications, which have specific
+                *    knowledge about the normal duration of connections,
+                *    MAY override this as appropriate.
+                */
+               kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
+                               (char *)&addr_pref, sizeof(addr_pref));
+
                 /* TCP user timeout (see RFC5482) */
                 timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
                         (xprt->timeout->to_retries + 1);
@@ -2295,6 +2307,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                 /* SYN_SENT! */
                 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
                         xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+               break;
+       case -EADDRNOTAVAIL:
+               /* Source port number is unavailable. Try a new one! */
+               transport->srcport = 0;
         }
  out:
         return ret;
@@ -2369,6 +2385,25 @@ out:
         xprt_wake_pending_tasks(xprt, status);
  }
  
+static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
+{
+       unsigned long start, now = jiffies;
+
+       start = xprt->stat.connect_start + xprt->reestablish_timeout;
+       if (time_after(start, now))
+               return start - now;
+       return 0;
+}
+
+static void xs_reconnect_backoff(struct rpc_xprt *xprt)
+{
+       xprt->reestablish_timeout <<= 1;
+       if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+               xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+       if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+               xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+}
+
  /**
   * xs_connect - connect a socket to a remote endpoint
   * @xprt: pointer to transport structure
@@ -2386,6 +2421,7 @@ out:
  static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       unsigned long delay = 0;
  
         WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
  
@@ -2397,19 +2433,15 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
                 /* Start by resetting any existing state */
                 xs_reset_transport(transport);
  
-               queue_delayed_work(xprtiod_workqueue,
-                                  &transport->connect_worker,
-                                  xprt->reestablish_timeout);
-               xprt->reestablish_timeout <<= 1;
-               if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-               if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
-       } else {
+               delay = xs_reconnect_delay(xprt);
+               xs_reconnect_backoff(xprt);
+
+       } else
                 dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
-               queue_delayed_work(xprtiod_workqueue,
-                                  &transport->connect_worker, 0);
-       }
+
+       queue_delayed_work(xprtiod_workqueue,
+                       &transport->connect_worker,
+                       delay);
  }
  
  /**
@@ -2961,6 +2993,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
         xprt->ops = &xs_tcp_ops;
         xprt->timeout = &xs_tcp_default_timeout;
  
+       xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+
         INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
         INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
  
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c

index b62caa1c770c042fa7449f401bcb333b3defa485..ed97a5876ebef128937906d4115d3c1db6d16998 100644 (file)
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -728,12 +728,13 @@ int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg,
                              u32 bearer_id, u32 *prev_node)
  {
         struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
-       struct tipc_peer *peer = mon->self;
+       struct tipc_peer *peer;
  
         if (!mon)
                 return -EINVAL;
  
         read_lock_bh(&mon->lock);
+       peer = mon->self;
         do {
                 if (*prev_node) {
                         if (peer->addr == *prev_node)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c

index c49b8df438cbeee021bbedf3c96631d82fb16670..f9f5f3c3dab530c0b798d314873800500ccc30b5 100644 (file)
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2180,7 +2180,8 @@ restart:
                                               TIPC_CONN_MSG, SHORT_H_SIZE,
                                               0, dnode, onode, dport, oport,
                                               TIPC_CONN_SHUTDOWN);
-                       tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+                       if (skb)
+                               tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
                 }
                 tsk->connected = 0;
                 sock->state = SS_DISCONNECTING;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c

index 699dfabdbccd5e6af0f4910cdf2467b36bab3b60..936d7eee62d03efbac1e278272ca8fd917a40622 100644 (file)
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
  
         vq = vsock->vqs[VSOCK_VQ_TX];
  
-       /* Avoid unnecessary interrupts while we're processing the ring */
-       virtqueue_disable_cb(vq);
-
         for (;;) {
                 struct virtio_vsock_pkt *pkt;
                 struct scatterlist hdr, buf, *sgs[2];
@@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
                 spin_lock_bh(&vsock->send_pkt_list_lock);
                 if (list_empty(&vsock->send_pkt_list)) {
                         spin_unlock_bh(&vsock->send_pkt_list_lock);
-                       virtqueue_enable_cb(vq);
                         break;
                 }
  
@@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work)
                 }
  
                 ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+               /* Usually this means that there is no more space available in
+                * the vq
+                */
                 if (ret < 0) {
                         spin_lock_bh(&vsock->send_pkt_list_lock);
                         list_add(&pkt->list, &vsock->send_pkt_list);
                         spin_unlock_bh(&vsock->send_pkt_list_lock);
-
-                       if (!virtqueue_enable_cb(vq) && ret == -ENOSPC)
-                               continue; /* retry now that we have more space */
                         break;
                 }
  
diff --git a/net/wireless/chan.c b/net/wireless/chan.c

index b0e11b6dc994fcfd6fdf53a94bb09fbe22f89192..0f506220a3bde0e4b73c043fa9ea6a7f6b404ed0 100644 (file)
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -513,6 +513,7 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
                 r = cfg80211_get_chans_dfs_available(wiphy,
                                                      chandef->center_freq2,
                                                      width);
+               break;
         default:
                 WARN_ON(chandef->center_freq2);
                 break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index 46417f9cce6810675f8d79f153260e61a689a140..f02653a08993334f98517d30c9c319f780ce9342 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5380,6 +5380,7 @@ static int nl80211_parse_mesh_config(struct genl_info *info,
  {
         struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
         u32 mask = 0;
+       u16 ht_opmode;
  
  #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
  do {                                                                       \
@@ -5471,9 +5472,36 @@ do {                                                                         \
         FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
                                   mask, NL80211_MESHCONF_RSSI_THRESHOLD,
                                   nl80211_check_s32);
-       FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
-                                 mask, NL80211_MESHCONF_HT_OPMODE,
-                                 nl80211_check_u16);
+       /*
+        * Check HT operation mode based on
+        * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+        */
+       if (tb[NL80211_MESHCONF_HT_OPMODE]) {
+               ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
+
+               if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION |
+                                 IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+                                 IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+                       return -EINVAL;
+
+               if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
+                   (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+                       return -EINVAL;
+
+               switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
+               case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
+               case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
+                       if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
+                               return -EINVAL;
+                       break;
+               case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
+               case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
+                       if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+                               return -EINVAL;
+                       break;
+               }
+               cfg->ht_opmode = ht_opmode;
+       }
         FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
                                   1, 65535, mask,
                                   NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h

index 217c8d507f2e8d6a6fae5a958c1edc1d3be3e99d..7927a090fa0d03241ef379db6ab91c5330c5f44b 100644 (file)
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -72,8 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
         (void *) BPF_FUNC_l3_csum_replace;
  static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
         (void *) BPF_FUNC_l4_csum_replace;
-static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
-       (void *) BPF_FUNC_skb_in_cgroup;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+       (void *) BPF_FUNC_skb_under_cgroup;
  
  #if defined(__x86_64__)
  
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c

index 2732c37c8d5be1e09364f1d4d97ae2126d40195e..10ff73404e3a80fe8bab464188335317ee71515a 100644 (file)
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -57,7 +57,7 @@ int handle_egress(struct __sk_buff *skb)
                 bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
                                  eth->h_proto, ip6h->nexthdr);
                 return TC_ACT_OK;
-       } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+       } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
                 bpf_trace_printk(pass_msg, sizeof(pass_msg));
                 return TC_ACT_OK;
         } else {
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c

index 47bf0858f9e47d1b85a908ab94518c46b11ffecd..cce2b59751ebcbd632926e492bb08bafa8711ed5 100644 (file)
--- a/samples/bpf/test_maps.c
+++ b/samples/bpf/test_maps.c
@@ -68,7 +68,16 @@ static void test_hashmap_sanity(int i, void *data)
         assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
                errno == E2BIG);
  
+       /* update existing element, thought the map is full */
+       key = 1;
+       assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+       key = 2;
+       assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+       key = 1;
+       assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
         /* check that key = 0 doesn't exist */
+       key = 0;
         assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
  
         /* iterate over two elements */
@@ -413,10 +422,12 @@ static void do_work(int fn, void *data)
  
         for (i = fn; i < MAP_SIZE; i += TASKS) {
                 key = value = i;
-               if (do_update)
+               if (do_update) {
                         assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
-               else
+                       assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+               } else {
                         assert(bpf_delete_elem(map_fd, &key) == 0);
+               }
         }
  }
  
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl

index 122fcdaf42c86cec7a5fbce08cc3b406f692f6c5..49a00d54b835f156745c27bfb12b1f64bcf9140c 100755 (executable)
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -432,7 +432,7 @@ foreach my $file (@ARGV) {
             die "$P: file '${file}' not found\n";
         }
      }
-    if ($from_filename || vcs_file_exists($file)) {
+    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
         $file =~ s/^\Q${cur_path}\E//;  #strip any absolute path
         $file =~ s/^\Q${lk_path}\E//;   #or the path to the lk tree
         push(@files, $file);
diff --git a/security/Kconfig b/security/Kconfig

index df28f2b6f3e1b47ab9a290c02f74b2da5f703196..da10d9b573a4a809f6159d82660a717f224903ec 100644 (file)
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -136,6 +136,7 @@ config HAVE_ARCH_HARDENED_USERCOPY
  config HARDENED_USERCOPY
         bool "Harden memory copies between kernel and userspace"
         depends on HAVE_ARCH_HARDENED_USERCOPY
+       depends on HAVE_HARDENED_USERCOPY_ALLOCATOR
         select BUG
         help
           This option checks for obviously wrong memory regions when
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c

index 89dacf9b4e6cbcdd7caaed257ce77a510083c3c4..160c7f71372289034f87953de4650fb08498298a 100644 (file)
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -906,20 +906,23 @@ static int azx_resume(struct device *dev)
         struct snd_card *card = dev_get_drvdata(dev);
         struct azx *chip;
         struct hda_intel *hda;
+       struct hdac_bus *bus;
  
         if (!card)
                 return 0;
  
         chip = card->private_data;
         hda = container_of(chip, struct hda_intel, chip);
+       bus = azx_bus(chip);
         if (chip->disabled || hda->init_failed || !chip->running)
                 return 0;
  
-       if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL
-               && hda->need_i915_power) {
-               snd_hdac_display_power(azx_bus(chip), true);
-               snd_hdac_i915_set_bclk(azx_bus(chip));
+       if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
+               snd_hdac_display_power(bus, true);
+               if (hda->need_i915_power)
+                       snd_hdac_i915_set_bclk(bus);
         }
+
         if (chip->msi)
                 if (pci_enable_msi(pci) < 0)
                         chip->msi = 0;
@@ -929,6 +932,11 @@ static int azx_resume(struct device *dev)
  
         hda_intel_init_chip(chip, true);
  
+       /* power down again for link-controlled chips */
+       if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+           !hda->need_i915_power)
+               snd_hdac_display_power(bus, false);
+
         snd_power_change_state(card, SNDRV_CTL_POWER_D0);
  
         trace_azx_resume(chip);
@@ -1008,6 +1016,7 @@ static int azx_runtime_resume(struct device *dev)
  
         chip = card->private_data;
         hda = container_of(chip, struct hda_intel, chip);
+       bus = azx_bus(chip);
         if (chip->disabled || hda->init_failed)
                 return 0;
  
@@ -1015,15 +1024,9 @@ static int azx_runtime_resume(struct device *dev)
                 return 0;
  
         if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
-               bus = azx_bus(chip);
-               if (hda->need_i915_power) {
-                       snd_hdac_display_power(bus, true);
+               snd_hdac_display_power(bus, true);
+               if (hda->need_i915_power)
                         snd_hdac_i915_set_bclk(bus);
-               } else {
-                       /* toggle codec wakeup bit for STATESTS read */
-                       snd_hdac_set_codec_wakeup(bus, true);
-                       snd_hdac_set_codec_wakeup(bus, false);
-               }
         }
  
         /* Read STATESTS before controller reset */
@@ -1043,6 +1046,11 @@ static int azx_runtime_resume(struct device *dev)
         azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
                         ~STATESTS_INT_MASK);
  
+       /* power down again for link-controlled chips */
+       if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+           !hda->need_i915_power)
+               snd_hdac_display_power(bus, false);
+
         trace_azx_runtime_resume(chip);
         return 0;
  }
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c

index 6adde457b602e08aedd1806e8b79863d65e006cc..6cf1f35974558053101e00351482574c2be6a988 100644 (file)
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1128,6 +1128,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
  {
         /* devices which do not support reading the sample rate. */
         switch (chip->usb_id) {
+       case USB_ID(0x041E, 0x4080): /* Creative Live Cam VF0610 */
         case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
         case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
         case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
@@ -1138,6 +1139,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
         case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
         case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
         case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
+       case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
         case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
         case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
         case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h

index f209ea151dca8ab8559243dffd8662008844b27c..3051f86a9b5f4ab976568b266b41f47313f86b82 100644 (file)
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
  /* Supported VGICv3 address types  */
  #define KVM_VGIC_V3_ADDR_TYPE_DIST     2
  #define KVM_VGIC_V3_ADDR_TYPE_REDIST   3
+#define KVM_VGIC_ITS_ADDR_TYPE         4
  
  #define KVM_VGIC_V3_DIST_SIZE          SZ_64K
  #define KVM_VGIC_V3_REDIST_SIZE                (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE           (2 * SZ_64K)
  
  #define KVM_ARM_VCPU_POWER_OFF         0 /* CPU is started in OFF state */
  #define KVM_ARM_VCPU_EL1_32BIT         1 /* CPU running a 32bit VM */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h

index 3b8e99ef9d58d44dc37ca64ced292cc7efcd6f39..a2ffec4139ad1cb8cebe816a9f0b3e261cd97d42 100644 (file)
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine {
         __u64 fac_list[256];
  };
  
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2
+#define KVM_S390_VM_CPU_MACHINE_FEAT   3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS   1024
+#define KVM_S390_VM_CPU_FEAT_ESOP      0
+#define KVM_S390_VM_CPU_FEAT_SIEF2     1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO   2
+#define KVM_S390_VM_CPU_FEAT_SIIF      3
+#define KVM_S390_VM_CPU_FEAT_GPERE     4
+#define KVM_S390_VM_CPU_FEAT_GSLS      5
+#define KVM_S390_VM_CPU_FEAT_IB                6
+#define KVM_S390_VM_CPU_FEAT_CEI       7
+#define KVM_S390_VM_CPU_FEAT_IBS       8
+#define KVM_S390_VM_CPU_FEAT_SKEY      9
+#define KVM_S390_VM_CPU_FEAT_CMMA      10
+#define KVM_S390_VM_CPU_FEAT_PFMFI     11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF    12
+struct kvm_s390_vm_cpu_feat {
+       __u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC      4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC                5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+       __u8 plo[32];           /* always */
+       __u8 ptff[16];          /* with TOD-clock steering */
+       __u8 kmac[16];          /* with MSA */
+       __u8 kmc[16];           /* with MSA */
+       __u8 km[16];            /* with MSA */
+       __u8 kimd[16];          /* with MSA */
+       __u8 klmd[16];          /* with MSA */
+       __u8 pckmo[16];         /* with MSA3 */
+       __u8 kmctr[16];         /* with MSA4 */
+       __u8 kmf[16];           /* with MSA4 */
+       __u8 kmo[16];           /* with MSA4 */
+       __u8 pcc[16];           /* with MSA4 */
+       __u8 ppno[16];          /* with MSA5 */
+       __u8 reserved[1824];
+};
+
  /* kvm attributes for crypto */
  #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW       0
  #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h

index 8fb5d4a6dd25bccfae4e2f3beef5ed81725adb60..3ac6343689394d0b128907a3f338958ec0f32471 100644 (file)
--- a/tools/arch/s390/include/uapi/asm/sie.h
+++ b/tools/arch/s390/include/uapi/asm/sie.h
@@ -140,6 +140,7 @@
         exit_code_ipa0(0xB2, 0x4c, "TAR"),      \
         exit_code_ipa0(0xB2, 0x50, "CSP"),      \
         exit_code_ipa0(0xB2, 0x54, "MVPG"),     \
+       exit_code_ipa0(0xB2, 0x56, "STHYI"),    \
         exit_code_ipa0(0xB2, 0x58, "BSG"),      \
         exit_code_ipa0(0xB2, 0x5a, "BSA"),      \
         exit_code_ipa0(0xB2, 0x5f, "CHSC"),     \
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h

index 4a413485f9eb8ef58ec71c77ff2594f4300c8ea6..92a8308b96f64cb6ce845a8379ca06cb9a6a00d6 100644 (file)
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -225,7 +225,6 @@
  #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
  #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
  #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT    ( 9*32+22) /* PCOMMIT instruction */
  #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
  #define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
  #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
@@ -301,10 +300,6 @@
  #define X86_BUG_FXSAVE_LEAK    X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
  #define X86_BUG_CLFLUSH_MONITOR        X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
  #define X86_BUG_SYSRET_SS_ATTRS        X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG       X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE   X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
  #ifdef CONFIG_X86_32
  /*
   * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
@@ -312,5 +307,7 @@
   */
  #define X86_BUG_ESPFIX         X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
  #endif
-
+#define X86_BUG_NULL_SEG       X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE   X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR                X86_BUG(12) /* IPI required to wake up remote CPU */
  #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h

index 911e9358ceb184b6b0b0f38b9c7b853fc4506fbe..85599ad4d0247863cef655d02b9a4b3f83c77fb7 100644 (file)
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,5 +56,7 @@
  #define DISABLED_MASK14        0
  #define DISABLED_MASK15        0
  #define DISABLED_MASK16        (DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17        0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
  
  #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h

index 4916144e3c42668a3e07af33859b4a1af3f2985b..fac9a5c0abe94b233b72b35bca8c7a665847b694 100644 (file)
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -99,5 +99,7 @@
  #define REQUIRED_MASK14        0
  #define REQUIRED_MASK15        0
  #define REQUIRED_MASK16        0
+#define REQUIRED_MASK17        0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
  
  #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h

index 5b15d94a33f818d04ee7ae2a0f5685125bd89a40..37fee272618f1de348a7d5961f1792debba72991 100644 (file)
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -78,7 +78,6 @@
  #define EXIT_REASON_PML_FULL            62
  #define EXIT_REASON_XSAVES              63
  #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
  
  #define VMX_EXIT_REASONS \
         { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
         { EXIT_REASON_INVVPID,               "INVVPID" }, \
         { EXIT_REASON_INVPCID,               "INVPCID" }, \
         { EXIT_REASON_XSAVES,                "XSAVES" }, \
-       { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-       { EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }
  
  #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
  #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 406459b935a27c4f9b518426a4fd60493432221d..da218fec605657ee415f8ad71a95d8851330a9de 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@ enum bpf_map_type {
         BPF_MAP_TYPE_PERCPU_HASH,
         BPF_MAP_TYPE_PERCPU_ARRAY,
         BPF_MAP_TYPE_STACK_TRACE,
+       BPF_MAP_TYPE_CGROUP_ARRAY,
  };
  
  enum bpf_prog_type {
@@ -93,6 +94,7 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_SCHED_CLS,
         BPF_PROG_TYPE_SCHED_ACT,
         BPF_PROG_TYPE_TRACEPOINT,
+       BPF_PROG_TYPE_XDP,
  };
  
  #define BPF_PSEUDO_MAP_FD      1
@@ -313,6 +315,66 @@ enum bpf_func_id {
          */
         BPF_FUNC_skb_get_tunnel_opt,
         BPF_FUNC_skb_set_tunnel_opt,
+
+       /**
+        * bpf_skb_change_proto(skb, proto, flags)
+        * Change protocol of the skb. Currently supported is
+        * v4 -> v6, v6 -> v4 transitions. The helper will also
+        * resize the skb. eBPF program is expected to fill the
+        * new headers via skb_store_bytes and lX_csum_replace.
+        * @skb: pointer to skb
+        * @proto: new skb->protocol type
+        * @flags: reserved
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_proto,
+
+       /**
+        * bpf_skb_change_type(skb, type)
+        * Change packet type of skb.
+        * @skb: pointer to skb
+        * @type: new skb->pkt_type type
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_type,
+
+       /**
+        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * @skb: pointer to skb
+        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+        * @index: index of the cgroup in the bpf_map
+        * Return:
+        *   == 0 skb failed the cgroup2 descendant test
+        *   == 1 skb succeeded the cgroup2 descendant test
+        *    < 0 error
+        */
+       BPF_FUNC_skb_in_cgroup,
+
+       /**
+        * bpf_get_hash_recalc(skb)
+        * Retrieve and possibly recalculate skb->hash.
+        * @skb: pointer to skb
+        * Return: hash
+        */
+       BPF_FUNC_get_hash_recalc,
+
+       /**
+        * u64 bpf_get_current_task(void)
+        * Returns current task_struct
+        * Return: current
+        */
+       BPF_FUNC_get_current_task,
+
+       /**
+        * bpf_probe_write_user(void *dst, void *src, int len)
+        * safely attempt to write to a location
+        * @dst: destination address in userspace
+        * @src: source address on stack
+        * @len: number of bytes to copy
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_probe_write_user,
+
         __BPF_FUNC_MAX_ID,
  };
  
@@ -347,9 +409,11 @@ enum bpf_func_id {
  #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
  #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
  
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
  #define BPF_F_INDEX_MASK               0xffffffffULL
  #define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
  
  /* user accessible mirror of in-kernel sk_buff.
   * new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@ struct bpf_tunnel_key {
         __u32 tunnel_label;
  };
  
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+       XDP_ABORTED = 0,
+       XDP_DROP,
+       XDP_PASS,
+       XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+       __u32 data;
+       __u32 data_end;
+};
+
  #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt

index 736da44596e451fa1a14d9a045f7feda269a0779..b303bcdd8ed15fb9d140e0e7369388bc714aaace 100644 (file)
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -176,10 +176,18 @@ Each probe argument follows below syntax.
  
  'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
  '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
  
  On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
  
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively. Traced arguments are shown in decimal (signed) or hex (unsigned). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe.
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
  LINE SYNTAX
  -----------
  Line range is described by following syntax.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt

index 1f6c70594f0f79e378163c6430b9028642eee45d..053bbbd84ece30c673afe7e176328f8390a6bda9 100644 (file)
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,8 @@ OPTIONS
  --fields::
          Comma separated list of fields to print. Options are:
          comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-       srcline, period, iregs, brstack, brstacksym, flags.
-        Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+        callindent. Field list can be prepended with the type, trace, sw or hw,
          to indicate to which event type the field list applies.
          e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
  
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c

index c6d0f91731a14732333af62d0a40a3ea43fb4c99..35745a733100e70f27c6c95ce97ce4ef5a6fabfa 100644 (file)
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -54,10 +54,6 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
  #endif
  
  #if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
-       return true;
-}
  
  #ifdef HAVE_LIBELF_SUPPORT
  void arch__sym_update(struct symbol *s, GElf_Sym *sym)
@@ -100,4 +96,29 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
                         tev->point.offset += lep_offset;
         }
  }
+
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          int ntevs)
+{
+       struct probe_trace_event *tev;
+       struct map *map;
+       struct symbol *sym = NULL;
+       struct rb_node *tmp;
+       int i = 0;
+
+       map = get_target_map(pev->target, pev->uprobes);
+       if (!map || map__load(map, NULL) < 0)
+               return;
+
+       for (i = 0; i < ntevs; i++) {
+               tev = &pev->tevs[i];
+               map__for_each_symbol(map, sym, tmp) {
+                       if (map->unmap_ip(map, sym->start) == tev->point.address)
+                               arch__fix_tev_from_maps(pev, tev, map, sym);
+               }
+       }
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
  #endif
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c

index fb51457ba338f8c1a3dc549582213eaf5446ec56..a2412e9d883b5246e8ebe1f0b356bbbebe01adcf 100644 (file)
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -501,7 +501,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
         struct intel_pt_recording *ptr =
                         container_of(itr, struct intel_pt_recording, itr);
         struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
-       bool have_timing_info;
+       bool have_timing_info, need_immediate = false;
         struct perf_evsel *evsel, *intel_pt_evsel = NULL;
         const struct cpu_map *cpus = evlist->cpus;
         bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
@@ -655,6 +655,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                                 ptr->have_sched_switch = 3;
                         } else {
                                 opts->record_switch_events = true;
+                               need_immediate = true;
                                 if (cpu_wide)
                                         ptr->have_sched_switch = 3;
                                 else
@@ -700,6 +701,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                 tracking_evsel->attr.freq = 0;
                 tracking_evsel->attr.sample_period = 1;
  
+               if (need_immediate)
+                       tracking_evsel->immediate = true;
+
                 /* In per-cpu case, always need the time of mmap events etc */
                 if (!cpu_map__empty(cpus)) {
                         perf_evsel__set_sample_bit(tracking_evsel, TIME);
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c

index d608a2c9e48cd219e82697bdfa9331a477e9eeed..d1ce29be560e5e7dad2a2faca9a68c2246510a11 100644 (file)
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -88,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
         if (mem->operation & MEM_OPERATION_LOAD)
                 perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
  
+       if (mem->operation & MEM_OPERATION_STORE)
+               perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+
         if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
                 rec_argv[i++] = "-W";
  
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c

index 971ff91b16cb3be52702cca780c3df818d52c51a..c859e59dfe3e7efae711fa056967c910a987d989 100644 (file)
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -371,14 +371,16 @@ static int perf_session__check_output_opt(struct perf_session *session)
  
         if (!no_callchain) {
                 bool use_callchain = false;
+               bool not_pipe = false;
  
                 evlist__for_each_entry(session->evlist, evsel) {
+                       not_pipe = true;
                         if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
                                 use_callchain = true;
                                 break;
                         }
                 }
-               if (!use_callchain)
+               if (not_pipe && !use_callchain)
                         symbol_conf.use_callchain = false;
         }
  
@@ -1690,8 +1692,13 @@ static int list_available_scripts(const struct option *opt __maybe_unused,
         snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
  
         scripts_dir = opendir(scripts_path);
-       if (!scripts_dir)
-               return -1;
+       if (!scripts_dir) {
+               fprintf(stdout,
+                       "open(%s) failed.\n"
+                       "Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
+                       scripts_path);
+               exit(-1);
+       }
  
         for_each_lang(scripts_path, scripts_dir, lang_dirent) {
                 snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
@@ -2116,7 +2123,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
                      "Valid types: hw,sw,trace,raw. "
                      "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
                      "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-                    "callindent", parse_output_fields),
+                    "bpf-output,callindent", parse_output_fields),
         OPT_BOOLEAN('a', "all-cpus", &system_wide,
                     "system-wide collection from all CPUs"),
         OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c

index 0c16d20d7e32fa2eb7377247c2e5542d2a5d076a..3c7452b39f57649b05d675db3d19395fb765df2d 100644 (file)
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -331,7 +331,7 @@ static int read_counter(struct perf_evsel *counter)
         return 0;
  }
  
-static void read_counters(bool close_counters)
+static void read_counters(void)
  {
         struct perf_evsel *counter;
  
@@ -341,11 +341,6 @@ static void read_counters(bool close_counters)
  
                 if (perf_stat_process_counter(&stat_config, counter))
                         pr_warning("failed to process counter %s\n", counter->name);
-
-               if (close_counters) {
-                       perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
-                                            thread_map__nr(evsel_list->threads));
-               }
         }
  }
  
@@ -353,7 +348,7 @@ static void process_interval(void)
  {
         struct timespec ts, rs;
  
-       read_counters(false);
+       read_counters();
  
         clock_gettime(CLOCK_MONOTONIC, &ts);
         diff_timespec(&rs, &ts, &ref_time);
@@ -380,6 +375,17 @@ static void enable_counters(void)
                 perf_evlist__enable(evsel_list);
  }
  
+static void disable_counters(void)
+{
+       /*
+        * If we don't have tracee (attaching to task or cpu), counters may
+        * still be running. To get accurate group ratios, we must stop groups
+        * from counting before reading their constituent counters.
+        */
+       if (!target__none(&target))
+               perf_evlist__disable(evsel_list);
+}
+
  static volatile int workload_exec_errno;
  
  /*
@@ -657,11 +663,20 @@ try_again:
                 }
         }
  
+       disable_counters();
+
         t1 = rdclock();
  
         update_stats(&walltime_nsecs_stats, t1 - t0);
  
-       read_counters(true);
+       /*
+        * Closing a group leader splits the group, and as we only disable
+        * group leaders, results in remaining events becoming enabled. To
+        * avoid arbitrary skew, we must read all counters before closing any
+        * group leaders.
+        */
+       read_counters();
+       perf_evlist__close(evsel_list);
  
         return WEXITSTATUS(status);
  }
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c

index 9c8f15da86ce8ad8f735815cb3c0f55ca10d7167..8ff6c6a61291f9bdfbb8c50e9e0018eb887a8665 100644 (file)
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -123,8 +123,6 @@ struct intel_pt_decoder {
         bool have_calc_cyc_to_tsc;
         int exec_mode;
         unsigned int insn_bytes;
-       uint64_t sign_bit;
-       uint64_t sign_bits;
         uint64_t period;
         enum intel_pt_period_type period_type;
         uint64_t tot_insn_cnt;
@@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
         decoder->data               = params->data;
         decoder->return_compression = params->return_compression;
  
-       decoder->sign_bit           = (uint64_t)1 << 47;
-       decoder->sign_bits          = ~(((uint64_t)1 << 48) - 1);
-
         decoder->period             = params->period;
         decoder->period_type        = params->period_type;
  
@@ -362,21 +357,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen)
         return 0;
  }
  
-static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
-                                const struct intel_pt_pkt *packet,
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
                                  uint64_t last_ip)
  {
         uint64_t ip;
  
         switch (packet->count) {
-       case 2:
+       case 1:
                 ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
                      packet->payload;
                 break;
-       case 4:
+       case 2:
                 ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
                      packet->payload;
                 break;
+       case 3:
+               ip = packet->payload;
+               /* Sign-extend 6-byte ip */
+               if (ip & (uint64_t)0x800000000000ULL)
+                       ip |= (uint64_t)0xffff000000000000ULL;
+               break;
+       case 4:
+               ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+                    packet->payload;
+               break;
         case 6:
                 ip = packet->payload;
                 break;
@@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
                 return 0;
         }
  
-       if (ip & decoder->sign_bit)
-               return ip | decoder->sign_bits;
-
         return ip;
  }
  
  static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
  {
-       decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
-                                           decoder->last_ip);
+       decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
  }
  
  static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -1657,6 +1657,12 @@ next:
         }
  }
  
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+       return decoder->last_ip || decoder->packet.count == 0 ||
+              decoder->packet.count == 3 || decoder->packet.count == 6;
+}
+
  /* Walk PSB+ packets to get in sync. */
  static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
  {
@@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
  
                 case INTEL_PT_FUP:
                         decoder->pge = true;
-                       if (decoder->last_ip || decoder->packet.count == 6 ||
-                           decoder->packet.count == 0) {
+                       if (intel_pt_have_ip(decoder)) {
                                 uint64_t current_ip = decoder->ip;
  
                                 intel_pt_set_ip(decoder);
@@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
                 case INTEL_PT_TIP_PGE:
                 case INTEL_PT_TIP:
                         decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
-                       if (decoder->last_ip || decoder->packet.count == 6 ||
-                           decoder->packet.count == 0)
+                       if (intel_pt_have_ip(decoder))
                                 intel_pt_set_ip(decoder);
                         if (decoder->ip)
                                 return 0;
@@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
  
                 case INTEL_PT_FUP:
                         if (decoder->overflow) {
-                               if (decoder->last_ip ||
-                                   decoder->packet.count == 6 ||
-                                   decoder->packet.count == 0)
+                               if (intel_pt_have_ip(decoder))
                                         intel_pt_set_ip(decoder);
                                 if (decoder->ip)
                                         return 0;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c

index b1257c816310fefe0bed0e963fc106273c019da1..4f7b32020487011a6bb04ec8d339e7d7c4dfe7cc 100644 (file)
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
                            const unsigned char *buf, size_t len,
                            struct intel_pt_pkt *packet)
  {
-       switch (byte >> 5) {
+       int ip_len;
+
+       packet->count = byte >> 5;
+
+       switch (packet->count) {
         case 0:
-               packet->count = 0;
+               ip_len = 0;
                 break;
         case 1:
                 if (len < 3)
                         return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 2;
+               ip_len = 2;
                 packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
                 break;
         case 2:
                 if (len < 5)
                         return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 4;
+               ip_len = 4;
                 packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
                 break;
         case 3:
-       case 6:
+       case 4:
                 if (len < 7)
                         return INTEL_PT_NEED_MORE_BYTES;
-               packet->count = 6;
+               ip_len = 6;
                 memcpy_le64(&packet->payload, buf + 1, 6);
                 break;
+       case 6:
+               if (len < 9)
+                       return INTEL_PT_NEED_MORE_BYTES;
+               ip_len = 8;
+               packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+               break;
         default:
                 return INTEL_PT_BAD_PACKET;
         }
  
         packet->type = type;
  
-       return packet->count + 1;
+       return ip_len + 1;
  }
  
  static int intel_pt_get_mode(const unsigned char *buf, size_t len,
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c

index 9f3305f6b6d5871ebde2c27f4ca1b2ac59953340..95f0884aae0286078681ecf19546546f9d6fb2a9 100644 (file)
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -1,3 +1,4 @@
+#include <sys/sysmacros.h>
  #include <sys/types.h>
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c

index 953dc1ab2ed7bd0be442c4491c9c8a4862aa3386..28733962cd80a63e1376b5b71f4771ab8f8857f7 100644 (file)
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -170,15 +170,17 @@ static struct map *kernel_get_module_map(const char *module)
                 module = "kernel";
  
         for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+               /* short_name is "[module]" */
                 if (strncmp(pos->dso->short_name + 1, module,
-                           pos->dso->short_name_len - 2) == 0) {
+                           pos->dso->short_name_len - 2) == 0 &&
+                   module[pos->dso->short_name_len - 2] == '\0') {
                         return pos;
                 }
         }
         return NULL;
  }
  
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
  {
         /* Init maps of given executable or kernel */
         if (user)
@@ -385,7 +387,7 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
                 if (uprobes)
                         address = sym->start;
                 else
-                       address = map->unmap_ip(map, sym->start);
+                       address = map->unmap_ip(map, sym->start) - map->reloc;
                 break;
         }
         if (!address) {
@@ -664,22 +666,14 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
         return ret;
  }
  
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
-                                          int ntevs, const char *module,
-                                          bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+                                      int ntevs)
  {
         struct ref_reloc_sym *reloc_sym;
         char *tmp;
         int i, skipped = 0;
  
-       if (uprobe)
-               return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
-       /* Note that currently ref_reloc_sym based probe is not for drivers */
-       if (module)
-               return add_module_to_probe_trace_events(tevs, ntevs, module);
-
         reloc_sym = kernel_get_ref_reloc_sym();
         if (!reloc_sym) {
                 pr_warning("Relocated base symbol is not found!\n");
@@ -711,6 +705,34 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
         return skipped;
  }
  
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+                                     int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          struct probe_trace_event *tevs,
+                                          int ntevs, const char *module,
+                                          bool uprobe)
+{
+       int ret;
+
+       if (uprobe)
+               ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+       else if (module)
+               /* Currently ref_reloc_sym based probe is not for drivers */
+               ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+       else
+               ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+       if (ret >= 0)
+               arch__post_process_probe_trace_events(pev, ntevs);
+
+       return ret;
+}
+
  /* Try to find perf_probe_event with debuginfo */
  static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                                           struct probe_trace_event **tevs)
@@ -749,7 +771,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
  
         if (ntevs > 0) {        /* Succeeded to find trace events */
                 pr_debug("Found %d probe_trace_events.\n", ntevs);
-               ret = post_process_probe_trace_events(*tevs, ntevs,
+               ret = post_process_probe_trace_events(pev, *tevs, ntevs,
                                                 pev->target, pev->uprobes);
                 if (ret < 0 || ret == ntevs) {
                         clear_probe_trace_events(*tevs, ntevs);
@@ -2936,8 +2958,6 @@ errout:
         return err;
  }
  
-bool __weak arch__prefers_symtab(void) { return false; }
-
  /* Concatinate two arrays */
  static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
  {
@@ -3158,12 +3178,6 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
         if (ret > 0 || pev->sdt)        /* SDT can be found only in the cache */
                 return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
  
-       if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-               ret = find_probe_trace_events_from_map(pev, tevs);
-               if (ret > 0)
-                       return ret; /* Found in symbol table */
-       }
-
         /* Convert perf_probe_event with debuginfo */
         ret = try_to_find_probe_trace_events(pev, tevs);
         if (ret != 0)
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h

index e18ea9fe63857cb7a9b382dac563fd2c8cdfbd85..f4f45db77c1c1ec59c3ee505f525f2b2561530ee 100644 (file)
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -158,7 +158,6 @@ int show_line_range(struct line_range *lr, const char *module, bool user);
  int show_available_vars(struct perf_probe_event *pevs, int npevs,
                         struct strfilter *filter);
  int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
  void arch__fix_tev_from_maps(struct perf_probe_event *pev,
                              struct probe_trace_event *tev, struct map *map,
                              struct symbol *sym);
@@ -173,4 +172,9 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
  int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
                             struct perf_probe_arg *pvar);
  
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+                                          int ntevs);
+
  #endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c

index 9aed9c332da656c69d91693e531df87fb03007b1..9c3b9ed5b3c3ec68ee175b6bec91d1c90211798b 100644 (file)
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -133,7 +133,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag)
  /* Get raw string list of current kprobe_events  or uprobe_events */
  struct strlist *probe_file__get_rawlist(int fd)
  {
-       int ret, idx;
+       int ret, idx, fddup;
         FILE *fp;
         char buf[MAX_CMDLEN];
         char *p;
@@ -143,8 +143,17 @@ struct strlist *probe_file__get_rawlist(int fd)
                 return NULL;
  
         sl = strlist__new(NULL, NULL);
+       if (sl == NULL)
+               return NULL;
+
+       fddup = dup(fd);
+       if (fddup < 0)
+               goto out_free_sl;
+
+       fp = fdopen(fddup, "r");
+       if (!fp)
+               goto out_close_fddup;
  
-       fp = fdopen(dup(fd), "r");
         while (!feof(fp)) {
                 p = fgets(buf, MAX_CMDLEN, fp);
                 if (!p)
@@ -156,13 +165,21 @@ struct strlist *probe_file__get_rawlist(int fd)
                 ret = strlist__add(sl, buf);
                 if (ret < 0) {
                         pr_debug("strlist__add failed (%d)\n", ret);
-                       strlist__delete(sl);
-                       return NULL;
+                       goto out_close_fp;
                 }
         }
         fclose(fp);
  
         return sl;
+
+out_close_fp:
+       fclose(fp);
+       goto out_free_sl;
+out_close_fddup:
+       close(fddup);
+out_free_sl:
+       strlist__delete(sl);
+       return NULL;
  }
  
  static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
@@ -447,12 +464,17 @@ static int probe_cache__load(struct probe_cache *pcache)
  {
         struct probe_cache_entry *entry = NULL;
         char buf[MAX_CMDLEN], *p;
-       int ret = 0;
+       int ret = 0, fddup;
         FILE *fp;
  
-       fp = fdopen(dup(pcache->fd), "r");
-       if (!fp)
+       fddup = dup(pcache->fd);
+       if (fddup < 0)
+               return -errno;
+       fp = fdopen(fddup, "r");
+       if (!fp) {
+               close(fddup);
                 return -EINVAL;
+       }
  
         while (!feof(fp)) {
                 if (!fgets(buf, MAX_CMDLEN, fp))
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c

index f2d9ff064e2de720247e77f9645faf94b6b4ce57..5c290c682afe7176607fe01f4d29742b1821f1a9 100644 (file)
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -297,10 +297,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
         char sbuf[STRERR_BUFSIZE];
         int bsize, boffs, total;
         int ret;
+       char sign;
  
         /* TODO: check all types */
-       if (cast && strcmp(cast, "string") != 0) {
+       if (cast && strcmp(cast, "string") != 0 &&
+           strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
                 /* Non string type is OK */
+               /* and respect signedness cast */
                 tvar->type = strdup(cast);
                 return (tvar->type == NULL) ? -ENOMEM : 0;
         }
@@ -361,6 +364,13 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                 return (tvar->type == NULL) ? -ENOMEM : 0;
         }
  
+       if (cast && (strcmp(cast, "u") == 0))
+               sign = 'u';
+       else if (cast && (strcmp(cast, "s") == 0))
+               sign = 's';
+       else
+               sign = die_is_signed_type(&type) ? 's' : 'u';
+
         ret = dwarf_bytesize(&type);
         if (ret <= 0)
                 /* No size ... try to use default type */
@@ -373,8 +383,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                         dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
                 ret = MAX_BASIC_TYPE_BITS;
         }
-       ret = snprintf(buf, 16, "%c%d",
-                      die_is_signed_type(&type) ? 's' : 'u', ret);
+       ret = snprintf(buf, 16, "%c%d", sign, ret);
  
  formatted:
         if (ret < 0 || ret >= 16) {
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c

index 947d21f3839838c433430b01fe52165522f87295..3d3cb8392c86029bb488f737564730e0cd8995bc 100644 (file)
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -588,7 +588,11 @@ static char *get_trace_output(struct hist_entry *he)
         } else {
                 pevent_event_info(&seq, evsel->tp_format, &rec);
         }
-       return seq.buffer;
+       /*
+        * Trim the buffer, it starts at 4KB and we're not going to
+        * add anything more to this buffer.
+        */
+       return realloc(seq.buffer, seq.len + 1);
  }
  
  static int64_t
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c

index a34321e9b44d8a42c1f4839b844b15e196a6417b..a811c13a74d663ac40efdf03333145450e7d7c19 100644 (file)
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -837,7 +837,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
         sec = syms_ss->symtab;
         shdr = syms_ss->symshdr;
  
-       if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+       if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+                               ".text", NULL))
                 dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
  
         if (runtime_ss->opdsec)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c

index 5404efa578a3fcea18ce5bbab2a991e0c3d98b73..dd48f421844c7902773526d8c0845109ab9c5e55 100644 (file)
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -13,6 +13,7 @@
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  #include <linux/platform_device.h>
  #include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
  #include <linux/libnvdimm.h>
  #include <linux/vmalloc.h>
  #include <linux/device.h>
@@ -1474,6 +1475,7 @@ static int nfit_test_probe(struct platform_device *pdev)
         if (nfit_test->setup != nfit_test0_setup)
                 return 0;
  
+       flush_work(&acpi_desc->work);
         nfit_test->setup_hotplug = 1;
         nfit_test->setup(nfit_test);
  
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile

index 3c40c9d0e6c70a87b83c9d92a61f0fc0f2f4570c..1cc6d64c39b709dd28f104ced47829d7427c435f 100644 (file)
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc)
  
  GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
  
-CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
  
  export CFLAGS
  
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h

index 4f93af89ae165af7f82ced48e57c00460e6ce34d..18601f6689b9e13de81f4926423c7b88cdc44bdf 100644 (file)
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -14,4 +14,20 @@ enum dma_data_direction {
         DMA_NONE = 3,
  };
  
+#define dma_alloc_coherent(d, s, hp, f) ({ \
+       void *__dma_alloc_coherent_p = kmalloc((s), (f)); \
+       *(hp) = (unsigned long)__dma_alloc_coherent_p; \
+       __dma_alloc_coherent_p; \
+})
+
+#define dma_free_coherent(d, s, p, h) kfree(p)
+
+#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o))
+
+#define dma_map_single(d, p, s, dir) (virt_to_phys(p))
+#define dma_mapping_error(...) (0)
+
+#define dma_unmap_single(...) do { } while (0)
+#define dma_unmap_page(...) do { } while (0)
+
  #endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h

index 0338499482159883bb3e30452d5afc0f77af577e..d9554fc3f3403c2adef3d883fb767cabaa82015e 100644 (file)
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -20,7 +20,9 @@
  
  #define PAGE_SIZE getpagesize()
  #define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK)
  
+typedef unsigned long long phys_addr_t;
  typedef unsigned long long dma_addr_t;
  typedef size_t __kernel_size_t;
  typedef unsigned int __wsum;
@@ -57,6 +59,11 @@ static inline void *kzalloc(size_t s, gfp_t gfp)
         return p;
  }
  
+static inline void *alloc_pages_exact(size_t s, gfp_t gfp)
+{
+       return kmalloc(s, gfp);
+}
+
  static inline void kfree(void *p)
  {
         if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
@@ -64,6 +71,11 @@ static inline void kfree(void *p)
         free(p);
  }
  
+static inline void free_pages_exact(void *p, size_t s)
+{
+       kfree(p);
+}
+
  static inline void *krealloc(void *p, size_t s, gfp_t gfp)
  {
         return realloc(p, s);
@@ -105,6 +117,8 @@ static inline void free_page(unsigned long addr)
  #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
  #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
  
+#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n"))
+
  #define min(x, y) ({                           \
         typeof(x) _min1 = (x);                  \
         typeof(y) _min2 = (y);                  \
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h

index 81baeac8ae40249728e1bd40188da4d09bfc854b..7e1c1197d4390ede3a3ec6447256ee09ebf38600 100644 (file)
--- a/tools/virtio/linux/slab.h
+++ b/tools/virtio/linux/slab.h
@@ -1,2 +1,6 @@
  #ifndef LINUX_SLAB_H
+#define GFP_KERNEL 0
+#define GFP_ATOMIC 0
+#define __GFP_NOWARN 0
+#define __GFP_ZERO 0
  #endif
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h

index ee125e714053a91a76658d417a46232349927450..9377c8b4ac167723de43088e96e5b5f0effcf4b2 100644 (file)
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -3,8 +3,12 @@
  #include <linux/scatterlist.h>
  #include <linux/kernel.h>
  
+struct device {
+       void *parent;
+};
+
  struct virtio_device {
-       void *dev;
+       struct device dev;
         u64 features;
  };
  
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h

index 57a6964a1e355b8daa154adff9bd007c93f8374f..9ba11815e0a16b93ec0e04735a3486ac9dc6bfe4 100644 (file)
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -40,6 +40,19 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
  #define virtio_has_feature(dev, feature) \
         (__virtio_test_bit((dev), feature))
  
+/**
+ * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * @vdev: the device
+ */
+static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+{
+       /*
+        * Note the reverse polarity of the quirk feature (compared to most
+        * other features), this is for compatibility with legacy systems.
+        */
+       return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+}
+
  static inline bool virtio_is_little_endian(struct virtio_device *vdev)
  {
         return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c

index 68e4f9f0da3abe48ea6e91cb3c8ce215bbd118c5..bd2ad1d3b7a9ef88e28e1ad982dd37638841fa04 100644 (file)
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -13,6 +13,7 @@
  #define cache_line_size() SMP_CACHE_BYTES
  #define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
  #define unlikely(x)    (__builtin_expect(!!(x), 0))
+#define likely(x)    (__builtin_expect(!!(x), 1))
  #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
  typedef pthread_spinlock_t  spinlock_t;
  
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c

index fb4b0a79a9502ba9bcaad7acfdbbf692c5ed5208..83777c1cbae0693c14e1d3f23df927c5cae25fef 100644 (file)
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -73,12 +73,8 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
         int i, vcpu_lock_idx = -1, ret;
         struct kvm_vcpu *vcpu;
  
-       mutex_lock(&kvm->lock);
-
-       if (irqchip_in_kernel(kvm)) {
-               ret = -EEXIST;
-               goto out;
-       }
+       if (irqchip_in_kernel(kvm))
+               return -EEXIST;
  
         /*
          * This function is also called by the KVM_CREATE_IRQCHIP handler,
@@ -87,10 +83,8 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
          * the proper checks already.
          */
         if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
-               !kvm_vgic_global_state.can_emulate_gicv2) {
-               ret = -ENODEV;
-               goto out;
-       }
+               !kvm_vgic_global_state.can_emulate_gicv2)
+               return -ENODEV;
  
         /*
          * Any time a vcpu is run, vcpu_load is called which tries to grab the
@@ -138,9 +132,6 @@ out_unlock:
                 vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
                 mutex_unlock(&vcpu->mutex);
         }
-
-out:
-       mutex_unlock(&kvm->lock);
         return ret;
  }
  
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index cc081ccfcaa3743ca5e0f46dd350bda36bc3ec72..195078225aa5d0c3b3214fd40e0b5f441c5d7518 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -696,6 +696,11 @@ static void kvm_destroy_devices(struct kvm *kvm)
  {
         struct kvm_device *dev, *tmp;
  
+       /*
+        * We do not need to take the kvm->lock here, because nobody else
+        * has a reference to the struct kvm at this point and therefore
+        * cannot access the devices list anyhow.
+        */
         list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
                 list_del(&dev->vm_node);
                 dev->ops->destroy(dev);
@@ -2832,19 +2837,28 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
         dev->ops = ops;
         dev->kvm = kvm;
  
+       mutex_lock(&kvm->lock);
         ret = ops->create(dev, cd->type);
         if (ret < 0) {
+               mutex_unlock(&kvm->lock);
                 kfree(dev);
                 return ret;
         }
+       list_add(&dev->vm_node, &kvm->devices);
+       mutex_unlock(&kvm->lock);
+
+       if (ops->init)
+               ops->init(dev);
  
         ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
         if (ret < 0) {
                 ops->destroy(dev);
+               mutex_lock(&kvm->lock);
+               list_del(&dev->vm_node);
+               mutex_unlock(&kvm->lock);
                 return ret;
         }
  
-       list_add(&dev->vm_node, &kvm->devices);
         kvm_get_kvm(kvm);
         cd->fd = ret;
         return 0;
author	Ingo Molnar <mingo@kernel.org>
	Thu, 8 Sep 2016 06:41:52 +0000 (08:41 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 8 Sep 2016 06:41:52 +0000 (08:41 +0200)