]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'x86/cpu' into x86/mm, before applying dependent patch
authorIngo Molnar <mingo@kernel.org>
Thu, 30 Mar 2017 07:07:54 +0000 (09:07 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 30 Mar 2017 07:07:54 +0000 (09:07 +0200)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
376 files changed:
Documentation/arm64/silicon-errata.txt
Documentation/cgroup-v2.txt
Documentation/devicetree/bindings/powerpc/4xx/emac.txt
Documentation/networking/ip-sysctl.txt
Documentation/virtual/kvm/api.txt
Documentation/x86/x86_64/mm.txt
Makefile
arch/Kconfig
arch/arm/Kconfig
arch/arm/include/asm/kvm_arm.h
arch/arm/include/asm/kvm_host.h
arch/arm/kvm/arm.c
arch/arm/kvm/handle_exit.c
arch/arm64/Kconfig
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/kernel/cpuidle.c
arch/arm64/kernel/probes/kprobes.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/tlb.c
arch/arm64/mm/kasan_init.c
arch/powerpc/Kconfig
arch/powerpc/boot/zImage.lds.S
arch/powerpc/crypto/crc32c-vpmsum_glue.c
arch/powerpc/include/asm/bitops.h
arch/powerpc/include/asm/mce.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/mce.c
arch/powerpc/kernel/mce_power.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/isa207-common.c
arch/powerpc/perf/isa207-common.h
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/purgatory/trampoline.S
arch/s390/crypto/paes_s390.c
arch/s390/include/asm/cputime.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/timex.h
arch/s390/include/uapi/asm/unistd.h
arch/s390/kernel/compat_wrapper.c
arch/s390/kernel/entry.S
arch/s390/kernel/ipl.c
arch/s390/kernel/process.c
arch/s390/kernel/syscalls.S
arch/s390/kernel/vtime.c
arch/s390/mm/pgtable.c
arch/score/kernel/traps.c
arch/score/mm/extable.c
arch/um/include/asm/mmu_context.h
arch/um/include/shared/os.h
arch/unicore32/include/asm/mmu_context.h
arch/x86/Kconfig
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/vdso/vma.c
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/desc.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/kexec.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pgalloc.h
arch/x86/include/asm/pgtable-2level_types.h
arch/x86/include/asm/pgtable-3level.h
arch/x86/include/asm/pgtable-3level_types.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pgtable_64_types.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/proto.h
arch/x86/include/asm/purgatory.h [new file with mode: 0644]
arch/x86/include/asm/stackprotector.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/xen/page.h
arch/x86/include/uapi/asm/prctl.h
arch/x86/kernel/acpi/sleep.c
arch/x86/kernel/apm_32.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/machine_kexec_32.c
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/reboot.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/sys_x86_64.c
arch/x86/kernel/tboot.c
arch/x86/kernel/tls.c
arch/x86/kernel/vm86_32.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/mm/Makefile
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/gup.c [deleted file]
arch/x86/mm/hugetlbpage.c
arch/x86/mm/ident_map.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/mmap.c
arch/x86/mm/pageattr.c
arch/x86/mm/pgtable.c
arch/x86/mm/pgtable_32.c
arch/x86/platform/efi/efi_32.c
arch/x86/platform/efi/efi_64.c
arch/x86/power/cpu.c
arch/x86/power/hibernate_32.c
arch/x86/power/hibernate_64.c
arch/x86/purgatory/purgatory.c
arch/x86/purgatory/purgatory.h [deleted file]
arch/x86/purgatory/setup-x86_64.S
arch/x86/purgatory/sha256.h
arch/x86/um/Makefile
arch/x86/um/asm/ptrace.h
arch/x86/um/os-Linux/prctl.c
arch/x86/um/syscalls_32.c [new file with mode: 0644]
arch/x86/um/syscalls_64.c
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/x86/xen/mmu.h
arch/x86/xen/smp.c
block/bio.c
block/blk-core.c
block/blk-mq-tag.c
block/blk-mq.c
crypto/af_alg.c
crypto/algif_hash.c
drivers/ata/ahci_qoriq.c
drivers/ata/libata-sff.c
drivers/ata/libata-transport.c
drivers/char/hw_random/omap-rng.c
drivers/char/random.c
drivers/crypto/s5p-sss.c
drivers/irqchip/irq-crossbar.c
drivers/irqchip/irq-gic-v3-its.c
drivers/isdn/gigaset/bas-gigaset.c
drivers/lguest/x86/core.c
drivers/macintosh/macio_asic.c
drivers/md/dm.c
drivers/md/md-cluster.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/genet/bcmgenet.h
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/liquidio/octeon_config.h
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.h
drivers/net/ethernet/cavium/liquidio/octeon_main.h
drivers/net/ethernet/cavium/liquidio/octeon_network.h
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.h
drivers/net/ethernet/cavium/thunder/thunder_bgx.c
drivers/net/ethernet/cavium/thunder/thunder_bgx.h
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/mellanox/mlx5/core/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_iscsi.c
drivers/net/ethernet/qlogic/qed/qed_ll2.c
drivers/net/ethernet/qlogic/qed/qed_ooo.c
drivers/net/ethernet/smsc/smc91x.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/phy/marvell.c
drivers/net/phy/phy_device.c
drivers/net/phy/spi_ks8995.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wan/fsl_ucc_hdlc.c
drivers/net/wimax/i2400m/usb.c
drivers/net/xen-netback/interface.c
drivers/net/xen-netback/netback.c
drivers/net/xen-netback/xenbus.c
drivers/platform/x86/asus-nb-wmi.c
drivers/platform/x86/asus-wmi.c
drivers/platform/x86/asus-wmi.h
drivers/platform/x86/fujitsu-laptop.c
drivers/pnp/pnpbios/bioscalls.c
drivers/scsi/Kconfig
drivers/scsi/aacraid/src.c
drivers/scsi/libiscsi.c
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_ct.c
drivers/scsi/lpfc/lpfc_debugfs.c
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_hw4.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_mem.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_nvme.h
drivers/scsi/lpfc/lpfc_nvmet.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli4.h
drivers/scsi/lpfc/lpfc_version.h
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/qedf/qedf_dbg.h
drivers/scsi/qedf/qedf_fip.c
drivers/scsi/qedf/qedf_io.c
drivers/scsi/qedf/qedf_main.c
drivers/scsi/qedi/qedi_debugfs.c
drivers/scsi/qedi/qedi_fw.c
drivers/scsi/qedi/qedi_gbl.h
drivers/scsi/qedi/qedi_iscsi.c
drivers/scsi/qedi/qedi_main.c
drivers/scsi/qla2xxx/qla_dbg.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_priv.h
drivers/scsi/sd.c
drivers/scsi/storvsc_drv.c
drivers/scsi/ufs/ufs.h
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h
drivers/scsi/vmw_pvscsi.c
drivers/staging/lustre/lnet/lnet/lib-socket.c
fs/dlm/lowcomms.c
fs/exec.c
fs/fs-writeback.c
fs/gfs2/incore.h
fs/ocfs2/cluster/tcp.c
fs/xfs/libxfs/xfs_dir2_priv.h
fs/xfs/libxfs/xfs_dir2_sf.c
fs/xfs/libxfs/xfs_inode_fork.c
fs/xfs/libxfs/xfs_inode_fork.h
fs/xfs/xfs_dir2_readdir.c
fs/xfs/xfs_inode.c
include/asm-generic/mm_hooks.h
include/asm-generic/pgtable.h
include/crypto/if_alg.h
include/linux/compat.h
include/linux/dccp.h
include/linux/filter.h
include/linux/irqchip/arm-gic-v3.h
include/linux/irqdomain.h
include/linux/list_nulls.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/net.h
include/linux/pagemap.h
include/linux/phy.h
include/linux/purgatory.h [new file with mode: 0644]
include/linux/random.h
include/linux/rculist_nulls.h
include/linux/thread_info.h
include/net/inet_common.h
include/net/inet_connection_sock.h
include/net/sctp/structs.h
include/net/sock.h
include/scsi/libiscsi.h
include/scsi/scsi_device.h
include/trace/events/xen.h
include/uapi/linux/packet_diag.h
init/main.c
kernel/bpf/hashtab.c
kernel/bpf/lpm_trie.c
kernel/cgroup/cgroup-v1.c
kernel/cgroup/pids.c
kernel/kexec_file.c
kernel/kexec_internal.h
kernel/workqueue.c
mm/Kconfig
mm/gup.c
mm/percpu-vm.c
mm/percpu.c
net/atm/svc.c
net/ax25/af_ax25.c
net/bluetooth/l2cap_sock.c
net/bluetooth/rfcomm/sock.c
net/bluetooth/sco.c
net/bridge/br_input.c
net/bridge/br_netfilter_hooks.c
net/core/dev.c
net/core/net-sysfs.c
net/core/skbuff.c
net/core/sock.c
net/dccp/ccids/ccid2.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dccp/minisocks.c
net/decnet/af_decnet.c
net/ipv4/af_inet.c
net/ipv4/inet_connection_sock.c
net/ipv4/ip_output.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_timer.c
net/ipv6/af_inet6.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_offload.c
net/ipv6/ip6_output.c
net/ipv6/ip6_vti.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/irda/af_irda.c
net/iucv/af_iucv.c
net/llc/af_llc.c
net/mpls/af_mpls.c
net/netrom/af_netrom.c
net/nfc/llcp_sock.c
net/phonet/pep.c
net/phonet/socket.c
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rds.h
net/rds/tcp.c
net/rds/tcp.h
net/rds/tcp_listen.c
net/rose/af_rose.c
net/rxrpc/input.c
net/rxrpc/recvmsg.c
net/rxrpc/sendmsg.c
net/sched/act_connmark.c
net/sched/act_skbmod.c
net/sctp/ipv6.c
net/sctp/protocol.c
net/sctp/socket.c
net/smc/af_smc.c
net/socket.c
net/tipc/socket.c
net/unix/af_unix.c
net/vmw_vsock/af_vsock.c
net/x25/af_x25.c
net/xfrm/xfrm_policy.c
tools/include/uapi/linux/bpf_perf_event.h [new file with mode: 0644]
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/powerpc/include/vsx_asm.h
tools/testing/selftests/x86/ldt_gdt.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-v3.c

index a71b8095dbd8df44603f18e7435b490d7b5c56c9..2f66683500b8e44e0ceb44bc877acccae39b35d7 100644 (file)
@@ -68,3 +68,4 @@ stable kernels.
 |                |                 |                 |                             |
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
+| Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
index 3b8449f8ac7e80a0ebeaf6dfe8c64b15503f3954..49d7c997fa1ee7f759b5ba319bb57be464f0bd47 100644 (file)
@@ -1142,16 +1142,17 @@ used by the kernel.
 
   pids.max
 
- A read-write single value file which exists on non-root cgroups.  The
- default is "max".
+       A read-write single value file which exists on non-root
      cgroups.  The default is "max".
 
- Hard limit of number of processes.
      Hard limit of number of processes.
 
   pids.current
 
- A read-only single value file which exists on all cgroups.
      A read-only single value file which exists on all cgroups.
 
- The number of processes currently in the cgroup and its descendants.
+       The number of processes currently in the cgroup and its
+       descendants.
 
 Organisational operations are not blocked by cgroup policies, so it is
 possible to have pids.current > pids.max.  This can be done by either
index 712baf6c3e246fa9ea93c6bab4cb8f294b894d86..44b842b6ca154d9c13c3d0da6a784fe54195fcf9 100644 (file)
@@ -71,6 +71,9 @@
                          For Axon it can be absent, though my current driver
                          doesn't handle phy-address yet so for now, keep
                          0x00ffffff in it.
+    - phy-handle       : Used to describe configurations where a external PHY
+                         is used. Please refer to:
+                         Documentation/devicetree/bindings/net/ethernet.txt
     - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
                          operations (if absent the value is the same as
                          rx-fifo-size).  For Axon, either absent or 2048.
                          offload, phandle of the TAH device node.
     - tah-channel       : 1 cell, optional. If appropriate, channel used on the
                          TAH engine.
+    - fixed-link       : Fixed-link subnode describing a link to a non-MDIO
+                         managed entity. See
+                         Documentation/devicetree/bindings/net/fixed-link.txt
+                         for details.
+    - mdio subnode     : When the EMAC has a phy connected to its local
+                         mdio, which us supported by the kernel's network
+                         PHY library in drivers/net/phy, there must be device
+                         tree subnode with the following required properties:
+                               - #address-cells: Must be <1>.
+                               - #size-cells: Must be <0>.
 
-    Example:
+                         For PHY definitions: Please refer to
+                         Documentation/devicetree/bindings/net/phy.txt and
+                         Documentation/devicetree/bindings/net/ethernet.txt
+
+    Examples:
 
        EMAC0: ethernet@40000800 {
                device_type = "network";
                zmii-channel = <0>;
        };
 
+       EMAC1: ethernet@ef600c00 {
+               device_type = "network";
+               compatible = "ibm,emac-apm821xx", "ibm,emac4sync";
+               interrupt-parent = <&EMAC1>;
+               interrupts = <0 1>;
+               #interrupt-cells = <1>;
+               #address-cells = <0>;
+               #size-cells = <0>;
+               interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */
+                                1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>;
+               reg = <0xef600c00 0x000000c4>;
+               local-mac-address = [000000000000]; /* Filled in by U-Boot */
+               mal-device = <&MAL0>;
+               mal-tx-channel = <0>;
+               mal-rx-channel = <0>;
+               cell-index = <0>;
+               max-frame-size = <9000>;
+               rx-fifo-size = <16384>;
+               tx-fifo-size = <2048>;
+               fifo-entry-size = <10>;
+               phy-mode = "rgmii";
+               phy-handle = <&phy0>;
+               phy-map = <0x00000000>;
+               rgmii-device = <&RGMII0>;
+               rgmii-channel = <0>;
+               tah-device = <&TAH0>;
+               tah-channel = <0>;
+               has-inverted-stacr-oc;
+               has-new-stacr-staopc;
+
+               mdio {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       phy0: ethernet-phy@0 {
+                               compatible = "ethernet-phy-ieee802.3-c22";
+                               reg = <0>;
+                       };
+               };
+       };
+
+
       ii) McMAL node
 
     Required properties:
     - revision           : as provided by the RGMII new version register if
                           available.
                           For Axon: 0x0000012a
-
index fc73eeb7b3b8b119083a03a42e0046a564ea2f0e..ab02304613771b6f6e120da96fb677c293d032d2 100644 (file)
@@ -1006,7 +1006,8 @@ accept_redirects - BOOLEAN
                FALSE (router)
 
 forwarding - BOOLEAN
-       Enable IP forwarding on this interface.
+       Enable IP forwarding on this interface.  This controls whether packets
+       received _on_ this interface can be forwarded.
 
 mc_forwarding - BOOLEAN
        Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
index 069450938b795df4e6f5e16f39b864e8011fb844..3c248f772ae61673e719b0c004f47d6fd7b788c1 100644 (file)
@@ -951,6 +951,10 @@ This ioctl allows the user to create or modify a guest physical memory
 slot.  When changing an existing slot, it may be moved in the guest
 physical memory space, or its flags may be modified.  It may not be
 resized.  Slots may not overlap in guest physical address space.
+Bits 0-15 of "slot" specifies the slot id and this value should be
+less than the maximum number of user memory slots supported per VM.
+The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
+if this capability is supported by the architecture.
 
 If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
 specifies the address space which is being modified.  They must be
index 5724092db8118deefb1758e3886db97aa61521b8..ee3f9c30957c65c2407d6e630290199dc371b931 100644 (file)
@@ -19,7 +19,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
 ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
@@ -39,6 +39,9 @@ memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
 during EFI runtime calls.
 
+The module mapping space size changes based on the CONFIG requirements for the
+following fixmap section.
+
 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
 Their order is preserved but their base will be offset early at boot time.
index 165cf9783a5dbb28ff82ac206d667bcba3b572b6..b841fb36beb2b5aa594907a4a5efbb8333e77548 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
index cd211a14a88f7774bec3abbe3b371f0302e624a0..c4d6833aacd98a75a226d337be0d6fb67d80fcd0 100644 (file)
@@ -700,6 +700,13 @@ config ARCH_MMAP_RND_COMPAT_BITS
          This value can be changed after boot using the
          /proc/sys/vm/mmap_rnd_compat_bits tunable
 
+config HAVE_ARCH_COMPAT_MMAP_BASES
+       bool
+       help
+         This allows 64bit applications to invoke 32-bit mmap() syscall
+         and vice-versa 32-bit applications to call 64-bit mmap().
+         Required for applications doing different bitness syscalls.
+
 config HAVE_COPY_THREAD_TLS
        bool
        help
index 0d4e71b42c77da986a2dc471a1f335be5e958527..454fadd077ad5b0c77a7b989ce58dcc12d0e9b35 100644 (file)
@@ -1666,7 +1666,7 @@ config ARCH_SELECT_MEMORY_MODEL
 config HAVE_ARCH_PFN_VALID
        def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
 
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
        def_bool y
        depends on ARM_LPAE
 
index e22089fb44dc86b7ed2fdb175bc6ec7b47ee4001..a3f0b3d500895b349004921b5f1b9435a45a0f5b 100644 (file)
 #define HSR_EC_IABT_HYP        (0x21)
 #define HSR_EC_DABT    (0x24)
 #define HSR_EC_DABT_HYP        (0x25)
+#define HSR_EC_MAX     (0x3f)
 
 #define HSR_WFI_IS_WFE         (_AC(1, UL) << 0)
 
index cc495d799c67643c58e136249197a06736299339..31ee468ce667dee8a219f775f1106714879088c2 100644 (file)
@@ -30,7 +30,6 @@
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
 #define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 #define KVM_HALT_POLL_NS_DEFAULT 500000
index c9a2103faeb9acf82f0c26164085506f14015822..96dba7cd8be7b4b6f29d9896e2d4515c477ca963 100644 (file)
@@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
                break;
+       case KVM_CAP_NR_MEMSLOTS:
+               r = KVM_USER_MEM_SLOTS;
+               break;
        case KVM_CAP_MSI_DEVID:
                if (!kvm)
                        r = -EINVAL;
index 4e40d1955e35341b7756efe72f2da6bf2360b224..96af65a30d78b1e09182d8e41f8b8e3ff4aae81e 100644 (file)
@@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
        return 1;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+       u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+       kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+                     hsr);
+
+       kvm_inject_undefined(vcpu);
+       return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+       [0 ... HSR_EC_MAX]      = kvm_handle_unknown_ec,
        [HSR_EC_WFI]            = kvm_handle_wfx,
        [HSR_EC_CP15_32]        = kvm_handle_cp15_32,
        [HSR_EC_CP15_64]        = kvm_handle_cp15_64,
@@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 {
        u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
 
-       if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-           !arm_exit_handlers[hsr_ec]) {
-               kvm_err("Unknown exception class: hsr: %#08x\n",
-                       (unsigned int)kvm_vcpu_get_hsr(vcpu));
-               BUG();
-       }
-
        return arm_exit_handlers[hsr_ec];
 }
 
index a39029b5414eb25f23f3409f74a4d84713a02c4f..af62bf79721a9163fd80ca6e0bcf38248618e0e8 100644 (file)
@@ -205,7 +205,7 @@ config GENERIC_CALIBRATE_DELAY
 config ZONE_DMA
        def_bool y
 
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
        def_bool y
 
 config ARCH_DMA_ADDR_T_64BIT
@@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009
 
          If unsure, say Y.
 
+config QCOM_QDF2400_ERRATUM_0065
+       bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+       default y
+       help
+         On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+         ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+         been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+         If unsure, say Y.
+
 endmenu
 
 
@@ -1063,6 +1073,10 @@ config SYSVIPC_COMPAT
        def_bool y
        depends on COMPAT && SYSVIPC
 
+config KEYS_COMPAT
+       def_bool y
+       depends on COMPAT && KEYS
+
 endmenu
 
 menu "Power management options"
index 05310ad8c5abec54a445cb2dfcd3df5fefcefe3a..f31c48d0cd6873f399a6d8f5f861e98fa3f66e10 100644 (file)
@@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void)
 static inline bool system_uses_ttbr0_pan(void)
 {
        return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
-               !cpus_have_cap(ARM64_HAS_PAN);
+               !cpus_have_const_cap(ARM64_HAS_PAN);
 }
 
 #endif /* __ASSEMBLY__ */
index f21fd38943708f1f6b69f1431fd07538ff26a993..e7705e7bb07b133de4da9b2809a152f94ceb0b4b 100644 (file)
@@ -30,8 +30,7 @@
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_USER_MEM_SLOTS 512
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HALT_POLL_NS_DEFAULT 500000
 
index 75a0f8acef669ce5560f627f516dae54168a898d..fd691087dc9ad58ff0ff007f5ea7191a3f879380 100644 (file)
@@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu)
 }
 
 /**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
  * @arg: argument to pass to CPU suspend operations
  *
  * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
index 2a07aae5b8a26431edcdfd2534a856474fc00b44..c5c45942fb6e6693c5f8c195bb6596e2fa9f6ff2 100644 (file)
@@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
        return 0;
 }
 
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
-                                      unsigned long val, void *data)
-{
-       return NOTIFY_DONE;
-}
-
 static void __kprobes kprobe_handler(struct pt_regs *regs)
 {
        struct kprobe *p, *cur_kprobe;
index 1bfe30dfbfe77ffa2395528e008c058bd93b648d..fa1b18e364fc9d73cec1c0fdb6626285c1d7adc2 100644 (file)
@@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
        return ret;
 }
 
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+       u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+       kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+                     hsr, esr_get_class_string(hsr));
+
+       kvm_inject_undefined(vcpu);
+       return 1;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
+       [0 ... ESR_ELx_EC_MAX]  = kvm_handle_unknown_ec,
        [ESR_ELx_EC_WFx]        = kvm_handle_wfx,
        [ESR_ELx_EC_CP15_32]    = kvm_handle_cp15_32,
        [ESR_ELx_EC_CP15_64]    = kvm_handle_cp15_64,
@@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
        u32 hsr = kvm_vcpu_get_hsr(vcpu);
        u8 hsr_ec = ESR_ELx_EC(hsr);
 
-       if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
-           !arm_exit_handlers[hsr_ec]) {
-               kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
-                       hsr, esr_get_class_string(hsr));
-               BUG();
-       }
-
        return arm_exit_handlers[hsr_ec];
 }
 
index e8e7ba2bc11f93abde92c6b91782ae776bdbcb73..9e1d2b75eecd606df6a6ccf632247ebc02149c67 100644 (file)
 #include <asm/kvm_hyp.h>
 #include <asm/tlbflush.h>
 
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+{
+       u64 val;
+
+       /*
+        * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+        * most TLB operations target EL2/EL0. In order to affect the
+        * guest TLBs (EL1/EL0), we need to change one of these two
+        * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+        * let's flip TGE before executing the TLB operation.
+        */
+       write_sysreg(kvm->arch.vttbr, vttbr_el2);
+       val = read_sysreg(hcr_el2);
+       val &= ~HCR_TGE;
+       write_sysreg(val, hcr_el2);
+       isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+{
+       write_sysreg(kvm->arch.vttbr, vttbr_el2);
+       isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+                           __tlb_switch_to_guest_nvhe,
+                           __tlb_switch_to_guest_vhe,
+                           ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+{
+       /*
+        * We're done with the TLB operation, let's restore the host's
+        * view of HCR_EL2.
+        */
+       write_sysreg(0, vttbr_el2);
+       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+{
+       write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+                           __tlb_switch_to_host_nvhe,
+                           __tlb_switch_to_host_vhe,
+                           ARM64_HAS_VIRT_HOST_EXTN);
+
 void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
        dsb(ishst);
 
        /* Switch to requested VMID */
        kvm = kern_hyp_va(kvm);
-       write_sysreg(kvm->arch.vttbr, vttbr_el2);
-       isb();
+       __tlb_switch_to_guest()(kvm);
 
        /*
         * We could do so much better if we had the VA as well.
@@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
        dsb(ish);
        isb();
 
-       write_sysreg(0, vttbr_el2);
+       __tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
 
        /* Switch to requested VMID */
        kvm = kern_hyp_va(kvm);
-       write_sysreg(kvm->arch.vttbr, vttbr_el2);
-       isb();
+       __tlb_switch_to_guest()(kvm);
 
        __tlbi(vmalls12e1is);
        dsb(ish);
        isb();
 
-       write_sysreg(0, vttbr_el2);
+       __tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
        struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
 
        /* Switch to requested VMID */
-       write_sysreg(kvm->arch.vttbr, vttbr_el2);
-       isb();
+       __tlb_switch_to_guest()(kvm);
 
        __tlbi(vmalle1);
        dsb(nsh);
        isb();
 
-       write_sysreg(0, vttbr_el2);
+       __tlb_switch_to_host()(kvm);
 }
 
 void __hyp_text __kvm_flush_vm_context(void)
index 55d1e9205543689a6883d983dc82cb8b9eb2be6a..687a358a37337af9cf7a0d50c27b0176cfbd2012 100644 (file)
@@ -162,7 +162,7 @@ void __init kasan_init(void)
        clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
        vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
-                        pfn_to_nid(virt_to_pfn(_text)));
+                        pfn_to_nid(virt_to_pfn(lm_alias(_text))));
 
        /*
         * vmemmap_populate() has populated the shadow region that covers the
index 97a8bc8a095ce4199ad2e4e0c88c2933fc3b6987..3a716b2dcde94a353a79c0fe2f81eabc2060f642 100644 (file)
@@ -135,7 +135,7 @@ config PPC
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
        select HAVE_GCC_PLUGINS
-       select HAVE_GENERIC_RCU_GUP
+       select HAVE_GENERIC_GUP
        select HAVE_HW_BREAKPOINT               if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
        select HAVE_IDE
        select HAVE_IOREMAP_PROT
index 861e72109df2da0b54c98a94584b8b4ff853026b..f080abfc2f83fbd1e7d63846904a3a21ad820cee 100644 (file)
@@ -68,6 +68,7 @@ SECTIONS
   }
 
 #ifdef CONFIG_PPC64_BOOT_WRAPPER
+  . = ALIGN(256);
   .got :
   {
     __toc_start = .;
index 9fa046d56ebadd6ad25e62b5a29a853b123cd30a..411994551afc138b0b733fdda2b6398140f7127c 100644 (file)
@@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
 {
        u32 *key = crypto_tfm_ctx(tfm);
 
-       *key = 0;
+       *key = ~0;
 
        return 0;
 }
index 73eb794d6163811c45729984f0d9fb06bbde31a9..bc5fdfd227886aa2cb359656faf2eef5eb6574b7 100644 (file)
 #define PPC_BIT(bit)           (1UL << PPC_BITLSHIFT(bit))
 #define PPC_BITMASK(bs, be)    ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
 
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit)                 \
+       ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
 #include <asm/barrier.h>
 
 /* Macro for generating the ***_bits() functions */
index f97d8cb6bdf64fd8e147035d659b71016c33ecd0..ed62efe01e49ed1a2e37c6bbd6efbf41fed3e925 100644 (file)
 
 #define P8_DSISR_MC_SLB_ERRORS         (P7_DSISR_MC_SLB_ERRORS | \
                                         P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
+/*
+ * Machine Check bits on power9
+ */
+#define P9_SRR1_MC_LOADSTORE(srr1)     (((srr1) >> PPC_BITLSHIFT(42)) & 1)
+
+#define P9_SRR1_MC_IFETCH(srr1)        (       \
+       PPC_BITEXTRACT(srr1, 45, 0) |   \
+       PPC_BITEXTRACT(srr1, 44, 1) |   \
+       PPC_BITEXTRACT(srr1, 43, 2) |   \
+       PPC_BITEXTRACT(srr1, 36, 3) )
+
+/* 0 is reserved */
+#define P9_SRR1_MC_IFETCH_UE                           1
+#define P9_SRR1_MC_IFETCH_SLB_PARITY                   2
+#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT                 3
+#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT                        4
+#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT                 5
+#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD                        6
+/* 7 is reserved */
+#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT                 8
+#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT       9
+/* 10 ? */
+#define P9_SRR1_MC_IFETCH_RA                   11
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK         12
+#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE               13
+#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT     14
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15
+
+/* DSISR bits for machine check (On Power9) */
+#define P9_DSISR_MC_UE                                 (PPC_BIT(48))
+#define P9_DSISR_MC_UE_TABLEWALK                       (PPC_BIT(49))
+#define P9_DSISR_MC_LINK_LOAD_TIMEOUT                  (PPC_BIT(50))
+#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT             (PPC_BIT(51))
+#define P9_DSISR_MC_ERAT_MULTIHIT                      (PPC_BIT(52))
+#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB                 (PPC_BIT(53))
+#define P9_DSISR_MC_USER_TLBIE                         (PPC_BIT(54))
+#define P9_DSISR_MC_SLB_PARITY_MFSLB                   (PPC_BIT(55))
+#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB                 (PPC_BIT(56))
+#define P9_DSISR_MC_RA_LOAD                            (PPC_BIT(57))
+#define P9_DSISR_MC_RA_TABLEWALK                       (PPC_BIT(58))
+#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN               (PPC_BIT(59))
+#define P9_DSISR_MC_RA_FOREIGN                         (PPC_BIT(60))
+
+/* SLB error bits */
+#define P9_DSISR_MC_SLB_ERRORS         (P9_DSISR_MC_ERAT_MULTIHIT | \
+                                        P9_DSISR_MC_SLB_PARITY_MFSLB | \
+                                        P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
+
 enum MCE_Version {
        MCE_V1 = 1,
 };
@@ -93,6 +142,9 @@ enum MCE_ErrorType {
        MCE_ERROR_TYPE_SLB = 2,
        MCE_ERROR_TYPE_ERAT = 3,
        MCE_ERROR_TYPE_TLB = 4,
+       MCE_ERROR_TYPE_USER = 5,
+       MCE_ERROR_TYPE_RA = 6,
+       MCE_ERROR_TYPE_LINK = 7,
 };
 
 enum MCE_UeErrorType {
@@ -121,6 +173,32 @@ enum MCE_TlbErrorType {
        MCE_TLB_ERROR_MULTIHIT = 2,
 };
 
+enum MCE_UserErrorType {
+       MCE_USER_ERROR_INDETERMINATE = 0,
+       MCE_USER_ERROR_TLBIE = 1,
+};
+
+enum MCE_RaErrorType {
+       MCE_RA_ERROR_INDETERMINATE = 0,
+       MCE_RA_ERROR_IFETCH = 1,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
+       MCE_RA_ERROR_LOAD = 4,
+       MCE_RA_ERROR_STORE = 5,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
+       MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
+       MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+};
+
+enum MCE_LinkErrorType {
+       MCE_LINK_ERROR_INDETERMINATE = 0,
+       MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+       MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+       MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+       MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+       MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
 struct machine_check_event {
        enum MCE_Version        version:8;      /* 0x00 */
        uint8_t                 in_use;         /* 0x01 */
@@ -166,6 +244,30 @@ struct machine_check_event {
                        uint64_t        effective_address;
                        uint8_t         reserved_2[16];
                } tlb_error;
+
+               struct {
+                       enum MCE_UserErrorType user_error_type:8;
+                       uint8_t         effective_address_provided;
+                       uint8_t         reserved_1[6];
+                       uint64_t        effective_address;
+                       uint8_t         reserved_2[16];
+               } user_error;
+
+               struct {
+                       enum MCE_RaErrorType ra_error_type:8;
+                       uint8_t         effective_address_provided;
+                       uint8_t         reserved_1[6];
+                       uint64_t        effective_address;
+                       uint8_t         reserved_2[16];
+               } ra_error;
+
+               struct {
+                       enum MCE_LinkErrorType link_error_type:8;
+                       uint8_t         effective_address_provided;
+                       uint8_t         reserved_1[6];
+                       uint64_t        effective_address;
+                       uint8_t         reserved_2[16];
+               } link_error;
        } u;
 };
 
@@ -176,8 +278,12 @@ struct mce_error_info {
                enum MCE_SlbErrorType slb_error_type:8;
                enum MCE_EratErrorType erat_error_type:8;
                enum MCE_TlbErrorType tlb_error_type:8;
+               enum MCE_UserErrorType user_error_type:8;
+               enum MCE_RaErrorType ra_error_type:8;
+               enum MCE_LinkErrorType link_error_type:8;
        } u;
-       uint8_t         reserved[2];
+       enum MCE_Severity       severity:8;
+       enum MCE_Initiator      initiator:8;
 };
 
 #define MAX_MC_EVT     100
index b9e3f0aca261da2233a086cb41150bc3afacb9dc..ecf9885ab660d329a910a1bda642d69f72a1e305 100644 (file)
@@ -163,11 +163,5 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        /* by default, allow everything */
        return true;
 }
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
-       /* by default, allow everything */
-       return true;
-}
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_MMU_CONTEXT_H */
index bb7a1890aeb7fb8e95cf8ca0c7aa53765e12eb45..e79b9daa873c1874485021676426ea47196a5a68 100644 (file)
@@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action);
 extern void __flush_tlb_power9(unsigned int action);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .cpu_setup              = __setup_cpu_power9,
                .cpu_restore            = __restore_cpu_power9,
                .flush_tlb              = __flush_tlb_power9,
+               .machine_check_early    = __machine_check_early_realmode_p9,
                .platform               = "power9",
        },
        {       /* Power9 */
@@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .cpu_setup              = __setup_cpu_power9,
                .cpu_restore            = __restore_cpu_power9,
                .flush_tlb              = __flush_tlb_power9,
+               .machine_check_early    = __machine_check_early_realmode_p9,
                .platform               = "power9",
        },
        {       /* Cell Broadband Engine */
index c6923ff451311bfade14e7f68888f85bb69f7176..a1475e6aef3a519c70824d4dd432748097a7965e 100644 (file)
@@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce,
        case MCE_ERROR_TYPE_TLB:
                mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
                break;
+       case MCE_ERROR_TYPE_USER:
+               mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+               break;
+       case MCE_ERROR_TYPE_RA:
+               mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+               break;
+       case MCE_ERROR_TYPE_LINK:
+               mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+               break;
        case MCE_ERROR_TYPE_UNKNOWN:
        default:
                break;
@@ -90,13 +99,14 @@ void save_mce_event(struct pt_regs *regs, long handled,
        mce->gpr3 = regs->gpr[3];
        mce->in_use = 1;
 
-       mce->initiator = MCE_INITIATOR_CPU;
        /* Mark it recovered if we have handled it and MSR(RI=1). */
        if (handled && (regs->msr & MSR_RI))
                mce->disposition = MCE_DISPOSITION_RECOVERED;
        else
                mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
-       mce->severity = MCE_SEV_ERROR_SYNC;
+
+       mce->initiator = mce_err->initiator;
+       mce->severity = mce_err->severity;
 
        /*
         * Populate the mce error_type and type-specific error_type.
@@ -115,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled,
        } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
                mce->u.erat_error.effective_address_provided = true;
                mce->u.erat_error.effective_address = addr;
+       } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+               mce->u.user_error.effective_address_provided = true;
+               mce->u.user_error.effective_address = addr;
+       } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+               mce->u.ra_error.effective_address_provided = true;
+               mce->u.ra_error.effective_address = addr;
+       } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+               mce->u.link_error.effective_address_provided = true;
+               mce->u.link_error.effective_address = addr;
        } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
                mce->u.ue_error.effective_address_provided = true;
                mce->u.ue_error.effective_address = addr;
@@ -239,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt)
                "Parity",
                "Multihit",
        };
+       static const char *mc_user_types[] = {
+               "Indeterminate",
+               "tlbie(l) invalid",
+       };
+       static const char *mc_ra_types[] = {
+               "Indeterminate",
+               "Instruction fetch (bad)",
+               "Page table walk ifetch (bad)",
+               "Page table walk ifetch (foreign)",
+               "Load (bad)",
+               "Store (bad)",
+               "Page table walk Load/Store (bad)",
+               "Page table walk Load/Store (foreign)",
+               "Load/Store (foreign)",
+       };
+       static const char *mc_link_types[] = {
+               "Indeterminate",
+               "Instruction fetch (timeout)",
+               "Page table walk ifetch (timeout)",
+               "Load (timeout)",
+               "Store (timeout)",
+               "Page table walk Load/Store (timeout)",
+       };
 
        /* Print things out */
        if (evt->version != MCE_V1) {
@@ -315,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt)
                        printk("%s    Effective address: %016llx\n",
                               level, evt->u.tlb_error.effective_address);
                break;
+       case MCE_ERROR_TYPE_USER:
+               subtype = evt->u.user_error.user_error_type <
+                       ARRAY_SIZE(mc_user_types) ?
+                       mc_user_types[evt->u.user_error.user_error_type]
+                       : "Unknown";
+               printk("%s  Error type: User [%s]\n", level, subtype);
+               if (evt->u.user_error.effective_address_provided)
+                       printk("%s    Effective address: %016llx\n",
+                              level, evt->u.user_error.effective_address);
+               break;
+       case MCE_ERROR_TYPE_RA:
+               subtype = evt->u.ra_error.ra_error_type <
+                       ARRAY_SIZE(mc_ra_types) ?
+                       mc_ra_types[evt->u.ra_error.ra_error_type]
+                       : "Unknown";
+               printk("%s  Error type: Real address [%s]\n", level, subtype);
+               if (evt->u.ra_error.effective_address_provided)
+                       printk("%s    Effective address: %016llx\n",
+                              level, evt->u.ra_error.effective_address);
+               break;
+       case MCE_ERROR_TYPE_LINK:
+               subtype = evt->u.link_error.link_error_type <
+                       ARRAY_SIZE(mc_link_types) ?
+                       mc_link_types[evt->u.link_error.link_error_type]
+                       : "Unknown";
+               printk("%s  Error type: Link [%s]\n", level, subtype);
+               if (evt->u.link_error.effective_address_provided)
+                       printk("%s    Effective address: %016llx\n",
+                              level, evt->u.link_error.effective_address);
+               break;
        default:
        case MCE_ERROR_TYPE_UNKNOWN:
                printk("%s  Error type: Unknown\n", level);
@@ -341,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt)
                if (evt->u.tlb_error.effective_address_provided)
                        return evt->u.tlb_error.effective_address;
                break;
+       case MCE_ERROR_TYPE_USER:
+               if (evt->u.user_error.effective_address_provided)
+                       return evt->u.user_error.effective_address;
+               break;
+       case MCE_ERROR_TYPE_RA:
+               if (evt->u.ra_error.effective_address_provided)
+                       return evt->u.ra_error.effective_address;
+               break;
+       case MCE_ERROR_TYPE_LINK:
+               if (evt->u.link_error.effective_address_provided)
+                       return evt->u.link_error.effective_address;
+               break;
        default:
        case MCE_ERROR_TYPE_UNKNOWN:
                break;
index 7353991c4ecee6d8a6ecacdce4ab96815ffdfb12..763d6f58caa8ca140c8afb1260555b0ea1c1d2a0 100644 (file)
@@ -116,6 +116,51 @@ static void flush_and_reload_slb(void)
 }
 #endif
 
+static void flush_erat(void)
+{
+       asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+       if (what == MCE_FLUSH_SLB) {
+               flush_and_reload_slb();
+               return 1;
+       }
+#endif
+       if (what == MCE_FLUSH_ERAT) {
+               flush_erat();
+               return 1;
+       }
+       if (what == MCE_FLUSH_TLB) {
+               if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+                       cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
+{
+       if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
+               dsisr &= ~slb;
+       if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
+               dsisr &= ~erat;
+       if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
+               dsisr &= ~tlb;
+       /* Any other errors we don't understand? */
+       if (dsisr)
+               return 0;
+       return 1;
+}
+
 static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
 {
        long handled = 1;
@@ -281,6 +326,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
        long handled = 1;
        struct mce_error_info mce_error_info = { 0 };
 
+       mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+       mce_error_info.initiator = MCE_INITIATOR_CPU;
+
        srr1 = regs->msr;
        nip = regs->nip;
 
@@ -352,6 +400,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
        long handled = 1;
        struct mce_error_info mce_error_info = { 0 };
 
+       mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+       mce_error_info.initiator = MCE_INITIATOR_CPU;
+
        srr1 = regs->msr;
        nip = regs->nip;
 
@@ -372,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
        save_mce_event(regs, handled, &mce_error_info, nip, addr);
        return handled;
 }
+
+static int mce_handle_derror_p9(struct pt_regs *regs)
+{
+       uint64_t dsisr = regs->dsisr;
+
+       return mce_handle_flush_derrors(dsisr,
+                       P9_DSISR_MC_SLB_PARITY_MFSLB |
+                       P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
+
+                       P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
+
+                       P9_DSISR_MC_ERAT_MULTIHIT);
+}
+
+static int mce_handle_ierror_p9(struct pt_regs *regs)
+{
+       uint64_t srr1 = regs->msr;
+
+       switch (P9_SRR1_MC_IFETCH(srr1)) {
+       case P9_SRR1_MC_IFETCH_SLB_PARITY:
+       case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+               return mce_flush(MCE_FLUSH_SLB);
+       case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+               return mce_flush(MCE_FLUSH_TLB);
+       case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+               return mce_flush(MCE_FLUSH_ERAT);
+       default:
+               return 0;
+       }
+}
+
+static void mce_get_derror_p9(struct pt_regs *regs,
+               struct mce_error_info *mce_err, uint64_t *addr)
+{
+       uint64_t dsisr = regs->dsisr;
+
+       mce_err->severity = MCE_SEV_ERROR_SYNC;
+       mce_err->initiator = MCE_INITIATOR_CPU;
+
+       if (dsisr & P9_DSISR_MC_USER_TLBIE)
+               *addr = regs->nip;
+       else
+               *addr = regs->dar;
+
+       if (dsisr & P9_DSISR_MC_UE) {
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+       } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+       } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
+       } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
+       } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
+               mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+               mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+       } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+               mce_err->error_type = MCE_ERROR_TYPE_TLB;
+               mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+       } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
+               mce_err->error_type = MCE_ERROR_TYPE_USER;
+               mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
+       } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+       } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+       } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
+       } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+       } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+       } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+       }
+}
+
+static void mce_get_ierror_p9(struct pt_regs *regs,
+               struct mce_error_info *mce_err, uint64_t *addr)
+{
+       uint64_t srr1 = regs->msr;
+
+       switch (P9_SRR1_MC_IFETCH(srr1)) {
+       case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+       case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+               mce_err->severity = MCE_SEV_FATAL;
+               break;
+       default:
+               mce_err->severity = MCE_SEV_ERROR_SYNC;
+               break;
+       }
+
+       mce_err->initiator = MCE_INITIATOR_CPU;
+
+       *addr = regs->nip;
+
+       switch (P9_SRR1_MC_IFETCH(srr1)) {
+       case P9_SRR1_MC_IFETCH_UE:
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_SLB_PARITY:
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+               break;
+       case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+               mce_err->error_type = MCE_ERROR_TYPE_SLB;
+               mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+               break;
+       case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+               mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+               mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+               break;
+       case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+               mce_err->error_type = MCE_ERROR_TYPE_TLB;
+               mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+               break;
+       case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+               mce_err->error_type = MCE_ERROR_TYPE_UE;
+               mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
+               break;
+       case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
+               break;
+       case P9_SRR1_MC_IFETCH_RA:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
+               break;
+       case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
+               break;
+       case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+               mce_err->error_type = MCE_ERROR_TYPE_LINK;
+               mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
+               break;
+       case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
+               mce_err->error_type = MCE_ERROR_TYPE_RA;
+               mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
+               break;
+       default:
+               break;
+       }
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+       uint64_t nip, addr;
+       long handled;
+       struct mce_error_info mce_error_info = { 0 };
+
+       nip = regs->nip;
+
+       if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
+               handled = mce_handle_derror_p9(regs);
+               mce_get_derror_p9(regs, &mce_error_info, &addr);
+       } else {
+               handled = mce_handle_ierror_p9(regs);
+               mce_get_ierror_p9(regs, &mce_error_info, &addr);
+       }
+
+       /* Handle UE error. */
+       if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+               handled = mce_handle_ue_error(regs);
+
+       save_mce_event(regs, handled, &mce_error_info, nip, addr);
+       return handled;
+}
index 595dd718ea8718b010fed1ca5c08f5f121f674c0..2ff13249f87a61759f015d7fff93bd014dba6347 100644 (file)
@@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
                        sdsync = POWER7P_MMCRA_SDAR_VALID;
                else if (ppmu->flags & PPMU_ALT_SIPR)
                        sdsync = POWER6_MMCRA_SDSYNC;
+               else if (ppmu->flags & PPMU_NO_SIAR)
+                       sdsync = MMCRA_SAMPLE_ENABLE;
                else
                        sdsync = MMCRA_SDSYNC;
 
index e79fb5fb817dbe21cd19f633d89ca3bbbf51ad0c..cd951fd231c4040ba653f32cf485eebb22d1d805 100644 (file)
@@ -65,12 +65,41 @@ static bool is_event_valid(u64 event)
        return !(event & ~valid_mask);
 }
 
-static u64 mmcra_sdar_mode(u64 event)
+static inline bool is_event_marked(u64 event)
 {
-       if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
-               return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+       if (event & EVENT_IS_MARKED)
+               return true;
+
+       return false;
+}
 
-       return MMCRA_SDAR_MODE_TLB;
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+       /*
+        * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
+        * continous sampling mode.
+        *
+        * Incase of Power8:
+        * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+        * mode and will be un-changed when setting MMCRA[63] (Marked events).
+        *
+        * Incase of Power9:
+        * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+        *               or if group already have any marked events.
+        * Non-Marked events (for DD1):
+        *      MMCRA[SDAR_MODE] will be set to 0b01
+        * For rest
+        *      MMCRA[SDAR_MODE] will be set from event code.
+        */
+       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+                       *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+               else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       *mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+               else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       *mmcra |= MMCRA_SDAR_MODE_TLB;
+       } else
+               *mmcra |= MMCRA_SDAR_MODE_TLB;
 }
 
 static u64 thresh_cmp_val(u64 value)
@@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
                value |= CNST_L1_QUAL_VAL(cache);
        }
 
-       if (event & EVENT_IS_MARKED) {
+       if (is_event_marked(event)) {
                mask  |= CNST_SAMPLE_MASK;
                value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
        }
@@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
                }
 
                /* In continuous sampling mode, update SDAR on TLB miss */
-               mmcra |= mmcra_sdar_mode(event[i]);
+               mmcra_sdar_mode(event[i], &mmcra);
 
                if (event[i] & EVENT_IS_L1) {
                        cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
@@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
                        mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
                }
 
-               if (event[i] & EVENT_IS_MARKED) {
+               if (is_event_marked(event[i])) {
                        mmcra |= MMCRA_SAMPLE_ENABLE;
 
                        val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
index cf9bd89901595cc38b793bc916a2096d873054eb..899210f14ee432ea4b63cc7de6f7ea1a6da7a404 100644 (file)
 #define MMCRA_THR_CMP_SHIFT            32
 #define MMCRA_SDAR_MODE_SHIFT          42
 #define MMCRA_SDAR_MODE_TLB            (1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES     ~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
 #define MMCRA_IFM_SHIFT                        30
 
 /* MMCR1 Threshold Compare bit constant for power9 */
index 86d9fde93c175f86dac6f40de0d68aff2455b0c6..e0f856bfbfe8f3c6ecfa70e737b1b2496725d563 100644 (file)
@@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs,
                                        struct machine_check_event *evt)
 {
        int recovered = 0;
-       uint64_t ea = get_mce_fault_addr(evt);
 
        if (!(regs->msr & MSR_RI)) {
                /* If MSR_RI isn't set, we cannot recover */
@@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs,
        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
                /* Platform corrected itself */
                recovered = 1;
-       } else if (ea && !is_kernel_addr(ea)) {
+       } else if (evt->severity == MCE_SEV_FATAL) {
+               /* Fatal machine check */
+               pr_err("Machine check interrupt is fatal\n");
+               recovered = 0;
+       } else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
+                       (user_mode(regs) && !is_global_init(current))) {
                /*
-                * Faulting address is not in kernel text. We should be fine.
-                * We need to find which process uses this address.
                 * For now, kill the task if we have received exception when
                 * in userspace.
                 *
                 * TODO: Queue up this address for hwpoisioning later.
                 */
-               if (user_mode(regs) && !is_global_init(current)) {
-                       _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
-                       recovered = 1;
-               } else
-                       recovered = 0;
-       } else if (user_mode(regs) && !is_global_init(current) &&
-               evt->severity == MCE_SEV_ERROR_SYNC) {
-               /*
-                * If we have received a synchronous error when in userspace
-                * kill the task.
-                */
                _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
                recovered = 1;
        }
index 6901a06da2f90bddf70386667eaea5c4fe3505a7..e36738291c320575523422e139d4642e04142bd5 100644 (file)
@@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
 }
 
 static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
-                                  struct pci_bus *bus)
+                                  struct pci_bus *bus,
+                                  bool add_to_group)
 {
        struct pci_dev *dev;
 
        list_for_each_entry(dev, &bus->devices, bus_list) {
                set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
                set_dma_offset(&dev->dev, pe->tce_bypass_base);
-               iommu_add_device(&dev->dev);
+               if (add_to_group)
+                       iommu_add_device(&dev->dev);
 
                if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
-                       pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+                       pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+                                       add_to_group);
        }
 }
 
@@ -2191,7 +2194,7 @@ found:
                set_iommu_table_base(&pe->pdev->dev, tbl);
                iommu_add_device(&pe->pdev->dev);
        } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-               pnv_ioda_setup_bus_dma(pe, pe->pbus);
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 
        return;
  fail:
@@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
 
        pnv_pci_ioda2_set_bypass(pe, false);
        pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+       if (pe->pbus)
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
        pnv_ioda2_table_free(tbl);
 }
 
@@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
                                                table_group);
 
        pnv_pci_ioda2_setup_default_config(pe);
+       if (pe->pbus)
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
 }
 
 static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2624,6 +2631,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
        level_shift = entries_shift + 3;
        level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
 
+       if ((level_shift - 3) * levels + page_shift >= 60)
+               return -EINVAL;
+
        /* Allocate TCE table */
        addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
                        levels, tce_table_size, &offset, &total_allocated);
@@ -2728,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
        if (pe->flags & PNV_IODA_PE_DEV)
                iommu_add_device(&pe->pdev->dev);
        else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
-               pnv_ioda_setup_bus_dma(pe, pe->pbus);
+               pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
 }
 
 #ifdef CONFIG_PCI_MSI
index f9760ccf40323674cecb3f4f14cc98aa8e615cfe..3696ea6c4826b9740398113d207b1679318db2f8 100644 (file)
@@ -116,13 +116,13 @@ dt_offset:
 
        .data
        .balign 8
-.globl sha256_digest
-sha256_digest:
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
        .skip   32
-       .size sha256_digest, . - sha256_digest
+       .size purgatory_sha256_digest, . - purgatory_sha256_digest
 
        .balign 8
-.globl sha_regions
-sha_regions:
+.globl purgatory_sha_regions
+purgatory_sha_regions:
        .skip   8 * 2 * 16
-       .size sha_regions, . - sha_regions
+       .size purgatory_sha_regions, . - purgatory_sha_regions
index d69ea495c4d748748618b27d7414671529e1f41a..716b17238599f63107b27b6860c030b63dd757ba 100644 (file)
@@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
                        ret = blkcipher_walk_done(desc, walk, nbytes - n);
                }
                if (k < n) {
-                       if (__ctr_paes_set_key(ctx) != 0)
+                       if (__ctr_paes_set_key(ctx) != 0) {
+                               if (locked)
+                                       spin_unlock(&ctrblk_lock);
                                return blkcipher_walk_done(desc, walk, -EIO);
+                       }
                }
        }
        if (locked)
index d1c407ddf7032de5a43d08aa48438abda7ab1e91..9072bf63a846148c008da47a5ed3a73313b382a3 100644 (file)
@@ -8,31 +8,27 @@
 #define _S390_CPUTIME_H
 
 #include <linux/types.h>
-#include <asm/div64.h>
+#include <asm/timex.h>
 
 #define CPUTIME_PER_USEC 4096ULL
 #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
-
 #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
 
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
-       return n / base;
-}
-
 /*
- * Convert cputime to microseconds and back.
+ * Convert cputime to microseconds.
  */
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline u64 cputime_to_usecs(const u64 cputime)
 {
-       return (__force unsigned long long) cputime >> 12;
+       return cputime >> 12;
 }
 
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
 
 u64 arch_cpu_idle_time(int cpu);
 
index 6e31d87fb669bd2037d04d91f1fd5b5bddb9e06b..fa2bf69be18220b9dbdc57573b07ec06eb4d084f 100644 (file)
@@ -156,10 +156,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        /* by default, allow everything */
        return true;
 }
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
-       /* by default, allow everything */
-       return true;
-}
 #endif /* __S390_MMU_CONTEXT_H */
index 354344dcc19898bb647722db24f49733b28793c6..118535123f346d9b32bfb140d45884870a99fd2f 100644 (file)
@@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void)
  *    ns = (todval * 125) >> 9;
  *
  * In order to avoid an overflow with the multiplication we can rewrite this.
- * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
  * we end up with
  *
- *    ns = ((2^32 * th + tl) * 125 ) >> 9;
- * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ *    ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
  *
  */
 static inline unsigned long long tod_to_ns(unsigned long long todval)
 {
-       unsigned long long ns;
-
-       ns = ((todval >> 32) << 23) * 125;
-       ns += ((todval & 0xffffffff) * 125) >> 9;
-       return ns;
+       return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
 }
 
 #endif
index 4384bc797a54f9d77dd593123f0cfc567124f792..152de9b796e149ed3745f41351a5cc5e637bb55e 100644 (file)
 #define __NR_copy_file_range   375
 #define __NR_preadv2           376
 #define __NR_pwritev2          377
-#define NR_syscalls 378
+/* Number 378 is reserved for guarded storage */
+#define __NR_statx             379
+#define NR_syscalls 380
 
 /* 
  * There are some system calls that are not present on 64 bit, some
index ae2cda5eee5a99b35b73e5b7868edd44cba1c6d2..e89cc2e71db1693c4c03f6e6ccc37ba9297b4012 100644 (file)
@@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
 COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
 COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
index dff2152350a7ebaaf3df6c8b000eb36b03afd19e..6a7d737d514c4c0064ddd8ef1ca80b824ae60c0c 100644 (file)
@@ -490,7 +490,7 @@ ENTRY(pgm_check_handler)
        jnz     .Lpgm_svcper            # -> single stepped svc
 1:     CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
        aghi    %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-       j       3f
+       j       4f
 2:     UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
        lg      %r15,__LC_KERNEL_STACK
        lgr     %r14,%r12
@@ -499,8 +499,8 @@ ENTRY(pgm_check_handler)
        tm      __LC_PGM_ILC+2,0x02     # check for transaction abort
        jz      3f
        mvc     __THREAD_trap_tdb(256,%r14),0(%r13)
-3:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
-       stg     %r10,__THREAD_last_break(%r14)
+3:     stg     %r10,__THREAD_last_break(%r14)
+4:     la      %r11,STACK_FRAME_OVERHEAD(%r15)
        stmg    %r0,%r7,__PT_R0(%r11)
        mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
        stmg    %r8,%r9,__PT_PSW(%r11)
@@ -509,14 +509,14 @@ ENTRY(pgm_check_handler)
        xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
        stg     %r10,__PT_ARGS(%r11)
        tm      __LC_PGM_ILC+3,0x80     # check for per exception
-       jz      4f
+       jz      5f
        tmhh    %r8,0x0001              # kernel per event ?
        jz      .Lpgm_kprobe
        oi      __PT_FLAGS+7(%r11),_PIF_PER_TRAP
        mvc     __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
        mvc     __THREAD_per_cause(2,%r14),__LC_PER_CODE
        mvc     __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-4:     REENABLE_IRQS
+5:     REENABLE_IRQS
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        larl    %r1,pgm_check_table
        llgh    %r10,__PT_INT_CODE+2(%r11)
index b67dafb7b7cfc58221d786ee9f97b2adc5a61217..e545ffe5155ab0179327cfe4f9f66e677c604041 100644 (file)
@@ -564,6 +564,8 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
+       if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
+               diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
        diag308(DIAG308_LOAD_CLEAR, NULL);
        if (MACHINE_IS_VM)
                __cpcmd("IPL", NULL, 0, NULL);
index 20cd339e11aefc9e190e7c98c5671b94dac46d37..f29e41c5e2ecf6d28018463cf89a2db677dffccc 100644 (file)
@@ -124,7 +124,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
        clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
        /* Initialize per thread user and system timer values */
        p->thread.user_timer = 0;
+       p->thread.guest_timer = 0;
        p->thread.system_timer = 0;
+       p->thread.hardirq_timer = 0;
+       p->thread.softirq_timer = 0;
 
        frame->sf.back_chain = 0;
        /* new return point is ret_from_fork */
index 9b59e6212d8fd22cadbc35f9e3546f7aa47e540c..2659b5cfeddba4cd294e71e356d1149cca68314f 100644 (file)
@@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
 SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
 SYSCALL(sys_preadv2,compat_sys_preadv2)
 SYSCALL(sys_pwritev2,compat_sys_pwritev2)
+NI_SYSCALL
+SYSCALL(sys_statx,compat_sys_statx)
index c14fc902991272be4d761b5f6fe506a64e12ace8..072d84ba42a3725ae1b1bff009bc5e241a264717 100644 (file)
@@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime)
 }
 
 static void account_system_index_scaled(struct task_struct *p,
-                                       cputime_t cputime, cputime_t scaled,
+                                       u64 cputime, u64 scaled,
                                        enum cpu_usage_stat index)
 {
        p->stimescaled += cputime_to_nsecs(scaled);
index b48dc5f1900b5122f62f98669d3f1ffd97955d99..463e5ef02304bb99c352c8468c7cf0ce57f0ba4e 100644 (file)
@@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 {
        spinlock_t *ptl;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
        pgste_t pgste;
        pte_t *ptep;
        pte_t pte;
        bool dirty;
 
-       ptep = get_locked_pte(mm, addr, &ptl);
+       pgd = pgd_offset(mm, addr);
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return false;
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return false;
+       /* We can't run guests backed by huge pages, but userspace can
+        * still set them up and then try to migrate them without any
+        * migration support.
+        */
+       if (pmd_large(*pmd))
+               return true;
+
+       ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (unlikely(!ptep))
                return false;
 
index e359ec67586982d3a47dd3bf0eb38418471836c2..12daf45369b44274a1ba299ecbf8be37311dfe55 100644 (file)
@@ -24,6 +24,7 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/debug.h>
index ec871355fc2d60498cee6b245c44476f0dc59604..6736a3ad6286093dd1c5ef957a2a60cf82801d97 100644 (file)
@@ -24,6 +24,8 @@
  */
 
 #include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <asm/extable.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
index 94ac2739918c62557269e51ed1be6b655dd3629e..b668e351fd6c2e4f7a4b75c8a67eada77449abc9 100644 (file)
@@ -37,12 +37,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        return true;
 }
 
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
-       /* by default, allow everything */
-       return true;
-}
-
 /*
  * end asm-generic/mm_hooks.h functions
  */
index de5d572225f3adb4c1cdab6e30c2dc3383ae02c1..cd1fa97776c302e03aa60267624e5d8bb56c2092 100644 (file)
@@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
 extern void maybe_sigio_broken(int fd, int read);
 extern void sigio_broken(int fd, int read);
 
-/* sys-x86_64/prctl.c */
-extern int os_arch_prctl(int pid, int code, unsigned long *addr);
+/* prctl.c */
+extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
 
 /* tty.c */
 extern int get_pty(void);
index 62dfc644c908ab5ffb173b493d5b97b93e8f7ca6..59b06b48f27d7a4e0d8b82fc147d3fdad7f75295 100644 (file)
@@ -103,10 +103,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        /* by default, allow everything */
        return true;
 }
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
-       /* by default, allow everything */
-       return true;
-}
 #endif
index cc98d5a294eee25c56209d92e38bdb08f379780d..8977d9c77373475f9a01b30ab864ced9e13523ce 100644 (file)
@@ -106,6 +106,7 @@ config X86
        select HAVE_ARCH_KMEMCHECK
        select HAVE_ARCH_MMAP_RND_BITS          if MMU
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
+       select HAVE_ARCH_COMPAT_MMAP_BASES      if MMU && COMPAT
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE
@@ -2787,6 +2788,9 @@ config X86_DMA_REMAP
        bool
        depends on STA2X11
 
+config HAVE_GENERIC_GUP
+       def_bool y
+
 source "net/Kconfig"
 
 source "drivers/Kconfig"
index 9ba050fe47f30e6eff1d119ed33fa1505b2a661d..0af59fa789ea6fd250f79125cad3aef01468a39b 100644 (file)
 381    i386    pkey_alloc              sys_pkey_alloc
 382    i386    pkey_free               sys_pkey_free
 383    i386    statx                   sys_statx
+384    i386    arch_prctl              sys_arch_prctl                  compat_sys_arch_prctl
index 226ca70dc6bd43b04e15477c8574b5d48d0d4ff8..5c5d4d7618e6dac3ccd376fce7eb53a161de626d 100644 (file)
@@ -354,7 +354,7 @@ static void vgetcpu_cpu_init(void *arg)
        d.p = 1;                /* Present */
        d.d = 1;                /* 32-bit */
 
-       write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+       write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
 }
 
 static int vgetcpu_online(unsigned int cpu)
index b04bb6dfed7f8464c1425df50c0fa9d1481dcee2..0fe00446f9cac8c16e1ae3fcabc4a469b772597b 100644 (file)
  * Reuse free bits when adding new feature flags!
  */
 #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
 #define X86_FEATURE_CPB                ( 7*32+ 2) /* AMD Core Performance Boost */
 #define X86_FEATURE_EPB                ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
 #define X86_FEATURE_CAT_L3     ( 7*32+ 4) /* Cache Allocation Technology L3 */
index 1548ca92ad3f620d48bce51537d24e0701212a42..d0a21b12dd585c5e73c1930f4afa37aee5a483c6 100644 (file)
@@ -4,6 +4,7 @@
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
+#include <asm/fixmap.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -45,11 +46,43 @@ struct gdt_page {
 
 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
 
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
 {
        return per_cpu(gdt_page, cpu).gdt;
 }
 
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_gdt_rw(void)
+{
+       return this_cpu_ptr(&gdt_page)->gdt;
+}
+
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+{
+       return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+{
+       unsigned int idx = get_cpu_gdt_ro_index(cpu);
+       return (struct desc_struct *)__fix_to_virt(idx);
+}
+
+/* Provide the current read-only GDT */
+static inline struct desc_struct *get_current_gdt_ro(void)
+{
+       return get_cpu_gdt_ro(smp_processor_id());
+}
+
+/* Provide the physical address of the GDT page. */
+static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
+{
+       return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +207,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
 
 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
 {
-       struct desc_struct *d = get_cpu_gdt_table(cpu);
+       struct desc_struct *d = get_cpu_gdt_rw(cpu);
        tss_desc tss;
 
        set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
@@ -194,22 +227,90 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
 
                set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
                                      entries * LDT_ENTRY_SIZE - 1);
-               write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+               write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
                                &ldt, DESC_LDT);
                asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
        }
 }
 
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+       asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+       asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+       asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+       asm volatile("sidt %0":"=m" (*dtr));
+}
+
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+       struct desc_ptr gdt;
+       int cpu = raw_smp_processor_id();
+       bool restore = 0;
+       struct desc_struct *fixmap_gdt;
+
+       native_store_gdt(&gdt);
+       fixmap_gdt = get_cpu_gdt_ro(cpu);
+
+       /*
+        * If the current GDT is the read-only fixmap, swap to the original
+        * writeable version. Swap back at the end.
+        */
+       if (gdt.address == (unsigned long)fixmap_gdt) {
+               load_direct_gdt(cpu);
+               restore = 1;
+       }
+       asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+       if (restore)
+               load_fixmap_gdt(cpu);
+}
+#else
 static inline void native_load_tr_desc(void)
 {
        asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
 }
+#endif
+
+static inline unsigned long native_store_tr(void)
+{
+       unsigned long tr;
+
+       asm volatile("str %0":"=r" (tr));
+
+       return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+       struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+       unsigned int i;
+
+       for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+               gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
 
 DECLARE_PER_CPU(bool, __tss_limit_invalid);
 
 static inline void force_reload_TR(void)
 {
-       struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
+       struct desc_struct *d = get_current_gdt_rw();
        tss_desc tss;
 
        memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -257,44 +358,6 @@ static inline void invalidate_tss_limit(void)
                this_cpu_write(__tss_limit_invalid, true);
 }
 
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
-       asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
-       asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
-       asm volatile("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
-       asm volatile("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
-       unsigned long tr;
-
-       asm volatile("str %0":"=r" (tr));
-
-       return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
-       struct desc_struct *gdt = get_cpu_gdt_table(cpu);
-       unsigned int i;
-
-       for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
-               gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
 /* This intentionally ignores lm, since 32-bit apps don't have that field. */
 #define LDT_empty(info)                                        \
        ((info)->base_addr              == 0    &&      \
index 9d49c18b5ea9360feb5e5bb1fe378914154f34d5..d4d3ed456cb7b1559a8aa3da27e355269ada35a1 100644 (file)
@@ -293,8 +293,23 @@ do {                                                                       \
        }                                                               \
 } while (0)
 
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+       return IS_ENABLED(CONFIG_X86_32) ||
+              (IS_ENABLED(CONFIG_COMPAT) &&
+               test_thread_flag(TIF_ADDR32));
+}
+
+extern unsigned long tasksize_32bit(void);
+extern unsigned long tasksize_64bit(void);
+extern unsigned long get_mmap_base(int is_legacy);
+
 #ifdef CONFIG_X86_32
 
+#define __STACK_RND_MASK(is32bit) (0x7ff)
 #define STACK_RND_MASK (0x7ff)
 
 #define ARCH_DLINFO            ARCH_DLINFO_IA32
@@ -304,7 +319,8 @@ do {                                                                        \
 #else /* CONFIG_X86_32 */
 
 /* 1GB for 64bit, 8MB for 32bit */
-#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
+#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
 
 #define ARCH_DLINFO                                                    \
 do {                                                                   \
@@ -348,16 +364,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
                                              int uses_interp);
 #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
 
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static inline int mmap_is_ia32(void)
-{
-       return IS_ENABLED(CONFIG_X86_32) ||
-              (IS_ENABLED(CONFIG_COMPAT) &&
-               test_thread_flag(TIF_ADDR32));
-}
-
 /* Do not change the values. See get_align_mask() */
 enum align_flags {
        ALIGN_VA_32     = BIT(0),
index 8554f960e21b7ce0d05dfb67fa36f6cff974650e..b65155cc3760a72b49b680c3f70923ddedf684d2 100644 (file)
@@ -100,6 +100,10 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_INTEL_MID
        FIX_LNW_VRTC,
 #endif
+       /* Fixmap entries to remap the GDTs, one per processor. */
+       FIX_GDT_REMAP_BEGIN,
+       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
        __end_of_permanent_fixed_addresses,
 
        /*
index 282630e4c6ea4696e54c6ea650751d913ce49c11..70ef205489f00e53ff568180c4dcbf6fb9e6ded1 100644 (file)
@@ -164,6 +164,7 @@ struct kimage_arch {
 };
 #else
 struct kimage_arch {
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
index 306c7e12af55b6518943dcf6fe2ab89745efdd1b..6e933d2d88d99bbf503ca379fa8f34e3dbfee993 100644 (file)
@@ -220,18 +220,6 @@ static inline int vma_pkey(struct vm_area_struct *vma)
 }
 #endif
 
-static inline bool __pkru_allows_pkey(u16 pkey, bool write)
-{
-       u32 pkru = read_pkru();
-
-       if (!__pkru_allows_read(pkru, pkey))
-               return false;
-       if (write && !__pkru_allows_write(pkru, pkey))
-               return false;
-
-       return true;
-}
-
 /*
  * We only want to enforce protection keys on the current process
  * because we effectively have no access to PKRU for other
@@ -268,8 +256,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
-       return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
-}
 #endif /* _ASM_X86_MMU_CONTEXT_H */
index d8b5f8ab8ef9e79fb76586bc3ca0f7f27325123e..673f9ac50f6d12612612e8efcce4eab0ef98bcbb 100644 (file)
@@ -45,6 +45,8 @@
 #define MSR_IA32_PERFCTR1              0x000000c2
 #define MSR_FSB_FREQ                   0x000000cd
 #define MSR_PLATFORM_INFO              0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT      31
+#define MSR_PLATFORM_INFO_CPUID_FAULT          BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
 
 #define MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
 #define NHM_C3_AUTO_DEMOTE             (1UL << 25)
 
 /* DEBUGCTLMSR bits (others vary by model): */
 #define DEBUGCTLMSR_LBR                        (1UL <<  0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT          1
 #define DEBUGCTLMSR_BTF                        (1UL <<  1) /* single-step on branches */
 #define DEBUGCTLMSR_TR                 (1UL <<  6)
 #define DEBUGCTLMSR_BTS                        (1UL <<  7)
 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT       39
 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE           (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
 
-/* MISC_FEATURE_ENABLES non-architectural features */
-#define MSR_MISC_FEATURE_ENABLES       0x00000140
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES      0x00000140
 
-#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT                1
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT      0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT          BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT       1
 
 #define MSR_IA32_TSC_DEADLINE          0x000006E0
 
index 0489884fdc440c14e68a1b961987163234cb1525..158d877ce9e99598c3e58352f2f7d7667e6c0569 100644 (file)
@@ -536,7 +536,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
                PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
                            val);
 }
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 static inline pud_t __pud(pudval_t val)
 {
        pudval_t ret;
@@ -565,6 +565,32 @@ static inline pudval_t pud_val(pud_t pud)
        return ret;
 }
 
+static inline void pud_clear(pud_t *pudp)
+{
+       set_pud(pudp, __pud(0));
+}
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+       p4dval_t val = native_p4d_val(p4d);
+
+       if (sizeof(p4dval_t) > sizeof(long))
+               PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp,
+                           val, (u64)val >> 32);
+       else
+               PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp,
+                           val);
+}
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+       set_p4d(p4dp, __p4d(0));
+}
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+
+#error FIXME
+
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
        pgdval_t val = native_pgd_val(pgd);
@@ -582,10 +608,7 @@ static inline void pgd_clear(pgd_t *pgdp)
        set_pgd(pgdp, __pgd(0));
 }
 
-static inline void pud_clear(pud_t *pudp)
-{
-       set_pud(pudp, __pud(0));
-}
+#endif  /* CONFIG_PGTABLE_LEVELS == 5 */
 
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
 
index b060f962d581684912a08feefd8354db7b943d2b..93c49cf09b63a60530c11fb6f8e2b4092e5058fe 100644 (file)
@@ -279,12 +279,18 @@ struct pv_mmu_ops {
        struct paravirt_callee_save pmd_val;
        struct paravirt_callee_save make_pmd;
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
        struct paravirt_callee_save pud_val;
        struct paravirt_callee_save make_pud;
 
-       void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
+       void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#error FIXME
+#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
+
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
+
 #endif /* CONFIG_PGTABLE_LEVELS >= 3 */
 
        struct pv_lazy_ops lazy_mode;
index b6d425999f99de01b7f8a5645894a39e04ac685c..2f585054c63c897511c0aa44601bba37ee619c88 100644 (file)
@@ -121,10 +121,10 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 #endif /* CONFIG_X86_PAE */
 
 #if CONFIG_PGTABLE_LEVELS > 3
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
        paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
-       set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+       set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -150,6 +150,37 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
        ___pud_free_tlb(tlb, pud);
 }
 
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+       paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
+       set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+       gfp_t gfp = GFP_KERNEL_ACCOUNT;
+
+       if (mm == &init_mm)
+               gfp &= ~__GFP_ACCOUNT;
+       return (p4d_t *)get_zeroed_page(gfp);
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+       BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+       free_page((unsigned long)p4d);
+}
+
+extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
+
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+                                 unsigned long address)
+{
+       ___p4d_free_tlb(tlb, p4d);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
index 392576433e7785769493032934bb44900304dd2e..373ab1de909f4fba5267cbd16102ad52b5c21aef 100644 (file)
@@ -7,6 +7,7 @@
 typedef unsigned long  pteval_t;
 typedef unsigned long  pmdval_t;
 typedef unsigned long  pudval_t;
+typedef unsigned long  p4dval_t;
 typedef unsigned long  pgdval_t;
 typedef unsigned long  pgprotval_t;
 
index 72277b1028a5f54551962555fa56bfd5aebab15c..29eb5778019ccfe6af6b5f6a4fecf0e914c43b88 100644 (file)
@@ -215,4 +215,51 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
 #define __pte_to_swp_entry(pte)                ((swp_entry_t){ (pte).pte_high })
 #define __swp_entry_to_pte(x)          ((pte_t){ { .pte_high = (x).val } })
 
+#define gup_get_pte gup_get_pte
+/*
+ * WARNING: only to be used in the get_user_pages_fast() implementation.
+ *
+ * With get_user_pages_fast(), we walk down the pagetables without taking
+ * any locks.  For this we would like to load the pointers atomically,
+ * but that is not possible (without expensive cmpxchg8b) on PAE.  What
+ * we do have is the guarantee that a PTE will only either go from not
+ * present to present, or present to not present or both -- it will not
+ * switch to a completely different present page without a TLB flush in
+ * between; something that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ *
+ *   ptep->pte_high = h;
+ *   smp_wmb();
+ *   ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ *
+ *   ptep->pte_low = 0;
+ *   smp_wmb();
+ *   ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees 'l' iff pte_high
+ * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
+ * don't see an older value of pte_high.  *Then* we recheck pte_low,
+ * which ensures that we haven't picked up a changed pte high. We might
+ * have gotten rubbish values from pte_low and pte_high, but we are
+ * guaranteed that pte_low will not have the present bit set *unless*
+ * it is 'l'. Because get_user_pages_fast() only operates on present ptes
+ * we're safe.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+       pte_t pte;
+
+       do {
+               pte.pte_low = ptep->pte_low;
+               smp_rmb();
+               pte.pte_high = ptep->pte_high;
+               smp_rmb();
+       } while (unlikely(pte.pte_low != ptep->pte_low));
+
+       return pte;
+}
+
 #endif /* _ASM_X86_PGTABLE_3LEVEL_H */
index bcc89625ebe530964c5b23a13b212711651d2296..b8a4341faafae9a33ab5b6ba5a9af92522c01493 100644 (file)
@@ -7,6 +7,7 @@
 typedef u64    pteval_t;
 typedef u64    pmdval_t;
 typedef u64    pudval_t;
+typedef u64    p4dval_t;
 typedef u64    pgdval_t;
 typedef u64    pgprotval_t;
 
index 1cfb36b8c024ab07b8334121fc56ac79f2a35371..bf51e60545778b5aa19c1ba58aa32ff18fcb9ef9 100644 (file)
@@ -53,11 +53,19 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 
 #define set_pmd(pmdp, pmd)             native_set_pmd(pmdp, pmd)
 
-#ifndef __PAGETABLE_PUD_FOLDED
+#ifndef __PAGETABLE_P4D_FOLDED
 #define set_pgd(pgdp, pgd)             native_set_pgd(pgdp, pgd)
 #define pgd_clear(pgd)                 native_pgd_clear(pgd)
 #endif
 
+#ifndef set_p4d
+# define set_p4d(p4dp, p4d)            native_set_p4d(p4dp, p4d)
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define p4d_clear(p4d)                 native_p4d_clear(p4d)
+#endif
+
 #ifndef set_pud
 # define set_pud(pudp, pud)            native_set_pud(pudp, pud)
 #endif
@@ -74,6 +82,11 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
 #define pgd_val(x)     native_pgd_val(x)
 #define __pgd(x)       native_make_pgd(x)
 
+#ifndef __PAGETABLE_P4D_FOLDED
+#define p4d_val(x)     native_p4d_val(x)
+#define __p4d(x)       native_make_p4d(x)
+#endif
+
 #ifndef __PAGETABLE_PUD_FOLDED
 #define pud_val(x)     native_pud_val(x)
 #define __pud(x)       native_make_pud(x)
@@ -179,6 +192,17 @@ static inline unsigned long pud_pfn(pud_t pud)
        return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
 }
 
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+       return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
+}
+
+static inline int p4d_large(p4d_t p4d)
+{
+       /* No 512 GiB pages yet */
+       return 0;
+}
+
 #define pte_page(pte)  pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
@@ -222,6 +246,11 @@ static inline int pud_devmap(pud_t pud)
        return 0;
 }
 #endif
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+       return 0;
+}
 #endif
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -538,6 +567,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 #define pte_pgprot(x) __pgprot(pte_flags(x))
 #define pmd_pgprot(x) __pgprot(pmd_flags(x))
 #define pud_pgprot(x) __pgprot(pud_flags(x))
+#define p4d_pgprot(x) __pgprot(p4d_flags(x))
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
@@ -587,6 +617,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
 #include <linux/mm_types.h>
 #include <linux/mmdebug.h>
 #include <linux/log2.h>
+#include <asm/fixmap.h>
 
 static inline int pte_none(pte_t pte)
 {
@@ -770,7 +801,52 @@ static inline int pud_large(pud_t pud)
 }
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
+static inline unsigned long pud_index(unsigned long address)
+{
+       return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
 #if CONFIG_PGTABLE_LEVELS > 3
+static inline int p4d_none(p4d_t p4d)
+{
+       return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+       return p4d_flags(p4d) & _PAGE_PRESENT;
+}
+
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+       return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define p4d_page(p4d)          \
+       pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+
+/* Find an entry in the third-level page table.. */
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+       return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+       return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+#endif  /* CONFIG_PGTABLE_LEVELS > 3 */
+
+static inline unsigned long p4d_index(unsigned long address)
+{
+       return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
+}
+
+#if CONFIG_PGTABLE_LEVELS > 4
 static inline int pgd_present(pgd_t pgd)
 {
        return pgd_flags(pgd) & _PAGE_PRESENT;
@@ -788,14 +864,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
 #define pgd_page(pgd)          pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
 
 /* to find an entry in a page-table-directory. */
-static inline unsigned long pud_index(unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
-       return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
-{
-       return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+       return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
 }
 
 static inline int pgd_bad(pgd_t pgd)
@@ -813,7 +884,7 @@ static inline int pgd_none(pgd_t pgd)
         */
        return !native_pgd_val(pgd);
 }
-#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
 
 #endif /* __ASSEMBLY__ */
 
@@ -1120,6 +1191,54 @@ static inline u16 pte_flags_pkey(unsigned long pte_flags)
 #endif
 }
 
+static inline bool __pkru_allows_pkey(u16 pkey, bool write)
+{
+       u32 pkru = read_pkru();
+
+       if (!__pkru_allows_read(pkru, pkey))
+               return false;
+       if (write && !__pkru_allows_write(pkru, pkey))
+               return false;
+
+       return true;
+}
+
+/*
+ * 'pteval' can come from a PTE, PMD or PUD.  We only check
+ * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
+ * same value on all 3 types.
+ */
+static inline bool __pte_access_permitted(unsigned long pteval, bool write)
+{
+       unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
+
+       if (write)
+               need_pte_bits |= _PAGE_RW;
+
+       if ((pteval & need_pte_bits) != need_pte_bits)
+               return 0;
+
+       return __pkru_allows_pkey(pte_flags_pkey(pteval), write);
+}
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+       return __pte_access_permitted(pte_val(pte), write);
+}
+
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+       return __pte_access_permitted(pmd_val(pmd), write);
+}
+
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+       return __pte_access_permitted(pud_val(pud), write);
+}
+
 #include <asm-generic/pgtable.h>
 #endif /* __ASSEMBLY__ */
 
index fbc73360aea05128d6b40be23e261893cd47a523..bfab55675c1607155ff983671c16fece71aafbdd 100644 (file)
@@ -14,7 +14,6 @@
  */
 #ifndef __ASSEMBLY__
 #include <asm/processor.h>
-#include <asm/fixmap.h>
 #include <linux/threads.h>
 #include <asm/paravirt.h>
 
index 73c7ccc389122d074399e9fc776ec33ab5332baf..0593a1ae757365d4b2eb6a8215ab87f579f7b8d1 100644 (file)
@@ -41,9 +41,9 @@ extern void paging_init(void);
 
 struct mm_struct;
 
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
 void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
 
-
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
                                    pte_t *ptep)
 {
@@ -121,6 +121,16 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 #endif
 }
 
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+       *p4dp = p4d;
+}
+
+static inline void native_p4d_clear(p4d_t *p4d)
+{
+       native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
+}
+
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
        *pgdp = pgd;
@@ -206,6 +216,20 @@ extern void cleanup_highmap(void);
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
-#endif /* !__ASSEMBLY__ */
+#define gup_fast_permitted gup_fast_permitted
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
+               int write)
+{
+       unsigned long len, end;
+
+       len = (unsigned long)nr_pages << PAGE_SHIFT;
+       end = start + len;
+       if (end < start)
+               return false;
+       if (end >> __VIRTUAL_MASK_SHIFT)
+               return false;
+       return true;
+}
 
+#endif /* !__ASSEMBLY__ */
 #endif /* _ASM_X86_PGTABLE_64_H */
index 3a264200c62fd2067c387ffaecb27309727ff1de..516593e66bd61c00dcb23d37911dc9f703c8ffba 100644 (file)
@@ -13,6 +13,7 @@
 typedef unsigned long  pteval_t;
 typedef unsigned long  pmdval_t;
 typedef unsigned long  pudval_t;
+typedef unsigned long  p4dval_t;
 typedef unsigned long  pgdval_t;
 typedef unsigned long  pgprotval_t;
 
@@ -67,7 +68,8 @@ typedef struct { pteval_t pte; } pte_t;
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END    (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
 #define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END      _AC(0xffffffffff000000, UL)
+/* The module sections ends with the start of the fixmap */
+#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
 #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
index 62484333673d98c251d52d1eccc10e762b38478b..4930afe9df0a28e46570c85e7f6f3ab397217440 100644 (file)
@@ -272,9 +272,20 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
        return native_pgd_val(pgd) & PTE_FLAGS_MASK;
 }
 
-#if CONFIG_PGTABLE_LEVELS > 3
-#include <asm-generic/5level-fixup.h>
+#if CONFIG_PGTABLE_LEVELS > 4
+
+#error FIXME
 
+#else
+#include <asm-generic/pgtable-nop4d.h>
+
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+       return native_pgd_val(p4d.pgd);
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
 typedef struct { pudval_t pud; } pud_t;
 
 static inline pud_t native_make_pud(pmdval_t val)
@@ -287,12 +298,11 @@ static inline pudval_t native_pud_val(pud_t pud)
        return pud.pud;
 }
 #else
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 
 static inline pudval_t native_pud_val(pud_t pud)
 {
-       return native_pgd_val(pud.pgd);
+       return native_pgd_val(pud.p4d.pgd);
 }
 #endif
 
@@ -309,15 +319,30 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
        return pmd.pmd;
 }
 #else
-#define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopmd.h>
 
 static inline pmdval_t native_pmd_val(pmd_t pmd)
 {
-       return native_pgd_val(pmd.pud.pgd);
+       return native_pgd_val(pmd.pud.p4d.pgd);
 }
 #endif
 
+static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
+{
+       /* No 512 GiB huge pages yet */
+       return PTE_PFN_MASK;
+}
+
+static inline p4dval_t p4d_flags_mask(p4d_t p4d)
+{
+       return ~p4d_pfn_mask(p4d);
+}
+
+static inline p4dval_t p4d_flags(p4d_t p4d)
+{
+       return native_p4d_val(p4d) & p4d_flags_mask(p4d);
+}
+
 static inline pudval_t pud_pfn_mask(pud_t pud)
 {
        if (native_pud_val(pud) & _PAGE_PSE)
@@ -461,6 +486,7 @@ enum pg_level {
        PG_LEVEL_4K,
        PG_LEVEL_2M,
        PG_LEVEL_1G,
+       PG_LEVEL_512G,
        PG_LEVEL_NUM
 };
 
index 4aa93b560a2b2075c94f338438469dc773f92330..3cada998a402a7893ffd2fc709916f4fcbc3f970 100644 (file)
@@ -709,6 +709,8 @@ extern struct desc_ptr              early_gdt_descr;
 
 extern void cpu_set_gdt(int);
 extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
+extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
 
@@ -790,6 +792,7 @@ static inline void spin_lock_prefetch(const void *x)
 /*
  * User space process size: 3GB (default).
  */
+#define IA32_PAGE_OFFSET       PAGE_OFFSET
 #define TASK_SIZE              PAGE_OFFSET
 #define TASK_SIZE_MAX          TASK_SIZE
 #define STACK_TOP              TASK_SIZE
@@ -866,7 +869,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
  * This decides where the kernel will search for a free chunk of vm
  * space during mmap's.
  */
-#define TASK_UNMAPPED_BASE     (PAGE_ALIGN(TASK_SIZE / 3))
+#define __TASK_UNMAPPED_BASE(task_size)        (PAGE_ALIGN(task_size / 3))
+#define TASK_UNMAPPED_BASE             __TASK_UNMAPPED_BASE(TASK_SIZE)
 
 #define KSTK_EIP(task)         (task_pt_regs(task)->ip)
 
@@ -877,6 +881,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+
 /* Register/unregister a process' MPX related resource */
 #define MPX_ENABLE_MANAGEMENT()        mpx_enable_management()
 #define MPX_DISABLE_MANAGEMENT()       mpx_disable_management()
index 9b9b30b1944187c8c3de3ed5c4ef1d82ea7c422d..8d3964fc5f915c37ec7bf74706446adc8ad2e93b 100644 (file)
@@ -9,6 +9,7 @@ void syscall_init(void);
 
 #ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
 #endif
 
 #ifdef CONFIG_X86_32
@@ -30,6 +31,7 @@ void x86_report_nx(void);
 
 extern int reboot_force;
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int option,
+                         unsigned long cpuid_enabled);
 
 #endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
new file mode 100644 (file)
index 0000000..d7da272
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_PURGATORY_H
+#define _ASM_X86_PURGATORY_H
+
+#ifndef __ASSEMBLY__
+#include <linux/purgatory.h>
+
+extern void purgatory(void);
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern unsigned long purgatory_backup_dest;
+extern unsigned long purgatory_backup_src;
+extern unsigned long purgatory_backup_sz;
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_PURGATORY_H */
index 58505f01962f31f80f2e99c5b0bb9e5b2c8405b8..dcbd9bcce71443eb64325051faa532a68bf50c00 100644 (file)
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
        unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
-       struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+       struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
        struct desc_struct desc;
 
        desc = gdt_table[GDT_ENTRY_STACK_CANARY];
index ad6f5eb07a95bd221fe4e13c8cdb3af0cd27aa37..9fc44b95f7cb1097f3c5b55d6f40a7e89f8f9175 100644 (file)
@@ -87,6 +87,7 @@ struct thread_info {
 #define TIF_SECCOMP            8       /* secure computing */
 #define TIF_USER_RETURN_NOTIFY 11      /* notify kernel of userspace return */
 #define TIF_UPROBE             12      /* breakpointed or singlestepping */
+#define TIF_NOCPUID            15      /* CPUID is not accessible in userland */
 #define TIF_NOTSC              16      /* TSC is not accessible in userland */
 #define TIF_IA32               17      /* IA32 compatibility process */
 #define TIF_NOHZ               19      /* in adaptive nohz mode */
@@ -110,6 +111,7 @@ struct thread_info {
 #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
 #define _TIF_USER_RETURN_NOTIFY        (1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE            (1 << TIF_UPROBE)
+#define _TIF_NOCPUID           (1 << TIF_NOCPUID)
 #define _TIF_NOTSC             (1 << TIF_NOTSC)
 #define _TIF_IA32              (1 << TIF_IA32)
 #define _TIF_NOHZ              (1 << TIF_NOHZ)
@@ -138,7 +140,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW                                                        \
-       (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+       (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_THREAD_INFO_H */
index 6fa85944af83d8ddbbad3a344a31a7920e64e6d0..75d002bdb3f35bcc12c06ec4acc213bc49241ae9 100644 (file)
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
        }
 }
 
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+       unsigned long cr4;
+
+       cr4 = this_cpu_read(cpu_tlbstate.cr4);
+       cr4 ^= mask;
+       this_cpu_write(cpu_tlbstate.cr4, cr4);
+       __write_cr4(cr4);
+}
+
 /* Read the CR4 shadow. */
 static inline unsigned long cr4_read_shadow(void)
 {
@@ -188,7 +198,7 @@ static inline void __native_flush_tlb_single(unsigned long addr)
 
 static inline void __flush_tlb_all(void)
 {
-       if (static_cpu_has(X86_FEATURE_PGE))
+       if (boot_cpu_has(X86_FEATURE_PGE))
                __flush_tlb_global();
        else
                __flush_tlb();
index 33cbd3db97b93d4449d29dfd1a79e1875ebfb1dc..bf2ca56fba110ae07369bf3bf730afaa67c045a4 100644 (file)
@@ -279,13 +279,17 @@ static inline pte_t __pte_ma(pteval_t x)
 
 #define pmd_val_ma(v) ((v).pmd)
 #ifdef __PAGETABLE_PUD_FOLDED
-#define pud_val_ma(v) ((v).pgd.pgd)
+#define pud_val_ma(v) ((v).p4d.pgd.pgd)
 #else
 #define pud_val_ma(v) ((v).pud)
 #endif
 #define __pmd_ma(x)    ((pmd_t) { (x) } )
 
-#define pgd_val_ma(x)  ((x).pgd)
+#ifdef __PAGETABLE_P4D_FOLDED
+#define p4d_val_ma(x)  ((x).pgd.pgd)
+#else
+#define p4d_val_ma(x)  ((x).p4d)
+#endif
 
 void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
 
index 835aa51c7f6ebb914592752373f55287ca8cc235..c4576551709216cd51c17e42af20c36a19cd5e7a 100644 (file)
@@ -1,10 +1,13 @@
 #ifndef _ASM_X86_PRCTL_H
 #define _ASM_X86_PRCTL_H
 
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS            0x1001
+#define ARCH_SET_FS            0x1002
+#define ARCH_GET_FS            0x1003
+#define ARCH_GET_GS            0x1004
+
+#define ARCH_GET_CPUID         0x1011
+#define ARCH_SET_CPUID         0x1012
 
 #define ARCH_MAP_VDSO_X32      0x2001
 #define ARCH_MAP_VDSO_32       0x2002
index 48587335ede8e2296b80ff991d1bf4e8e155ad46..ed014814ea35fb46ddc14b95b7d030e0baaaf82c 100644 (file)
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
 #ifdef CONFIG_SMP
        initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
        early_gdt_descr.address =
-                       (unsigned long)get_cpu_gdt_table(smp_processor_id());
+                       (unsigned long)get_cpu_gdt_rw(smp_processor_id());
        initial_gs = per_cpu_offset(smp_processor_id());
 #endif
        initial_code = (unsigned long)wakeup_long64;
index 5a414545e8a39001a1679b40012adad2ac908d7b..446b0d3d4932fbe30f6d067cfd09bf65a5ac6709 100644 (file)
@@ -609,7 +609,7 @@ static long __apm_bios_call(void *_call)
 
        cpu = get_cpu();
        BUG_ON(cpu != 0);
-       gdt = get_cpu_gdt_table(cpu);
+       gdt = get_cpu_gdt_rw(cpu);
        save_desc_40 = gdt[0x40 / 8];
        gdt[0x40 / 8] = bad_bios_desc;
 
@@ -685,7 +685,7 @@ static long __apm_bios_call_simple(void *_call)
 
        cpu = get_cpu();
        BUG_ON(cpu != 0);
-       gdt = get_cpu_gdt_table(cpu);
+       gdt = get_cpu_gdt_rw(cpu);
        save_desc_40 = gdt[0x40 / 8];
        gdt[0x40 / 8] = bad_bios_desc;
 
@@ -2352,7 +2352,7 @@ static int __init apm_init(void)
         * Note we only set APM segments on CPU zero, since we pin the APM
         * code to that CPU.
         */
-       gdt = get_cpu_gdt_table(0);
+       gdt = get_cpu_gdt_rw(0);
        set_desc_base(&gdt[APM_CS >> 3],
                 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
        set_desc_base(&gdt[APM_CS_16 >> 3],
index 58094a1f9e9d301e11d2c93a1ecc126e1715002e..8ee32119144d7b475a0e4ab3c93cae1a55131d59 100644 (file)
@@ -448,19 +448,60 @@ void load_percpu_segment(int cpu)
        load_stack_canary_segment();
 }
 
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+#ifdef CONFIG_X86_64
+       /* On 64-bit systems, we use a read-only fixmap GDT. */
+       pgprot_t prot = PAGE_KERNEL_RO;
+#else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+        * our double fault handler uses a task gate, and entering through
+        * a task gate needs to change an available TSS to busy.  If the GDT
+        * is read-only, that will triple fault.
+        *
+        * On Xen PV, the GDT must be read-only because the hypervisor requires
+        * it.
+        */
+       pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
+#endif
+
+       __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
+}
+
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+       struct desc_ptr gdt_descr;
+
+       gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
+       gdt_descr.size = GDT_SIZE - 1;
+       load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL_GPL(load_direct_gdt);
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+       struct desc_ptr gdt_descr;
+
+       gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
+       gdt_descr.size = GDT_SIZE - 1;
+       load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL_GPL(load_fixmap_gdt);
+
 /*
  * Current gdt points %fs at the "master" per-cpu area: after this,
  * it's on the real one.
  */
 void switch_to_new_gdt(int cpu)
 {
-       struct desc_ptr gdt_descr;
-
-       gdt_descr.address = (long)get_cpu_gdt_table(cpu);
-       gdt_descr.size = GDT_SIZE - 1;
-       load_gdt(&gdt_descr);
+       /* Load the original GDT */
+       load_direct_gdt(cpu);
        /* Reload the per-cpu base */
-
        load_percpu_segment(cpu);
 }
 
@@ -1526,6 +1567,9 @@ void cpu_init(void)
 
        if (is_uv_system())
                uv_cpu_init();
+
+       setup_fixmap_gdt(cpu);
+       load_fixmap_gdt(cpu);
 }
 
 #else
@@ -1581,6 +1625,9 @@ void cpu_init(void)
        dbg_restore_debug_regs();
 
        fpu__init_cpu();
+
+       setup_fixmap_gdt(cpu);
+       load_fixmap_gdt(cpu);
 }
 #endif
 
index 063197771b8d7ba08f2eafe474cacb0efe9e79d3..dfa90a3a5145d784dafdcd201243d2bcde537897 100644 (file)
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
                return;
        }
 
-       if (ring3mwait_disabled) {
-               msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
-                             MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+       if (ring3mwait_disabled)
                return;
-       }
-
-       msr_set_bit(MSR_MISC_FEATURE_ENABLES,
-                   MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
 
        set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
+       this_cpu_or(msr_misc_features_shadow,
+                   1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
 
        if (c == &boot_cpu_data)
                ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
        init_intel_energy_perf(c);
 }
 
+static void init_cpuid_fault(struct cpuinfo_x86 *c)
+{
+       u64 msr;
+
+       if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
+               if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
+                       set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
+       }
+}
+
+static void init_intel_misc_features(struct cpuinfo_x86 *c)
+{
+       u64 msr;
+
+       if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
+               return;
+
+       /* Clear all MISC features */
+       this_cpu_write(msr_misc_features_shadow, 0);
+
+       /* Check features and update capabilities and shadow control bits */
+       init_cpuid_fault(c);
+       probe_xeon_phi_r3mwait(c);
+
+       msr = this_cpu_read(msr_misc_features_shadow);
+       wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
+}
+
 static void init_intel(struct cpuinfo_x86 *c)
 {
        unsigned int l2 = 0;
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
 
        init_intel_energy_perf(c);
 
-       probe_xeon_phi_r3mwait(c);
+       init_intel_misc_features(c);
 }
 
 #ifdef CONFIG_X86_32
index 469b23d6acc272b2113878182582d9fa7532f189..5f43cec296c5c38dc28a2a0c0e43aee91ee89934 100644 (file)
@@ -103,6 +103,7 @@ static void machine_kexec_page_table_set_one(
        pgd_t *pgd, pmd_t *pmd, pte_t *pte,
        unsigned long vaddr, unsigned long paddr)
 {
+       p4d_t *p4d;
        pud_t *pud;
 
        pgd += pgd_index(vaddr);
@@ -110,7 +111,8 @@ static void machine_kexec_page_table_set_one(
        if (!(pgd_val(*pgd) & _PAGE_PRESENT))
                set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
 #endif
-       pud = pud_offset(pgd, vaddr);
+       p4d = p4d_offset(pgd, vaddr);
+       pud = pud_offset(p4d, vaddr);
        pmd = pmd_offset(pud, vaddr);
        if (!(pmd_val(*pmd) & _PAGE_PRESENT))
                set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
index 307b1f4543de4bc96c6759c5f81a7faf5c9f443c..085c3b300d32e15faeb297028ad45ddb14513dd3 100644 (file)
@@ -36,6 +36,7 @@ static struct kexec_file_ops *kexec_file_loaders[] = {
 
 static void free_transition_pgtable(struct kimage *image)
 {
+       free_page((unsigned long)image->arch.p4d);
        free_page((unsigned long)image->arch.pud);
        free_page((unsigned long)image->arch.pmd);
        free_page((unsigned long)image->arch.pte);
@@ -43,6 +44,7 @@ static void free_transition_pgtable(struct kimage *image)
 
 static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 {
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -53,13 +55,21 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
        paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
        pgd += pgd_index(vaddr);
        if (!pgd_present(*pgd)) {
+               p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
+               if (!p4d)
+                       goto err;
+               image->arch.p4d = p4d;
+               set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+       }
+       p4d = p4d_offset(pgd, vaddr);
+       if (!p4d_present(*p4d)) {
                pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
                if (!pud)
                        goto err;
                image->arch.pud = pud;
-               set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+               set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
        }
-       pud = pud_offset(pgd, vaddr);
+       pud = pud_offset(p4d, vaddr);
        if (!pud_present(*pud)) {
                pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
                if (!pmd)
@@ -194,19 +204,22 @@ static int arch_update_purgatory(struct kimage *image)
 
        /* Setup copying of backup region */
        if (image->type == KEXEC_TYPE_CRASH) {
-               ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+               ret = kexec_purgatory_get_set_symbol(image,
+                               "purgatory_backup_dest",
                                &image->arch.backup_load_addr,
                                sizeof(image->arch.backup_load_addr), 0);
                if (ret)
                        return ret;
 
-               ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+               ret = kexec_purgatory_get_set_symbol(image,
+                               "purgatory_backup_src",
                                &image->arch.backup_src_start,
                                sizeof(image->arch.backup_src_start), 0);
                if (ret)
                        return ret;
 
-               ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+               ret = kexec_purgatory_get_set_symbol(image,
+                               "purgatory_backup_sz",
                                &image->arch.backup_src_sz,
                                sizeof(image->arch.backup_src_sz), 0);
                if (ret)
index 4797e87b0fb6a6a0b27817f7342bd6f65dcc15c2..110daf22f5c77fbdda77c6137d7d2e7cc22ed767 100644 (file)
@@ -430,12 +430,16 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
        .pmd_val = PTE_IDENT,
        .make_pmd = PTE_IDENT,
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
        .pud_val = PTE_IDENT,
        .make_pud = PTE_IDENT,
 
-       .set_pgd = native_set_pgd,
-#endif
+       .set_p4d = native_set_p4d,
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#error FIXME
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
 #endif /* CONFIG_PGTABLE_LEVELS >= 3 */
 
        .pte_val = PTE_IDENT,
index f675915617110fa4cae6c74efc35ba8ccd12eb46..0bb88428cbf2697c89a60311051cc5351ea55fde 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/vm86.h>
 #include <asm/switch_to.h>
 #include <asm/desc.h>
+#include <asm/prctl.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -124,11 +125,6 @@ void flush_thread(void)
        fpu__clear(&tsk->thread.fpu);
 }
 
-static void hard_disable_TSC(void)
-{
-       cr4_set_bits(X86_CR4_TSD);
-}
-
 void disable_TSC(void)
 {
        preempt_disable();
@@ -137,15 +133,10 @@ void disable_TSC(void)
                 * Must flip the CPU state synchronously with
                 * TIF_NOTSC in the current running context.
                 */
-               hard_disable_TSC();
+               cr4_set_bits(X86_CR4_TSD);
        preempt_enable();
 }
 
-static void hard_enable_TSC(void)
-{
-       cr4_clear_bits(X86_CR4_TSD);
-}
-
 static void enable_TSC(void)
 {
        preempt_disable();
@@ -154,7 +145,7 @@ static void enable_TSC(void)
                 * Must flip the CPU state synchronously with
                 * TIF_NOTSC in the current running context.
                 */
-               hard_enable_TSC();
+               cr4_clear_bits(X86_CR4_TSD);
        preempt_enable();
 }
 
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
        return 0;
 }
 
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
-                     struct tss_struct *tss)
-{
-       struct thread_struct *prev, *next;
-
-       prev = &prev_p->thread;
-       next = &next_p->thread;
+DEFINE_PER_CPU(u64, msr_misc_features_shadow);
 
-       if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
-           test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
-               unsigned long debugctl = get_debugctlmsr();
+static void set_cpuid_faulting(bool on)
+{
+       u64 msrval;
 
-               debugctl &= ~DEBUGCTLMSR_BTF;
-               if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
-                       debugctl |= DEBUGCTLMSR_BTF;
+       msrval = this_cpu_read(msr_misc_features_shadow);
+       msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+       msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
+       this_cpu_write(msr_misc_features_shadow, msrval);
+       wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
+}
 
-               update_debugctlmsr(debugctl);
+static void disable_cpuid(void)
+{
+       preempt_disable();
+       if (!test_and_set_thread_flag(TIF_NOCPUID)) {
+               /*
+                * Must flip the CPU state synchronously with
+                * TIF_NOCPUID in the current running context.
+                */
+               set_cpuid_faulting(true);
        }
+       preempt_enable();
+}
 
-       if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
-           test_tsk_thread_flag(next_p, TIF_NOTSC)) {
-               /* prev and next are different */
-               if (test_tsk_thread_flag(next_p, TIF_NOTSC))
-                       hard_disable_TSC();
-               else
-                       hard_enable_TSC();
+static void enable_cpuid(void)
+{
+       preempt_disable();
+       if (test_and_clear_thread_flag(TIF_NOCPUID)) {
+               /*
+                * Must flip the CPU state synchronously with
+                * TIF_NOCPUID in the current running context.
+                */
+               set_cpuid_faulting(false);
        }
+       preempt_enable();
+}
+
+static int get_cpuid_mode(void)
+{
+       return !test_thread_flag(TIF_NOCPUID);
+}
+
+static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+{
+       if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+               return -ENODEV;
+
+       if (cpuid_enabled)
+               enable_cpuid();
+       else
+               disable_cpuid();
+
+       return 0;
+}
+
+/*
+ * Called immediately after a successful exec.
+ */
+void arch_setup_new_exec(void)
+{
+       /* If cpuid was previously disabled for this task, re-enable it. */
+       if (test_thread_flag(TIF_NOCPUID))
+               enable_cpuid();
+}
 
-       if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+static inline void switch_to_bitmap(struct tss_struct *tss,
+                                   struct thread_struct *prev,
+                                   struct thread_struct *next,
+                                   unsigned long tifp, unsigned long tifn)
+{
+       if (tifn & _TIF_IO_BITMAP) {
                /*
                 * Copy the relevant range of the IO bitmap.
                 * Normally this is 128 bytes or less:
                 */
                memcpy(tss->io_bitmap, next->io_bitmap_ptr,
                       max(prev->io_bitmap_max, next->io_bitmap_max));
-
                /*
                 * Make sure that the TSS limit is correct for the CPU
                 * to notice the IO bitmap.
                 */
                refresh_tss_limit();
-       } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+       } else if (tifp & _TIF_IO_BITMAP) {
                /*
                 * Clear any possible leftover bits:
                 */
                memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
        }
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+                     struct tss_struct *tss)
+{
+       struct thread_struct *prev, *next;
+       unsigned long tifp, tifn;
+
+       prev = &prev_p->thread;
+       next = &next_p->thread;
+
+       tifn = READ_ONCE(task_thread_info(next_p)->flags);
+       tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+       switch_to_bitmap(tss, prev, next, tifp, tifn);
+
        propagate_user_return_notify(prev_p, next_p);
+
+       if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+           arch_has_block_step()) {
+               unsigned long debugctl, msk;
+
+               rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+               debugctl &= ~DEBUGCTLMSR_BTF;
+               msk = tifn & _TIF_BLOCKSTEP;
+               debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+       }
+
+       if ((tifp ^ tifn) & _TIF_NOTSC)
+               cr4_toggle_bits(X86_CR4_TSD);
+
+       if ((tifp ^ tifn) & _TIF_NOCPUID)
+               set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
 }
 
 /*
@@ -550,3 +616,16 @@ out:
        put_task_stack(p);
        return ret;
 }
+
+long do_arch_prctl_common(struct task_struct *task, int option,
+                         unsigned long cpuid_enabled)
+{
+       switch (option) {
+       case ARCH_GET_CPUID:
+               return get_cpuid_mode();
+       case ARCH_SET_CPUID:
+               return set_cpuid_mode(task, cpuid_enabled);
+       }
+
+       return -EINVAL;
+}
index 4c818f8bc1352b46263b63abd111de3910e9811a..ff40e74c9181f0e009b51909a0e76ce25c1c2cf3 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/kdebug.h>
+#include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
 #include <asm/ldt.h>
@@ -56,6 +57,7 @@
 #include <asm/switch_to.h>
 #include <asm/vm86.h>
 #include <asm/intel_rdt.h>
+#include <asm/proto.h>
 
 void __show_regs(struct pt_regs *regs, int all)
 {
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
        return prev_p;
 }
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+       return do_arch_prctl_common(current, option, arg2);
+}
index d6b784a5520daf2938cd228daa2bf6bde74c421d..ea1a6180bf3999eed7e2aa27f44500cfb1dfa59f 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/ftrace.h>
+#include <linux/syscalls.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
                                (struct user_desc __user *)tls, 0);
                else
 #endif
-                       err = do_arch_prctl(p, ARCH_SET_FS, tls);
+                       err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
                if (err)
                        goto out;
        }
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
 }
 #endif
 
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 {
        int ret = 0;
        int doit = task == current;
        int cpu;
 
-       switch (code) {
+       switch (option) {
        case ARCH_SET_GS:
-               if (addr >= TASK_SIZE_MAX)
+               if (arg2 >= TASK_SIZE_MAX)
                        return -EPERM;
                cpu = get_cpu();
                task->thread.gsindex = 0;
-               task->thread.gsbase = addr;
+               task->thread.gsbase = arg2;
                if (doit) {
                        load_gs_index(0);
-                       ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+                       ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
                }
                put_cpu();
                break;
        case ARCH_SET_FS:
                /* Not strictly needed for fs, but do it for symmetry
                   with gs */
-               if (addr >= TASK_SIZE_MAX)
+               if (arg2 >= TASK_SIZE_MAX)
                        return -EPERM;
                cpu = get_cpu();
                task->thread.fsindex = 0;
-               task->thread.fsbase = addr;
+               task->thread.fsbase = arg2;
                if (doit) {
                        /* set the selector to 0 to not confuse __switch_to */
                        loadsegment(fs, 0);
-                       ret = wrmsrl_safe(MSR_FS_BASE, addr);
+                       ret = wrmsrl_safe(MSR_FS_BASE, arg2);
                }
                put_cpu();
                break;
        case ARCH_GET_FS: {
                unsigned long base;
+
                if (doit)
                        rdmsrl(MSR_FS_BASE, base);
                else
                        base = task->thread.fsbase;
-               ret = put_user(base, (unsigned long __user *)addr);
+               ret = put_user(base, (unsigned long __user *)arg2);
                break;
        }
        case ARCH_GET_GS: {
                unsigned long base;
+
                if (doit)
                        rdmsrl(MSR_KERNEL_GS_BASE, base);
                else
                        base = task->thread.gsbase;
-               ret = put_user(base, (unsigned long __user *)addr);
+               ret = put_user(base, (unsigned long __user *)arg2);
                break;
        }
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
 # ifdef CONFIG_X86_X32_ABI
        case ARCH_MAP_VDSO_X32:
-               return prctl_map_vdso(&vdso_image_x32, addr);
+               return prctl_map_vdso(&vdso_image_x32, arg2);
 # endif
 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
        case ARCH_MAP_VDSO_32:
-               return prctl_map_vdso(&vdso_image_32, addr);
+               return prctl_map_vdso(&vdso_image_32, arg2);
 # endif
        case ARCH_MAP_VDSO_64:
-               return prctl_map_vdso(&vdso_image_64, addr);
+               return prctl_map_vdso(&vdso_image_64, arg2);
 #endif
 
        default:
@@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
        return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+       long ret;
+
+       ret = do_arch_prctl_64(current, option, arg2);
+       if (ret == -EINVAL)
+               ret = do_arch_prctl_common(current, option, arg2);
+
+       return ret;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return do_arch_prctl(current, code, addr);
+       return do_arch_prctl_common(current, option, arg2);
 }
+#endif
 
 unsigned long KSTK_ESP(struct task_struct *task)
 {
index 2364b23ea3e52c3f5f9901a66574337ae2e23a3f..f37d18124648fb9591779e50c878c4ffd51c51b1 100644 (file)
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
                if (value >= TASK_SIZE_MAX)
                        return -EIO;
                /*
-                * When changing the segment base, use do_arch_prctl
+                * When changing the segment base, use do_arch_prctl_64
                 * to set either thread.fs or thread.fsindex and the
                 * corresponding GDT slot.
                 */
                if (child->thread.fsbase != value)
-                       return do_arch_prctl(child, ARCH_SET_FS, value);
+                       return do_arch_prctl_64(child, ARCH_SET_FS, value);
                return 0;
        case offsetof(struct user_regs_struct,gs_base):
                /*
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
                if (value >= TASK_SIZE_MAX)
                        return -EIO;
                if (child->thread.gsbase != value)
-                       return do_arch_prctl(child, ARCH_SET_GS, value);
+                       return do_arch_prctl_64(child, ARCH_SET_GS, value);
                return 0;
 #endif
        }
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
                   Works just like arch_prctl, except that the arguments
                   are reversed. */
        case PTRACE_ARCH_PRCTL:
-               ret = do_arch_prctl(child, data, addr);
+               ret = do_arch_prctl_64(child, data, addr);
                break;
 #endif
 
index 4194d6f9bb290bcce8a581b3e3a38bf010262f0c..067f9813fd2cf7c15d5a1d297b537eedf6ca7959 100644 (file)
@@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
                .ident = "ASUS EeeBook X205TA",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
                },
        },
        {       /* Handle problems with rebooting on ASUS EeeBook X205TAW */
index 7cd7bbefd418da5ffa7534a6697f70e08abfcc29..1302c47c5ce11efb71f881b376dbbcee75232009 100644 (file)
@@ -1223,21 +1223,6 @@ void __init setup_arch(char **cmdline_p)
 
        kasan_init();
 
-#ifdef CONFIG_X86_32
-       /* sync back kernel address range */
-       clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       KERNEL_PGD_PTRS);
-
-       /*
-        * sync back low identity map too.  It is used for example
-        * in the 32-bit EFI stub.
-        */
-       clone_pgd_range(initial_page_table,
-                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-                       min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
-
        tboot_probe();
 
        map_vsyscall();
index 9820d6d977c6646f9471229063688f3f3d24eaec..bb1e8cc0bc84816b8976dec088d3c18b038f8c01 100644 (file)
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
        pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
                        0x2 | DESCTYPE_S, 0x8);
        gdt.s = 1;
-       write_gdt_entry(get_cpu_gdt_table(cpu),
+       write_gdt_entry(get_cpu_gdt_rw(cpu),
                        GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
 #endif
 }
@@ -288,4 +288,25 @@ void __init setup_per_cpu_areas(void)
 
        /* Setup cpu initialized, callin, callout masks */
        setup_cpu_local_masks();
+
+#ifdef CONFIG_X86_32
+       /*
+        * Sync back kernel address range.  We want to make sure that
+        * all kernel mappings, including percpu mappings, are available
+        * in the smpboot asm.  We can't reliably pick up percpu
+        * mappings using vmalloc_fault(), because exception dispatch
+        * needs percpu data.
+        */
+       clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+                       KERNEL_PGD_PTRS);
+
+       /*
+        * sync back low identity map too.  It is used for example
+        * in the 32-bit EFI stub.
+        */
+       clone_pgd_range(initial_page_table,
+                       swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
+                       min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+#endif
 }
index bd1f1ad3528420578ae2f5331bc23269b4eb89c6..f04479a8f74f5598c8ee42bae66b17cee469ba49 100644 (file)
@@ -983,7 +983,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
        unsigned long timeout;
 
        idle->thread.sp = (unsigned long)task_pt_regs(idle);
-       early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+       early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
        initial_code = (unsigned long)start_secondary;
        initial_stack  = idle->thread.sp;
 
index 50215a4b9347441deb5bb76eb0b5870fc0d60273..207b8f2582c75b7c80a882ffc92a720071c8277e 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/uaccess.h>
 #include <linux/elf.h>
 
+#include <asm/elf.h>
+#include <asm/compat.h>
 #include <asm/ia32.h>
 #include <asm/syscalls.h>
 
@@ -101,7 +103,7 @@ out:
 static void find_start_end(unsigned long flags, unsigned long *begin,
                           unsigned long *end)
 {
-       if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
+       if (!in_compat_syscall() && (flags & MAP_32BIT)) {
                /* This is usually used needed to map code in small
                   model, so it needs to be in the first 31bit. Limit
                   it to that.  This means we need to move the
@@ -114,10 +116,11 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
                if (current->flags & PF_RANDOMIZE) {
                        *begin = randomize_page(*begin, 0x02000000);
                }
-       } else {
-               *begin = current->mm->mmap_legacy_base;
-               *end = TASK_SIZE;
+               return;
        }
+
+       *begin  = get_mmap_base(1);
+       *end    = in_compat_syscall() ? tasksize_32bit() : tasksize_64bit();
 }
 
 unsigned long
@@ -176,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                return addr;
 
        /* for MAP_32BIT mappings we force the legacy mmap base */
-       if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
+       if (!in_compat_syscall() && (flags & MAP_32BIT))
                goto bottomup;
 
        /* requesting a specific address */
@@ -191,7 +194,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
        info.length = len;
        info.low_limit = PAGE_SIZE;
-       info.high_limit = mm->mmap_base;
+       info.high_limit = get_mmap_base(0);
        info.align_mask = 0;
        info.align_offset = pgoff << PAGE_SHIFT;
        if (filp) {
index b868fa1b812b3a82713e0556c39900bced73f338..5db0f33cbf2c55d4feb1b0f1c3097d7163a52f25 100644 (file)
@@ -118,12 +118,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
                          pgprot_t prot)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
        pgd = pgd_offset(&tboot_mm, vaddr);
-       pud = pud_alloc(&tboot_mm, pgd, vaddr);
+       p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+       if (!p4d)
+               return -1;
+       pud = pud_alloc(&tboot_mm, p4d, vaddr);
        if (!pud)
                return -1;
        pmd = pmd_alloc(&tboot_mm, pud, vaddr);
index 6c8934406dc906c631200787f2b0d6ee8e746b60..dcd699baea1be86a7fa0f5d44e8ca1fefe875458 100644 (file)
@@ -92,10 +92,17 @@ static void set_tls_desc(struct task_struct *p, int idx,
        cpu = get_cpu();
 
        while (n-- > 0) {
-               if (LDT_empty(info) || LDT_zero(info))
+               if (LDT_empty(info) || LDT_zero(info)) {
                        desc->a = desc->b = 0;
-               else
+               } else {
                        fill_ldt(desc, info);
+
+                       /*
+                        * Always set the accessed bit so that the CPU
+                        * doesn't try to write to the (read-only) GDT.
+                        */
+                       desc->type |= 1;
+               }
                ++info;
                ++desc;
        }
index 23ee89ce59a940712a0b9d91fc78edde94fce8fb..62597c300d94bcfea908c1cdde8e4728db614091 100644 (file)
@@ -164,6 +164,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
        struct vm_area_struct *vma;
        spinlock_t *ptl;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -173,7 +174,10 @@ static void mark_screen_rdonly(struct mm_struct *mm)
        pgd = pgd_offset(mm, 0xA0000);
        if (pgd_none_or_clear_bad(pgd))
                goto out;
-       pud = pud_offset(pgd, 0xA0000);
+       p4d = p4d_offset(pgd, 0xA0000);
+       if (p4d_none_or_clear_bad(p4d))
+               goto out;
+       pud = pud_offset(p4d, 0xA0000);
        if (pud_none_or_clear_bad(pud))
                goto out;
        pmd = pmd_offset(pud, 0xA0000);
index d1efe2c62b3f8d0db7392970cdfd8e018dd3ac06..c02b9af2056a758c573738f9b8831b5103544ab3 100644 (file)
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
 
        struct svm_cpu_data *sd;
        uint64_t efer;
-       struct desc_ptr gdt_descr;
        struct desc_struct *gdt;
        int me = raw_smp_processor_id();
 
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
        sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
        sd->next_asid = sd->max_asid + 1;
 
-       native_store_gdt(&gdt_descr);
-       gdt = (struct desc_struct *)gdt_descr.address;
+       gdt = get_current_gdt_rw();
        sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
        wrmsrl(MSR_EFER, efer | EFER_SVME);
index 283aa8601833509b9cf792b919dd3f243a78f389..3acde663dc582490449e4a40ebeaac712e125c5e 100644 (file)
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
  * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
  */
 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
 
 /*
  * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -2052,14 +2051,13 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
  */
 static unsigned long segment_base(u16 selector)
 {
-       struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
        struct desc_struct *table;
        unsigned long v;
 
        if (!(selector & ~SEGMENT_RPL_MASK))
                return 0;
 
-       table = (struct desc_struct *)gdt->address;
+       table = get_current_gdt_ro();
 
        if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
                u16 ldt_selector = kvm_read_ldt();
@@ -2164,7 +2162,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #endif
        if (vmx->host_state.msr_host_bndcfgs)
                wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
-       load_gdt(this_cpu_ptr(&host_gdt));
+       load_fixmap_gdt(raw_smp_processor_id());
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2266,7 +2264,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        }
 
        if (!already_loaded) {
-               struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+               void *gdt = get_current_gdt_ro();
                unsigned long sysenter_esp;
 
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2277,7 +2275,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 */
                vmcs_writel(HOST_TR_BASE,
                            (unsigned long)this_cpu_ptr(&cpu_tss));
-               vmcs_writel(HOST_GDTR_BASE, gdt->address);
+               vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
                /*
                 * VM exits change the host TR limit to 0x67 after a VM
@@ -3465,8 +3463,6 @@ static int hardware_enable(void)
                ept_sync_global();
        }
 
-       native_store_gdt(this_cpu_ptr(&host_gdt));
-
        return 0;
 }
 
@@ -7258,9 +7254,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
 static int handle_vmclear(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       u32 zero = 0;
        gpa_t vmptr;
-       struct vmcs12 *vmcs12;
-       struct page *page;
 
        if (!nested_vmx_check_permission(vcpu))
                return 1;
@@ -7271,22 +7266,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
        if (vmptr == vmx->nested.current_vmptr)
                nested_release_vmcs12(vmx);
 
-       page = nested_get_page(vcpu, vmptr);
-       if (page == NULL) {
-               /*
-                * For accurate processor emulation, VMCLEAR beyond available
-                * physical memory should do nothing at all. However, it is
-                * possible that a nested vmx bug, not a guest hypervisor bug,
-                * resulted in this case, so let's shut down before doing any
-                * more damage:
-                */
-               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-               return 1;
-       }
-       vmcs12 = kmap(page);
-       vmcs12->launch_state = 0;
-       kunmap(page);
-       nested_release_page(page);
+       kvm_vcpu_write_guest(vcpu,
+                       vmptr + offsetof(struct vmcs12, launch_state),
+                       &zero, sizeof(zero));
 
        nested_free_vmcs02(vmx, vmptr);
 
@@ -9694,10 +9676,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
                return false;
 
        page = nested_get_page(vcpu, vmcs12->msr_bitmap);
-       if (!page) {
-               WARN_ON(1);
+       if (!page)
                return false;
-       }
        msr_bitmap_l1 = (unsigned long *)kmap(page);
 
        memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
@@ -11121,8 +11101,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
  */
 static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 {
-       if (is_guest_mode(vcpu))
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.nested_run_pending = 0;
                nested_vmx_vmexit(vcpu, -1, 0, 0);
+       }
        free_nested(to_vmx(vcpu));
 }
 
index 96d2b847e09ea504fc3ac824d347651a3bc880b9..0fbdcb64f9f836c0556ca0abcca4c673aea143f0 100644 (file)
@@ -2,7 +2,7 @@
 KCOV_INSTRUMENT_tlb.o  := n
 
 obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-           pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
+           pat.o pgtable.o physaddr.o setup_nx.o tlb.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
index 58b5bee7ea27011419a0c81f5efeac3964a3ad71..9f305be71a72729196ac04269fb0e121b3450fad 100644 (file)
@@ -110,7 +110,8 @@ static struct addr_marker address_markers[] = {
 #define PTE_LEVEL_MULT (PAGE_SIZE)
 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
-#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+#define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+#define PGD_LEVEL_MULT (PTRS_PER_PUD * P4D_LEVEL_MULT)
 
 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...)          \
 ({                                                             \
@@ -286,14 +287,13 @@ static void note_page(struct seq_file *m, struct pg_state *st,
        }
 }
 
-static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
-                                                       unsigned long P)
+static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P)
 {
        int i;
        pte_t *start;
        pgprotval_t prot;
 
-       start = (pte_t *) pmd_page_vaddr(addr);
+       start = (pte_t *)pmd_page_vaddr(addr);
        for (i = 0; i < PTRS_PER_PTE; i++) {
                prot = pte_flags(*start);
                st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
@@ -304,14 +304,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
 
 #if PTRS_PER_PMD > 1
 
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
-                                                       unsigned long P)
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
 {
        int i;
        pmd_t *start;
        pgprotval_t prot;
 
-       start = (pmd_t *) pud_page_vaddr(addr);
+       start = (pmd_t *)pud_page_vaddr(addr);
        for (i = 0; i < PTRS_PER_PMD; i++) {
                st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
                if (!pmd_none(*start)) {
@@ -347,15 +346,14 @@ static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
        return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
 }
 
-static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
-                                                       unsigned long P)
+static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
 {
        int i;
        pud_t *start;
        pgprotval_t prot;
        pud_t *prev_pud = NULL;
 
-       start = (pud_t *) pgd_page_vaddr(addr);
+       start = (pud_t *)p4d_page_vaddr(addr);
 
        for (i = 0; i < PTRS_PER_PUD; i++) {
                st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
@@ -377,9 +375,42 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
 }
 
 #else
-#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
-#define pgd_large(a) pud_large(__pud(pgd_val(a)))
-#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
+#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p)
+#define p4d_large(a) pud_large(__pud(p4d_val(a)))
+#define p4d_none(a)  pud_none(__pud(p4d_val(a)))
+#endif
+
+#if PTRS_PER_P4D > 1
+
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
+{
+       int i;
+       p4d_t *start;
+       pgprotval_t prot;
+
+       start = (p4d_t *)pgd_page_vaddr(addr);
+
+       for (i = 0; i < PTRS_PER_P4D; i++) {
+               st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
+               if (!p4d_none(*start)) {
+                       if (p4d_large(*start) || !p4d_present(*start)) {
+                               prot = p4d_flags(*start);
+                               note_page(m, st, __pgprot(prot), 2);
+                       } else {
+                               walk_pud_level(m, st, *start,
+                                              P + i * P4D_LEVEL_MULT);
+                       }
+               } else
+                       note_page(m, st, __pgprot(0), 2);
+
+               start++;
+       }
+}
+
+#else
+#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
+#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
+#define pgd_none(a)  p4d_none(__p4d(pgd_val(a)))
 #endif
 
 static inline bool is_hypervisor_range(int idx)
@@ -424,7 +455,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
                                prot = pgd_flags(*start);
                                note_page(m, &st, __pgprot(prot), 1);
                        } else {
-                               walk_pud_level(m, &st, *start,
+                               walk_p4d_level(m, &st, *start,
                                               i * PGD_LEVEL_MULT);
                        }
                } else
index 428e31763cb93e593f261a9f443c3999cb8c473d..8ad91a01cbc88ff9e53d79ef9a2fd299b8558995 100644 (file)
@@ -253,6 +253,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 {
        unsigned index = pgd_index(address);
        pgd_t *pgd_k;
+       p4d_t *p4d, *p4d_k;
        pud_t *pud, *pud_k;
        pmd_t *pmd, *pmd_k;
 
@@ -265,10 +266,15 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
        /*
         * set_pgd(pgd, *pgd_k); here would be useless on PAE
         * and redundant with the set_pmd() on non-PAE. As would
-        * set_pud.
+        * set_p4d/set_pud.
         */
-       pud = pud_offset(pgd, address);
-       pud_k = pud_offset(pgd_k, address);
+       p4d = p4d_offset(pgd, address);
+       p4d_k = p4d_offset(pgd_k, address);
+       if (!p4d_present(*p4d_k))
+               return NULL;
+
+       pud = pud_offset(p4d, address);
+       pud_k = pud_offset(p4d_k, address);
        if (!pud_present(*pud_k))
                return NULL;
 
@@ -384,6 +390,8 @@ static void dump_pagetable(unsigned long address)
 {
        pgd_t *base = __va(read_cr3());
        pgd_t *pgd = &base[pgd_index(address)];
+       p4d_t *p4d;
+       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
@@ -392,7 +400,9 @@ static void dump_pagetable(unsigned long address)
        if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
                goto out;
 #endif
-       pmd = pmd_offset(pud_offset(pgd, address), address);
+       p4d = p4d_offset(pgd, address);
+       pud = pud_offset(p4d, address);
+       pmd = pmd_offset(pud, address);
        printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
 
        /*
@@ -425,6 +435,7 @@ void vmalloc_sync_all(void)
 static noinline int vmalloc_fault(unsigned long address)
 {
        pgd_t *pgd, *pgd_ref;
+       p4d_t *p4d, *p4d_ref;
        pud_t *pud, *pud_ref;
        pmd_t *pmd, *pmd_ref;
        pte_t *pte, *pte_ref;
@@ -448,17 +459,37 @@ static noinline int vmalloc_fault(unsigned long address)
        if (pgd_none(*pgd)) {
                set_pgd(pgd, *pgd_ref);
                arch_flush_lazy_mmu_mode();
-       } else {
+       } else if (CONFIG_PGTABLE_LEVELS > 4) {
+               /*
+                * With folded p4d, pgd_none() is always false, so the pgd may
+                * point to an empty page table entry and pgd_page_vaddr()
+                * will return garbage.
+                *
+                * We will do the correct sanity check on the p4d level.
+                */
                BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
        }
 
+       /* With 4-level paging, copying happens on the p4d level. */
+       p4d = p4d_offset(pgd, address);
+       p4d_ref = p4d_offset(pgd_ref, address);
+       if (p4d_none(*p4d_ref))
+               return -1;
+
+       if (p4d_none(*p4d)) {
+               set_p4d(p4d, *p4d_ref);
+               arch_flush_lazy_mmu_mode();
+       } else {
+               BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref));
+       }
+
        /*
         * Below here mismatches are bugs because these lower tables
         * are shared:
         */
 
-       pud = pud_offset(pgd, address);
-       pud_ref = pud_offset(pgd_ref, address);
+       pud = pud_offset(p4d, address);
+       pud_ref = pud_offset(p4d_ref, address);
        if (pud_none(*pud_ref))
                return -1;
 
@@ -526,6 +557,7 @@ static void dump_pagetable(unsigned long address)
 {
        pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
        pgd_t *pgd = base + pgd_index(address);
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -538,7 +570,15 @@ static void dump_pagetable(unsigned long address)
        if (!pgd_present(*pgd))
                goto out;
 
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (bad_address(p4d))
+               goto bad;
+
+       printk("P4D %lx ", p4d_val(*p4d));
+       if (!p4d_present(*p4d) || p4d_large(*p4d))
+               goto out;
+
+       pud = pud_offset(p4d, address);
        if (bad_address(pud))
                goto bad;
 
@@ -1082,6 +1122,7 @@ static noinline int
 spurious_fault(unsigned long error_code, unsigned long address)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -1104,7 +1145,14 @@ spurious_fault(unsigned long error_code, unsigned long address)
        if (!pgd_present(*pgd))
                return 0;
 
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (!p4d_present(*p4d))
+               return 0;
+
+       if (p4d_large(*p4d))
+               return spurious_fault_check(error_code, (pte_t *) p4d);
+
+       pud = pud_offset(p4d, address);
        if (!pud_present(*pud))
                return 0;
 
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
deleted file mode 100644 (file)
index 1f3b6ef..0000000
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Lockless get_user_pages_fast for x86
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/memremap.h>
-
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
-#ifndef CONFIG_X86_PAE
-       return READ_ONCE(*ptep);
-#else
-       /*
-        * With get_user_pages_fast, we walk down the pagetables without taking
-        * any locks.  For this we would like to load the pointers atomically,
-        * but that is not possible (without expensive cmpxchg8b) on PAE.  What
-        * we do have is the guarantee that a pte will only either go from not
-        * present to present, or present to not present or both -- it will not
-        * switch to a completely different present page without a TLB flush in
-        * between; something that we are blocking by holding interrupts off.
-        *
-        * Setting ptes from not present to present goes:
-        * ptep->pte_high = h;
-        * smp_wmb();
-        * ptep->pte_low = l;
-        *
-        * And present to not present goes:
-        * ptep->pte_low = 0;
-        * smp_wmb();
-        * ptep->pte_high = 0;
-        *
-        * We must ensure here that the load of pte_low sees l iff pte_high
-        * sees h. We load pte_high *after* loading pte_low, which ensures we
-        * don't see an older value of pte_high.  *Then* we recheck pte_low,
-        * which ensures that we haven't picked up a changed pte high. We might
-        * have got rubbish values from pte_low and pte_high, but we are
-        * guaranteed that pte_low will not have the present bit set *unless*
-        * it is 'l'. And get_user_pages_fast only operates on present ptes, so
-        * we're safe.
-        *
-        * gup_get_pte should not be used or copied outside gup.c without being
-        * very careful -- it does not atomically load the pte or anything that
-        * is likely to be useful for you.
-        */
-       pte_t pte;
-
-retry:
-       pte.pte_low = ptep->pte_low;
-       smp_rmb();
-       pte.pte_high = ptep->pte_high;
-       smp_rmb();
-       if (unlikely(pte.pte_low != ptep->pte_low))
-               goto retry;
-
-       return pte;
-#endif
-}
-
-static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
-{
-       while ((*nr) - nr_start) {
-               struct page *page = pages[--(*nr)];
-
-               ClearPageReferenced(page);
-               put_page(page);
-       }
-}
-
-/*
- * 'pteval' can come from a pte, pmd or pud.  We only check
- * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
- * same value on all 3 types.
- */
-static inline int pte_allows_gup(unsigned long pteval, int write)
-{
-       unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
-
-       if (write)
-               need_pte_bits |= _PAGE_RW;
-
-       if ((pteval & need_pte_bits) != need_pte_bits)
-               return 0;
-
-       /* Check memory protection keys permissions. */
-       if (!__pkru_allows_pkey(pte_flags_pkey(pteval), write))
-               return 0;
-
-       return 1;
-}
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
-               unsigned long end, int write, struct page **pages, int *nr)
-{
-       struct dev_pagemap *pgmap = NULL;
-       int nr_start = *nr, ret = 0;
-       pte_t *ptep, *ptem;
-
-       /*
-        * Keep the original mapped PTE value (ptem) around since we
-        * might increment ptep off the end of the page when finishing
-        * our loop iteration.
-        */
-       ptem = ptep = pte_offset_map(&pmd, addr);
-       do {
-               pte_t pte = gup_get_pte(ptep);
-               struct page *page;
-
-               /* Similar to the PMD case, NUMA hinting must take slow path */
-               if (pte_protnone(pte))
-                       break;
-
-               if (!pte_allows_gup(pte_val(pte), write))
-                       break;
-
-               if (pte_devmap(pte)) {
-                       pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
-                       if (unlikely(!pgmap)) {
-                               undo_dev_pagemap(nr, nr_start, pages);
-                               break;
-                       }
-               } else if (pte_special(pte))
-                       break;
-
-               VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-               page = pte_page(pte);
-               get_page(page);
-               put_dev_pagemap(pgmap);
-               SetPageReferenced(page);
-               pages[*nr] = page;
-               (*nr)++;
-
-       } while (ptep++, addr += PAGE_SIZE, addr != end);
-       if (addr == end)
-               ret = 1;
-       pte_unmap(ptem);
-
-       return ret;
-}
-
-static inline void get_head_page_multiple(struct page *page, int nr)
-{
-       VM_BUG_ON_PAGE(page != compound_head(page), page);
-       VM_BUG_ON_PAGE(page_count(page) == 0, page);
-       page_ref_add(page, nr);
-       SetPageReferenced(page);
-}
-
-static int __gup_device_huge(unsigned long pfn, unsigned long addr,
-               unsigned long end, struct page **pages, int *nr)
-{
-       int nr_start = *nr;
-       struct dev_pagemap *pgmap = NULL;
-
-       do {
-               struct page *page = pfn_to_page(pfn);
-
-               pgmap = get_dev_pagemap(pfn, pgmap);
-               if (unlikely(!pgmap)) {
-                       undo_dev_pagemap(nr, nr_start, pages);
-                       return 0;
-               }
-               SetPageReferenced(page);
-               pages[*nr] = page;
-               get_page(page);
-               put_dev_pagemap(pgmap);
-               (*nr)++;
-               pfn++;
-       } while (addr += PAGE_SIZE, addr != end);
-       return 1;
-}
-
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
-               unsigned long end, struct page **pages, int *nr)
-{
-       unsigned long fault_pfn;
-
-       fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
-               unsigned long end, struct page **pages, int *nr)
-{
-       unsigned long fault_pfn;
-
-       fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
-               unsigned long end, int write, struct page **pages, int *nr)
-{
-       struct page *head, *page;
-       int refs;
-
-       if (!pte_allows_gup(pmd_val(pmd), write))
-               return 0;
-
-       VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
-       if (pmd_devmap(pmd))
-               return __gup_device_huge_pmd(pmd, addr, end, pages, nr);
-
-       /* hugepages are never "special" */
-       VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
-
-       refs = 0;
-       head = pmd_page(pmd);
-       page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
-       do {
-               VM_BUG_ON_PAGE(compound_head(page) != head, page);
-               pages[*nr] = page;
-               (*nr)++;
-               page++;
-               refs++;
-       } while (addr += PAGE_SIZE, addr != end);
-       get_head_page_multiple(head, refs);
-
-       return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
-               int write, struct page **pages, int *nr)
-{
-       unsigned long next;
-       pmd_t *pmdp;
-
-       pmdp = pmd_offset(&pud, addr);
-       do {
-               pmd_t pmd = *pmdp;
-
-               next = pmd_addr_end(addr, end);
-               if (pmd_none(pmd))
-                       return 0;
-               if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
-                       /*
-                        * NUMA hinting faults need to be handled in the GUP
-                        * slowpath for accounting purposes and so that they
-                        * can be serialised against THP migration.
-                        */
-                       if (pmd_protnone(pmd))
-                               return 0;
-                       if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
-                               return 0;
-               } else {
-                       if (!gup_pte_range(pmd, addr, next, write, pages, nr))
-                               return 0;
-               }
-       } while (pmdp++, addr = next, addr != end);
-
-       return 1;
-}
-
-static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
-               unsigned long end, int write, struct page **pages, int *nr)
-{
-       struct page *head, *page;
-       int refs;
-
-       if (!pte_allows_gup(pud_val(pud), write))
-               return 0;
-
-       VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
-       if (pud_devmap(pud))
-               return __gup_device_huge_pud(pud, addr, end, pages, nr);
-
-       /* hugepages are never "special" */
-       VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
-
-       refs = 0;
-       head = pud_page(pud);
-       page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-       do {
-               VM_BUG_ON_PAGE(compound_head(page) != head, page);
-               pages[*nr] = page;
-               (*nr)++;
-               page++;
-               refs++;
-       } while (addr += PAGE_SIZE, addr != end);
-       get_head_page_multiple(head, refs);
-
-       return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
-                       int write, struct page **pages, int *nr)
-{
-       unsigned long next;
-       pud_t *pudp;
-
-       pudp = pud_offset(&pgd, addr);
-       do {
-               pud_t pud = *pudp;
-
-               next = pud_addr_end(addr, end);
-               if (pud_none(pud))
-                       return 0;
-               if (unlikely(pud_large(pud))) {
-                       if (!gup_huge_pud(pud, addr, next, write, pages, nr))
-                               return 0;
-               } else {
-                       if (!gup_pmd_range(pud, addr, next, write, pages, nr))
-                               return 0;
-               }
-       } while (pudp++, addr = next, addr != end);
-
-       return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
-                         struct page **pages)
-{
-       struct mm_struct *mm = current->mm;
-       unsigned long addr, len, end;
-       unsigned long next;
-       unsigned long flags;
-       pgd_t *pgdp;
-       int nr = 0;
-
-       start &= PAGE_MASK;
-       addr = start;
-       len = (unsigned long) nr_pages << PAGE_SHIFT;
-       end = start + len;
-       if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
-                                       (void __user *)start, len)))
-               return 0;
-
-       /*
-        * XXX: batch / limit 'nr', to avoid large irq off latency
-        * needs some instrumenting to determine the common sizes used by
-        * important workloads (eg. DB2), and whether limiting the batch size
-        * will decrease performance.
-        *
-        * It seems like we're in the clear for the moment. Direct-IO is
-        * the main guy that batches up lots of get_user_pages, and even
-        * they are limited to 64-at-a-time which is not so many.
-        */
-       /*
-        * This doesn't prevent pagetable teardown, but does prevent
-        * the pagetables and pages from being freed on x86.
-        *
-        * So long as we atomically load page table pointers versus teardown
-        * (which we do on x86, with the above PAE exception), we can follow the
-        * address down to the the page and take a ref on it.
-        */
-       local_irq_save(flags);
-       pgdp = pgd_offset(mm, addr);
-       do {
-               pgd_t pgd = *pgdp;
-
-               next = pgd_addr_end(addr, end);
-               if (pgd_none(pgd))
-                       break;
-               if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
-                       break;
-       } while (pgdp++, addr = next, addr != end);
-       local_irq_restore(flags);
-
-       return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start:     starting user address
- * @nr_pages:  number of pages from start to pin
- * @write:     whether pages will be written to
- * @pages:     array that receives pointers to the pages pinned.
- *             Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
-                       struct page **pages)
-{
-       struct mm_struct *mm = current->mm;
-       unsigned long addr, len, end;
-       unsigned long next;
-       pgd_t *pgdp;
-       int nr = 0;
-
-       start &= PAGE_MASK;
-       addr = start;
-       len = (unsigned long) nr_pages << PAGE_SHIFT;
-
-       end = start + len;
-       if (end < start)
-               goto slow_irqon;
-
-#ifdef CONFIG_X86_64
-       if (end >> __VIRTUAL_MASK_SHIFT)
-               goto slow_irqon;
-#endif
-
-       /*
-        * XXX: batch / limit 'nr', to avoid large irq off latency
-        * needs some instrumenting to determine the common sizes used by
-        * important workloads (eg. DB2), and whether limiting the batch size
-        * will decrease performance.
-        *
-        * It seems like we're in the clear for the moment. Direct-IO is
-        * the main guy that batches up lots of get_user_pages, and even
-        * they are limited to 64-at-a-time which is not so many.
-        */
-       /*
-        * This doesn't prevent pagetable teardown, but does prevent
-        * the pagetables and pages from being freed on x86.
-        *
-        * So long as we atomically load page table pointers versus teardown
-        * (which we do on x86, with the above PAE exception), we can follow the
-        * address down to the the page and take a ref on it.
-        */
-       local_irq_disable();
-       pgdp = pgd_offset(mm, addr);
-       do {
-               pgd_t pgd = *pgdp;
-
-               next = pgd_addr_end(addr, end);
-               if (pgd_none(pgd))
-                       goto slow;
-               if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
-                       goto slow;
-       } while (pgdp++, addr = next, addr != end);
-       local_irq_enable();
-
-       VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
-       return nr;
-
-       {
-               int ret;
-
-slow:
-               local_irq_enable();
-slow_irqon:
-               /* Try to get the remaining pages with get_user_pages */
-               start += nr << PAGE_SHIFT;
-               pages += nr;
-
-               ret = get_user_pages_unlocked(start,
-                                             (end - start) >> PAGE_SHIFT,
-                                             pages, write ? FOLL_WRITE : 0);
-
-               /* Have to be a bit careful with return values */
-               if (nr > 0) {
-                       if (ret < 0)
-                               ret = nr;
-                       else
-                               ret += nr;
-               }
-
-               return ret;
-       }
-}
index c5066a260803d4b6b3a91ef56df9220be57db539..302f43fd9c28c6d14652981d672569d57e3dea35 100644 (file)
 #include <linux/pagemap.h>
 #include <linux/err.h>
 #include <linux/sysctl.h>
+#include <linux/compat.h>
 #include <asm/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
+#include <asm/elf.h>
 
 #if 0  /* This is just for testing */
 struct page *
@@ -82,8 +84,9 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 
        info.flags = 0;
        info.length = len;
-       info.low_limit = current->mm->mmap_legacy_base;
-       info.high_limit = TASK_SIZE;
+       info.low_limit = get_mmap_base(1);
+       info.high_limit = in_compat_syscall() ?
+               tasksize_32bit() : tasksize_64bit();
        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        return vm_unmapped_area(&info);
@@ -100,7 +103,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
        info.flags = VM_UNMAPPED_AREA_TOPDOWN;
        info.length = len;
        info.low_limit = PAGE_SIZE;
-       info.high_limit = current->mm->mmap_base;
+       info.high_limit = get_mmap_base(0);
        info.align_mask = PAGE_MASK & ~huge_page_mask(h);
        info.align_offset = 0;
        addr = vm_unmapped_area(&info);
index 4473cb4f8b906dcae083a4b29c38b72d9b7d56d3..04210a29dd6060959745c4d974c4c99e6292786d 100644 (file)
@@ -45,6 +45,34 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
        return 0;
 }
 
+static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
+                         unsigned long addr, unsigned long end)
+{
+       unsigned long next;
+
+       for (; addr < end; addr = next) {
+               p4d_t *p4d = p4d_page + p4d_index(addr);
+               pud_t *pud;
+
+               next = (addr & P4D_MASK) + P4D_SIZE;
+               if (next > end)
+                       next = end;
+
+               if (p4d_present(*p4d)) {
+                       pud = pud_offset(p4d, 0);
+                       ident_pud_init(info, pud, addr, next);
+                       continue;
+               }
+               pud = (pud_t *)info->alloc_pgt_page(info->context);
+               if (!pud)
+                       return -ENOMEM;
+               ident_pud_init(info, pud, addr, next);
+               set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+       }
+
+       return 0;
+}
+
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
                              unsigned long pstart, unsigned long pend)
 {
@@ -55,27 +83,36 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 
        for (; addr < end; addr = next) {
                pgd_t *pgd = pgd_page + pgd_index(addr);
-               pud_t *pud;
+               p4d_t *p4d;
 
                next = (addr & PGDIR_MASK) + PGDIR_SIZE;
                if (next > end)
                        next = end;
 
                if (pgd_present(*pgd)) {
-                       pud = pud_offset(pgd, 0);
-                       result = ident_pud_init(info, pud, addr, next);
+                       p4d = p4d_offset(pgd, 0);
+                       result = ident_p4d_init(info, p4d, addr, next);
                        if (result)
                                return result;
                        continue;
                }
 
-               pud = (pud_t *)info->alloc_pgt_page(info->context);
-               if (!pud)
+               p4d = (p4d_t *)info->alloc_pgt_page(info->context);
+               if (!p4d)
                        return -ENOMEM;
-               result = ident_pud_init(info, pud, addr, next);
+               result = ident_p4d_init(info, p4d, addr, next);
                if (result)
                        return result;
-               set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+               if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+                       set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+               } else {
+                       /*
+                        * With p4d folded, pgd is equal to p4d.
+                        * The pgd entry has to point to the pud page table in this case.
+                        */
+                       pud_t *pud = pud_offset(p4d, 0);
+                       set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+               }
        }
 
        return 0;
index 4dddfaf6569a87a67680c0da377d07be0735f5a7..7116a727fd5ad298ecfc02a6609cbe63c8f0d2c3 100644 (file)
@@ -67,6 +67,7 @@ bool __read_mostly __vmalloc_start_set = false;
  */
 static pmd_t * __init one_md_table_init(pgd_t *pgd)
 {
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd_table;
 
@@ -75,13 +76,15 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
                pmd_table = (pmd_t *)alloc_low_page();
                paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
                set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-               pud = pud_offset(pgd, 0);
+               p4d = p4d_offset(pgd, 0);
+               pud = pud_offset(p4d, 0);
                BUG_ON(pmd_table != pmd_offset(pud, 0));
 
                return pmd_table;
        }
 #endif
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
        pmd_table = pmd_offset(pud, 0);
 
        return pmd_table;
@@ -390,8 +393,11 @@ pte_t *kmap_pte;
 
 static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
 {
-       return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
-                       vaddr), vaddr), vaddr);
+       pgd_t *pgd = pgd_offset_k(vaddr);
+       p4d_t *p4d = p4d_offset(pgd, vaddr);
+       pud_t *pud = pud_offset(p4d, vaddr);
+       pmd_t *pmd = pmd_offset(pud, vaddr);
+       return pte_offset_kernel(pmd, vaddr);
 }
 
 static void __init kmap_init(void)
@@ -410,6 +416,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 {
        unsigned long vaddr;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -418,7 +425,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
        page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 
        pgd = swapper_pg_dir + pgd_index(vaddr);
-       pud = pud_offset(pgd, vaddr);
+       p4d = p4d_offset(pgd, vaddr);
+       pud = pud_offset(p4d, vaddr);
        pmd = pmd_offset(pud, vaddr);
        pte = pte_offset_kernel(pmd, vaddr);
        pkmap_page_table = pte;
@@ -450,6 +458,7 @@ void __init native_pagetable_init(void)
 {
        unsigned long pfn, va;
        pgd_t *pgd, *base = swapper_pg_dir;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -469,7 +478,8 @@ void __init native_pagetable_init(void)
                if (!pgd_present(*pgd))
                        break;
 
-               pud = pud_offset(pgd, va);
+               p4d = p4d_offset(pgd, va);
+               pud = pud_offset(p4d, va);
                pmd = pmd_offset(pud, va);
                if (!pmd_present(*pmd))
                        break;
index 15173d37f399610caf8969fa942b17b175dd466b..7bdda6f1d13511bf58372bc1b9ac317c461a1f5b 100644 (file)
@@ -97,28 +97,38 @@ void sync_global_pgds(unsigned long start, unsigned long end)
        unsigned long address;
 
        for (address = start; address <= end; address += PGDIR_SIZE) {
-               const pgd_t *pgd_ref = pgd_offset_k(address);
+               pgd_t *pgd_ref = pgd_offset_k(address);
+               const p4d_t *p4d_ref;
                struct page *page;
 
-               if (pgd_none(*pgd_ref))
+               /*
+                * With folded p4d, pgd_none() is always false, we need to
+                * handle synchonization on p4d level.
+                */
+               BUILD_BUG_ON(pgd_none(*pgd_ref));
+               p4d_ref = p4d_offset(pgd_ref, address);
+
+               if (p4d_none(*p4d_ref))
                        continue;
 
                spin_lock(&pgd_lock);
                list_for_each_entry(page, &pgd_list, lru) {
                        pgd_t *pgd;
+                       p4d_t *p4d;
                        spinlock_t *pgt_lock;
 
                        pgd = (pgd_t *)page_address(page) + pgd_index(address);
+                       p4d = p4d_offset(pgd, address);
                        /* the pgt_lock only for Xen */
                        pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
                        spin_lock(pgt_lock);
 
-                       if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
-                               BUG_ON(pgd_page_vaddr(*pgd)
-                                      != pgd_page_vaddr(*pgd_ref));
+                       if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
+                               BUG_ON(p4d_page_vaddr(*p4d)
+                                      != p4d_page_vaddr(*p4d_ref));
 
-                       if (pgd_none(*pgd))
-                               set_pgd(pgd, *pgd_ref);
+                       if (p4d_none(*p4d))
+                               set_p4d(p4d, *p4d_ref);
 
                        spin_unlock(pgt_lock);
                }
@@ -149,16 +159,28 @@ static __ref void *spp_getpage(void)
        return ptr;
 }
 
-static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
+static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
 {
        if (pgd_none(*pgd)) {
-               pud_t *pud = (pud_t *)spp_getpage();
-               pgd_populate(&init_mm, pgd, pud);
-               if (pud != pud_offset(pgd, 0))
+               p4d_t *p4d = (p4d_t *)spp_getpage();
+               pgd_populate(&init_mm, pgd, p4d);
+               if (p4d != p4d_offset(pgd, 0))
                        printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
-                              pud, pud_offset(pgd, 0));
+                              p4d, p4d_offset(pgd, 0));
+       }
+       return p4d_offset(pgd, vaddr);
+}
+
+static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
+{
+       if (p4d_none(*p4d)) {
+               pud_t *pud = (pud_t *)spp_getpage();
+               p4d_populate(&init_mm, p4d, pud);
+               if (pud != pud_offset(p4d, 0))
+                       printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+                              pud, pud_offset(p4d, 0));
        }
-       return pud_offset(pgd, vaddr);
+       return pud_offset(p4d, vaddr);
 }
 
 static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
@@ -167,7 +189,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
                pmd_t *pmd = (pmd_t *) spp_getpage();
                pud_populate(&init_mm, pud, pmd);
                if (pmd != pmd_offset(pud, 0))
-                       printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+                       printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
                               pmd, pmd_offset(pud, 0));
        }
        return pmd_offset(pud, vaddr);
@@ -179,20 +201,15 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
                pte_t *pte = (pte_t *) spp_getpage();
                pmd_populate_kernel(&init_mm, pmd, pte);
                if (pte != pte_offset_kernel(pmd, 0))
-                       printk(KERN_ERR "PAGETABLE BUG #02!\n");
+                       printk(KERN_ERR "PAGETABLE BUG #03!\n");
        }
        return pte_offset_kernel(pmd, vaddr);
 }
 
-void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
 {
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-
-       pud = pud_page + pud_index(vaddr);
-       pmd = fill_pmd(pud, vaddr);
-       pte = fill_pte(pmd, vaddr);
+       pmd_t *pmd = fill_pmd(pud, vaddr);
+       pte_t *pte = fill_pte(pmd, vaddr);
 
        set_pte(pte, new_pte);
 
@@ -203,10 +220,25 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
        __flush_tlb_one(vaddr);
 }
 
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
+{
+       p4d_t *p4d = p4d_page + p4d_index(vaddr);
+       pud_t *pud = fill_pud(p4d, vaddr);
+
+       __set_pte_vaddr(pud, vaddr, new_pte);
+}
+
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+{
+       pud_t *pud = pud_page + pud_index(vaddr);
+
+       __set_pte_vaddr(pud, vaddr, new_pte);
+}
+
 void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 {
        pgd_t *pgd;
-       pud_t *pud_page;
+       p4d_t *p4d_page;
 
        pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
 
@@ -216,17 +248,20 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
                        "PGD FIXMAP MISSING, it should be setup in head.S!\n");
                return;
        }
-       pud_page = (pud_t*)pgd_page_vaddr(*pgd);
-       set_pte_vaddr_pud(pud_page, vaddr, pteval);
+
+       p4d_page = p4d_offset(pgd, 0);
+       set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
 }
 
 pmd_t * __init populate_extra_pmd(unsigned long vaddr)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
 
        pgd = pgd_offset_k(vaddr);
-       pud = fill_pud(pgd, vaddr);
+       p4d = fill_p4d(pgd, vaddr);
+       pud = fill_pud(p4d, vaddr);
        return fill_pmd(pud, vaddr);
 }
 
@@ -245,6 +280,7 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
                                        enum page_cache_mode cache)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pgprot_t prot;
@@ -255,11 +291,17 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
        for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
                pgd = pgd_offset_k((unsigned long)__va(phys));
                if (pgd_none(*pgd)) {
+                       p4d = (p4d_t *) spp_getpage();
+                       set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
+                                               _PAGE_USER));
+               }
+               p4d = p4d_offset(pgd, (unsigned long)__va(phys));
+               if (p4d_none(*p4d)) {
                        pud = (pud_t *) spp_getpage();
-                       set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
+                       set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
                                                _PAGE_USER));
                }
-               pud = pud_offset(pgd, (unsigned long)__va(phys));
+               pud = pud_offset(p4d, (unsigned long)__va(phys));
                if (pud_none(*pud)) {
                        pmd = (pmd_t *) spp_getpage();
                        set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
@@ -563,12 +605,15 @@ kernel_physical_mapping_init(unsigned long paddr_start,
 
        for (; vaddr < vaddr_end; vaddr = vaddr_next) {
                pgd_t *pgd = pgd_offset_k(vaddr);
+               p4d_t *p4d;
                pud_t *pud;
 
                vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
 
-               if (pgd_val(*pgd)) {
-                       pud = (pud_t *)pgd_page_vaddr(*pgd);
+               BUILD_BUG_ON(pgd_none(*pgd));
+               p4d = p4d_offset(pgd, vaddr);
+               if (p4d_val(*p4d)) {
+                       pud = (pud_t *)p4d_page_vaddr(*p4d);
                        paddr_last = phys_pud_init(pud, __pa(vaddr),
                                                   __pa(vaddr_end),
                                                   page_size_mask);
@@ -580,7 +625,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
                                           page_size_mask);
 
                spin_lock(&init_mm.page_table_lock);
-               pgd_populate(&init_mm, pgd, pud);
+               p4d_populate(&init_mm, p4d, pud);
                spin_unlock(&init_mm.page_table_lock);
                pgd_changed = true;
        }
@@ -726,6 +771,24 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
        spin_unlock(&init_mm.page_table_lock);
 }
 
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+       pud_t *pud;
+       int i;
+
+       for (i = 0; i < PTRS_PER_PUD; i++) {
+               pud = pud_start + i;
+               if (!pud_none(*pud))
+                       return;
+       }
+
+       /* free a pud talbe */
+       free_pagetable(p4d_page(*p4d), 0);
+       spin_lock(&init_mm.page_table_lock);
+       p4d_clear(p4d);
+       spin_unlock(&init_mm.page_table_lock);
+}
+
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
                 bool direct)
@@ -908,6 +971,32 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                update_page_count(PG_LEVEL_1G, -pages);
 }
 
+static void __meminit
+remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
+                bool direct)
+{
+       unsigned long next, pages = 0;
+       pud_t *pud_base;
+       p4d_t *p4d;
+
+       p4d = p4d_start + p4d_index(addr);
+       for (; addr < end; addr = next, p4d++) {
+               next = p4d_addr_end(addr, end);
+
+               if (!p4d_present(*p4d))
+                       continue;
+
+               BUILD_BUG_ON(p4d_large(*p4d));
+
+               pud_base = (pud_t *)p4d_page_vaddr(*p4d);
+               remove_pud_table(pud_base, addr, next, direct);
+               free_pud_table(pud_base, p4d);
+       }
+
+       if (direct)
+               update_page_count(PG_LEVEL_512G, -pages);
+}
+
 /* start and end are both virtual address. */
 static void __meminit
 remove_pagetable(unsigned long start, unsigned long end, bool direct)
@@ -915,7 +1004,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
        unsigned long next;
        unsigned long addr;
        pgd_t *pgd;
-       pud_t *pud;
+       p4d_t *p4d;
 
        for (addr = start; addr < end; addr = next) {
                next = pgd_addr_end(addr, end);
@@ -924,8 +1013,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
                if (!pgd_present(*pgd))
                        continue;
 
-               pud = (pud_t *)pgd_page_vaddr(*pgd);
-               remove_pud_table(pud, addr, next, direct);
+               p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+               remove_p4d_table(p4d, addr, next, direct);
        }
 
        flush_tlb_all();
@@ -1090,6 +1179,7 @@ int kern_addr_valid(unsigned long addr)
 {
        unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -1101,7 +1191,11 @@ int kern_addr_valid(unsigned long addr)
        if (pgd_none(*pgd))
                return 0;
 
-       pud = pud_offset(pgd, addr);
+       p4d = p4d_offset(pgd, addr);
+       if (p4d_none(*p4d))
+               return 0;
+
+       pud = pud_offset(p4d, addr);
        if (pud_none(*pud))
                return 0;
 
@@ -1158,6 +1252,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
        unsigned long addr;
        unsigned long next;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
 
@@ -1168,7 +1263,11 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
                if (!pgd)
                        return -ENOMEM;
 
-               pud = vmemmap_pud_populate(pgd, addr, node);
+               p4d = vmemmap_p4d_populate(pgd, addr, node);
+               if (!p4d)
+                       return -ENOMEM;
+
+               pud = vmemmap_pud_populate(p4d, addr, node);
                if (!pud)
                        return -ENOMEM;
 
@@ -1236,6 +1335,7 @@ void register_page_bootmem_memmap(unsigned long section_nr,
        unsigned long end = (unsigned long)(start_page + size);
        unsigned long next;
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        unsigned int nr_pages;
@@ -1251,7 +1351,14 @@ void register_page_bootmem_memmap(unsigned long section_nr,
                }
                get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
 
-               pud = pud_offset(pgd, addr);
+               p4d = p4d_offset(pgd, addr);
+               if (p4d_none(*p4d)) {
+                       next = (addr + PAGE_SIZE) & PAGE_MASK;
+                       continue;
+               }
+               get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
+
+               pud = pud_offset(p4d, addr);
                if (pud_none(*pud)) {
                        next = (addr + PAGE_SIZE) & PAGE_MASK;
                        continue;
index 7aaa2635862d7771797b38cfcc553bcf955e0531..a5e1cda859742626a0f71ae1857d653cee860fd4 100644 (file)
@@ -425,7 +425,8 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
        /* Don't assume we're using swapper_pg_dir at this point */
        pgd_t *base = __va(read_cr3());
        pgd_t *pgd = &base[pgd_index(addr)];
-       pud_t *pud = pud_offset(pgd, addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
        pmd_t *pmd = pmd_offset(pud, addr);
 
        return pmd;
index 8d63d7a104c3c445805dcf24a59fff2756a17b01..7b81f01067f2a9e6164728784848ee9f7068c3d0 100644 (file)
@@ -32,8 +32,19 @@ static int __init map_range(struct range *range)
 static void __init clear_pgds(unsigned long start,
                        unsigned long end)
 {
-       for (; start < end; start += PGDIR_SIZE)
-               pgd_clear(pgd_offset_k(start));
+       pgd_t *pgd;
+
+       for (; start < end; start += PGDIR_SIZE) {
+               pgd = pgd_offset_k(start);
+               /*
+                * With folded p4d, pgd_clear() is nop, use p4d_clear()
+                * instead.
+                */
+               if (CONFIG_PGTABLE_LEVELS < 5)
+                       p4d_clear(p4d_offset(pgd, start));
+               else
+                       pgd_clear(pgd);
+       }
 }
 
 static void __init kasan_map_early_shadow(pgd_t *pgd)
index 7940166c799b787f1c9b01a08ce8920365b05cad..19ad095b41dfc823f6107bf1ee822fcecd56a74b 100644 (file)
 #include <linux/limits.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/mm.h>
+#include <linux/compat.h>
 #include <asm/elf.h>
 
 struct va_alignment __read_mostly va_align = {
        .flags = -1,
 };
 
-static unsigned long stack_maxrandom_size(void)
+unsigned long tasksize_32bit(void)
+{
+       return IA32_PAGE_OFFSET;
+}
+
+unsigned long tasksize_64bit(void)
+{
+       return TASK_SIZE_MAX;
+}
+
+static unsigned long stack_maxrandom_size(unsigned long task_size)
 {
        unsigned long max = 0;
        if ((current->flags & PF_RANDOMIZE) &&
                !(current->personality & ADDR_NO_RANDOMIZE)) {
-               max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
+               max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit());
+               max <<= PAGE_SHIFT;
        }
 
        return max;
 }
 
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole with possible stack randomization.
- */
-#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
-#define MAX_GAP (TASK_SIZE/6*5)
+#ifdef CONFIG_COMPAT
+# define mmap32_rnd_bits  mmap_rnd_compat_bits
+# define mmap64_rnd_bits  mmap_rnd_bits
+#else
+# define mmap32_rnd_bits  mmap_rnd_bits
+# define mmap64_rnd_bits  mmap_rnd_bits
+#endif
+
+#define SIZE_128M    (128 * 1024 * 1024UL)
 
 static int mmap_is_legacy(void)
 {
@@ -66,54 +80,91 @@ static int mmap_is_legacy(void)
        return sysctl_legacy_va_layout;
 }
 
-unsigned long arch_mmap_rnd(void)
+static unsigned long arch_rnd(unsigned int rndbits)
 {
-       unsigned long rnd;
-
-       if (mmap_is_ia32())
-#ifdef CONFIG_COMPAT
-               rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-#else
-               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-#endif
-       else
-               rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+       return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
+}
 
-       return rnd << PAGE_SHIFT;
+unsigned long arch_mmap_rnd(void)
+{
+       if (!(current->flags & PF_RANDOMIZE))
+               return 0;
+       return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
 {
        unsigned long gap = rlimit(RLIMIT_STACK);
+       unsigned long gap_min, gap_max;
+
+       /*
+        * Top of mmap area (just below the process stack).
+        * Leave an at least ~128 MB hole with possible stack randomization.
+        */
+       gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+       gap_max = (task_size / 6) * 5;
 
-       if (gap < MIN_GAP)
-               gap = MIN_GAP;
-       else if (gap > MAX_GAP)
-               gap = MAX_GAP;
+       if (gap < gap_min)
+               gap = gap_min;
+       else if (gap > gap_max)
+               gap = gap_max;
+
+       return PAGE_ALIGN(task_size - gap - rnd);
+}
 
-       return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+static unsigned long mmap_legacy_base(unsigned long rnd,
+                                     unsigned long task_size)
+{
+       return __TASK_UNMAPPED_BASE(task_size) + rnd;
 }
 
 /*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
+static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
+               unsigned long random_factor, unsigned long task_size)
+{
+       *legacy_base = mmap_legacy_base(random_factor, task_size);
+       if (mmap_is_legacy())
+               *base = *legacy_base;
+       else
+               *base = mmap_base(random_factor, task_size);
+}
+
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
-       unsigned long random_factor = 0UL;
+       if (mmap_is_legacy())
+               mm->get_unmapped_area = arch_get_unmapped_area;
+       else
+               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 
-       if (current->flags & PF_RANDOMIZE)
-               random_factor = arch_mmap_rnd();
+       arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
+                       arch_rnd(mmap64_rnd_bits), tasksize_64bit());
+
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+       /*
+        * The mmap syscall mapping base decision depends solely on the
+        * syscall type (64-bit or compat). This applies for 64bit
+        * applications and 32bit applications. The 64bit syscall uses
+        * mmap_base, the compat syscall uses mmap_compat_base.
+        */
+       arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
+                       arch_rnd(mmap32_rnd_bits), tasksize_32bit());
+#endif
+}
 
-       mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
+unsigned long get_mmap_base(int is_legacy)
+{
+       struct mm_struct *mm = current->mm;
 
-       if (mmap_is_legacy()) {
-               mm->mmap_base = mm->mmap_legacy_base;
-               mm->get_unmapped_area = arch_get_unmapped_area;
-       } else {
-               mm->mmap_base = mmap_base(random_factor);
-               mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+       if (in_compat_syscall()) {
+               return is_legacy ? mm->mmap_compat_legacy_base
+                                : mm->mmap_compat_base;
        }
+#endif
+       return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
 }
 
 const char *arch_vma_name(struct vm_area_struct *vma)
index 28d42130243c0fdd463e7c2221af83115cce6bec..b5949017deadcb75c8236f4303c4d06aef610c0c 100644 (file)
@@ -346,6 +346,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
                             unsigned int *level)
 {
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
 
@@ -354,7 +355,15 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
        if (pgd_none(*pgd))
                return NULL;
 
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (p4d_none(*p4d))
+               return NULL;
+
+       *level = PG_LEVEL_512G;
+       if (p4d_large(*p4d) || !p4d_present(*p4d))
+               return (pte_t *)p4d;
+
+       pud = pud_offset(p4d, address);
        if (pud_none(*pud))
                return NULL;
 
@@ -406,13 +415,18 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
 pmd_t *lookup_pmd_address(unsigned long address)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
 
        pgd = pgd_offset_k(address);
        if (pgd_none(*pgd))
                return NULL;
 
-       pud = pud_offset(pgd, address);
+       p4d = p4d_offset(pgd, address);
+       if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
+               return NULL;
+
+       pud = pud_offset(p4d, address);
        if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
                return NULL;
 
@@ -477,11 +491,13 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 
                list_for_each_entry(page, &pgd_list, lru) {
                        pgd_t *pgd;
+                       p4d_t *p4d;
                        pud_t *pud;
                        pmd_t *pmd;
 
                        pgd = (pgd_t *)page_address(page) + pgd_index(address);
-                       pud = pud_offset(pgd, address);
+                       p4d = p4d_offset(pgd, address);
+                       pud = pud_offset(p4d, address);
                        pmd = pmd_offset(pud, address);
                        set_pte_atomic((pte_t *)pmd, pte);
                }
@@ -836,9 +852,9 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
                        pud_clear(pud);
 }
 
-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
 {
-       pud_t *pud = pud_offset(pgd, start);
+       pud_t *pud = pud_offset(p4d, start);
 
        /*
         * Not on a GB page boundary?
@@ -1004,8 +1020,8 @@ static long populate_pmd(struct cpa_data *cpa,
        return num_pages;
 }
 
-static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
-                        pgprot_t pgprot)
+static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
+                       pgprot_t pgprot)
 {
        pud_t *pud;
        unsigned long end;
@@ -1026,7 +1042,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
                cur_pages = (pre_end - start) >> PAGE_SHIFT;
                cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
 
-               pud = pud_offset(pgd, start);
+               pud = pud_offset(p4d, start);
 
                /*
                 * Need a PMD page?
@@ -1047,7 +1063,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
        if (cpa->numpages == cur_pages)
                return cur_pages;
 
-       pud = pud_offset(pgd, start);
+       pud = pud_offset(p4d, start);
        pud_pgprot = pgprot_4k_2_large(pgprot);
 
        /*
@@ -1067,7 +1083,7 @@ static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
        if (start < end) {
                long tmp;
 
-               pud = pud_offset(pgd, start);
+               pud = pud_offset(p4d, start);
                if (pud_none(*pud))
                        if (alloc_pmd_page(pud))
                                return -1;
@@ -1090,33 +1106,43 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 {
        pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
        pud_t *pud = NULL;      /* shut up gcc */
+       p4d_t *p4d;
        pgd_t *pgd_entry;
        long ret;
 
        pgd_entry = cpa->pgd + pgd_index(addr);
 
+       if (pgd_none(*pgd_entry)) {
+               p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+               if (!p4d)
+                       return -1;
+
+               set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
+       }
+
        /*
         * Allocate a PUD page and hand it down for mapping.
         */
-       if (pgd_none(*pgd_entry)) {
+       p4d = p4d_offset(pgd_entry, addr);
+       if (p4d_none(*p4d)) {
                pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
                if (!pud)
                        return -1;
 
-               set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
+               set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
        }
 
        pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
        pgprot_val(pgprot) |=  pgprot_val(cpa->mask_set);
 
-       ret = populate_pud(cpa, addr, pgd_entry, pgprot);
+       ret = populate_pud(cpa, addr, p4d, pgprot);
        if (ret < 0) {
                /*
                 * Leave the PUD page in place in case some other CPU or thread
                 * already found it, but remove any useless entries we just
                 * added to it.
                 */
-               unmap_pud_range(pgd_entry, addr,
+               unmap_pud_range(p4d, addr,
                                addr + (cpa->numpages << PAGE_SHIFT));
                return ret;
        }
index 6cbdff26bb96a25939a0e818d4895b42b1a5a130..38b6daf72debc6396347de943762b2e1cda30f54 100644 (file)
@@ -261,13 +261,15 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
 
 static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
 {
+       p4d_t *p4d;
        pud_t *pud;
        int i;
 
        if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
                return;
 
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
 
        for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
                pmd_t *pmd = pmds[i];
index 9adce776852be885cff56c59b07cec98b64f4dde..3d275a791c76cff93990c81520b85453b4865c77 100644 (file)
@@ -26,6 +26,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20;
 void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
@@ -35,7 +36,12 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
                BUG();
                return;
        }
-       pud = pud_offset(pgd, vaddr);
+       p4d = p4d_offset(pgd, vaddr);
+       if (p4d_none(*p4d)) {
+               BUG();
+               return;
+       }
+       pud = pud_offset(p4d, vaddr);
        if (pud_none(*pud)) {
                BUG();
                return;
index cef39b0976498ccd6ea0b5dc1dc69acd516e9ea6..3481268da3d072e7daf0a97c43d189fb2b554d90 100644 (file)
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
        load_cr3(initial_page_table);
        __flush_tlb_all();
 
-       gdt_descr.address = __pa(get_cpu_gdt_table(0));
+       gdt_descr.address = get_cpu_gdt_paddr(0);
        gdt_descr.size = GDT_SIZE - 1;
        load_gdt(&gdt_descr);
 
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
        struct desc_ptr gdt_descr;
 
-       gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+       gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
        gdt_descr.size = GDT_SIZE - 1;
        load_gdt(&gdt_descr);
 
index a4695da42d77b39cec3b65083c7b7115b1faa32d..eb64e5b33e3773460fadbd16247eeda6cc318f14 100644 (file)
@@ -135,6 +135,7 @@ static pgd_t *efi_pgd;
 int __init efi_alloc_page_tables(void)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
        gfp_t gfp_mask;
 
@@ -147,15 +148,20 @@ int __init efi_alloc_page_tables(void)
                return -ENOMEM;
 
        pgd = efi_pgd + pgd_index(EFI_VA_END);
+       p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
+       if (!p4d) {
+               free_page((unsigned long)efi_pgd);
+               return -ENOMEM;
+       }
 
-       pud = pud_alloc_one(NULL, 0);
+       pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
        if (!pud) {
+               if (CONFIG_PGTABLE_LEVELS > 4)
+                       free_page((unsigned long) pgd_page_vaddr(*pgd));
                free_page((unsigned long)efi_pgd);
                return -ENOMEM;
        }
 
-       pgd_populate(NULL, pgd, pud);
-
        return 0;
 }
 
@@ -166,6 +172,7 @@ void efi_sync_low_kernel_mappings(void)
 {
        unsigned num_entries;
        pgd_t *pgd_k, *pgd_efi;
+       p4d_t *p4d_k, *p4d_efi;
        pud_t *pud_k, *pud_efi;
 
        if (efi_enabled(EFI_OLD_MEMMAP))
@@ -189,6 +196,21 @@ void efi_sync_low_kernel_mappings(void)
        num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
        memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
 
+       /*
+        * As with PGDs, we share all P4D entries apart from the one entry
+        * that covers the EFI runtime mapping space.
+        */
+       BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
+       BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
+
+       pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
+       pgd_k = pgd_offset_k(EFI_VA_END);
+       p4d_efi = p4d_offset(pgd_efi, 0);
+       p4d_k = p4d_offset(pgd_k, 0);
+
+       num_entries = p4d_index(EFI_VA_END);
+       memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries);
+
        /*
         * We share all the PUD entries apart from those that map the
         * EFI regions. Copy around them.
@@ -196,17 +218,16 @@ void efi_sync_low_kernel_mappings(void)
        BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
        BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
 
-       pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
-       pud_efi = pud_offset(pgd_efi, 0);
-
-       pgd_k = pgd_offset_k(EFI_VA_END);
-       pud_k = pud_offset(pgd_k, 0);
+       p4d_efi = p4d_offset(pgd_efi, EFI_VA_END);
+       p4d_k = p4d_offset(pgd_k, EFI_VA_END);
+       pud_efi = pud_offset(p4d_efi, 0);
+       pud_k = pud_offset(p4d_k, 0);
 
        num_entries = pud_index(EFI_VA_END);
        memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
 
-       pud_efi = pud_offset(pgd_efi, EFI_VA_START);
-       pud_k = pud_offset(pgd_k, EFI_VA_START);
+       pud_efi = pud_offset(p4d_efi, EFI_VA_START);
+       pud_k = pud_offset(p4d_k, EFI_VA_START);
 
        num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
        memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
index 66ade16c769363ecd1bef435eccfd4977ffd5228..6b05a9219ea2c69abb871d10576e8420ec97eb34 100644 (file)
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
         * 'pmode_gdt' in wakeup_start.
         */
        ctxt->gdt_desc.size = GDT_SIZE - 1;
-       ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+       ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_rw(smp_processor_id());
 
        store_tr(ctxt->tr);
 
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
        int cpu = smp_processor_id();
        struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
-       struct desc_struct *desc = get_cpu_gdt_table(cpu);
+       struct desc_struct *desc = get_cpu_gdt_rw(cpu);
        tss_desc tss;
 #endif
        set_tss_desc(cpu, t);   /*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
        load_mm_ldt(current->active_mm);        /* This does lldt */
 
        fpu__resume_cpu();
+
+       /* The processor is back on the direct GDT, load back the fixmap */
+       load_fixmap_gdt(cpu);
 }
 
 /**
index 9f14bd34581d663a22cb326d0ad3f98b7c2822d6..c35fdb585c6853a1f281b043d54ca3b59c801d06 100644 (file)
@@ -32,6 +32,7 @@ pgd_t *resume_pg_dir;
  */
 static pmd_t *resume_one_md_table_init(pgd_t *pgd)
 {
+       p4d_t *p4d;
        pud_t *pud;
        pmd_t *pmd_table;
 
@@ -41,11 +42,13 @@ static pmd_t *resume_one_md_table_init(pgd_t *pgd)
                return NULL;
 
        set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
 
        BUG_ON(pmd_table != pmd_offset(pud, 0));
 #else
-       pud = pud_offset(pgd, 0);
+       p4d = p4d_offset(pgd, 0);
+       pud = pud_offset(p4d, 0);
        pmd_table = pmd_offset(pud, 0);
 #endif
 
index ded2e82723829b7d74b2a771c6426cae06168cc8..2a9f993bbbf0b8a866def733b4ff0140f9af2611 100644 (file)
@@ -49,6 +49,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
 {
        pmd_t *pmd;
        pud_t *pud;
+       p4d_t *p4d;
 
        /*
         * The new mapping only has to cover the page containing the image
@@ -63,6 +64,13 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
         * the virtual address space after switching over to the original page
         * tables used by the image kernel.
         */
+
+       if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+               p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
+               if (!p4d)
+                       return -ENOMEM;
+       }
+
        pud = (pud_t *)get_safe_page(GFP_ATOMIC);
        if (!pud)
                return -ENOMEM;
@@ -75,8 +83,13 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
                __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
        set_pud(pud + pud_index(restore_jump_address),
                __pud(__pa(pmd) | _KERNPG_TABLE));
-       set_pgd(pgd + pgd_index(restore_jump_address),
-               __pgd(__pa(pud) | _KERNPG_TABLE));
+       if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+               set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
+               set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
+       } else {
+               /* No p4d for 4-level paging: point the pgd to the pud page table */
+               set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE));
+       }
 
        return 0;
 }
@@ -124,7 +137,10 @@ static int set_up_temporary_mappings(void)
 static int relocate_restore_code(void)
 {
        pgd_t *pgd;
+       p4d_t *p4d;
        pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
 
        relocated_restore_code = get_safe_page(GFP_ATOMIC);
        if (!relocated_restore_code)
@@ -134,22 +150,25 @@ static int relocate_restore_code(void)
 
        /* Make the page containing the relocated code executable */
        pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
-       pud = pud_offset(pgd, relocated_restore_code);
+       p4d = p4d_offset(pgd, relocated_restore_code);
+       if (p4d_large(*p4d)) {
+               set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
+               goto out;
+       }
+       pud = pud_offset(p4d, relocated_restore_code);
        if (pud_large(*pud)) {
                set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
-       } else {
-               pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
-
-               if (pmd_large(*pmd)) {
-                       set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
-               } else {
-                       pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
-
-                       set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
-               }
+               goto out;
+       }
+       pmd = pmd_offset(pud, relocated_restore_code);
+       if (pmd_large(*pmd)) {
+               set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+               goto out;
        }
+       pte = pte_offset_kernel(pmd, relocated_restore_code);
+       set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+out:
        __flush_tlb_all();
-
        return 0;
 }
 
index b6d5c8946e664aad672d15c3058e214c8192b8c6..470edad96bb9560a218affd4c0922888f9200dba 100644 (file)
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/bug.h>
+#include <asm/purgatory.h>
+
 #include "sha256.h"
-#include "purgatory.h"
 #include "../boot/string.h"
 
-struct sha_region {
-       unsigned long start;
-       unsigned long len;
-};
-
-static unsigned long backup_dest;
-static unsigned long backup_src;
-static unsigned long backup_sz;
+unsigned long purgatory_backup_dest __section(.kexec-purgatory);
+unsigned long purgatory_backup_src __section(.kexec-purgatory);
+unsigned long purgatory_backup_sz __section(.kexec-purgatory);
 
-static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
 
-struct sha_region sha_regions[16] = {};
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
 
 /*
  * On x86, second kernel requries first 640K of memory to boot. Copy
@@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {};
  */
 static int copy_backup_region(void)
 {
-       if (backup_dest)
-               memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
-
+       if (purgatory_backup_dest) {
+               memcpy((void *)purgatory_backup_dest,
+                      (void *)purgatory_backup_src, purgatory_backup_sz);
+       }
        return 0;
 }
 
 static int verify_sha256_digest(void)
 {
-       struct sha_region *ptr, *end;
+       struct kexec_sha_region *ptr, *end;
        u8 digest[SHA256_DIGEST_SIZE];
        struct sha256_state sctx;
 
        sha256_init(&sctx);
-       end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
-       for (ptr = sha_regions; ptr < end; ptr++)
+       end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+       for (ptr = purgatory_sha_regions; ptr < end; ptr++)
                sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
 
        sha256_final(&sctx, digest);
 
-       if (memcmp(digest, sha256_digest, sizeof(digest)))
+       if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
                return 1;
 
        return 0;
diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h
deleted file mode 100644 (file)
index e2e365a..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef PURGATORY_H
-#define PURGATORY_H
-
-#ifndef __ASSEMBLY__
-extern void purgatory(void);
-#endif /* __ASSEMBLY__ */
-
-#endif /* PURGATORY_H */
index f90e9dfa90bb928979698d46af465e537f8314a2..dfae9b9e60b5ba01e62d92d442bd1276eee45ae7 100644 (file)
@@ -9,7 +9,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
-#include "purgatory.h"
+#include <asm/purgatory.h>
 
        .text
        .globl purgatory_start
index bd15a4127735e5f6ed9560b8dfb2503126f56e47..2867d9825a57e5f1f734bfb4a5777bc31810b090 100644 (file)
@@ -10,7 +10,6 @@
 #ifndef SHA256_H
 #define SHA256_H
 
-
 #include <linux/types.h>
 #include <crypto/sha.h>
 
index e7e7055a86589dea6d0f9ca043365db4ab8b1b9c..69f0827d5f5391e1ad3b4ff970aa63504c17b434 100644 (file)
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
 
 ifeq ($(CONFIG_X86_32),y)
 
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
 obj-$(CONFIG_ELF_CORE) += elfcore.o
 
 subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
index e59eef20647b3e722cc3f5a97fdec08c29071c14..b291ca5cf66bfc83e8176d0d6c05fa82a49ad823 100644 (file)
@@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
         return -ENOSYS;
 }
 
-extern long arch_prctl(struct task_struct *task, int code,
+extern long arch_prctl(struct task_struct *task, int option,
                       unsigned long __user *addr);
 
 #endif
index 96eb2bd288320b28e1ff6278bd0d410f1536ceab..8431e87ac33338eed5d037fe07259558c9b5ee23 100644 (file)
@@ -6,7 +6,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
-int os_arch_prctl(int pid, int code, unsigned long *addr)
+int os_arch_prctl(int pid, int option, unsigned long *arg2)
 {
-        return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
+       return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
 }
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644 (file)
index 0000000..627d688
--- /dev/null
@@ -0,0 +1,7 @@
+#include <linux/syscalls.h>
+#include <os.h>
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+       return -EINVAL;
+}
index 10d907098c2614835b002cbdc8d754cca1f7d56a..58f51667e2e4beb82ec794cf618bf7026cfcb66c 100644 (file)
@@ -7,13 +7,15 @@
 
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
+#include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/prctl.h> /* XXX This should get the constants from libc */
 #include <os.h>
 
-long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
+long arch_prctl(struct task_struct *task, int option,
+               unsigned long __user *arg2)
 {
-       unsigned long *ptr = addr, tmp;
+       unsigned long *ptr = arg2, tmp;
        long ret;
        int pid = task->mm->context.id.u.pid;
 
@@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
         * arch_prctl is run on the host, then the registers are read
         * back.
         */
-       switch (code) {
+       switch (option) {
        case ARCH_SET_FS:
        case ARCH_SET_GS:
                ret = restore_registers(pid, &current->thread.regs.regs);
@@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
                ptr = &tmp;
        }
 
-       ret = os_arch_prctl(pid, code, ptr);
+       ret = os_arch_prctl(pid, option, ptr);
        if (ret)
                return ret;
 
-       switch (code) {
+       switch (option) {
        case ARCH_SET_FS:
                current->thread.arch.fs = (unsigned long) ptr;
                ret = save_registers(pid, &current->thread.regs.regs);
@@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
                ret = save_registers(pid, &current->thread.regs.regs);
                break;
        case ARCH_GET_FS:
-               ret = put_user(tmp, addr);
+               ret = put_user(tmp, arg2);
                break;
        case ARCH_GET_GS:
-               ret = put_user(tmp, addr);
+               ret = put_user(tmp, arg2);
                break;
        }
 
        return ret;
 }
 
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
 {
-       return arch_prctl(current, code, (unsigned long __user *) addr);
+       return arch_prctl(current, option, (unsigned long __user *) arg2);
 }
 
 void arch_switch_to(struct task_struct *to)
index bc3dab5d47ca408721094b570a52570cfa3dba0d..6efa0cc425a22f405fd86ff98e10164a8be51050 100644 (file)
@@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
 
        *shadow = t->tls_array[i];
 
-       gdt = get_cpu_gdt_table(cpu);
+       gdt = get_cpu_gdt_rw(cpu);
        maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
        mc = __xen_mc_entry(0);
 
index 37cb5aad71de364fa50974e236e2d001f51ea687..4d4b7bc48f5d7e68d9e84471b031caa6b843eb74 100644 (file)
@@ -535,40 +535,41 @@ static pgd_t *xen_get_user_pgd(pgd_t *pgd)
        return user_ptr;
 }
 
-static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
 {
        struct mmu_update u;
 
        u.ptr = virt_to_machine(ptr).maddr;
-       u.val = pgd_val_ma(val);
+       u.val = p4d_val_ma(val);
        xen_extend_mmu_update(&u);
 }
 
 /*
- * Raw hypercall-based set_pgd, intended for in early boot before
+ * Raw hypercall-based set_p4d, intended for in early boot before
  * there's a page structure.  This implies:
  *  1. The only existing pagetable is the kernel's
  *  2. It is always pinned
  *  3. It has no user pagetable attached to it
  */
-static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
 {
        preempt_disable();
 
        xen_mc_batch();
 
-       __xen_set_pgd_hyper(ptr, val);
+       __xen_set_p4d_hyper(ptr, val);
 
        xen_mc_issue(PARAVIRT_LAZY_MMU);
 
        preempt_enable();
 }
 
-static void xen_set_pgd(pgd_t *ptr, pgd_t val)
+static void xen_set_p4d(p4d_t *ptr, p4d_t val)
 {
-       pgd_t *user_ptr = xen_get_user_pgd(ptr);
+       pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr);
+       pgd_t pgd_val;
 
-       trace_xen_mmu_set_pgd(ptr, user_ptr, val);
+       trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val);
 
        /* If page is not pinned, we can just update the entry
           directly */
@@ -576,7 +577,8 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
                *ptr = val;
                if (user_ptr) {
                        WARN_ON(xen_page_pinned(user_ptr));
-                       *user_ptr = val;
+                       pgd_val.pgd = p4d_val_ma(val);
+                       *user_ptr = pgd_val;
                }
                return;
        }
@@ -585,14 +587,72 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
           user updates together. */
        xen_mc_batch();
 
-       __xen_set_pgd_hyper(ptr, val);
+       __xen_set_p4d_hyper(ptr, val);
        if (user_ptr)
-               __xen_set_pgd_hyper(user_ptr, val);
+               __xen_set_p4d_hyper((p4d_t *)user_ptr, val);
 
        xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
 
+static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+               bool last, unsigned long limit)
+{
+       int i, nr, flush = 0;
+
+       nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
+       for (i = 0; i < nr; i++) {
+               if (!pmd_none(pmd[i]))
+                       flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
+       }
+       return flush;
+}
+
+static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+               bool last, unsigned long limit)
+{
+       int i, nr, flush = 0;
+
+       nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
+       for (i = 0; i < nr; i++) {
+               pmd_t *pmd;
+
+               if (pud_none(pud[i]))
+                       continue;
+
+               pmd = pmd_offset(&pud[i], 0);
+               if (PTRS_PER_PMD > 1)
+                       flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
+               flush |= xen_pmd_walk(mm, pmd, func,
+                               last && i == nr - 1, limit);
+       }
+       return flush;
+}
+
+static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+               int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+               bool last, unsigned long limit)
+{
+       int i, nr, flush = 0;
+
+       nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
+       for (i = 0; i < nr; i++) {
+               pud_t *pud;
+
+               if (p4d_none(p4d[i]))
+                       continue;
+
+               pud = pud_offset(&p4d[i], 0);
+               if (PTRS_PER_PUD > 1)
+                       flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+               flush |= xen_pud_walk(mm, pud, func,
+                               last && i == nr - 1, limit);
+       }
+       return flush;
+}
+
 /*
  * (Yet another) pagetable walker.  This one is intended for pinning a
  * pagetable.  This means that it walks a pagetable and calls the
@@ -613,10 +673,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
                                      enum pt_level),
                          unsigned long limit)
 {
-       int flush = 0;
+       int i, nr, flush = 0;
        unsigned hole_low, hole_high;
-       unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
-       unsigned pgdidx, pudidx, pmdidx;
 
        /* The limit is the last byte to be touched */
        limit--;
@@ -633,65 +691,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
        hole_low = pgd_index(USER_LIMIT);
        hole_high = pgd_index(PAGE_OFFSET);
 
-       pgdidx_limit = pgd_index(limit);
-#if PTRS_PER_PUD > 1
-       pudidx_limit = pud_index(limit);
-#else
-       pudidx_limit = 0;
-#endif
-#if PTRS_PER_PMD > 1
-       pmdidx_limit = pmd_index(limit);
-#else
-       pmdidx_limit = 0;
-#endif
-
-       for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
-               pud_t *pud;
+       nr = pgd_index(limit) + 1;
+       for (i = 0; i < nr; i++) {
+               p4d_t *p4d;
 
-               if (pgdidx >= hole_low && pgdidx < hole_high)
+               if (i >= hole_low && i < hole_high)
                        continue;
 
-               if (!pgd_val(pgd[pgdidx]))
+               if (pgd_none(pgd[i]))
                        continue;
 
-               pud = pud_offset(&pgd[pgdidx], 0);
-
-               if (PTRS_PER_PUD > 1) /* not folded */
-                       flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-
-               for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
-                       pmd_t *pmd;
-
-                       if (pgdidx == pgdidx_limit &&
-                           pudidx > pudidx_limit)
-                               goto out;
-
-                       if (pud_none(pud[pudidx]))
-                               continue;
-
-                       pmd = pmd_offset(&pud[pudidx], 0);
-
-                       if (PTRS_PER_PMD > 1) /* not folded */
-                               flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
-
-                       for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
-                               struct page *pte;
-
-                               if (pgdidx == pgdidx_limit &&
-                                   pudidx == pudidx_limit &&
-                                   pmdidx > pmdidx_limit)
-                                       goto out;
-
-                               if (pmd_none(pmd[pmdidx]))
-                                       continue;
-
-                               pte = pmd_page(pmd[pmdidx]);
-                               flush |= (*func)(mm, pte, PT_PTE);
-                       }
-               }
+               p4d = p4d_offset(&pgd[i], 0);
+               if (PTRS_PER_P4D > 1)
+                       flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
+               flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
        }
 
-out:
        /* Do the top level last, so that the callbacks can use it as
           a cue to do final things like tlb flushes. */
        flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
@@ -1150,57 +1165,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
        xen_free_ro_pages(pa, PAGE_SIZE);
 }
 
+static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
+{
+       unsigned long pa;
+       pte_t *pte_tbl;
+       int i;
+
+       if (pmd_large(*pmd)) {
+               pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
+               xen_free_ro_pages(pa, PMD_SIZE);
+               return;
+       }
+
+       pte_tbl = pte_offset_kernel(pmd, 0);
+       for (i = 0; i < PTRS_PER_PTE; i++) {
+               if (pte_none(pte_tbl[i]))
+                       continue;
+               pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
+               xen_free_ro_pages(pa, PAGE_SIZE);
+       }
+       set_pmd(pmd, __pmd(0));
+       xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
+{
+       unsigned long pa;
+       pmd_t *pmd_tbl;
+       int i;
+
+       if (pud_large(*pud)) {
+               pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+               xen_free_ro_pages(pa, PUD_SIZE);
+               return;
+       }
+
+       pmd_tbl = pmd_offset(pud, 0);
+       for (i = 0; i < PTRS_PER_PMD; i++) {
+               if (pmd_none(pmd_tbl[i]))
+                       continue;
+               xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
+       }
+       set_pud(pud, __pud(0));
+       xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
+{
+       unsigned long pa;
+       pud_t *pud_tbl;
+       int i;
+
+       if (p4d_large(*p4d)) {
+               pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
+               xen_free_ro_pages(pa, P4D_SIZE);
+               return;
+       }
+
+       pud_tbl = pud_offset(p4d, 0);
+       for (i = 0; i < PTRS_PER_PUD; i++) {
+               if (pud_none(pud_tbl[i]))
+                       continue;
+               xen_cleanmfnmap_pud(pud_tbl + i, unpin);
+       }
+       set_p4d(p4d, __p4d(0));
+       xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
+}
+
 /*
  * Since it is well isolated we can (and since it is perhaps large we should)
  * also free the page tables mapping the initial P->M table.
  */
 static void __init xen_cleanmfnmap(unsigned long vaddr)
 {
-       unsigned long va = vaddr & PMD_MASK;
-       unsigned long pa;
-       pgd_t *pgd = pgd_offset_k(va);
-       pud_t *pud_page = pud_offset(pgd, 0);
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgd;
+       p4d_t *p4d;
        unsigned int i;
        bool unpin;
 
        unpin = (vaddr == 2 * PGDIR_SIZE);
-       set_pgd(pgd, __pgd(0));
-       do {
-               pud = pud_page + pud_index(va);
-               if (pud_none(*pud)) {
-                       va += PUD_SIZE;
-               } else if (pud_large(*pud)) {
-                       pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
-                       xen_free_ro_pages(pa, PUD_SIZE);
-                       va += PUD_SIZE;
-               } else {
-                       pmd = pmd_offset(pud, va);
-                       if (pmd_large(*pmd)) {
-                               pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
-                               xen_free_ro_pages(pa, PMD_SIZE);
-                       } else if (!pmd_none(*pmd)) {
-                               pte = pte_offset_kernel(pmd, va);
-                               set_pmd(pmd, __pmd(0));
-                               for (i = 0; i < PTRS_PER_PTE; ++i) {
-                                       if (pte_none(pte[i]))
-                                               break;
-                                       pa = pte_pfn(pte[i]) << PAGE_SHIFT;
-                                       xen_free_ro_pages(pa, PAGE_SIZE);
-                               }
-                               xen_cleanmfnmap_free_pgtbl(pte, unpin);
-                       }
-                       va += PMD_SIZE;
-                       if (pmd_index(va))
-                               continue;
-                       set_pud(pud, __pud(0));
-                       xen_cleanmfnmap_free_pgtbl(pmd, unpin);
-               }
-
-       } while (pud_index(va) || pmd_index(va));
-       xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
+       vaddr &= PMD_MASK;
+       pgd = pgd_offset_k(vaddr);
+       p4d = p4d_offset(pgd, 0);
+       for (i = 0; i < PTRS_PER_P4D; i++) {
+               if (p4d_none(p4d[i]))
+                       continue;
+               xen_cleanmfnmap_p4d(p4d + i, unpin);
+       }
+       if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+               set_pgd(pgd, __pgd(0));
+               xen_cleanmfnmap_free_pgtbl(p4d, unpin);
+       }
 }
 
 static void __init xen_pagetable_p2m_free(void)
@@ -1538,7 +1593,6 @@ static int xen_pgd_alloc(struct mm_struct *mm)
                BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
        }
 #endif
-
        return ret;
 }
 
@@ -1730,7 +1784,7 @@ static void xen_release_pmd(unsigned long pfn)
        xen_release_ptpage(pfn, PT_PMD);
 }
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
        xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -2071,21 +2125,27 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
  */
 void __init xen_relocate_p2m(void)
 {
-       phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
+       phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
        unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
-       int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
+       int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
        pte_t *pt;
        pmd_t *pmd;
        pud_t *pud;
+       p4d_t *p4d = NULL;
        pgd_t *pgd;
        unsigned long *new_p2m;
+       int save_pud;
 
        size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
        n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
        n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
        n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
-       n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
-       n_frames = n_pte + n_pt + n_pmd + n_pud;
+       n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
+       if (PTRS_PER_P4D > 1)
+               n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
+       else
+               n_p4d = 0;
+       n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
 
        new_area = xen_find_free_area(PFN_PHYS(n_frames));
        if (!new_area) {
@@ -2101,55 +2161,76 @@ void __init xen_relocate_p2m(void)
         * To avoid any possible virtual address collision, just use
         * 2 * PUD_SIZE for the new area.
         */
-       pud_phys = new_area;
+       p4d_phys = new_area;
+       pud_phys = p4d_phys + PFN_PHYS(n_p4d);
        pmd_phys = pud_phys + PFN_PHYS(n_pud);
        pt_phys = pmd_phys + PFN_PHYS(n_pmd);
        p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
 
        pgd = __va(read_cr3());
        new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
-       for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
-               pud = early_memremap(pud_phys, PAGE_SIZE);
-               clear_page(pud);
-               for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
-                    idx_pmd++) {
-                       pmd = early_memremap(pmd_phys, PAGE_SIZE);
-                       clear_page(pmd);
-                       for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
-                            idx_pt++) {
-                               pt = early_memremap(pt_phys, PAGE_SIZE);
-                               clear_page(pt);
-                               for (idx_pte = 0;
-                                    idx_pte < min(n_pte, PTRS_PER_PTE);
-                                    idx_pte++) {
-                                       set_pte(pt + idx_pte,
-                                               pfn_pte(p2m_pfn, PAGE_KERNEL));
-                                       p2m_pfn++;
+       idx_p4d = 0;
+       save_pud = n_pud;
+       do {
+               if (n_p4d > 0) {
+                       p4d = early_memremap(p4d_phys, PAGE_SIZE);
+                       clear_page(p4d);
+                       n_pud = min(save_pud, PTRS_PER_P4D);
+               }
+               for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
+                       pud = early_memremap(pud_phys, PAGE_SIZE);
+                       clear_page(pud);
+                       for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
+                                idx_pmd++) {
+                               pmd = early_memremap(pmd_phys, PAGE_SIZE);
+                               clear_page(pmd);
+                               for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
+                                        idx_pt++) {
+                                       pt = early_memremap(pt_phys, PAGE_SIZE);
+                                       clear_page(pt);
+                                       for (idx_pte = 0;
+                                                idx_pte < min(n_pte, PTRS_PER_PTE);
+                                                idx_pte++) {
+                                               set_pte(pt + idx_pte,
+                                                               pfn_pte(p2m_pfn, PAGE_KERNEL));
+                                               p2m_pfn++;
+                                       }
+                                       n_pte -= PTRS_PER_PTE;
+                                       early_memunmap(pt, PAGE_SIZE);
+                                       make_lowmem_page_readonly(__va(pt_phys));
+                                       pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
+                                                       PFN_DOWN(pt_phys));
+                                       set_pmd(pmd + idx_pt,
+                                                       __pmd(_PAGE_TABLE | pt_phys));
+                                       pt_phys += PAGE_SIZE;
                                }
-                               n_pte -= PTRS_PER_PTE;
-                               early_memunmap(pt, PAGE_SIZE);
-                               make_lowmem_page_readonly(__va(pt_phys));
-                               pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
-                                                 PFN_DOWN(pt_phys));
-                               set_pmd(pmd + idx_pt,
-                                       __pmd(_PAGE_TABLE | pt_phys));
-                               pt_phys += PAGE_SIZE;
+                               n_pt -= PTRS_PER_PMD;
+                               early_memunmap(pmd, PAGE_SIZE);
+                               make_lowmem_page_readonly(__va(pmd_phys));
+                               pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
+                                               PFN_DOWN(pmd_phys));
+                               set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+                               pmd_phys += PAGE_SIZE;
                        }
-                       n_pt -= PTRS_PER_PMD;
-                       early_memunmap(pmd, PAGE_SIZE);
-                       make_lowmem_page_readonly(__va(pmd_phys));
-                       pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
-                                         PFN_DOWN(pmd_phys));
-                       set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
-                       pmd_phys += PAGE_SIZE;
+                       n_pmd -= PTRS_PER_PUD;
+                       early_memunmap(pud, PAGE_SIZE);
+                       make_lowmem_page_readonly(__va(pud_phys));
+                       pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
+                       if (n_p4d > 0)
+                               set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
+                       else
+                               set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
+                       pud_phys += PAGE_SIZE;
                }
-               n_pmd -= PTRS_PER_PUD;
-               early_memunmap(pud, PAGE_SIZE);
-               make_lowmem_page_readonly(__va(pud_phys));
-               pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
-               set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
-               pud_phys += PAGE_SIZE;
-       }
+               if (n_p4d > 0) {
+                       save_pud -= PTRS_PER_P4D;
+                       early_memunmap(p4d, PAGE_SIZE);
+                       make_lowmem_page_readonly(__va(p4d_phys));
+                       pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
+                       set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
+                       p4d_phys += PAGE_SIZE;
+               }
+       } while (++idx_p4d < n_p4d);
 
        /* Now copy the old p2m info to the new area. */
        memcpy(new_p2m, xen_p2m_addr, size);
@@ -2326,6 +2407,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
        case FIX_TEXT_POKE0:
        case FIX_TEXT_POKE1:
+       case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
                /* All local page mappings */
                pte = pfn_pte(phys, prot);
                break;
@@ -2378,8 +2460,8 @@ static void __init xen_post_allocator_init(void)
        pv_mmu_ops.set_pte = xen_set_pte;
        pv_mmu_ops.set_pmd = xen_set_pmd;
        pv_mmu_ops.set_pud = xen_set_pud;
-#if CONFIG_PGTABLE_LEVELS == 4
-       pv_mmu_ops.set_pgd = xen_set_pgd;
+#if CONFIG_PGTABLE_LEVELS >= 4
+       pv_mmu_ops.set_p4d = xen_set_p4d;
 #endif
 
        /* This will work as long as patching hasn't happened yet
@@ -2388,7 +2470,7 @@ static void __init xen_post_allocator_init(void)
        pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
        pv_mmu_ops.release_pte = xen_release_pte;
        pv_mmu_ops.release_pmd = xen_release_pmd;
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
        pv_mmu_ops.alloc_pud = xen_alloc_pud;
        pv_mmu_ops.release_pud = xen_release_pud;
 #endif
@@ -2454,10 +2536,10 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
        .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
        .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
        .pud_val = PV_CALLEE_SAVE(xen_pud_val),
        .make_pud = PV_CALLEE_SAVE(xen_make_pud),
-       .set_pgd = xen_set_pgd_hyper,
+       .set_p4d = xen_set_p4d_hyper,
 
        .alloc_pud = xen_alloc_pmd_init,
        .release_pud = xen_release_pmd_init,
index 73809bb951b40a7b8baf73773260276ed575c623..3fe2b3292915847b8a74b3a58bdfd9464db35935 100644 (file)
@@ -5,6 +5,7 @@
 
 enum pt_level {
        PT_PGD,
+       PT_P4D,
        PT_PUD,
        PT_PMD,
        PT_PTE
index 7ff2f1bfb7ec0592de8466a48c0350f72ec6816e..eaa36162ed4a996e35250037b77add28d19c7867 100644 (file)
@@ -392,7 +392,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
        if (ctxt == NULL)
                return -ENOMEM;
 
-       gdt = get_cpu_gdt_table(cpu);
+       gdt = get_cpu_gdt_rw(cpu);
 
 #ifdef CONFIG_X86_32
        ctxt->user_regs.fs = __KERNEL_PERCPU;
index 5eec5e08417f6ff1989e3e2a07b31c62901953d5..e75878f8b14af8f852d814717c3900759b0ed6fc 100644 (file)
@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
        bio_list_init(&punt);
        bio_list_init(&nopunt);
 
-       while ((bio = bio_list_pop(current->bio_list)))
+       while ((bio = bio_list_pop(&current->bio_list[0])))
                bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[0] = nopunt;
 
-       *current->bio_list = nopunt;
+       bio_list_init(&nopunt);
+       while ((bio = bio_list_pop(&current->bio_list[1])))
+               bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+       current->bio_list[1] = nopunt;
 
        spin_lock(&bs->rescue_lock);
        bio_list_merge(&bs->rescue_list, &punt);
@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                 * we retry with the original gfp_flags.
                 */
 
-               if (current->bio_list && !bio_list_empty(current->bio_list))
+               if (current->bio_list &&
+                   (!bio_list_empty(&current->bio_list[0]) ||
+                    !bio_list_empty(&current->bio_list[1])))
                        gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
                p = mempool_alloc(bs->bio_pool, gfp_mask);
index 0eeb99ef654f4ad6874cf579883a263c9894ca31..d772c221cc178bf3ecfe448f3367121ec1d077de 100644 (file)
@@ -1973,7 +1973,14 @@ end_io:
  */
 blk_qc_t generic_make_request(struct bio *bio)
 {
-       struct bio_list bio_list_on_stack;
+       /*
+        * bio_list_on_stack[0] contains bios submitted by the current
+        * make_request_fn.
+        * bio_list_on_stack[1] contains bios that were submitted before
+        * the current make_request_fn, but that haven't been processed
+        * yet.
+        */
+       struct bio_list bio_list_on_stack[2];
        blk_qc_t ret = BLK_QC_T_NONE;
 
        if (!generic_make_request_checks(bio))
@@ -1990,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio)
         * should be added at the tail
         */
        if (current->bio_list) {
-               bio_list_add(current->bio_list, bio);
+               bio_list_add(&current->bio_list[0], bio);
                goto out;
        }
 
@@ -2009,18 +2016,17 @@ blk_qc_t generic_make_request(struct bio *bio)
         * bio_list, and call into ->make_request() again.
         */
        BUG_ON(bio->bi_next);
-       bio_list_init(&bio_list_on_stack);
-       current->bio_list = &bio_list_on_stack;
+       bio_list_init(&bio_list_on_stack[0]);
+       current->bio_list = bio_list_on_stack;
        do {
                struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
                if (likely(blk_queue_enter(q, false) == 0)) {
-                       struct bio_list hold;
                        struct bio_list lower, same;
 
                        /* Create a fresh bio_list for all subordinate requests */
-                       hold = bio_list_on_stack;
-                       bio_list_init(&bio_list_on_stack);
+                       bio_list_on_stack[1] = bio_list_on_stack[0];
+                       bio_list_init(&bio_list_on_stack[0]);
                        ret = q->make_request_fn(q, bio);
 
                        blk_queue_exit(q);
@@ -2030,19 +2036,19 @@ blk_qc_t generic_make_request(struct bio *bio)
                         */
                        bio_list_init(&lower);
                        bio_list_init(&same);
-                       while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+                       while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
                                if (q == bdev_get_queue(bio->bi_bdev))
                                        bio_list_add(&same, bio);
                                else
                                        bio_list_add(&lower, bio);
                        /* now assemble so we handle the lowest level first */
-                       bio_list_merge(&bio_list_on_stack, &lower);
-                       bio_list_merge(&bio_list_on_stack, &same);
-                       bio_list_merge(&bio_list_on_stack, &hold);
+                       bio_list_merge(&bio_list_on_stack[0], &lower);
+                       bio_list_merge(&bio_list_on_stack[0], &same);
+                       bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
                } else {
                        bio_io_error(bio);
                }
-               bio = bio_list_pop(current->bio_list);
+               bio = bio_list_pop(&bio_list_on_stack[0]);
        } while (bio);
        current->bio_list = NULL; /* deactivate */
 
index e48bc2c72615de016f013a2e98ea72cd49713a04..9d97bfc4d4657b586d1a9b4d077a8e673300d79a 100644 (file)
@@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
        for (i = 0; i < set->nr_hw_queues; i++) {
                struct blk_mq_tags *tags = set->tags[i];
 
+               if (!tags)
+                       continue;
+
                for (j = 0; j < tags->nr_tags; j++) {
                        if (!tags->static_rqs[j])
                                continue;
index 159187a28d66521b4ab0109d3db38e6225ac71b3..a4546f060e80933423638f1399ab1922db8331a9 100644 (file)
@@ -1434,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
        return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
 }
 
-static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
+static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
+                                     bool may_sleep)
 {
        struct request_queue *q = rq->q;
        struct blk_mq_queue_data bd = {
@@ -1475,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
        }
 
 insert:
-       blk_mq_sched_insert_request(rq, false, true, true, false);
+       blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
 }
 
 /*
@@ -1569,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
                if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
                        rcu_read_lock();
-                       blk_mq_try_issue_directly(old_rq, &cookie);
+                       blk_mq_try_issue_directly(old_rq, &cookie, false);
                        rcu_read_unlock();
                } else {
                        srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
-                       blk_mq_try_issue_directly(old_rq, &cookie);
+                       blk_mq_try_issue_directly(old_rq, &cookie, true);
                        srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
                }
                goto done;
index f5e18c2a48527bb3f5bbdc5202b37577689710b3..690deca17c35287c00171466f7b06e53262b0601 100644 (file)
@@ -266,7 +266,7 @@ unlock:
        return err;
 }
 
-int af_alg_accept(struct sock *sk, struct socket *newsock)
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
 {
        struct alg_sock *ask = alg_sk(sk);
        const struct af_alg_type *type;
@@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock)
        if (!type)
                goto unlock;
 
-       sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
+       sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern);
        err = -ENOMEM;
        if (!sk2)
                goto unlock;
@@ -323,9 +323,10 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(af_alg_accept);
 
-static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
+static int alg_accept(struct socket *sock, struct socket *newsock, int flags,
+                     bool kern)
 {
-       return af_alg_accept(sock->sk, newsock);
+       return af_alg_accept(sock->sk, newsock, kern);
 }
 
 static const struct proto_ops alg_proto_ops = {
index 54fc90e8339ce83edab95908adb16ddc1e9d7d32..5e92bd275ef38e8dfe47d9ca50a0e5ced51b2d0e 100644 (file)
@@ -239,7 +239,8 @@ unlock:
        return err ?: len;
 }
 
-static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
+static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
 {
        struct sock *sk = sock->sk;
        struct alg_sock *ask = alg_sk(sk);
@@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
        if (err)
                return err;
 
-       err = af_alg_accept(ask->parent, newsock);
+       err = af_alg_accept(ask->parent, newsock, kern);
        if (err)
                return err;
 
@@ -378,7 +379,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
 }
 
 static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
-                            int flags)
+                            int flags, bool kern)
 {
        int err;
 
@@ -386,7 +387,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
        if (err)
                return err;
 
-       return hash_accept(sock, newsock, flags);
+       return hash_accept(sock, newsock, flags, kern);
 }
 
 static struct proto_ops algif_hash_ops_nokey = {
index 85d833289f28f85de9aa98efe52a05a921cdc3bd..4c96f3ac4976d9bbc306010ca4fd76bb7d33015e 100644 (file)
@@ -177,7 +177,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
        case AHCI_LS1043A:
                if (!qpriv->ecc_addr)
                        return -EINVAL;
-               writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+               writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+                               qpriv->ecc_addr);
                writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
                writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                if (qpriv->is_dmacoherent)
@@ -194,7 +195,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
        case AHCI_LS1046A:
                if (!qpriv->ecc_addr)
                        return -EINVAL;
-               writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+               writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+                               qpriv->ecc_addr);
                writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
                writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
                if (qpriv->is_dmacoherent)
index 2bd92dca3e6204027f6c1b5fb07ba519cbe039fa..274d6d7193d7caa9b57f111962aa6e245ebc8f7c 100644 (file)
@@ -1482,7 +1482,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
                break;
 
        default:
-               WARN_ON_ONCE(1);
                return AC_ERR_SYSTEM;
        }
 
index 46698232e6bff069293200a5da842e196a4a0730..19e6e539a061b93a6ac81975a69b3cdba7b1d699 100644 (file)
@@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class,
 
 static void ata_tport_release(struct device *dev)
 {
-       put_device(dev->parent);
 }
 
 /**
@@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent,
        device_initialize(dev);
        dev->type = &ata_port_type;
 
-       dev->parent = get_device(parent);
+       dev->parent = parent;
        dev->release = ata_tport_release;
        dev_set_name(dev, "ata%d", ap->print_id);
        transport_setup_device(dev);
@@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class,
 
 static void ata_tlink_release(struct device *dev)
 {
-       put_device(dev->parent);
 }
 
 /**
@@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link)
        int error;
 
        device_initialize(dev);
-       dev->parent = get_device(&ap->tdev);
+       dev->parent = &ap->tdev;
        dev->release = ata_tlink_release;
        if (ata_is_host_link(link))
                dev_set_name(dev, "link%d", ap->print_id);
@@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class,
 
 static void ata_tdev_release(struct device *dev)
 {
-       put_device(dev->parent);
 }
 
 /**
@@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev)
        int error;
 
        device_initialize(dev);
-       dev->parent = get_device(&link->tdev);
+       dev->parent = &link->tdev;
        dev->release = ata_tdev_release;
        if (ata_is_host_link(link))
                dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
index 3ad86fdf954e96a71b16f436f2b22bd02e6cbd8f..b1ad12552b566a6892a7de411e9cd0c65cf25933 100644 (file)
@@ -397,9 +397,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
                                irq, err);
                        return err;
                }
-               omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
 
-               priv->clk = of_clk_get(pdev->dev.of_node, 0);
+               priv->clk = devm_clk_get(&pdev->dev, NULL);
                if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
                        return -EPROBE_DEFER;
                if (!IS_ERR(priv->clk)) {
@@ -408,6 +407,19 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
                                dev_err(&pdev->dev, "unable to enable the clk, "
                                                    "err = %d\n", err);
                }
+
+               /*
+                * On OMAP4, enabling the shutdown_oflo interrupt is
+                * done in the interrupt mask register. There is no
+                * such register on EIP76, and it's enabled by the
+                * same bit in the control register
+                */
+               if (priv->pdata->regs[RNG_INTMASK_REG])
+                       omap_rng_write(priv, RNG_INTMASK_REG,
+                                      RNG_SHUTDOWN_OFLO_MASK);
+               else
+                       omap_rng_write(priv, RNG_CONTROL_REG,
+                                      RNG_SHUTDOWN_OFLO_MASK);
        }
        return 0;
 }
index 1ef26403bcc83f6a0c26f20d67b74e7fe6331635..0ab0249189072befe3cee1b8696052727f360540 100644 (file)
@@ -312,13 +312,6 @@ static int random_read_wakeup_bits = 64;
  */
 static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
 
-/*
- * The minimum number of seconds between urandom pool reseeding.  We
- * do this to limit the amount of entropy that can be drained from the
- * input pool even if there are heavy demands on /dev/urandom.
- */
-static int random_min_urandom_seed = 60;
-
 /*
  * Originally, we used a primitive polynomial of degree .poolwords
  * over GF(2).  The taps for various sizes are defined below.  They
@@ -409,7 +402,6 @@ static struct poolinfo {
  */
 static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
 static struct fasync_struct *fasync;
 
 static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -467,7 +459,6 @@ struct entropy_store {
        int entropy_count;
        int entropy_total;
        unsigned int initialized:1;
-       unsigned int limit:1;
        unsigned int last_data_init:1;
        __u8 last_data[EXTRACT_SIZE];
 };
@@ -485,7 +476,6 @@ static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
 static struct entropy_store input_pool = {
        .poolinfo = &poolinfo_table[0],
        .name = "input",
-       .limit = 1,
        .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
        .pool = input_pool_data
 };
@@ -493,7 +483,6 @@ static struct entropy_store input_pool = {
 static struct entropy_store blocking_pool = {
        .poolinfo = &poolinfo_table[1],
        .name = "blocking",
-       .limit = 1,
        .pull = &input_pool,
        .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
        .pool = blocking_pool_data,
@@ -855,13 +844,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
        spin_unlock_irqrestore(&primary_crng.lock, flags);
 }
 
-static inline void maybe_reseed_primary_crng(void)
-{
-       if (crng_init > 2 &&
-           time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
-               crng_reseed(&primary_crng, &input_pool);
-}
-
 static inline void crng_wait_ready(void)
 {
        wait_event_interruptible(crng_init_wait, crng_ready());
@@ -1220,15 +1202,6 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
            r->entropy_count > r->poolinfo->poolfracbits)
                return;
 
-       if (r->limit == 0 && random_min_urandom_seed) {
-               unsigned long now = jiffies;
-
-               if (time_before(now,
-                               r->last_pulled + random_min_urandom_seed * HZ))
-                       return;
-               r->last_pulled = now;
-       }
-
        _xfer_secondary_pool(r, nbytes);
 }
 
@@ -1236,8 +1209,6 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
 {
        __u32   tmp[OUTPUT_POOL_WORDS];
 
-       /* For /dev/random's pool, always leave two wakeups' worth */
-       int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4;
        int bytes = nbytes;
 
        /* pull at least as much as a wakeup */
@@ -1248,7 +1219,7 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes)
        trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8,
                                  ENTROPY_BITS(r), ENTROPY_BITS(r->pull));
        bytes = extract_entropy(r->pull, tmp, bytes,
-                               random_read_wakeup_bits / 8, rsvd_bytes);
+                               random_read_wakeup_bits / 8, 0);
        mix_pool_bytes(r, tmp, bytes);
        credit_entropy_bits(r, bytes*8);
 }
@@ -1276,7 +1247,7 @@ static void push_to_pool(struct work_struct *work)
 static size_t account(struct entropy_store *r, size_t nbytes, int min,
                      int reserved)
 {
-       int entropy_count, orig;
+       int entropy_count, orig, have_bytes;
        size_t ibytes, nfrac;
 
        BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
@@ -1285,14 +1256,12 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
 retry:
        entropy_count = orig = ACCESS_ONCE(r->entropy_count);
        ibytes = nbytes;
-       /* If limited, never pull more than available */
-       if (r->limit) {
-               int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
+       /* never pull more than available */
+       have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
 
-               if ((have_bytes -= reserved) < 0)
-                       have_bytes = 0;
-               ibytes = min_t(size_t, ibytes, have_bytes);
-       }
+       if ((have_bytes -= reserved) < 0)
+               have_bytes = 0;
+       ibytes = min_t(size_t, ibytes, have_bytes);
        if (ibytes < min)
                ibytes = 0;
 
@@ -1912,6 +1881,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 static int min_read_thresh = 8, min_write_thresh;
 static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
 static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static int random_min_urandom_seed = 60;
 static char sysctl_bootid[16];
 
 /*
@@ -2042,63 +2012,64 @@ struct ctl_table random_table[] = {
 };
 #endif         /* CONFIG_SYSCTL */
 
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
-
-int random_int_secret_init(void)
-{
-       get_random_bytes(random_int_secret, sizeof(random_int_secret));
-       return 0;
-}
-
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
-               __aligned(sizeof(unsigned long));
+struct batched_entropy {
+       union {
+               u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
+               u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+       };
+       unsigned int position;
+};
 
 /*
- * Get a random word for internal kernel use only. Similar to urandom but
- * with the goal of minimal entropy pool depletion. As a result, the random
- * value is not cryptographically secure but for several uses the cost of
- * depleting entropy is too high
+ * Get a random word for internal kernel use only. The quality of the random
+ * number is either as good as RDRAND or as good as /dev/urandom, with the
+ * goal of being quite fast and not depleting entropy.
  */
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
+u64 get_random_u64(void)
 {
-       __u32 *hash;
-       unsigned int ret;
+       u64 ret;
+       struct batched_entropy *batch;
 
-       if (arch_get_random_int(&ret))
+#if BITS_PER_LONG == 64
+       if (arch_get_random_long((unsigned long *)&ret))
                return ret;
+#else
+       if (arch_get_random_long((unsigned long *)&ret) &&
+           arch_get_random_long((unsigned long *)&ret + 1))
+           return ret;
+#endif
 
-       hash = get_cpu_var(get_random_int_hash);
-
-       hash[0] += current->pid + jiffies + random_get_entropy();
-       md5_transform(hash, random_int_secret);
-       ret = hash[0];
-       put_cpu_var(get_random_int_hash);
-
+       batch = &get_cpu_var(batched_entropy_u64);
+       if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+               extract_crng((u8 *)batch->entropy_u64);
+               batch->position = 0;
+       }
+       ret = batch->entropy_u64[batch->position++];
+       put_cpu_var(batched_entropy_u64);
        return ret;
 }
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_u64);
 
-/*
- * Same as get_random_int(), but returns unsigned long.
- */
-unsigned long get_random_long(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
+u32 get_random_u32(void)
 {
-       __u32 *hash;
-       unsigned long ret;
+       u32 ret;
+       struct batched_entropy *batch;
 
-       if (arch_get_random_long(&ret))
+       if (arch_get_random_int(&ret))
                return ret;
 
-       hash = get_cpu_var(get_random_int_hash);
-
-       hash[0] += current->pid + jiffies + random_get_entropy();
-       md5_transform(hash, random_int_secret);
-       ret = *(unsigned long *)hash;
-       put_cpu_var(get_random_int_hash);
-
+       batch = &get_cpu_var(batched_entropy_u32);
+       if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+               extract_crng((u8 *)batch->entropy_u32);
+               batch->position = 0;
+       }
+       ret = batch->entropy_u32[batch->position++];
+       put_cpu_var(batched_entropy_u32);
        return ret;
 }
-EXPORT_SYMBOL(get_random_long);
+EXPORT_SYMBOL(get_random_u32);
 
 /**
  * randomize_page - Generate a random, page aligned address
index dce1af0ce85ce8ec6dbd7184f02776cb173c41f0..1b9da3dc799b05dff2971e6a5415cc3ae3d4bd85 100644 (file)
@@ -270,7 +270,7 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg,
        scatterwalk_done(&walk, out, 0);
 }
 
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_sg_done(struct s5p_aes_dev *dev)
 {
        if (dev->sg_dst_cpy) {
                dev_dbg(dev->dev,
@@ -281,8 +281,11 @@ static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
        }
        s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
        s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
+}
 
-       /* holding a lock outside */
+/* Calls the completion. Cannot be called with dev->lock hold. */
+static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+{
        dev->req->base.complete(&dev->req->base, err);
        dev->busy = false;
 }
@@ -368,51 +371,44 @@ exit:
 }
 
 /*
- * Returns true if new transmitting (output) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_outdata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new transmitting (output) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_outdata()).
  */
-static bool s5p_aes_tx(struct s5p_aes_dev *dev)
+static int s5p_aes_tx(struct s5p_aes_dev *dev)
 {
-       int err = 0;
-       bool ret = false;
+       int ret = 0;
 
        s5p_unset_outdata(dev);
 
        if (!sg_is_last(dev->sg_dst)) {
-               err = s5p_set_outdata(dev, sg_next(dev->sg_dst));
-               if (err)
-                       s5p_aes_complete(dev, err);
-               else
-                       ret = true;
-       } else {
-               s5p_aes_complete(dev, err);
-
-               dev->busy = true;
-               tasklet_schedule(&dev->tasklet);
+               ret = s5p_set_outdata(dev, sg_next(dev->sg_dst));
+               if (!ret)
+                       ret = 1;
        }
 
        return ret;
 }
 
 /*
- * Returns true if new receiving (input) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_indata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ *  - 0 if there is no more data,
+ *  - 1 if new receiving (input) data is ready and its address+length
+ *     have to be written to device (by calling s5p_set_dma_indata()).
  */
-static bool s5p_aes_rx(struct s5p_aes_dev *dev)
+static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/)
 {
-       int err;
-       bool ret = false;
+       int ret = 0;
 
        s5p_unset_indata(dev);
 
        if (!sg_is_last(dev->sg_src)) {
-               err = s5p_set_indata(dev, sg_next(dev->sg_src));
-               if (err)
-                       s5p_aes_complete(dev, err);
-               else
-                       ret = true;
+               ret = s5p_set_indata(dev, sg_next(dev->sg_src));
+               if (!ret)
+                       ret = 1;
        }
 
        return ret;
@@ -422,33 +418,73 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id)
 {
        struct platform_device *pdev = dev_id;
        struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
-       bool set_dma_tx = false;
-       bool set_dma_rx = false;
+       int err_dma_tx = 0;
+       int err_dma_rx = 0;
+       bool tx_end = false;
        unsigned long flags;
        uint32_t status;
+       int err;
 
        spin_lock_irqsave(&dev->lock, flags);
 
+       /*
+        * Handle rx or tx interrupt. If there is still data (scatterlist did not
+        * reach end), then map next scatterlist entry.
+        * In case of such mapping error, s5p_aes_complete() should be called.
+        *
+        * If there is no more data in tx scatter list, call s5p_aes_complete()
+        * and schedule new tasklet.
+        */
        status = SSS_READ(dev, FCINTSTAT);
        if (status & SSS_FCINTSTAT_BRDMAINT)
-               set_dma_rx = s5p_aes_rx(dev);
-       if (status & SSS_FCINTSTAT_BTDMAINT)
-               set_dma_tx = s5p_aes_tx(dev);
+               err_dma_rx = s5p_aes_rx(dev);
+
+       if (status & SSS_FCINTSTAT_BTDMAINT) {
+               if (sg_is_last(dev->sg_dst))
+                       tx_end = true;
+               err_dma_tx = s5p_aes_tx(dev);
+       }
 
        SSS_WRITE(dev, FCINTPEND, status);
 
-       /*
-        * Writing length of DMA block (either receiving or transmitting)
-        * will start the operation immediately, so this should be done
-        * at the end (even after clearing pending interrupts to not miss the
-        * interrupt).
-        */
-       if (set_dma_tx)
-               s5p_set_dma_outdata(dev, dev->sg_dst);
-       if (set_dma_rx)
-               s5p_set_dma_indata(dev, dev->sg_src);
+       if (err_dma_rx < 0) {
+               err = err_dma_rx;
+               goto error;
+       }
+       if (err_dma_tx < 0) {
+               err = err_dma_tx;
+               goto error;
+       }
+
+       if (tx_end) {
+               s5p_sg_done(dev);
+
+               spin_unlock_irqrestore(&dev->lock, flags);
+
+               s5p_aes_complete(dev, 0);
+               dev->busy = true;
+               tasklet_schedule(&dev->tasklet);
+       } else {
+               /*
+                * Writing length of DMA block (either receiving or
+                * transmitting) will start the operation immediately, so this
+                * should be done at the end (even after clearing pending
+                * interrupts to not miss the interrupt).
+                */
+               if (err_dma_tx == 1)
+                       s5p_set_dma_outdata(dev, dev->sg_dst);
+               if (err_dma_rx == 1)
+                       s5p_set_dma_indata(dev, dev->sg_src);
 
+               spin_unlock_irqrestore(&dev->lock, flags);
+       }
+
+       return IRQ_HANDLED;
+
+error:
+       s5p_sg_done(dev);
        spin_unlock_irqrestore(&dev->lock, flags);
+       s5p_aes_complete(dev, err);
 
        return IRQ_HANDLED;
 }
@@ -597,8 +633,9 @@ outdata_error:
        s5p_unset_indata(dev);
 
 indata_error:
-       s5p_aes_complete(dev, err);
+       s5p_sg_done(dev);
        spin_unlock_irqrestore(&dev->lock, flags);
+       s5p_aes_complete(dev, err);
 }
 
 static void s5p_tasklet_cb(unsigned long data)
@@ -805,8 +842,9 @@ static int s5p_aes_probe(struct platform_device *pdev)
                dev_warn(dev, "feed control interrupt is not available.\n");
                goto err_irq;
        }
-       err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
-                              IRQF_SHARED, pdev->name, pdev);
+       err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+                                       s5p_aes_interrupt, IRQF_ONESHOT,
+                                       pdev->name, pdev);
        if (err < 0) {
                dev_warn(dev, "feed control interrupt is not available.\n");
                goto err_irq;
index 05bbf171df37720b7a3185e4e56dd8ee318beaf2..f96601268f7194bb5aada8f0f07327f2871d1c76 100644 (file)
@@ -198,8 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = {
 
 static int __init crossbar_of_init(struct device_node *node)
 {
+       u32 max = 0, entry, reg_size;
        int i, size, reserved = 0;
-       u32 max = 0, entry;
        const __be32 *irqsr;
        int ret = -ENOMEM;
 
@@ -276,9 +276,9 @@ static int __init crossbar_of_init(struct device_node *node)
        if (!cb->register_offsets)
                goto err_irq_map;
 
-       of_property_read_u32(node, "ti,reg-size", &size);
+       of_property_read_u32(node, "ti,reg-size", &reg_size);
 
-       switch (size) {
+       switch (reg_size) {
        case 1:
                cb->write = crossbar_writeb;
                break;
@@ -304,7 +304,7 @@ static int __init crossbar_of_init(struct device_node *node)
                        continue;
 
                cb->register_offsets[i] = reserved;
-               reserved += size;
+               reserved += reg_size;
        }
 
        of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
index 23201004fd7a68e39055a69abbd41019aa66b12b..f77f840d2b5f7995ee0424445546a140079a5022 100644 (file)
@@ -1601,6 +1601,14 @@ static void __maybe_unused its_enable_quirk_cavium_23144(void *data)
        its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
 }
 
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+       struct its_node *its = data;
+
+       /* On QDF2400, the size of the ITE is 16Bytes */
+       its->ite_size = 16;
+}
+
 static const struct gic_quirk its_quirks[] = {
 #ifdef CONFIG_CAVIUM_ERRATUM_22375
        {
@@ -1617,6 +1625,14 @@ static const struct gic_quirk its_quirks[] = {
                .mask   = 0xffff0fff,
                .init   = its_enable_quirk_cavium_23144,
        },
+#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+       {
+               .desc   = "ITS: QDF2400 erratum 0065",
+               .iidr   = 0x00001070, /* QDF2400 ITS rev 1.x */
+               .mask   = 0xffffffff,
+               .init   = its_enable_quirk_qdf2400_e0065,
+       },
 #endif
        {
        }
index 11e13c56126fba31fca9c59d66252ffeced55c8a..2da3ff650e1d550cc50fd8572156ddd1cabd7961 100644 (file)
@@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface,
                return -ENODEV;
        }
 
+       if (hostif->desc.bNumEndpoints < 1)
+               return -ENODEV;
+
        dev_info(&udev->dev,
                 "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n",
                 __func__, le16_to_cpu(udev->descriptor.idVendor),
index d71f6323ac001e0e55c50023521c8d7119087de3..b4f79b923aea5d25ea0baae037d5d89fcfd5a659 100644 (file)
@@ -504,7 +504,7 @@ void __init lguest_arch_host_init(void)
                 * byte, not the size, hence the "-1").
                 */
                state->host_gdt_desc.size = GDT_SIZE-1;
-               state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+               state->host_gdt_desc.address = (long)get_cpu_gdt_rw(i);
 
                /*
                 * All CPUs on the Host use the same Interrupt Descriptor
@@ -554,8 +554,8 @@ void __init lguest_arch_host_init(void)
                 * The Host needs to be able to use the LGUEST segments on this
                 * CPU, too, so put them in the Host GDT.
                 */
-               get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
-               get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+               get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+               get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
        }
 
        /*
index 3f041b1870335ab48daeb847b986a274f619677b..f757cef293f86881667333f3ecc05bbd5f41a197 100644 (file)
@@ -392,6 +392,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
         * To get all the fields, copy all archdata
         */
        dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata;
+       dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops;
 #endif /* CONFIG_PCI */
 
 #ifdef DEBUG
index f4ffd1eb8f44c3d5c44c50277fb703545157dbcf..dfb75979e4555d806ea52a494e161d4c6f8fa86b 100644 (file)
@@ -989,26 +989,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
        struct dm_offload *o = container_of(cb, struct dm_offload, cb);
        struct bio_list list;
        struct bio *bio;
+       int i;
 
        INIT_LIST_HEAD(&o->cb.list);
 
        if (unlikely(!current->bio_list))
                return;
 
-       list = *current->bio_list;
-       bio_list_init(current->bio_list);
-
-       while ((bio = bio_list_pop(&list))) {
-               struct bio_set *bs = bio->bi_pool;
-               if (unlikely(!bs) || bs == fs_bio_set) {
-                       bio_list_add(current->bio_list, bio);
-                       continue;
+       for (i = 0; i < 2; i++) {
+               list = current->bio_list[i];
+               bio_list_init(&current->bio_list[i]);
+
+               while ((bio = bio_list_pop(&list))) {
+                       struct bio_set *bs = bio->bi_pool;
+                       if (unlikely(!bs) || bs == fs_bio_set) {
+                               bio_list_add(&current->bio_list[i], bio);
+                               continue;
+                       }
+
+                       spin_lock(&bs->rescue_lock);
+                       bio_list_add(&bs->rescue_list, bio);
+                       queue_work(bs->rescue_workqueue, &bs->rescue_work);
+                       spin_unlock(&bs->rescue_lock);
                }
-
-               spin_lock(&bs->rescue_lock);
-               bio_list_add(&bs->rescue_list, bio);
-               queue_work(bs->rescue_workqueue, &bs->rescue_work);
-               spin_unlock(&bs->rescue_lock);
        }
 }
 
index 2b13117fb918cbe27775ba61cc68c6f78e5408ff..321ecac23027804d18ded577a5c05604ec46220a 100644 (file)
@@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
                bm_lockres->flags |= DLM_LKF_NOQUEUE;
                ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
                if (ret == -EAGAIN) {
-                       memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
                        s = read_resync_info(mddev, bm_lockres);
                        if (s) {
                                pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
@@ -974,6 +973,7 @@ static int leave(struct mddev *mddev)
        lockres_free(cinfo->bitmap_lockres);
        unlock_all_bitmaps(mddev);
        dlm_release_lockspace(cinfo->lockspace, 2);
+       kfree(cinfo);
        return 0;
 }
 
index 548d1b8014f89e9f4b1170daff8fa677d758f39a..f6ae1d67bcd02c6b743258ef3ff6a05896828cb5 100644 (file)
@@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_flush_request);
 
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
-       struct mddev *mddev = cb->data;
-       md_wakeup_thread(mddev->thread);
-       kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
 static inline struct mddev *mddev_get(struct mddev *mddev)
 {
        atomic_inc(&mddev->active);
@@ -1887,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
        }
        sb = page_address(rdev->sb_page);
        sb->data_size = cpu_to_le64(num_sectors);
-       sb->super_offset = rdev->sb_start;
+       sb->super_offset = cpu_to_le64(rdev->sb_start);
        sb->sb_csum = calc_sb_1_csum(sb);
        do {
                md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2295,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev)
        /* Check if any mddev parameters have changed */
        if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
            (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
-           (mddev->layout != le64_to_cpu(sb->layout)) ||
+           (mddev->layout != le32_to_cpu(sb->layout)) ||
            (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
            (mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
                return true;
@@ -6458,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
        mddev->layout        = info->layout;
        mddev->chunk_sectors = info->chunk_size >> 9;
 
-       mddev->max_disks     = MD_SB_DISKS;
-
        if (mddev->persistent) {
-               mddev->flags         = 0;
-               mddev->sb_flags         = 0;
+               mddev->max_disks = MD_SB_DISKS;
+               mddev->flags = 0;
+               mddev->sb_flags = 0;
        }
        set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
 
@@ -6533,8 +6524,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
                        return -ENOSPC;
        }
        rv = mddev->pers->resize(mddev, num_sectors);
-       if (!rv)
-               revalidate_disk(mddev->gendisk);
+       if (!rv) {
+               if (mddev->queue) {
+                       set_capacity(mddev->gendisk, mddev->array_sectors);
+                       revalidate_disk(mddev->gendisk);
+               }
+       }
        return rv;
 }
 
index b8859cbf84b618b39ed3d92a2887e8764c403919..dde8ecb760c87113ba36d50c0d6867bc6e215f02 100644 (file)
@@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                   struct mddev *mddev);
 
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
 extern void md_reload_sb(struct mddev *mddev, int raid_disk);
 extern void md_update_sb(struct mddev *mddev, int force);
 extern void md_kick_rdev_from_array(struct md_rdev * rdev);
 struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
-       return !!blk_check_plugged(md_unplug, mddev,
-                                  sizeof(struct blk_plug_cb));
-}
 
 static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
 {
index fbc2d7851b497fec0cacd45832bbd9c9d258eaae..a34f58772022c9f40243e1d117a3473332bd76a2 100644 (file)
@@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf)
 static void freeze_array(struct r1conf *conf, int extra)
 {
        /* Stop sync I/O and normal I/O and wait for everything to
-        * go quite.
+        * go quiet.
         * This is called in two situations:
         * 1) management command handlers (reshape, remove disk, quiesce).
         * 2) one normal I/O request failed.
@@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio)
                        split = bio;
                }
 
-               if (bio_data_dir(split) == READ)
+               if (bio_data_dir(split) == READ) {
                        raid1_read_request(mddev, split);
-               else
+
+                       /*
+                        * If a bio is splitted, the first part of bio will
+                        * pass barrier but the bio is queued in
+                        * current->bio_list (see generic_make_request). If
+                        * there is a raise_barrier() called here, the second
+                        * part of bio can't pass barrier. But since the first
+                        * part bio isn't dispatched to underlaying disks yet,
+                        * the barrier is never released, hence raise_barrier
+                        * will alays wait. We have a deadlock.
+                        * Note, this only happens in read path. For write
+                        * path, the first part of bio is dispatched in a
+                        * schedule() call (because of blk plug) or offloaded
+                        * to raid10d.
+                        * Quitting from the function immediately can change
+                        * the bio order queued in bio_list and avoid the deadlock.
+                        */
+                       if (split != bio) {
+                               generic_make_request(bio);
+                               break;
+                       }
+               } else
                        raid1_write_request(mddev, split);
        } while (split != bio);
 }
@@ -3246,8 +3267,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors)
                        return ret;
        }
        md_set_array_sectors(mddev, newsize);
-       set_capacity(mddev->gendisk, mddev->array_sectors);
-       revalidate_disk(mddev->gendisk);
        if (sectors > mddev->dev_sectors &&
            mddev->recovery_cp > mddev->dev_sectors) {
                mddev->recovery_cp = mddev->dev_sectors;
index 063c43d83b72c2f0f753edb7b08f8dd608fa15ad..e89a8d78a9ed537f417c414b2081ef5f9a97f291 100644 (file)
@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
                                    !conf->barrier ||
                                    (atomic_read(&conf->nr_pending) &&
                                     current->bio_list &&
-                                    !bio_list_empty(current->bio_list)),
+                                    (!bio_list_empty(&current->bio_list[0]) ||
+                                     !bio_list_empty(&current->bio_list[1]))),
                                    conf->resync_lock);
                conf->nr_waiting--;
                if (!conf->nr_waiting)
@@ -1477,11 +1478,24 @@ retry_write:
                        mbio->bi_bdev = (void*)rdev;
 
                        atomic_inc(&r10_bio->remaining);
+
+                       cb = blk_check_plugged(raid10_unplug, mddev,
+                                              sizeof(*plug));
+                       if (cb)
+                               plug = container_of(cb, struct raid10_plug_cb,
+                                                   cb);
+                       else
+                               plug = NULL;
                        spin_lock_irqsave(&conf->device_lock, flags);
-                       bio_list_add(&conf->pending_bio_list, mbio);
-                       conf->pending_count++;
+                       if (plug) {
+                               bio_list_add(&plug->pending, mbio);
+                               plug->pending_cnt++;
+                       } else {
+                               bio_list_add(&conf->pending_bio_list, mbio);
+                               conf->pending_count++;
+                       }
                        spin_unlock_irqrestore(&conf->device_lock, flags);
-                       if (!mddev_check_plugged(mddev))
+                       if (!plug)
                                md_wakeup_thread(mddev->thread);
                }
        }
@@ -1571,7 +1585,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
                        split = bio;
                }
 
+               /*
+                * If a bio is splitted, the first part of bio will pass
+                * barrier but the bio is queued in current->bio_list (see
+                * generic_make_request). If there is a raise_barrier() called
+                * here, the second part of bio can't pass barrier. But since
+                * the first part bio isn't dispatched to underlaying disks
+                * yet, the barrier is never released, hence raise_barrier will
+                * alays wait. We have a deadlock.
+                * Note, this only happens in read path. For write path, the
+                * first part of bio is dispatched in a schedule() call
+                * (because of blk plug) or offloaded to raid10d.
+                * Quitting from the function immediately can change the bio
+                * order queued in bio_list and avoid the deadlock.
+                */
                __make_request(mddev, split);
+               if (split != bio && bio_data_dir(bio) == READ) {
+                       generic_make_request(bio);
+                       break;
+               }
        } while (split != bio);
 
        /* In case raid10d snuck in to freeze_array */
@@ -3943,10 +3975,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
                        return ret;
        }
        md_set_array_sectors(mddev, size);
-       if (mddev->queue) {
-               set_capacity(mddev->gendisk, mddev->array_sectors);
-               revalidate_disk(mddev->gendisk);
-       }
        if (sectors > mddev->dev_sectors &&
            mddev->recovery_cp > oldsize) {
                mddev->recovery_cp = oldsize;
index 4fb09b3fcb410468a9b1939b93d9529e70dd592d..ed5cd705b985f13611d26b44e81aefbb0e93c306 100644 (file)
@@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs,
                     (test_bit(R5_Wantdrain, &dev->flags) ||
                      test_bit(R5_InJournal, &dev->flags))) ||
                    (srctype == SYNDROME_SRC_WRITTEN &&
-                    dev->written)) {
+                    (dev->written ||
+                     test_bit(R5_InJournal, &dev->flags)))) {
                        if (test_bit(R5_InJournal, &dev->flags))
                                srcs[slot] = sh->dev[i].orig_page;
                        else
@@ -7605,8 +7606,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
                        return ret;
        }
        md_set_array_sectors(mddev, newsize);
-       set_capacity(mddev->gendisk, mddev->array_sectors);
-       revalidate_disk(mddev->gendisk);
        if (sectors > mddev->dev_sectors &&
            mddev->recovery_cp > mddev->dev_sectors) {
                mddev->recovery_cp = mddev->dev_sectors;
index 248f60d171a5a0ce76744a95e2d59039939e6538..ffea9859f5a72be1edbfdb1d52d2c70a0df2b19e 100644 (file)
@@ -2272,10 +2272,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
        processed = xgbe_rx_poll(channel, budget);
 
        /* If we processed everything, we are done */
-       if (processed < budget) {
-               /* Turn off polling */
-               napi_complete_done(napi, processed);
-
+       if ((processed < budget) && napi_complete_done(napi, processed)) {
                /* Enable Tx and Rx interrupts */
                if (pdata->channel_irq_mode)
                        xgbe_enable_rx_tx_int(pdata, channel);
@@ -2317,10 +2314,7 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget)
        } while ((processed < budget) && (processed != last_processed));
 
        /* If we processed everything, we are done */
-       if (processed < budget) {
-               /* Turn off polling */
-               napi_complete_done(napi, processed);
-
+       if ((processed < budget) && napi_complete_done(napi, processed)) {
                /* Enable Tx and Rx interrupts */
                xgbe_enable_rx_tx_ints(pdata);
        }
index 581de71a958a3519682272a49dec55432e9eebed..4c6c882c6a1c424238473ea40ecf9f0ebf7cee28 100644 (file)
@@ -213,9 +213,9 @@ void aq_pci_func_free_irqs(struct aq_pci_func_s *self)
                if (!((1U << i) & self->msix_entry_mask))
                        continue;
 
-               free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
                if (pdev->msix_enabled)
                        irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL);
+               free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
                self->msix_entry_mask &= ~(1U << i);
        }
 }
index d8d06fdfc42b9d685244513c1542b69bd78d7ca9..ac76fc251d268e4ec5f9ca2904345557fb2cd55a 100644 (file)
@@ -13292,17 +13292,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
        dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
                NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
 
-       /* VF with OLD Hypervisor or old PF do not support filtering */
        if (IS_PF(bp)) {
                if (chip_is_e1x)
                        bp->accept_any_vlan = true;
                else
                        dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#ifdef CONFIG_BNX2X_SRIOV
-       } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
-               dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#endif
        }
+       /* For VF we'll know whether to enable VLAN filtering after
+        * getting a response to CHANNEL_TLV_ACQUIRE from PF.
+        */
 
        dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX;
        dev->features |= NETIF_F_HIGHDMA;
@@ -13738,7 +13736,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
        if (!netif_running(bp->dev)) {
                DP(BNX2X_MSG_PTP,
                   "PTP adjfreq called while the interface is down\n");
-               return -EFAULT;
+               return -ENETDOWN;
        }
 
        if (ppb < 0) {
@@ -13797,6 +13795,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
        struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
 
+       if (!netif_running(bp->dev)) {
+               DP(BNX2X_MSG_PTP,
+                  "PTP adjtime called while the interface is down\n");
+               return -ENETDOWN;
+       }
+
        DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
 
        timecounter_adjtime(&bp->timecounter, delta);
@@ -13809,6 +13813,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
        struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
        u64 ns;
 
+       if (!netif_running(bp->dev)) {
+               DP(BNX2X_MSG_PTP,
+                  "PTP gettime called while the interface is down\n");
+               return -ENETDOWN;
+       }
+
        ns = timecounter_read(&bp->timecounter);
 
        DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
@@ -13824,6 +13834,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp,
        struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
        u64 ns;
 
+       if (!netif_running(bp->dev)) {
+               DP(BNX2X_MSG_PTP,
+                  "PTP settime called while the interface is down\n");
+               return -ENETDOWN;
+       }
+
        ns = timespec64_to_ns(ts);
 
        DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
@@ -13991,6 +14007,14 @@ static int bnx2x_init_one(struct pci_dev *pdev,
                rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
                if (rc)
                        goto init_one_freemem;
+
+#ifdef CONFIG_BNX2X_SRIOV
+               /* VF with OLD Hypervisor or old PF do not support filtering */
+               if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
+                       dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+                       dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+               }
+#endif
        }
 
        /* Enable SRIOV if capability found in configuration space */
index 6fad22adbbb9e72fda1208d2b689ef371ac77d09..bdfd53b46bc568286ac9debc70bb14563329040b 100644 (file)
@@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
 
        /* Add/Remove the filter */
        rc = bnx2x_config_vlan_mac(bp, &ramrod);
-       if (rc && rc != -EEXIST) {
+       if (rc == -EEXIST)
+               return 0;
+       if (rc) {
                BNX2X_ERR("Failed to %s %s\n",
                          filter->add ? "add" : "delete",
                          (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
@@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp,
                return rc;
        }
 
+       filter->applied = true;
+
        return 0;
 }
 
@@ -469,8 +473,10 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf,
        /* Rollback if needed */
        if (i != filters->count) {
                BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
-                         i, filters->count + 1);
+                         i, filters->count);
                while (--i >= 0) {
+                       if (!filters->filters[i].applied)
+                               continue;
                        filters->filters[i].add = !filters->filters[i].add;
                        bnx2x_vf_mac_vlan_config(bp, vf, qid,
                                                 &filters->filters[i],
@@ -1899,7 +1905,8 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
                        continue;
                }
 
-               DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid);
+               DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+                      "add addresses for vf %d\n", vf->abs_vfid);
                for_each_vfq(vf, j) {
                        struct bnx2x_vf_queue *rxq = vfq_get(vf, j);
 
@@ -1920,11 +1927,12 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
                                cpu_to_le32(U64_HI(q_stats_addr));
                        cur_query_entry->address.lo =
                                cpu_to_le32(U64_LO(q_stats_addr));
-                       DP(BNX2X_MSG_IOV,
-                          "added address %x %x for vf %d queue %d client %d\n",
-                          cur_query_entry->address.hi,
-                          cur_query_entry->address.lo, cur_query_entry->funcID,
-                          j, cur_query_entry->index);
+                       DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+                              "added address %x %x for vf %d queue %d client %d\n",
+                              cur_query_entry->address.hi,
+                              cur_query_entry->address.lo,
+                              cur_query_entry->funcID,
+                              j, cur_query_entry->index);
                        cur_query_entry++;
                        cur_data_offset += sizeof(struct per_queue_stats);
                        stats_count++;
index 7a6d406f4c111774ea606c98253608273ae8787b..888d0b6632e86f2f7ab7e2f9e605be87fa4c7061 100644 (file)
@@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter {
        (BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
 
        bool add;
+       bool applied;
        u8 *mac;
        u16 vid;
 };
index bfae300cf25ff881292dc36ad56e51e37132cd76..76a4668c50fe98edb3d6e955351a357a3e3f0608 100644 (file)
@@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
        struct bnx2x *bp = netdev_priv(dev);
        struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
        struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
-       int rc, i = 0;
+       int rc = 0, i = 0;
        struct netdev_hw_addr *ha;
 
        if (bp->state != BNX2X_STATE_OPEN) {
@@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
        /* Get Rx mode requested */
        DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
 
+       /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+       if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+               DP(NETIF_MSG_IFUP,
+                  "VF supports not more than %d multicast MAC addresses\n",
+                  PFVF_MAX_MULTICAST_PER_VF);
+               rc = -EINVAL;
+               goto out;
+       }
+
        netdev_for_each_mc_addr(ha, dev) {
                DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
                   bnx2x_mc_addr(ha));
@@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
                i++;
        }
 
-       /* We support four PFVF_MAX_MULTICAST_PER_VF mcast
-         * addresses tops
-         */
-       if (i >= PFVF_MAX_MULTICAST_PER_VF) {
-               DP(NETIF_MSG_IFUP,
-                  "VF supports not more than %d multicast MAC addresses\n",
-                  PFVF_MAX_MULTICAST_PER_VF);
-               return -EINVAL;
-       }
-
        req->n_multicast = i;
        req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
        req->vf_qid = 0;
@@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev)
 out:
        bnx2x_vfpf_finalize(bp, &req->first_tlv);
 
-       return 0;
+       return rc;
 }
 
 /* request pf to add a vlan for the vf */
@@ -1778,6 +1777,23 @@ static int bnx2x_vf_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf)
                                goto op_err;
                }
 
+               /* build vlan list */
+               fl = NULL;
+
+               rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+                                              VFPF_VLAN_FILTER);
+               if (rc)
+                       goto op_err;
+
+               if (fl) {
+                       /* set vlan list */
+                       rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl,
+                                                          msg->vf_qid,
+                                                          false);
+                       if (rc)
+                               goto op_err;
+               }
+
        }
 
        if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) {
index 235733e91c791b8ff04951cbdd42ec10ab435bae..32de4589d16a2cde27f2e5674b234e2b7185f00d 100644 (file)
@@ -4465,6 +4465,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
                vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
        }
 #endif
+       if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) &
+                           FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED))
+               bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+
        switch (resp->port_partition_type) {
        case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
        case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
@@ -5507,8 +5511,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
                bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
                                 PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
        }
-       link_info->support_auto_speeds =
-               le16_to_cpu(resp->supported_speeds_auto_mode);
+       if (resp->supported_speeds_auto_mode)
+               link_info->support_auto_speeds =
+                       le16_to_cpu(resp->supported_speeds_auto_mode);
 
 hwrm_phy_qcaps_exit:
        mutex_unlock(&bp->hwrm_cmd_lock);
@@ -6495,8 +6500,14 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
        if (!silent)
                bnxt_dbg_dump_states(bp);
        if (netif_running(bp->dev)) {
+               int rc;
+
+               if (!silent)
+                       bnxt_ulp_stop(bp);
                bnxt_close_nic(bp, false, false);
-               bnxt_open_nic(bp, false, false);
+               rc = bnxt_open_nic(bp, false, false);
+               if (!silent && !rc)
+                       bnxt_ulp_start(bp);
        }
 }
 
@@ -7444,6 +7455,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (rc)
                goto init_err_pci_clean;
 
+       rc = bnxt_hwrm_func_reset(bp);
+       if (rc)
+               goto init_err_pci_clean;
+
        bnxt_hwrm_fw_set_time(bp);
 
        dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
@@ -7554,10 +7569,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (rc)
                goto init_err_pci_clean;
 
-       rc = bnxt_hwrm_func_reset(bp);
-       if (rc)
-               goto init_err_pci_clean;
-
        rc = bnxt_init_int_mode(bp);
        if (rc)
                goto init_err_pci_clean;
index faf26a2f726b808792fd837437bf7abb9279a8c7..c7a5b84a5cb20ecb1112f831d868238f9cead76b 100644 (file)
@@ -993,6 +993,7 @@ struct bnxt {
                                         BNXT_FLAG_ROCEV2_CAP)
        #define BNXT_FLAG_NO_AGG_RINGS  0x20000
        #define BNXT_FLAG_RX_PAGE_MODE  0x40000
+       #define BNXT_FLAG_FW_LLDP_AGENT 0x80000
        #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000
 
        #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |             \
index fdf2d8caf7bfaae56b4b39c415feaa343f29d579..03532061d211b168d1bd7774d2aeea9b4ed3a776 100644 (file)
@@ -474,7 +474,7 @@ void bnxt_dcb_init(struct bnxt *bp)
                return;
 
        bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
-       if (BNXT_PF(bp))
+       if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
                bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
        else
                bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
index f92896835d2a4ceb18e7d69fdc53ce7730402667..69015fa50f2096c77999539179446e5ab38fa759 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Broadcom GENET (Gigabit Ethernet) controller driver
  *
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -450,6 +450,22 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
                        genet_dma_ring_regs[r]);
 }
 
+static int bcmgenet_begin(struct net_device *dev)
+{
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+
+       /* Turn on the clock */
+       return clk_prepare_enable(priv->clk);
+}
+
+static void bcmgenet_complete(struct net_device *dev)
+{
+       struct bcmgenet_priv *priv = netdev_priv(dev);
+
+       /* Turn off the clock */
+       clk_disable_unprepare(priv->clk);
+}
+
 static int bcmgenet_get_link_ksettings(struct net_device *dev,
                                       struct ethtool_link_ksettings *cmd)
 {
@@ -778,8 +794,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
        STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
        /* Misc UniMAC counters */
        STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
-                       UMAC_RBUF_OVFL_CNT),
-       STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+                       UMAC_RBUF_OVFL_CNT_V1),
+       STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+                       UMAC_RBUF_ERR_CNT_V1),
        STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
        STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
        STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
@@ -821,6 +838,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset,
        }
 }
 
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+       u16 new_offset;
+       u32 val;
+
+       switch (offset) {
+       case UMAC_RBUF_OVFL_CNT_V1:
+               if (GENET_IS_V2(priv))
+                       new_offset = RBUF_OVFL_CNT_V2;
+               else
+                       new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+               val = bcmgenet_rbuf_readl(priv, new_offset);
+               /* clear if overflowed */
+               if (val == ~0)
+                       bcmgenet_rbuf_writel(priv, 0, new_offset);
+               break;
+       case UMAC_RBUF_ERR_CNT_V1:
+               if (GENET_IS_V2(priv))
+                       new_offset = RBUF_ERR_CNT_V2;
+               else
+                       new_offset = RBUF_ERR_CNT_V3PLUS;
+
+               val = bcmgenet_rbuf_readl(priv, new_offset);
+               /* clear if overflowed */
+               if (val == ~0)
+                       bcmgenet_rbuf_writel(priv, 0, new_offset);
+               break;
+       default:
+               val = bcmgenet_umac_readl(priv, offset);
+               /* clear if overflowed */
+               if (val == ~0)
+                       bcmgenet_umac_writel(priv, 0, offset);
+               break;
+       }
+
+       return val;
+}
+
 static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
 {
        int i, j = 0;
@@ -836,19 +892,28 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
                case BCMGENET_STAT_NETDEV:
                case BCMGENET_STAT_SOFT:
                        continue;
-               case BCMGENET_STAT_MIB_RX:
-               case BCMGENET_STAT_MIB_TX:
                case BCMGENET_STAT_RUNT:
-                       if (s->type != BCMGENET_STAT_MIB_RX)
-                               offset = BCMGENET_STAT_OFFSET;
+                       offset += BCMGENET_STAT_OFFSET;
+                       /* fall through */
+               case BCMGENET_STAT_MIB_TX:
+                       offset += BCMGENET_STAT_OFFSET;
+                       /* fall through */
+               case BCMGENET_STAT_MIB_RX:
                        val = bcmgenet_umac_readl(priv,
                                                  UMAC_MIB_START + j + offset);
+                       offset = 0;     /* Reset Offset */
                        break;
                case BCMGENET_STAT_MISC:
-                       val = bcmgenet_umac_readl(priv, s->reg_offset);
-                       /* clear if overflowed */
-                       if (val == ~0)
-                               bcmgenet_umac_writel(priv, 0, s->reg_offset);
+                       if (GENET_IS_V1(priv)) {
+                               val = bcmgenet_umac_readl(priv, s->reg_offset);
+                               /* clear if overflowed */
+                               if (val == ~0)
+                                       bcmgenet_umac_writel(priv, 0,
+                                                            s->reg_offset);
+                       } else {
+                               val = bcmgenet_update_stat_misc(priv,
+                                                               s->reg_offset);
+                       }
                        break;
                }
 
@@ -973,6 +1038,8 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 
 /* standard ethtool support functions. */
 static const struct ethtool_ops bcmgenet_ethtool_ops = {
+       .begin                  = bcmgenet_begin,
+       .complete               = bcmgenet_complete,
        .get_strings            = bcmgenet_get_strings,
        .get_sset_count         = bcmgenet_get_sset_count,
        .get_ethtool_stats      = bcmgenet_get_ethtool_stats,
@@ -1167,7 +1234,6 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
        struct bcmgenet_priv *priv = netdev_priv(dev);
        struct device *kdev = &priv->pdev->dev;
        struct enet_cb *tx_cb_ptr;
-       struct netdev_queue *txq;
        unsigned int pkts_compl = 0;
        unsigned int bytes_compl = 0;
        unsigned int c_index;
@@ -1219,13 +1285,8 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
        dev->stats.tx_packets += pkts_compl;
        dev->stats.tx_bytes += bytes_compl;
 
-       txq = netdev_get_tx_queue(dev, ring->queue);
-       netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
-
-       if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
-               if (netif_tx_queue_stopped(txq))
-                       netif_tx_wake_queue(txq);
-       }
+       netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue),
+                                 pkts_compl, bytes_compl);
 
        return pkts_compl;
 }
@@ -1248,8 +1309,16 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
        struct bcmgenet_tx_ring *ring =
                container_of(napi, struct bcmgenet_tx_ring, napi);
        unsigned int work_done = 0;
+       struct netdev_queue *txq;
+       unsigned long flags;
 
-       work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
+       spin_lock_irqsave(&ring->lock, flags);
+       work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring);
+       if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
+               txq = netdev_get_tx_queue(ring->priv->dev, ring->queue);
+               netif_tx_wake_queue(txq);
+       }
+       spin_unlock_irqrestore(&ring->lock, flags);
 
        if (work_done == 0) {
                napi_complete(napi);
@@ -2457,24 +2526,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
 /* Interrupt bottom half */
 static void bcmgenet_irq_task(struct work_struct *work)
 {
+       unsigned long flags;
+       unsigned int status;
        struct bcmgenet_priv *priv = container_of(
                        work, struct bcmgenet_priv, bcmgenet_irq_work);
 
        netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
-       if (priv->irq0_stat & UMAC_IRQ_MPD_R) {
-               priv->irq0_stat &= ~UMAC_IRQ_MPD_R;
+       spin_lock_irqsave(&priv->lock, flags);
+       status = priv->irq0_stat;
+       priv->irq0_stat = 0;
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       if (status & UMAC_IRQ_MPD_R) {
                netif_dbg(priv, wol, priv->dev,
                          "magic packet detected, waking up\n");
                bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
        }
 
        /* Link UP/DOWN event */
-       if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
+       if (status & UMAC_IRQ_LINK_EVENT)
                phy_mac_interrupt(priv->phydev,
-                                 !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
-               priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
-       }
+                                 !!(status & UMAC_IRQ_LINK_UP));
 }
 
 /* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2483,22 +2556,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
        struct bcmgenet_priv *priv = dev_id;
        struct bcmgenet_rx_ring *rx_ring;
        struct bcmgenet_tx_ring *tx_ring;
-       unsigned int index;
+       unsigned int index, status;
 
-       /* Save irq status for bottom-half processing. */
-       priv->irq1_stat =
-               bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
+       /* Read irq status */
+       status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
                ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
 
        /* clear interrupts */
-       bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
+       bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR);
 
        netif_dbg(priv, intr, priv->dev,
-                 "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+                 "%s: IRQ=0x%x\n", __func__, status);
 
        /* Check Rx priority queue interrupts */
        for (index = 0; index < priv->hw_params->rx_queues; index++) {
-               if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+               if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
                        continue;
 
                rx_ring = &priv->rx_rings[index];
@@ -2511,7 +2583,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
 
        /* Check Tx priority queue interrupts */
        for (index = 0; index < priv->hw_params->tx_queues; index++) {
-               if (!(priv->irq1_stat & BIT(index)))
+               if (!(status & BIT(index)))
                        continue;
 
                tx_ring = &priv->tx_rings[index];
@@ -2531,19 +2603,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
        struct bcmgenet_priv *priv = dev_id;
        struct bcmgenet_rx_ring *rx_ring;
        struct bcmgenet_tx_ring *tx_ring;
+       unsigned int status;
+       unsigned long flags;
 
-       /* Save irq status for bottom-half processing. */
-       priv->irq0_stat =
-               bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
+       /* Read irq status */
+       status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
                ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
 
        /* clear interrupts */
-       bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+       bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR);
 
        netif_dbg(priv, intr, priv->dev,
-                 "IRQ=0x%x\n", priv->irq0_stat);
+                 "IRQ=0x%x\n", status);
 
-       if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) {
+       if (status & UMAC_IRQ_RXDMA_DONE) {
                rx_ring = &priv->rx_rings[DESC_INDEX];
 
                if (likely(napi_schedule_prep(&rx_ring->napi))) {
@@ -2552,7 +2625,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
                }
        }
 
-       if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) {
+       if (status & UMAC_IRQ_TXDMA_DONE) {
                tx_ring = &priv->tx_rings[DESC_INDEX];
 
                if (likely(napi_schedule_prep(&tx_ring->napi))) {
@@ -2561,22 +2634,23 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
                }
        }
 
-       if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
-                               UMAC_IRQ_PHY_DET_F |
-                               UMAC_IRQ_LINK_EVENT |
-                               UMAC_IRQ_HFB_SM |
-                               UMAC_IRQ_HFB_MM |
-                               UMAC_IRQ_MPD_R)) {
-               /* all other interested interrupts handled in bottom half */
-               schedule_work(&priv->bcmgenet_irq_work);
-       }
-
        if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
-           priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
-               priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+               status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
                wake_up(&priv->wq);
        }
 
+       /* all other interested interrupts handled in bottom half */
+       status &= (UMAC_IRQ_LINK_EVENT |
+                  UMAC_IRQ_MPD_R);
+       if (status) {
+               /* Save irq status for bottom-half processing. */
+               spin_lock_irqsave(&priv->lock, flags);
+               priv->irq0_stat |= status;
+               spin_unlock_irqrestore(&priv->lock, flags);
+
+               schedule_work(&priv->bcmgenet_irq_work);
+       }
+
        return IRQ_HANDLED;
 }
 
@@ -2801,6 +2875,8 @@ err_irq0:
 err_fini_dma:
        bcmgenet_fini_dma(priv);
 err_clk_disable:
+       if (priv->internal_phy)
+               bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
        clk_disable_unprepare(priv->clk);
        return ret;
 }
@@ -3177,6 +3253,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
         */
        gphy_rev = reg & 0xffff;
 
+       /* This is reserved so should require special treatment */
+       if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+               pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+               return;
+       }
+
        /* This is the good old scheme, just GPHY major, no minor nor patch */
        if ((gphy_rev & 0xf0) != 0)
                priv->gphy_rev = gphy_rev << 8;
@@ -3185,12 +3267,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
        else if ((gphy_rev & 0xff00) != 0)
                priv->gphy_rev = gphy_rev;
 
-       /* This is reserved so should require special treatment */
-       else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
-               pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
-               return;
-       }
-
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
        if (!(params->flags & GENET_HAS_40BITS))
                pr_warn("GENET does not support 40-bits PA\n");
@@ -3233,6 +3309,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
        const void *macaddr;
        struct resource *r;
        int err = -EIO;
+       const char *phy_mode_str;
 
        /* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
        dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
@@ -3276,6 +3353,8 @@ static int bcmgenet_probe(struct platform_device *pdev)
                goto err;
        }
 
+       spin_lock_init(&priv->lock);
+
        SET_NETDEV_DEV(dev, &pdev->dev);
        dev_set_drvdata(&pdev->dev, dev);
        ether_addr_copy(dev->dev_addr, macaddr);
@@ -3338,6 +3417,13 @@ static int bcmgenet_probe(struct platform_device *pdev)
                priv->clk_eee = NULL;
        }
 
+       /* If this is an internal GPHY, power it on now, before UniMAC is
+        * brought out of reset as absolutely no UniMAC activity is allowed
+        */
+       if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+           !strcasecmp(phy_mode_str, "internal"))
+               bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
        err = reset_umac(priv);
        if (err)
                goto err_clk_disable;
@@ -3502,6 +3588,8 @@ static int bcmgenet_resume(struct device *d)
        return 0;
 
 out_clk_disable:
+       if (priv->internal_phy)
+               bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
        clk_disable_unprepare(priv->clk);
        return ret;
 }
index 1e2dc34d331a49e05a8fc9a66156dfeeb00ee10f..db7f289d65ae2abd1589446ee0cadc00ffbf0254 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -214,7 +214,9 @@ struct bcmgenet_mib_counters {
 #define  MDIO_REG_SHIFT                        16
 #define  MDIO_REG_MASK                 0x1F
 
-#define UMAC_RBUF_OVFL_CNT             0x61C
+#define UMAC_RBUF_OVFL_CNT_V1          0x61C
+#define RBUF_OVFL_CNT_V2               0x80
+#define RBUF_OVFL_CNT_V3PLUS           0x94
 
 #define UMAC_MPD_CTRL                  0x620
 #define  MPD_EN                                (1 << 0)
@@ -224,7 +226,9 @@ struct bcmgenet_mib_counters {
 
 #define UMAC_MPD_PW_MS                 0x624
 #define UMAC_MPD_PW_LS                 0x628
-#define UMAC_RBUF_ERR_CNT              0x634
+#define UMAC_RBUF_ERR_CNT_V1           0x634
+#define RBUF_ERR_CNT_V2                        0x84
+#define RBUF_ERR_CNT_V3PLUS            0x98
 #define UMAC_MDF_ERR_CNT               0x638
 #define UMAC_MDF_CTRL                  0x650
 #define UMAC_MDF_ADDR                  0x654
@@ -619,11 +623,13 @@ struct bcmgenet_priv {
        struct work_struct bcmgenet_irq_work;
        int irq0;
        int irq1;
-       unsigned int irq0_stat;
-       unsigned int irq1_stat;
        int wol_irq;
        bool wol_irq_disabled;
 
+       /* shared status */
+       spinlock_t lock;
+       unsigned int irq0_stat;
+
        /* HW descriptors/checksum variables */
        bool desc_64b_en;
        bool desc_rxchk_en;
index be9c0e3f5ade7d4e61694da214702f0223ab5d59..92f46b1375c32527b29e24a4476d6b455835bd46 100644 (file)
@@ -152,7 +152,7 @@ struct octnic_gather {
         */
        struct octeon_sg_entry *sg;
 
-       u64 sg_dma_ptr;
+       dma_addr_t sg_dma_ptr;
 };
 
 struct handshake {
@@ -734,6 +734,9 @@ static void delete_glists(struct lio *lio)
        struct octnic_gather *g;
        int i;
 
+       kfree(lio->glist_lock);
+       lio->glist_lock = NULL;
+
        if (!lio->glist)
                return;
 
@@ -741,23 +744,26 @@ static void delete_glists(struct lio *lio)
                do {
                        g = (struct octnic_gather *)
                                list_delete_head(&lio->glist[i]);
-                       if (g) {
-                               if (g->sg) {
-                                       dma_unmap_single(&lio->oct_dev->
-                                                        pci_dev->dev,
-                                                        g->sg_dma_ptr,
-                                                        g->sg_size,
-                                                        DMA_TO_DEVICE);
-                                       kfree((void *)((unsigned long)g->sg -
-                                                      g->adjust));
-                               }
+                       if (g)
                                kfree(g);
-                       }
                } while (g);
+
+               if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+                       lio_dma_free(lio->oct_dev,
+                                    lio->glist_entry_size * lio->tx_qsize,
+                                    lio->glists_virt_base[i],
+                                    lio->glists_dma_base[i]);
+               }
        }
 
-       kfree((void *)lio->glist);
-       kfree((void *)lio->glist_lock);
+       kfree(lio->glists_virt_base);
+       lio->glists_virt_base = NULL;
+
+       kfree(lio->glists_dma_base);
+       lio->glists_dma_base = NULL;
+
+       kfree(lio->glist);
+       lio->glist = NULL;
 }
 
 /**
@@ -772,13 +778,30 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
        lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
                                  GFP_KERNEL);
        if (!lio->glist_lock)
-               return 1;
+               return -ENOMEM;
 
        lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
                             GFP_KERNEL);
        if (!lio->glist) {
-               kfree((void *)lio->glist_lock);
-               return 1;
+               kfree(lio->glist_lock);
+               lio->glist_lock = NULL;
+               return -ENOMEM;
+       }
+
+       lio->glist_entry_size =
+               ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+       /* allocate memory to store virtual and dma base address of
+        * per glist consistent memory
+        */
+       lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+                                       GFP_KERNEL);
+       lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+                                      GFP_KERNEL);
+
+       if (!lio->glists_virt_base || !lio->glists_dma_base) {
+               delete_glists(lio);
+               return -ENOMEM;
        }
 
        for (i = 0; i < num_iqs; i++) {
@@ -788,6 +811,16 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
 
                INIT_LIST_HEAD(&lio->glist[i]);
 
+               lio->glists_virt_base[i] =
+                       lio_dma_alloc(oct,
+                                     lio->glist_entry_size * lio->tx_qsize,
+                                     &lio->glists_dma_base[i]);
+
+               if (!lio->glists_virt_base[i]) {
+                       delete_glists(lio);
+                       return -ENOMEM;
+               }
+
                for (j = 0; j < lio->tx_qsize; j++) {
                        g = kzalloc_node(sizeof(*g), GFP_KERNEL,
                                         numa_node);
@@ -796,43 +829,18 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs)
                        if (!g)
                                break;
 
-                       g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-                                     OCT_SG_ENTRY_SIZE);
+                       g->sg = lio->glists_virt_base[i] +
+                               (j * lio->glist_entry_size);
 
-                       g->sg = kmalloc_node(g->sg_size + 8,
-                                            GFP_KERNEL, numa_node);
-                       if (!g->sg)
-                               g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-                       if (!g->sg) {
-                               kfree(g);
-                               break;
-                       }
-
-                       /* The gather component should be aligned on 64-bit
-                        * boundary
-                        */
-                       if (((unsigned long)g->sg) & 7) {
-                               g->adjust = 8 - (((unsigned long)g->sg) & 7);
-                               g->sg = (struct octeon_sg_entry *)
-                                       ((unsigned long)g->sg + g->adjust);
-                       }
-                       g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
-                                                      g->sg, g->sg_size,
-                                                      DMA_TO_DEVICE);
-                       if (dma_mapping_error(&oct->pci_dev->dev,
-                                             g->sg_dma_ptr)) {
-                               kfree((void *)((unsigned long)g->sg -
-                                              g->adjust));
-                               kfree(g);
-                               break;
-                       }
+                       g->sg_dma_ptr = lio->glists_dma_base[i] +
+                                       (j * lio->glist_entry_size);
 
                        list_add_tail(&g->list, &lio->glist[i]);
                }
 
                if (j != lio->tx_qsize) {
                        delete_glists(lio);
-                       return 1;
+                       return -ENOMEM;
                }
        }
 
@@ -1885,9 +1893,6 @@ static void free_netsgbuf(void *buf)
                i++;
        }
 
-       dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-                               g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
        iq = skb_iq(lio, skb);
        spin_lock(&lio->glist_lock[iq]);
        list_add_tail(&g->list, &lio->glist[iq]);
@@ -1933,9 +1938,6 @@ static void free_netsgbuf_with_resp(void *buf)
                i++;
        }
 
-       dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
-                               g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
        iq = skb_iq(lio, skb);
 
        spin_lock(&lio->glist_lock[iq]);
@@ -3273,8 +3275,6 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                        i++;
                }
 
-               dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
-                                          g->sg_size, DMA_TO_DEVICE);
                dptr = g->sg_dma_ptr;
 
                if (OCTEON_CN23XX_PF(oct))
index 9d5e03502c76cbfe3c8372a5d3e73c67e07e3a03..7b83be4ce1fe0ce5cab0c7ff889edbf334a5a065 100644 (file)
@@ -108,6 +108,8 @@ struct octnic_gather {
         * received from the IP layer.
         */
        struct octeon_sg_entry *sg;
+
+       dma_addr_t sg_dma_ptr;
 };
 
 struct octeon_device_priv {
@@ -490,6 +492,9 @@ static void delete_glists(struct lio *lio)
        struct octnic_gather *g;
        int i;
 
+       kfree(lio->glist_lock);
+       lio->glist_lock = NULL;
+
        if (!lio->glist)
                return;
 
@@ -497,17 +502,26 @@ static void delete_glists(struct lio *lio)
                do {
                        g = (struct octnic_gather *)
                            list_delete_head(&lio->glist[i]);
-                       if (g) {
-                               if (g->sg)
-                                       kfree((void *)((unsigned long)g->sg -
-                                                       g->adjust));
+                       if (g)
                                kfree(g);
-                       }
                } while (g);
+
+               if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+                       lio_dma_free(lio->oct_dev,
+                                    lio->glist_entry_size * lio->tx_qsize,
+                                    lio->glists_virt_base[i],
+                                    lio->glists_dma_base[i]);
+               }
        }
 
+       kfree(lio->glists_virt_base);
+       lio->glists_virt_base = NULL;
+
+       kfree(lio->glists_dma_base);
+       lio->glists_dma_base = NULL;
+
        kfree(lio->glist);
-       kfree(lio->glist_lock);
+       lio->glist = NULL;
 }
 
 /**
@@ -522,13 +536,30 @@ static int setup_glists(struct lio *lio, int num_iqs)
        lio->glist_lock =
            kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
        if (!lio->glist_lock)
-               return 1;
+               return -ENOMEM;
 
        lio->glist =
            kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
        if (!lio->glist) {
                kfree(lio->glist_lock);
-               return 1;
+               lio->glist_lock = NULL;
+               return -ENOMEM;
+       }
+
+       lio->glist_entry_size =
+               ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+       /* allocate memory to store virtual and dma base address of
+        * per glist consistent memory
+        */
+       lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+                                       GFP_KERNEL);
+       lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+                                      GFP_KERNEL);
+
+       if (!lio->glists_virt_base || !lio->glists_dma_base) {
+               delete_glists(lio);
+               return -ENOMEM;
        }
 
        for (i = 0; i < num_iqs; i++) {
@@ -536,34 +567,33 @@ static int setup_glists(struct lio *lio, int num_iqs)
 
                INIT_LIST_HEAD(&lio->glist[i]);
 
+               lio->glists_virt_base[i] =
+                       lio_dma_alloc(lio->oct_dev,
+                                     lio->glist_entry_size * lio->tx_qsize,
+                                     &lio->glists_dma_base[i]);
+
+               if (!lio->glists_virt_base[i]) {
+                       delete_glists(lio);
+                       return -ENOMEM;
+               }
+
                for (j = 0; j < lio->tx_qsize; j++) {
                        g = kzalloc(sizeof(*g), GFP_KERNEL);
                        if (!g)
                                break;
 
-                       g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
-                                     OCT_SG_ENTRY_SIZE);
+                       g->sg = lio->glists_virt_base[i] +
+                               (j * lio->glist_entry_size);
 
-                       g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
-                       if (!g->sg) {
-                               kfree(g);
-                               break;
-                       }
+                       g->sg_dma_ptr = lio->glists_dma_base[i] +
+                                       (j * lio->glist_entry_size);
 
-                       /* The gather component should be aligned on 64-bit
-                        * boundary
-                        */
-                       if (((unsigned long)g->sg) & 7) {
-                               g->adjust = 8 - (((unsigned long)g->sg) & 7);
-                               g->sg = (struct octeon_sg_entry *)
-                                       ((unsigned long)g->sg + g->adjust);
-                       }
                        list_add_tail(&g->list, &lio->glist[i]);
                }
 
                if (j != lio->tx_qsize) {
                        delete_glists(lio);
-                       return 1;
+                       return -ENOMEM;
                }
        }
 
@@ -1324,10 +1354,6 @@ static void free_netsgbuf(void *buf)
                i++;
        }
 
-       dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-                        finfo->dptr, g->sg_size,
-                        DMA_TO_DEVICE);
-
        iq = skb_iq(lio, skb);
 
        spin_lock(&lio->glist_lock[iq]);
@@ -1374,10 +1400,6 @@ static void free_netsgbuf_with_resp(void *buf)
                i++;
        }
 
-       dma_unmap_single(&lio->oct_dev->pci_dev->dev,
-                        finfo->dptr, g->sg_size,
-                        DMA_TO_DEVICE);
-
        iq = skb_iq(lio, skb);
 
        spin_lock(&lio->glist_lock[iq]);
@@ -2382,23 +2404,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
                        i++;
                }
 
-               dptr = dma_map_single(&oct->pci_dev->dev,
-                                     g->sg, g->sg_size,
-                                     DMA_TO_DEVICE);
-               if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
-                       dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
-                               __func__);
-                       dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
-                                        skb->len - skb->data_len,
-                                        DMA_TO_DEVICE);
-                       for (j = 1; j <= frags; j++) {
-                               frag = &skb_shinfo(skb)->frags[j - 1];
-                               dma_unmap_page(&oct->pci_dev->dev,
-                                              g->sg[j >> 2].ptr[j & 3],
-                                              frag->size, DMA_TO_DEVICE);
-                       }
-                       return NETDEV_TX_BUSY;
-               }
+               dptr = g->sg_dma_ptr;
 
                ndata.cmd.cmd3.dptr = dptr;
                finfo->dptr = dptr;
index b3dc2e9651a8e205d7e6e451109f98e96065de2c..d29ebc531151f0fe85cb83826d3af9b069d75f52 100644 (file)
 #define   CN23XX_MAX_RINGS_PER_VF          8
 
 #define   CN23XX_MAX_INPUT_QUEUES      CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_IQ_DESCRIPTORS    2048
+#define   CN23XX_MAX_IQ_DESCRIPTORS    512
 #define   CN23XX_DB_MIN                 1
 #define   CN23XX_DB_MAX                 8
 #define   CN23XX_DB_TIMEOUT             1
 
 #define   CN23XX_MAX_OUTPUT_QUEUES     CN23XX_MAX_RINGS_PER_PF
-#define   CN23XX_MAX_OQ_DESCRIPTORS    2048
+#define   CN23XX_MAX_OQ_DESCRIPTORS    512
 #define   CN23XX_OQ_BUF_SIZE           1536
 #define   CN23XX_OQ_PKTSPER_INTR       128
 /*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
-#define   CN23XX_OQ_REFIL_THRESHOLD    128
+#define   CN23XX_OQ_REFIL_THRESHOLD    16
 
 #define   CN23XX_OQ_INTR_PKT           64
 #define   CN23XX_OQ_INTR_TIME          100
index 0be87d119a979ea70117e13b2213987460da2a81..79f809479af6e7d865cc7c280c84232622af982e 100644 (file)
@@ -155,11 +155,6 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct,
                        recv_buffer_destroy(droq->recv_buf_list[i].buffer,
                                            pg_info);
 
-               if (droq->desc_ring && droq->desc_ring[i].info_ptr)
-                       lio_unmap_ring_info(oct->pci_dev,
-                                           (u64)droq->
-                                           desc_ring[i].info_ptr,
-                                           OCT_DROQ_INFO_SIZE);
                droq->recv_buf_list[i].buffer = NULL;
        }
 
@@ -211,10 +206,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no)
        vfree(droq->recv_buf_list);
 
        if (droq->info_base_addr)
-               cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
-                                      droq->info_alloc_size,
-                                      droq->info_base_addr,
-                                      droq->info_list_dma);
+               lio_free_info_buffer(oct, droq);
 
        if (droq->desc_ring)
                lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
@@ -294,12 +286,7 @@ int octeon_init_droq(struct octeon_device *oct,
        dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
                droq->max_count);
 
-       droq->info_list =
-               cnnic_numa_alloc_aligned_dma((droq->max_count *
-                                             OCT_DROQ_INFO_SIZE),
-                                            &droq->info_alloc_size,
-                                            &droq->info_base_addr,
-                                            numa_node);
+       droq->info_list = lio_alloc_info_buffer(oct, droq);
        if (!droq->info_list) {
                dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
                lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
index e62074090681d3597f973f54fadb2133b21b2931..6982c0af5eccb7129123fcbb4ba8363bb7f9710a 100644 (file)
@@ -325,10 +325,10 @@ struct octeon_droq {
        size_t desc_ring_dma;
 
        /** Info ptr list are allocated at this virtual address. */
-       size_t info_base_addr;
+       void *info_base_addr;
 
        /** DMA mapped address of the info list */
-       size_t info_list_dma;
+       dma_addr_t info_list_dma;
 
        /** Allocated size of info list. */
        u32 info_alloc_size;
index aa36e9ae7676556e562a8bb4c9cb46aacc709890..bed9ef17bc26b4526cf3c57dcb4823cdc2586491 100644 (file)
@@ -140,48 +140,6 @@ err_release_region:
        return 1;
 }
 
-static inline void *
-cnnic_numa_alloc_aligned_dma(u32 size,
-                            u32 *alloc_size,
-                            size_t *orig_ptr,
-                            int numa_node)
-{
-       int retries = 0;
-       void *ptr = NULL;
-
-#define OCTEON_MAX_ALLOC_RETRIES     1
-       do {
-               struct page *page = NULL;
-
-               page = alloc_pages_node(numa_node,
-                                       GFP_KERNEL,
-                                       get_order(size));
-               if (!page)
-                       page = alloc_pages(GFP_KERNEL,
-                                          get_order(size));
-               ptr = (void *)page_address(page);
-               if ((unsigned long)ptr & 0x07) {
-                       __free_pages(page, get_order(size));
-                       ptr = NULL;
-                       /* Increment the size required if the first
-                        * attempt failed.
-                        */
-                       if (!retries)
-                               size += 7;
-               }
-               retries++;
-       } while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr);
-
-       *alloc_size = size;
-       *orig_ptr = (unsigned long)ptr;
-       if ((unsigned long)ptr & 0x07)
-               ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL));
-       return ptr;
-}
-
-#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
-               free_pages(orig_ptr, get_order(size))
-
 static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
index 6bb89419006eb5635cc65c415ad6183f5296f6d3..eef2a1e8a7e3f96b26f004ec0eec93e447a5d61f 100644 (file)
@@ -62,6 +62,9 @@ struct lio {
 
        /** Array of gather component linked lists */
        struct list_head *glist;
+       void **glists_virt_base;
+       dma_addr_t *glists_dma_base;
+       u32 glist_entry_size;
 
        /** Pointer to the NIC properties for the Octeon device this network
         *  interface is associated with.
@@ -344,6 +347,29 @@ static inline void tx_buffer_free(void *buffer)
 #define lio_dma_free(oct, size, virt_addr, dma_addr) \
        dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
 
+static inline void *
+lio_alloc_info_buffer(struct octeon_device *oct,
+                     struct octeon_droq *droq)
+{
+       void *virt_ptr;
+
+       virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE),
+                                &droq->info_list_dma);
+       if (virt_ptr) {
+               droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE;
+               droq->info_base_addr = virt_ptr;
+       }
+
+       return virt_ptr;
+}
+
+static inline void lio_free_info_buffer(struct octeon_device *oct,
+                                       struct octeon_droq *droq)
+{
+       lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr,
+                    droq->info_list_dma);
+}
+
 static inline
 void *get_rbd(struct sk_buff *skb)
 {
@@ -359,22 +385,7 @@ void *get_rbd(struct sk_buff *skb)
 static inline u64
 lio_map_ring_info(struct octeon_droq *droq, u32 i)
 {
-       dma_addr_t dma_addr;
-       struct octeon_device *oct = droq->oct_dev;
-
-       dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
-                                 OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
-
-       WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
-
-       return (u64)dma_addr;
-}
-
-static inline void
-lio_unmap_ring_info(struct pci_dev *pci_dev,
-                   u64 info_ptr, u32 size)
-{
-       dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE);
+       return droq->info_list_dma + (i * sizeof(struct octeon_droq_info));
 }
 
 static inline u64
index e739c715356283553f4ace131a251bc4b30d6de2..2269ff562d9562eede1d1d02c2e66ee3dcfed89b 100644 (file)
@@ -269,6 +269,7 @@ struct nicvf {
 #define        MAX_QUEUES_PER_QSET                     8
        struct queue_set        *qs;
        struct nicvf_cq_poll    *napi[8];
+       void                    *iommu_domain;
        u8                      vf_id;
        u8                      sqs_id;
        bool                    sqs_mode;
index 6feaa24bcfd42bb9647298a0b665e6bf3b11d496..24017588f5317107142897ca77aa0668046532d5 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/log2.h>
 #include <linux/prefetch.h>
 #include <linux/irq.h>
+#include <linux/iommu.h>
 
 #include "nic_reg.h"
 #include "nic.h"
@@ -525,7 +526,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
                        /* Get actual TSO descriptors and free them */
                        tso_sqe =
                         (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+                       nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+                                                tso_sqe->subdesc_cnt);
                        nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+               } else {
+                       nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+                                                hdr->subdesc_cnt);
                }
                nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
                prefetch(skb);
@@ -576,6 +582,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 {
        struct sk_buff *skb;
        struct nicvf *nic = netdev_priv(netdev);
+       struct nicvf *snic = nic;
        int err = 0;
        int rq_idx;
 
@@ -592,7 +599,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
        if (err && !cqe_rx->rb_cnt)
                return;
 
-       skb = nicvf_get_rcv_skb(nic, cqe_rx);
+       skb = nicvf_get_rcv_skb(snic, cqe_rx);
        if (!skb) {
                netdev_dbg(nic->netdev, "Packet not received\n");
                return;
@@ -1643,6 +1650,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (!pass1_silicon(nic->pdev))
                nic->hw_tso = true;
 
+       /* Get iommu domain for iova to physical addr conversion */
+       nic->iommu_domain = iommu_get_domain_for_dev(dev);
+
        pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
        if (sdevid == 0xA134)
                nic->t88 = true;
index ac0390be3b126e957071bde64daebdd29b536c34..f13289f0d2386d09e348b13c310346d42e20b74d 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/netdevice.h>
 #include <linux/ip.h>
 #include <linux/etherdevice.h>
+#include <linux/iommu.h>
 #include <net/ip.h>
 #include <net/tso.h>
 
 #include "q_struct.h"
 #include "nicvf_queues.h"
 
+#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0)
+
+static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
+{
+       /* Translation is installed only when IOMMU is present */
+       if (nic->iommu_domain)
+               return iommu_iova_to_phys(nic->iommu_domain, dma_addr);
+       return dma_addr;
+}
+
 static void nicvf_get_page(struct nicvf *nic)
 {
        if (!nic->rb_pageref || !nic->rb_page)
@@ -87,7 +98,7 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
                                         u32 buf_len, u64 **rbuf)
 {
-       int order = (PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0;
+       int order = NICVF_PAGE_ORDER;
 
        /* Check if request can be accomodated in previous allocated page */
        if (nic->rb_page &&
@@ -97,22 +108,27 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
        }
 
        nicvf_get_page(nic);
-       nic->rb_page = NULL;
 
        /* Allocate a new page */
+       nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+                                  order);
        if (!nic->rb_page) {
-               nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
-                                          order);
-               if (!nic->rb_page) {
-                       this_cpu_inc(nic->pnicvf->drv_stats->
-                                    rcv_buffer_alloc_failures);
-                       return -ENOMEM;
-               }
-               nic->rb_page_offset = 0;
+               this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
+               return -ENOMEM;
        }
-
+       nic->rb_page_offset = 0;
 ret:
-       *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+       /* HW will ensure data coherency, CPU sync not required */
+       *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+                                               nic->rb_page_offset, buf_len,
+                                               DMA_FROM_DEVICE,
+                                               DMA_ATTR_SKIP_CPU_SYNC));
+       if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
+               if (!nic->rb_page_offset)
+                       __free_pages(nic->rb_page, order);
+               nic->rb_page = NULL;
+               return -ENOMEM;
+       }
        nic->rb_page_offset += buf_len;
 
        return 0;
@@ -158,16 +174,21 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
        rbdr->dma_size = buf_size;
        rbdr->enable = true;
        rbdr->thresh = RBDR_THRESH;
+       rbdr->head = 0;
+       rbdr->tail = 0;
 
        nic->rb_page = NULL;
        for (idx = 0; idx < ring_len; idx++) {
                err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
                                             &rbuf);
-               if (err)
+               if (err) {
+                       /* To free already allocated and mapped ones */
+                       rbdr->tail = idx - 1;
                        return err;
+               }
 
                desc = GET_RBDR_DESC(rbdr, idx);
-               desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+               desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
        }
 
        nicvf_get_page(nic);
@@ -179,7 +200,7 @@ static int  nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
 {
        int head, tail;
-       u64 buf_addr;
+       u64 buf_addr, phys_addr;
        struct rbdr_entry_t *desc;
 
        if (!rbdr)
@@ -192,18 +213,26 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
        head = rbdr->head;
        tail = rbdr->tail;
 
-       /* Free SKBs */
+       /* Release page references */
        while (head != tail) {
                desc = GET_RBDR_DESC(rbdr, head);
-               buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-               put_page(virt_to_page(phys_to_virt(buf_addr)));
+               buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+               phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+               dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+                                    DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+               if (phys_addr)
+                       put_page(virt_to_page(phys_to_virt(phys_addr)));
                head++;
                head &= (rbdr->dmem.q_len - 1);
        }
-       /* Free SKB of tail desc */
+       /* Release buffer of tail desc */
        desc = GET_RBDR_DESC(rbdr, tail);
-       buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
-       put_page(virt_to_page(phys_to_virt(buf_addr)));
+       buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+       phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+       dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+                            DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+       if (phys_addr)
+               put_page(virt_to_page(phys_to_virt(phys_addr)));
 
        /* Free RBDR ring */
        nicvf_free_q_desc_mem(nic, &rbdr->dmem);
@@ -250,7 +279,7 @@ refill:
                        break;
 
                desc = GET_RBDR_DESC(rbdr, tail);
-               desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+               desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
                refill_rb_cnt--;
                new_rb++;
        }
@@ -361,9 +390,29 @@ static int nicvf_init_snd_queue(struct nicvf *nic,
        return 0;
 }
 
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+                             int hdr_sqe, u8 subdesc_cnt)
+{
+       u8 idx;
+       struct sq_gather_subdesc *gather;
+
+       /* Unmap DMA mapped skb data buffers */
+       for (idx = 0; idx < subdesc_cnt; idx++) {
+               hdr_sqe++;
+               hdr_sqe &= (sq->dmem.q_len - 1);
+               gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
+               /* HW will ensure data coherency, CPU sync not required */
+               dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
+                                    gather->size, DMA_TO_DEVICE,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+       }
+}
+
 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 {
        struct sk_buff *skb;
+       struct sq_hdr_subdesc *hdr;
+       struct sq_hdr_subdesc *tso_sqe;
 
        if (!sq)
                return;
@@ -379,8 +428,22 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
        smp_rmb();
        while (sq->head != sq->tail) {
                skb = (struct sk_buff *)sq->skbuff[sq->head];
-               if (skb)
-                       dev_kfree_skb_any(skb);
+               if (!skb)
+                       goto next;
+               hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+               /* Check for dummy descriptor used for HW TSO offload on 88xx */
+               if (hdr->dont_send) {
+                       /* Get actual TSO descriptors and unmap them */
+                       tso_sqe =
+                        (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+                       nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+                                                tso_sqe->subdesc_cnt);
+               } else {
+                       nicvf_unmap_sndq_buffers(nic, sq, sq->head,
+                                                hdr->subdesc_cnt);
+               }
+               dev_kfree_skb_any(skb);
+next:
                sq->head++;
                sq->head &= (sq->dmem.q_len - 1);
        }
@@ -559,9 +622,11 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
        nicvf_send_msg_to_pf(nic, &mbx);
 
        if (!nic->sqs_mode && (qidx == 0)) {
-               /* Enable checking L3/L4 length and TCP/UDP checksums */
+               /* Enable checking L3/L4 length and TCP/UDP checksums
+                * Also allow IPv6 pkts with zero UDP checksum.
+                */
                nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
-                                     (BIT(24) | BIT(23) | BIT(21)));
+                                     (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
                nicvf_config_vlan_stripping(nic, nic->netdev->features);
        }
 
@@ -882,6 +947,14 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
        return qentry;
 }
 
+/* Rollback to previous tail pointer when descriptors not used */
+static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
+                                         int qentry, int desc_cnt)
+{
+       sq->tail = qentry;
+       atomic_add(desc_cnt, &sq->free_cnt);
+}
+
 /* Free descriptor back to SQ for future use */
 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
 {
@@ -1207,8 +1280,9 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
                        struct sk_buff *skb, u8 sq_num)
 {
        int i, size;
-       int subdesc_cnt, tso_sqe = 0;
+       int subdesc_cnt, hdr_sqe = 0;
        int qentry;
+       u64 dma_addr;
 
        subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
        if (subdesc_cnt > atomic_read(&sq->free_cnt))
@@ -1223,12 +1297,21 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
        /* Add SQ header subdesc */
        nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
                                 skb, skb->len);
-       tso_sqe = qentry;
+       hdr_sqe = qentry;
 
        /* Add SQ gather subdescs */
        qentry = nicvf_get_nxt_sqentry(sq, qentry);
        size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
-       nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data));
+       /* HW will ensure data coherency, CPU sync not required */
+       dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
+                                     offset_in_page(skb->data), size,
+                                     DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+       if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+               nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+               return 0;
+       }
+
+       nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
 
        /* Check for scattered buffer */
        if (!skb_is_nonlinear(skb))
@@ -1241,15 +1324,26 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
 
                qentry = nicvf_get_nxt_sqentry(sq, qentry);
                size = skb_frag_size(frag);
-               nicvf_sq_add_gather_subdesc(sq, qentry, size,
-                                           virt_to_phys(
-                                           skb_frag_address(frag)));
+               dma_addr = dma_map_page_attrs(&nic->pdev->dev,
+                                             skb_frag_page(frag),
+                                             frag->page_offset, size,
+                                             DMA_TO_DEVICE,
+                                             DMA_ATTR_SKIP_CPU_SYNC);
+               if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+                       /* Free entire chain of mapped buffers
+                        * here 'i' = frags mapped + above mapped skb->data
+                        */
+                       nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
+                       nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+                       return 0;
+               }
+               nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
        }
 
 doorbell:
        if (nic->t88 && skb_shinfo(skb)->gso_size) {
                qentry = nicvf_get_nxt_sqentry(sq, qentry);
-               nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+               nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
        }
 
        nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
@@ -1282,6 +1376,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
        int offset;
        u16 *rb_lens = NULL;
        u64 *rb_ptrs = NULL;
+       u64 phys_addr;
 
        rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
        /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
@@ -1296,15 +1391,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
        else
                rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
 
-       netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
-                  __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
-
        for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
                payload_len = rb_lens[frag_num(frag)];
+               phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
+               if (!phys_addr) {
+                       if (skb)
+                               dev_kfree_skb_any(skb);
+                       return NULL;
+               }
+
                if (!frag) {
                        /* First fragment */
+                       dma_unmap_page_attrs(&nic->pdev->dev,
+                                            *rb_ptrs - cqe_rx->align_pad,
+                                            RCV_FRAG_LEN, DMA_FROM_DEVICE,
+                                            DMA_ATTR_SKIP_CPU_SYNC);
                        skb = nicvf_rb_ptr_to_skb(nic,
-                                                 *rb_ptrs - cqe_rx->align_pad,
+                                                 phys_addr - cqe_rx->align_pad,
                                                  payload_len);
                        if (!skb)
                                return NULL;
@@ -1312,8 +1415,11 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
                        skb_put(skb, payload_len);
                } else {
                        /* Add fragments */
-                       page = virt_to_page(phys_to_virt(*rb_ptrs));
-                       offset = phys_to_virt(*rb_ptrs) - page_address(page);
+                       dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs,
+                                            RCV_FRAG_LEN, DMA_FROM_DEVICE,
+                                            DMA_ATTR_SKIP_CPU_SYNC);
+                       page = virt_to_page(phys_to_virt(phys_addr));
+                       offset = phys_to_virt(phys_addr) - page_address(page);
                        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
                                        offset, payload_len, RCV_FRAG_LEN);
                }
index 5cb84da99a2de5bc594464db8759c8359d20447f..10cb4b84625b14a0446996776689ae6733f4ccee 100644 (file)
@@ -87,7 +87,7 @@
 #define RCV_BUF_COUNT          (1ULL << (RBDR_SIZE + 13))
 #define MAX_RCV_BUF_COUNT      (1ULL << (RBDR_SIZE6 + 13))
 #define RBDR_THRESH            (RCV_BUF_COUNT / 2)
-#define DMA_BUFFER_LEN         2048 /* In multiples of 128bytes */
+#define DMA_BUFFER_LEN         1536 /* In multiples of 128bytes */
 #define RCV_FRAG_LEN    (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
                         SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
@@ -301,6 +301,8 @@ struct queue_set {
 
 #define        CQ_ERR_MASK     (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
 
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+                             int hdr_sqe, u8 subdesc_cnt);
 void nicvf_config_vlan_stripping(struct nicvf *nic,
                                 netdev_features_t features);
 int nicvf_set_qset_resources(struct nicvf *nic);
index 4c8e8cf730bbc2ee1d488d42d9d42163d442fb75..64a1095e4d1495c1e32c3ff7008882789f6b6f6e 100644 (file)
@@ -123,14 +123,44 @@ static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero)
        return 1;
 }
 
+static int max_bgx_per_node;
+static void set_max_bgx_per_node(struct pci_dev *pdev)
+{
+       u16 sdevid;
+
+       if (max_bgx_per_node)
+               return;
+
+       pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+       switch (sdevid) {
+       case PCI_SUBSYS_DEVID_81XX_BGX:
+               max_bgx_per_node = MAX_BGX_PER_CN81XX;
+               break;
+       case PCI_SUBSYS_DEVID_83XX_BGX:
+               max_bgx_per_node = MAX_BGX_PER_CN83XX;
+               break;
+       case PCI_SUBSYS_DEVID_88XX_BGX:
+       default:
+               max_bgx_per_node = MAX_BGX_PER_CN88XX;
+               break;
+       }
+}
+
+static struct bgx *get_bgx(int node, int bgx_idx)
+{
+       int idx = (node * max_bgx_per_node) + bgx_idx;
+
+       return bgx_vnic[idx];
+}
+
 /* Return number of BGX present in HW */
 unsigned bgx_get_map(int node)
 {
        int i;
        unsigned map = 0;
 
-       for (i = 0; i < MAX_BGX_PER_NODE; i++) {
-               if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
+       for (i = 0; i < max_bgx_per_node; i++) {
+               if (bgx_vnic[(node * max_bgx_per_node) + i])
                        map |= (1 << i);
        }
 
@@ -143,7 +173,7 @@ int bgx_get_lmac_count(int node, int bgx_idx)
 {
        struct bgx *bgx;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
        if (bgx)
                return bgx->lmac_count;
 
@@ -158,7 +188,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
        struct bgx *bgx;
        struct lmac *lmac;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
        if (!bgx)
                return;
 
@@ -172,7 +202,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state);
 
 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
 {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
 
        if (bgx)
                return bgx->lmac[lmacid].mac;
@@ -183,7 +213,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac);
 
 void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
 
        if (!bgx)
                return;
@@ -194,7 +224,7 @@ EXPORT_SYMBOL(bgx_set_lmac_mac);
 
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
        struct lmac *lmac;
        u64 cfg;
 
@@ -217,7 +247,7 @@ EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
 {
        struct pfc *pfc = (struct pfc *)pause;
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
        struct lmac *lmac;
        u64 cfg;
 
@@ -237,7 +267,7 @@ EXPORT_SYMBOL(bgx_lmac_get_pfc);
 void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
 {
        struct pfc *pfc = (struct pfc *)pause;
-       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct bgx *bgx = get_bgx(node, bgx_idx);
        struct lmac *lmac;
        u64 cfg;
 
@@ -369,7 +399,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx)
 {
        struct bgx *bgx;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
        if (!bgx)
                return 0;
 
@@ -383,7 +413,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx)
 {
        struct bgx *bgx;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
        if (!bgx)
                return 0;
 
@@ -411,7 +441,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx,
        struct lmac *lmac;
        u64    cfg;
 
-       bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+       bgx = get_bgx(node, bgx_idx);
        if (!bgx)
                return;
 
@@ -1011,12 +1041,6 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
                        dev_info(dev, "%s: 40G_KR4\n", (char *)str);
                break;
        case BGX_MODE_QSGMII:
-               if ((lmacid == 0) &&
-                   (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
-                       return;
-               if ((lmacid == 2) &&
-                   (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
-                       return;
                dev_info(dev, "%s: QSGMII\n", (char *)str);
                break;
        case BGX_MODE_RGMII:
@@ -1334,11 +1358,13 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_release_regions;
        }
 
+       set_max_bgx_per_node(pdev);
+
        pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
        if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
                bgx->bgx_id = (pci_resource_start(pdev,
                        PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
-               bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+               bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node;
                bgx->max_lmac = MAX_LMAC_PER_BGX;
                bgx_vnic[bgx->bgx_id] = bgx;
        } else {
index a60f189429bb658cb5ab8383982f86ddd9090fc3..c5080f2cead5d0efc435fd827038eb7dbe4b5830 100644 (file)
@@ -22,7 +22,6 @@
 #define    MAX_BGX_PER_CN88XX                  2
 #define    MAX_BGX_PER_CN81XX                  3 /* 2 BGXs + 1 RGX */
 #define    MAX_BGX_PER_CN83XX                  4
-#define    MAX_BGX_PER_NODE                    4
 #define    MAX_LMAC_PER_BGX                    4
 #define    MAX_BGX_CHANS_PER_LMAC              16
 #define    MAX_DMAC_PER_LMAC                   8
index 275c2e2349ad92de224df1939769238d83ea5f3b..c44036d5761a4cbec301afb63393d49b47bac867 100644 (file)
@@ -2589,8 +2589,6 @@ static int emac_dt_mdio_probe(struct emac_instance *dev)
 static int emac_dt_phy_connect(struct emac_instance *dev,
                               struct device_node *phy_handle)
 {
-       int res;
-
        dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def),
                                    GFP_KERNEL);
        if (!dev->phy.def)
@@ -2617,7 +2615,7 @@ static int emac_dt_phy_probe(struct emac_instance *dev)
 {
        struct device_node *np = dev->ofdev->dev.of_node;
        struct device_node *phy_handle;
-       int res = 0;
+       int res = 1;
 
        phy_handle = of_parse_phandle(np, "phy-handle", 0);
 
@@ -2714,13 +2712,24 @@ static int emac_init_phy(struct emac_instance *dev)
        if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) {
                int res = emac_dt_phy_probe(dev);
 
-               mutex_unlock(&emac_phy_map_lock);
-               if (!res)
+               switch (res) {
+               case 1:
+                       /* No phy-handle property configured.
+                        * Continue with the existing phy probe
+                        * and setup code.
+                        */
+                       break;
+
+               case 0:
+                       mutex_unlock(&emac_phy_map_lock);
                        goto init_phy;
 
-               dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
-                       res);
-               return res;
+               default:
+                       mutex_unlock(&emac_phy_map_lock);
+                       dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
+                               res);
+                       return res;
+               }
        }
 
        if (dev->phy_address != 0xffffffff)
index 9198e6bd5160f9559075f348a6d595252ea88819..5f11b4dc95d2d1b271c9a47b3e890ef4662af2e4 100644 (file)
@@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev)
        send_map_query(adapter);
        for (i = 0; i < rxadd_subcrqs; i++) {
                init_rx_pool(adapter, &adapter->rx_pool[i],
-                            IBMVNIC_BUFFS_PER_POOL, i,
+                            adapter->req_rx_add_entries_per_subcrq, i,
                             be64_to_cpu(size_array[i]), 1);
                if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
                        dev_err(dev, "Couldn't alloc rx pool\n");
@@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev)
        for (i = 0; i < tx_subcrqs; i++) {
                tx_pool = &adapter->tx_pool[i];
                tx_pool->tx_buff =
-                   kcalloc(adapter->max_tx_entries_per_subcrq,
+                   kcalloc(adapter->req_tx_entries_per_subcrq,
                            sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
                if (!tx_pool->tx_buff)
                        goto tx_pool_alloc_failed;
 
                if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
-                                        adapter->max_tx_entries_per_subcrq *
+                                        adapter->req_tx_entries_per_subcrq *
                                         adapter->req_mtu))
                        goto tx_ltb_alloc_failed;
 
                tx_pool->free_map =
-                   kcalloc(adapter->max_tx_entries_per_subcrq,
+                   kcalloc(adapter->req_tx_entries_per_subcrq,
                            sizeof(int), GFP_KERNEL);
                if (!tx_pool->free_map)
                        goto tx_fm_alloc_failed;
 
-               for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++)
+               for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
                        tx_pool->free_map[j] = j;
 
                tx_pool->consumer_index = 0;
@@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
        struct device *dev = &adapter->vdev->dev;
        struct ibmvnic_tx_buff *tx_buff = NULL;
+       struct ibmvnic_sub_crq_queue *tx_scrq;
        struct ibmvnic_tx_pool *tx_pool;
        unsigned int tx_send_failed = 0;
        unsigned int tx_map_failed = 0;
@@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
        int ret = 0;
 
        tx_pool = &adapter->tx_pool[queue_num];
+       tx_scrq = adapter->tx_scrq[queue_num];
        txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
        handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
                                   be32_to_cpu(adapter->login_rsp_buf->
@@ -744,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        tx_pool->consumer_index =
            (tx_pool->consumer_index + 1) %
-               adapter->max_tx_entries_per_subcrq;
+               adapter->req_tx_entries_per_subcrq;
 
        tx_buff = &tx_pool->tx_buff[index];
        tx_buff->skb = skb;
@@ -817,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
                if (tx_pool->consumer_index == 0)
                        tx_pool->consumer_index =
-                               adapter->max_tx_entries_per_subcrq - 1;
+                               adapter->req_tx_entries_per_subcrq - 1;
                else
                        tx_pool->consumer_index--;
 
@@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
                ret = NETDEV_TX_BUSY;
                goto out;
        }
+
+       atomic_inc(&tx_scrq->used);
+
+       if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) {
+               netdev_info(netdev, "Stopping queue %d\n", queue_num);
+               netif_stop_subqueue(netdev, queue_num);
+       }
+
        tx_packets++;
        tx_bytes += skb->len;
        txq->trans_start = jiffies;
@@ -1213,6 +1223,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
        scrq->adapter = adapter;
        scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
        scrq->cur = 0;
+       atomic_set(&scrq->used, 0);
        scrq->rx_skb_top = NULL;
        spin_lock_init(&scrq->lock);
 
@@ -1355,14 +1366,28 @@ restart_loop:
                                                 DMA_TO_DEVICE);
                        }
 
-                       if (txbuff->last_frag)
+                       if (txbuff->last_frag) {
+                               atomic_dec(&scrq->used);
+
+                               if (atomic_read(&scrq->used) <=
+                                   (adapter->req_tx_entries_per_subcrq / 2) &&
+                                   netif_subqueue_stopped(adapter->netdev,
+                                                          txbuff->skb)) {
+                                       netif_wake_subqueue(adapter->netdev,
+                                                           scrq->pool_index);
+                                       netdev_dbg(adapter->netdev,
+                                                  "Started queue %d\n",
+                                                  scrq->pool_index);
+                               }
+
                                dev_kfree_skb_any(txbuff->skb);
+                       }
 
                        adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
                                                     producer_index] = index;
                        adapter->tx_pool[pool].producer_index =
                            (adapter->tx_pool[pool].producer_index + 1) %
-                           adapter->max_tx_entries_per_subcrq;
+                           adapter->req_tx_entries_per_subcrq;
                }
                /* remove tx_comp scrq*/
                next->tx_comp.first = 0;
index 422824f1f42a8accdbbe7a97c70baa9882fbcce8..1993b42666f73d659773b6b88bcd8e8552ac97b7 100644 (file)
@@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue {
        spinlock_t lock;
        struct sk_buff *rx_skb_top;
        struct ibmvnic_adapter *adapter;
+       atomic_t used;
 };
 
 struct ibmvnic_long_term_buff {
index ddb4ca4ff930a74b38a97b04b98ad54262da1a7b..117170014e8897f0f91cfc25464e3a03aba044ec 100644 (file)
@@ -14,6 +14,7 @@ config MLX5_CORE
 config MLX5_CORE_EN
        bool "Mellanox Technologies ConnectX-4 Ethernet support"
        depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+       depends on IPV6=y || IPV6=n || MLX5_CORE=m
        imply PTP_1588_CLOCK
        default n
        ---help---
index 0523ed47f597c715296c5ea843245625bf3dac62..8fa23f6a1f67f6494168455a58c5a7b1ee35cae5 100644 (file)
@@ -302,6 +302,9 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5e_dcbx *dcbx = &priv->dcbx;
 
+       if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+               return 1;
+
        if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
                if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
                        return 0;
@@ -315,13 +318,10 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
                return 1;
        }
 
-       if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+       if (!(mode & DCB_CAP_DCBX_HOST))
                return 1;
 
-       if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-           !(mode & DCB_CAP_DCBX_VER_CEE) ||
-           !(mode & DCB_CAP_DCBX_VER_IEEE) ||
-           !(mode & DCB_CAP_DCBX_HOST))
+       if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
                return 1;
 
        return 0;
index 31e3cb7ee5feeb35ce383c5582bc800733b81643..5621dcfda4f1868c6bccdff9cd220b7a43d46dd3 100644 (file)
@@ -204,9 +204,6 @@ mlx5e_test_loopback_validate(struct sk_buff *skb,
        struct iphdr *iph;
 
        /* We are only going to peek, no need to clone the SKB */
-       if (skb->protocol != htons(ETH_P_IP))
-               goto out;
-
        if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
                goto out;
 
@@ -249,7 +246,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
        lbtp->loopback_ok = false;
        init_completion(&lbtp->comp);
 
-       lbtp->pt.type = htons(ETH_P_ALL);
+       lbtp->pt.type = htons(ETH_P_IP);
        lbtp->pt.func = mlx5e_test_loopback_validate;
        lbtp->pt.dev = priv->netdev;
        lbtp->pt.af_packet_priv = lbtp;
index 44406a5ec15d96a6ca45d30b609864f8cccb07e1..79481f4cf264320648d0ea9c335cf6a3b23faf01 100644 (file)
 #include "eswitch.h"
 #include "vxlan.h"
 
+enum {
+       MLX5E_TC_FLOW_ESWITCH   = BIT(0),
+};
+
 struct mlx5e_tc_flow {
        struct rhash_head       node;
        u64                     cookie;
+       u8                      flags;
        struct mlx5_flow_handle *rule;
        struct list_head        encap; /* flows sharing the same encap */
        struct mlx5_esw_flow_attr *attr;
@@ -177,7 +182,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                mlx5_fc_destroy(priv->mdev, counter);
        }
 
-       if (esw && esw->mode == SRIOV_OFFLOADS) {
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
                mlx5_eswitch_del_vlan_action(esw, flow->attr);
                if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
                        mlx5e_detach_encap(priv, flow);
@@ -598,6 +603,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 }
 
 static int parse_cls_flower(struct mlx5e_priv *priv,
+                           struct mlx5e_tc_flow *flow,
                            struct mlx5_flow_spec *spec,
                            struct tc_cls_flower_offload *f)
 {
@@ -609,7 +615,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 
        err = __parse_cls_flower(priv, spec, f, &min_inline);
 
-       if (!err && esw->mode == SRIOV_OFFLOADS &&
+       if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
            rep->vport != FDB_UPLINK_VPORT) {
                if (min_inline > esw->offloads.inline_mode) {
                        netdev_warn(priv->netdev,
@@ -1132,23 +1138,19 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
                           struct tc_cls_flower_offload *f)
 {
        struct mlx5e_tc_table *tc = &priv->fs.tc;
-       int err = 0;
-       bool fdb_flow = false;
+       int err, attr_size = 0;
        u32 flow_tag, action;
        struct mlx5e_tc_flow *flow;
        struct mlx5_flow_spec *spec;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       u8 flow_flags = 0;
 
-       if (esw && esw->mode == SRIOV_OFFLOADS)
-               fdb_flow = true;
-
-       if (fdb_flow)
-               flow = kzalloc(sizeof(*flow) +
-                              sizeof(struct mlx5_esw_flow_attr),
-                              GFP_KERNEL);
-       else
-               flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+       if (esw && esw->mode == SRIOV_OFFLOADS) {
+               flow_flags = MLX5E_TC_FLOW_ESWITCH;
+               attr_size  = sizeof(struct mlx5_esw_flow_attr);
+       }
 
+       flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
        spec = mlx5_vzalloc(sizeof(*spec));
        if (!spec || !flow) {
                err = -ENOMEM;
@@ -1156,12 +1158,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
        }
 
        flow->cookie = f->cookie;
+       flow->flags = flow_flags;
 
-       err = parse_cls_flower(priv, spec, f);
+       err = parse_cls_flower(priv, flow, spec, f);
        if (err < 0)
                goto err_free;
 
-       if (fdb_flow) {
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
                flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
                err = parse_tc_fdb_actions(priv, f->exts, flow);
                if (err < 0)
index 2478516a61e2ea547f5ae8af0c3aae7228e64db9..ded27bb9a3b6049ff4bad1606443dbeff53be8f1 100644 (file)
@@ -1136,7 +1136,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
                                                u32 *match_criteria)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct list_head *prev = ft->node.children.prev;
+       struct list_head *prev = &ft->node.children;
        unsigned int candidate_index = 0;
        struct mlx5_flow_group *fg;
        void *match_criteria_addr;
index c4242a4e81309f0d90a0cae8bdfc09fd39da5649..e2bd600d19de09c8e4049e0b83ec2b9eae645729 100644 (file)
@@ -1352,6 +1352,7 @@ static int init_one(struct pci_dev *pdev,
        if (err)
                goto clean_load;
 
+       pci_save_state(pdev);
        return 0;
 
 clean_load:
@@ -1407,9 +1408,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
        mlx5_enter_error_state(dev);
        mlx5_unload_one(dev, priv, false);
-       /* In case of kernel call save the pci state and drain the health wq */
+       /* In case of kernel call drain the health wq */
        if (state) {
-               pci_save_state(pdev);
                mlx5_drain_health_wq(dev);
                mlx5_pci_disable_device(dev);
        }
@@ -1461,6 +1461,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
 
        pci_set_master(pdev);
        pci_restore_state(pdev);
+       pci_save_state(pdev);
 
        if (wait_vital(pdev)) {
                dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
index 0899e2d310e26269a5c3d025b7afeeb1516bf21e..d9616daf8a705645eb5b14e5af889e0b5020ab0d 100644 (file)
@@ -769,7 +769,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
 #define MLXSW_REG_SPVM_ID 0x200F
 #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
 #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN +  \
                    MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
 
@@ -1702,7 +1702,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload,
 #define MLXSW_REG_SPVMLR_ID 0x2020
 #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
 #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
 #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
                              MLXSW_REG_SPVMLR_REC_LEN * \
                              MLXSW_REG_SPVMLR_REC_MAX_COUNT)
index 22ab429253778d2a22e4c59c742f8f6778e57f40..ae6cccc666e4619bbb1a8360b5cc090d303da60b 100644 (file)
@@ -303,11 +303,11 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
        ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
                                           ingress,
                                           MLXSW_SP_ACL_PROFILE_FLOWER);
-       if (WARN_ON(IS_ERR(ruleset)))
+       if (IS_ERR(ruleset))
                return;
 
        rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
-       if (!WARN_ON(!rule)) {
+       if (rule) {
                mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
                mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
        }
index d42d03df751acbb32dd725c769bbf5d56aeb72e4..7e3a6fed3da6d94fe47139aef697563b56726950 100644 (file)
@@ -422,8 +422,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn,
                u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
                u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
                u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+               u32 align = elems_per_page * DQ_RANGE_ALIGN;
 
-               p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+               p_conn->cid_count = roundup(p_conn->cid_count, align);
        }
 }
 
index e2a081ceaf520c429b90e1fcc1e2b6cb7d3b10aa..e518f914eab13f52d8f82a8e1a29a5a80a2f2b24 100644 (file)
@@ -2389,9 +2389,8 @@ qed_chain_alloc_sanity_check(struct qed_dev *cdev,
         * size/capacity fields are of a u32 type.
         */
        if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
-            chain_size > 0x10000) ||
-           (cnt_type == QED_CHAIN_CNT_TYPE_U32 &&
-            chain_size > 0x100000000ULL)) {
+            chain_size > ((u32)U16_MAX + 1)) ||
+           (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) {
                DP_NOTICE(cdev,
                          "The actual chain size (0x%llx) is larger than the maximal possible value\n",
                          chain_size);
index 3a44d6b395fac9500841f5ac1bc73b4c11d4d188..098766f7fe88a6e0a131712330cfa3b144c32738 100644 (file)
@@ -190,6 +190,9 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
        p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
        p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
        p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+       p_init->ooo_enable = p_params->ooo_enable;
+       p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] +
+                                 p_params->ll2_ooo_queue_id;
        p_init->func_params.log_page_size = p_params->log_page_size;
        val = p_params->num_tasks;
        p_init->func_params.num_tasks = cpu_to_le16(val);
@@ -786,6 +789,23 @@ static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
        spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
 }
 
+void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
+                              struct qed_iscsi_conn *p_conn)
+{
+       qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
+       qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+       qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         sizeof(struct tcp_upload_params),
+                         p_conn->tcp_upload_params_virt_addr,
+                         p_conn->tcp_upload_params_phys_addr);
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         sizeof(struct scsi_terminate_extra_params),
+                         p_conn->queue_cnts_virt_addr,
+                         p_conn->queue_cnts_phys_addr);
+       kfree(p_conn);
+}
+
 struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
 {
        struct qed_iscsi_info *p_iscsi_info;
@@ -807,6 +827,17 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
 void qed_iscsi_free(struct qed_hwfn *p_hwfn,
                    struct qed_iscsi_info *p_iscsi_info)
 {
+       struct qed_iscsi_conn *p_conn = NULL;
+
+       while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) {
+               p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+                                         struct qed_iscsi_conn, list_entry);
+               if (p_conn) {
+                       list_del(&p_conn->list_entry);
+                       qed_iscsi_free_connection(p_hwfn, p_conn);
+               }
+       }
+
        kfree(p_iscsi_info);
 }
 
index 9a0b9af10a572f3e3c2a5d086e9e731b040e4d6b..0d3cef409c96d0849c7860e8f03a920b3b8966f1 100644 (file)
@@ -211,6 +211,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
        /* If need to reuse or there's no replacement buffer, repost this */
        if (rc)
                goto out_post;
+       dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+                        cdev->ll2->rx_size, DMA_FROM_DEVICE);
 
        skb = build_skb(buffer->data, 0);
        if (!skb) {
@@ -474,7 +476,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
 static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
                                      struct qed_ll2_info *p_ll2_conn,
                                      union core_rx_cqe_union *p_cqe,
-                                     unsigned long lock_flags,
+                                     unsigned long *p_lock_flags,
                                      bool b_last_cqe)
 {
        struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
@@ -495,10 +497,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
                          "Mismatch between active_descq and the LL2 Rx chain\n");
        list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-       spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+       spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
        qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
                                    p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
-       spin_lock_irqsave(&p_rx->lock, lock_flags);
+       spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
 
        return 0;
 }
@@ -538,7 +540,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
                        break;
                case CORE_RX_CQE_TYPE_REGULAR:
                        rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
-                                                       cqe, flags, b_last_cqe);
+                                                       cqe, &flags,
+                                                       b_last_cqe);
                        break;
                default:
                        rc = -EIO;
@@ -968,7 +971,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev,
 {
        struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
        u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
-       struct qed_ll2_conn ll2_info;
+       struct qed_ll2_conn ll2_info = { 0 };
        int rc;
 
        ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;
index 7d731c6cb8923dd927a7bbaafa3a3a97237ba652..378afce58b3f0abd4c2a3bd43f403a50e1a9a1da 100644 (file)
@@ -159,6 +159,8 @@ struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn)
        if (!p_ooo_info->ooo_history.p_cqes)
                goto no_history_mem;
 
+       p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES;
+
        return p_ooo_info;
 
 no_history_mem:
index 65077c77082a2f042117a0889c2b15099c58eae5..91e9bd7159ab37cab5731fef122345cb59341ba2 100644 (file)
@@ -1535,32 +1535,33 @@ static int smc_close(struct net_device *dev)
  * Ethtool support
  */
 static int
-smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_get_link_ksettings(struct net_device *dev,
+                              struct ethtool_link_ksettings *cmd)
 {
        struct smc_local *lp = netdev_priv(dev);
        int ret;
 
-       cmd->maxtxpkt = 1;
-       cmd->maxrxpkt = 1;
-
        if (lp->phy_type != 0) {
                spin_lock_irq(&lp->lock);
-               ret = mii_ethtool_gset(&lp->mii, cmd);
+               ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
                spin_unlock_irq(&lp->lock);
        } else {
-               cmd->supported = SUPPORTED_10baseT_Half |
+               u32 supported = SUPPORTED_10baseT_Half |
                                 SUPPORTED_10baseT_Full |
                                 SUPPORTED_TP | SUPPORTED_AUI;
 
                if (lp->ctl_rspeed == 10)
-                       ethtool_cmd_speed_set(cmd, SPEED_10);
+                       cmd->base.speed = SPEED_10;
                else if (lp->ctl_rspeed == 100)
-                       ethtool_cmd_speed_set(cmd, SPEED_100);
+                       cmd->base.speed = SPEED_100;
+
+               cmd->base.autoneg = AUTONEG_DISABLE;
+               cmd->base.port = 0;
+               cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ?
+                       DUPLEX_FULL : DUPLEX_HALF;
 
-               cmd->autoneg = AUTONEG_DISABLE;
-               cmd->transceiver = XCVR_INTERNAL;
-               cmd->port = 0;
-               cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF;
+               ethtool_convert_legacy_u32_to_link_mode(
+                       cmd->link_modes.supported, supported);
 
                ret = 0;
        }
@@ -1569,24 +1570,26 @@ smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 }
 
 static int
-smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_set_link_ksettings(struct net_device *dev,
+                              const struct ethtool_link_ksettings *cmd)
 {
        struct smc_local *lp = netdev_priv(dev);
        int ret;
 
        if (lp->phy_type != 0) {
                spin_lock_irq(&lp->lock);
-               ret = mii_ethtool_sset(&lp->mii, cmd);
+               ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
                spin_unlock_irq(&lp->lock);
        } else {
-               if (cmd->autoneg != AUTONEG_DISABLE ||
-                   cmd->speed != SPEED_10 ||
-                   (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
-                   (cmd->port != PORT_TP && cmd->port != PORT_AUI))
+               if (cmd->base.autoneg != AUTONEG_DISABLE ||
+                   cmd->base.speed != SPEED_10 ||
+                   (cmd->base.duplex != DUPLEX_HALF &&
+                    cmd->base.duplex != DUPLEX_FULL) ||
+                   (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI))
                        return -EINVAL;
 
-//             lp->port = cmd->port;
-               lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+//             lp->port = cmd->base.port;
+               lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
 
 //             if (netif_running(dev))
 //                     smc_set_port(dev);
@@ -1744,8 +1747,6 @@ static int smc_ethtool_seteeprom(struct net_device *dev,
 
 
 static const struct ethtool_ops smc_ethtool_ops = {
-       .get_settings   = smc_ethtool_getsettings,
-       .set_settings   = smc_ethtool_setsettings,
        .get_drvinfo    = smc_ethtool_getdrvinfo,
 
        .get_msglevel   = smc_ethtool_getmsglevel,
@@ -1755,6 +1756,8 @@ static const struct ethtool_ops smc_ethtool_ops = {
        .get_eeprom_len = smc_ethtool_geteeprom_len,
        .get_eeprom     = smc_ethtool_geteeprom,
        .set_eeprom     = smc_ethtool_seteeprom,
+       .get_link_ksettings     = smc_ethtool_get_link_ksettings,
+       .set_link_ksettings     = smc_ethtool_set_link_ksettings,
 };
 
 static const struct net_device_ops smc_netdev_ops = {
index d3e73ac158aee6d3958b18618b85f0180f22abf7..f9f3dba7a58800d9288199b50bec0b85d18cb249 100644 (file)
@@ -700,6 +700,8 @@ struct net_device_context {
 
        u32 tx_checksum_mask;
 
+       u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+
        /* Ethtool settings */
        u8 duplex;
        u32 speed;
@@ -757,7 +759,6 @@ struct netvsc_device {
 
        struct nvsp_message revoke_packet;
 
-       u32 send_table[VRSS_SEND_TAB_SIZE];
        u32 max_chn;
        u32 num_chn;
        spinlock_t sc_lock; /* Protects num_sc_offered variable */
index d35ebd993b385255eaa441fb67272c117e328e7f..4c1d8cca247b921e263268bf8344898c31bb488a 100644 (file)
@@ -1136,15 +1136,11 @@ static void netvsc_receive(struct net_device *ndev,
 static void netvsc_send_table(struct hv_device *hdev,
                              struct nvsp_message *nvmsg)
 {
-       struct netvsc_device *nvscdev;
        struct net_device *ndev = hv_get_drvdata(hdev);
+       struct net_device_context *net_device_ctx = netdev_priv(ndev);
        int i;
        u32 count, *tab;
 
-       nvscdev = get_outbound_net_device(hdev);
-       if (!nvscdev)
-               return;
-
        count = nvmsg->msg.v5_msg.send_table.count;
        if (count != VRSS_SEND_TAB_SIZE) {
                netdev_err(ndev, "Received wrong send-table size:%u\n", count);
@@ -1155,7 +1151,7 @@ static void netvsc_send_table(struct hv_device *hdev,
                      nvmsg->msg.v5_msg.send_table.offset);
 
        for (i = 0; i < count; i++)
-               nvscdev->send_table[i] = tab[i];
+               net_device_ctx->tx_send_table[i] = tab[i];
 }
 
 static void netvsc_send_vf(struct net_device_context *net_device_ctx,
index bc05c895d9589deccd24f1013831036da75e4d1b..5ede87f30463e8211ef2828a8f74d4951c4166a6 100644 (file)
@@ -206,17 +206,15 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                        void *accel_priv, select_queue_fallback_t fallback)
 {
        struct net_device_context *net_device_ctx = netdev_priv(ndev);
-       struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
+       unsigned int num_tx_queues = ndev->real_num_tx_queues;
        struct sock *sk = skb->sk;
        int q_idx = sk_tx_queue_get(sk);
 
-       if (q_idx < 0 || skb->ooo_okay ||
-           q_idx >= ndev->real_num_tx_queues) {
+       if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
                u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
                int new_idx;
 
-               new_idx = nvsc_dev->send_table[hash]
-                       % nvsc_dev->num_chn;
+               new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
 
                if (q_idx != new_idx && sk &&
                    sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
@@ -225,9 +223,6 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
                q_idx = new_idx;
        }
 
-       if (unlikely(!nvsc_dev->chan_table[q_idx].channel))
-               q_idx = 0;
-
        return q_idx;
 }
 
index f9d0fa315a47624409cb054e762a6c8b6537a7b6..272b051a019975110aa1d117da993cf18cb98816 100644 (file)
@@ -1883,17 +1883,6 @@ static int m88e1510_probe(struct phy_device *phydev)
        return m88e1510_hwmon_probe(phydev);
 }
 
-static void marvell_remove(struct phy_device *phydev)
-{
-#ifdef CONFIG_HWMON
-
-       struct marvell_priv *priv = phydev->priv;
-
-       if (priv && priv->hwmon_dev)
-               hwmon_device_unregister(priv->hwmon_dev);
-#endif
-}
-
 static struct phy_driver marvell_drivers[] = {
        {
                .phy_id = MARVELL_PHY_ID_88E1101,
@@ -1974,7 +1963,6 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .probe = &m88e1121_probe,
-               .remove = &marvell_remove,
                .config_init = &m88e1121_config_init,
                .config_aneg = &m88e1121_config_aneg,
                .read_status = &marvell_read_status,
@@ -2087,7 +2075,6 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
                .flags = PHY_HAS_INTERRUPT,
                .probe = &m88e1510_probe,
-               .remove = &marvell_remove,
                .config_init = &m88e1510_config_init,
                .config_aneg = &m88e1510_config_aneg,
                .read_status = &marvell_read_status,
@@ -2109,7 +2096,6 @@ static struct phy_driver marvell_drivers[] = {
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .probe = m88e1510_probe,
-               .remove = &marvell_remove,
                .config_init = &marvell_config_init,
                .config_aneg = &m88e1510_config_aneg,
                .read_status = &marvell_read_status,
@@ -2127,7 +2113,6 @@ static struct phy_driver marvell_drivers[] = {
                .phy_id_mask = MARVELL_PHY_ID_MASK,
                .name = "Marvell 88E1545",
                .probe = m88e1510_probe,
-               .remove = &marvell_remove,
                .features = PHY_GBIT_FEATURES,
                .flags = PHY_HAS_INTERRUPT,
                .config_init = &marvell_config_init,
index daec6555f3b10889f786912d286b316a9331480b..5198ccfa347f8b4bfb5ee5e0c69ee12fb44ec681 100644 (file)
@@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = {
        .phy_id         = 0xffffffff,
        .phy_id_mask    = 0xffffffff,
        .name           = "Generic PHY",
-       .soft_reset     = genphy_soft_reset,
+       .soft_reset     = genphy_no_soft_reset,
        .config_init    = genphy_config_init,
        .features       = PHY_GBIT_FEATURES | SUPPORTED_MII |
                          SUPPORTED_AUI | SUPPORTED_FIBRE |
index 93ffedfa299412f78af2c72fedc991101a52a451..1e2d4f1179da31ed1e458af5e29cc77314f875ad 100644 (file)
@@ -491,13 +491,14 @@ static int ks8995_probe(struct spi_device *spi)
        if (err)
                return err;
 
-       ks->regs_attr.size = ks->chip->regs_size;
        memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
+       ks->regs_attr.size = ks->chip->regs_size;
 
        err = ks8995_reset(ks);
        if (err)
                return err;
 
+       sysfs_attr_init(&ks->regs_attr.attr);
        err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
        if (err) {
                dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
index 4a24b5d15f5a5dfe770d184533f70f7140d9e145..1b52520715aec6f972626361a6aa93accf809301 100644 (file)
@@ -2072,6 +2072,7 @@ static int team_dev_type_check_change(struct net_device *dev,
 static void team_setup(struct net_device *dev)
 {
        ether_setup(dev);
+       dev->max_mtu = ETH_MAX_MTU;
 
        dev->netdev_ops = &team_netdev_ops;
        dev->ethtool_ops = &team_ethtool_ops;
index dc1b1dd9157c16d1bbd3505751a8782e020ab71a..34cc3c590aa5c5c49509159d8fbf0f0cfcfca988 100644 (file)
@@ -822,7 +822,18 @@ static void tun_net_uninit(struct net_device *dev)
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
 {
+       struct tun_struct *tun = netdev_priv(dev);
+       int i;
+
        netif_tx_start_all_queues(dev);
+
+       for (i = 0; i < tun->numqueues; i++) {
+               struct tun_file *tfile;
+
+               tfile = rtnl_dereference(tun->tfiles[i]);
+               tfile->socket.sk->sk_write_space(tfile->socket.sk);
+       }
+
        return 0;
 }
 
@@ -1103,9 +1114,10 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
        if (!skb_array_empty(&tfile->tx_array))
                mask |= POLLIN | POLLRDNORM;
 
-       if (sock_writeable(sk) ||
-           (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
-            sock_writeable(sk)))
+       if (tun->dev->flags & IFF_UP &&
+           (sock_writeable(sk) ||
+            (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+             sock_writeable(sk))))
                mask |= POLLOUT | POLLWRNORM;
 
        if (tun->dev->reg_state != NETREG_REGISTERED)
@@ -2570,7 +2582,6 @@ static int __init tun_init(void)
        int ret = 0;
 
        pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
-       pr_info("%s\n", DRV_COPYRIGHT);
 
        ret = rtnl_link_register(&tun_link_ops);
        if (ret) {
index 22379da63400776ff70994097de6d472232ca908..fea687f35b5ac6f373396a860a3ff16a8e59fd66 100644 (file)
@@ -340,6 +340,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
 
 static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+       int len = skb->len;
        netdev_tx_t ret = is_ip_tx_frame(skb, dev);
 
        if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
@@ -347,7 +348,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 
                u64_stats_update_begin(&dstats->syncp);
                dstats->tx_pkts++;
-               dstats->tx_bytes += skb->len;
+               dstats->tx_bytes += len;
                u64_stats_update_end(&dstats->syncp);
        } else {
                this_cpu_inc(dev->dstats->tx_drps);
index e375560cc74e5ffc09553ddab5c6b657fe1cb6f0..bdb6ae16d4a85bf9539199e189011bce104ba51a 100644 (file)
@@ -2976,6 +2976,44 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
        return 0;
 }
 
+static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+                             struct vxlan_config *conf)
+{
+       struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       int err;
+
+       err = vxlan_dev_configure(net, dev, conf, false);
+       if (err)
+               return err;
+
+       dev->ethtool_ops = &vxlan_ethtool_ops;
+
+       /* create an fdb entry for a valid default destination */
+       if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+               err = vxlan_fdb_create(vxlan, all_zeros_mac,
+                                      &vxlan->default_dst.remote_ip,
+                                      NUD_REACHABLE | NUD_PERMANENT,
+                                      NLM_F_EXCL | NLM_F_CREATE,
+                                      vxlan->cfg.dst_port,
+                                      vxlan->default_dst.remote_vni,
+                                      vxlan->default_dst.remote_vni,
+                                      vxlan->default_dst.remote_ifindex,
+                                      NTF_SELF);
+               if (err)
+                       return err;
+       }
+
+       err = register_netdevice(dev);
+       if (err) {
+               vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
+               return err;
+       }
+
+       list_add(&vxlan->next, &vn->vxlan_list);
+       return 0;
+}
+
 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
                         struct net_device *dev, struct vxlan_config *conf,
                         bool changelink)
@@ -3172,8 +3210,6 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
                         struct nlattr *tb[], struct nlattr *data[])
 {
-       struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
-       struct vxlan_dev *vxlan = netdev_priv(dev);
        struct vxlan_config conf;
        int err;
 
@@ -3181,36 +3217,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
        if (err)
                return err;
 
-       err = vxlan_dev_configure(src_net, dev, &conf, false);
-       if (err)
-               return err;
-
-       dev->ethtool_ops = &vxlan_ethtool_ops;
-
-       /* create an fdb entry for a valid default destination */
-       if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
-               err = vxlan_fdb_create(vxlan, all_zeros_mac,
-                                      &vxlan->default_dst.remote_ip,
-                                      NUD_REACHABLE | NUD_PERMANENT,
-                                      NLM_F_EXCL | NLM_F_CREATE,
-                                      vxlan->cfg.dst_port,
-                                      vxlan->default_dst.remote_vni,
-                                      vxlan->default_dst.remote_vni,
-                                      vxlan->default_dst.remote_ifindex,
-                                      NTF_SELF);
-               if (err)
-                       return err;
-       }
-
-       err = register_netdevice(dev);
-       if (err) {
-               vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
-               return err;
-       }
-
-       list_add(&vxlan->next, &vn->vxlan_list);
-
-       return 0;
+       return __vxlan_dev_create(src_net, dev, &conf);
 }
 
 static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
@@ -3440,7 +3447,7 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name,
        if (IS_ERR(dev))
                return dev;
 
-       err = vxlan_dev_configure(net, dev, conf, false);
+       err = __vxlan_dev_create(net, dev, conf);
        if (err < 0) {
                free_netdev(dev);
                return ERR_PTR(err);
index a5045b5279d70a92c827424be3ff7869c6193dc8..6742ae605660454e19406d11f5aff7e84a2527f3 100644 (file)
@@ -381,8 +381,8 @@ static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev)
        /* set bd status and length */
        bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S;
 
-       iowrite16be(bd_status, &bd->status);
        iowrite16be(skb->len, &bd->length);
+       iowrite16be(bd_status, &bd->status);
 
        /* Move to next BD in the ring */
        if (!(bd_status & T_W_S))
@@ -457,7 +457,7 @@ static int hdlc_rx_done(struct ucc_hdlc_private *priv, int rx_work_limit)
        struct sk_buff *skb;
        hdlc_device *hdlc = dev_to_hdlc(dev);
        struct qe_bd *bd;
-       u32 bd_status;
+       u16 bd_status;
        u16 length, howmany = 0;
        u8 *bdbuffer;
        int i;
index e7f5910a65191f4f013ae53db73d2a77510ae9e8..f8eb66ef2944ea9630237455cae4faf483b4f609 100644 (file)
@@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface,
        struct i2400mu *i2400mu;
        struct usb_device *usb_dev = interface_to_usbdev(iface);
 
+       if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+               return -ENODEV;
+
        if (usb_dev->speed != USB_SPEED_HIGH)
                dev_err(dev, "device not connected as high speed\n");
 
index 829b26cd4549a4e07ccdf30ea87d902424ce737b..8397f6c9245158e8b3ff005bc58a419e4250169d 100644 (file)
@@ -165,13 +165,17 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct xenvif *vif = netdev_priv(dev);
        struct xenvif_queue *queue = NULL;
-       unsigned int num_queues = vif->num_queues;
+       unsigned int num_queues;
        u16 index;
        struct xenvif_rx_cb *cb;
 
        BUG_ON(skb->dev != dev);
 
-       /* Drop the packet if queues are not set up */
+       /* Drop the packet if queues are not set up.
+        * This handler should be called inside an RCU read section
+        * so we don't need to enter it here explicitly.
+        */
+       num_queues = READ_ONCE(vif->num_queues);
        if (num_queues < 1)
                goto drop;
 
@@ -222,18 +226,18 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 {
        struct xenvif *vif = netdev_priv(dev);
        struct xenvif_queue *queue = NULL;
+       unsigned int num_queues;
        u64 rx_bytes = 0;
        u64 rx_packets = 0;
        u64 tx_bytes = 0;
        u64 tx_packets = 0;
        unsigned int index;
 
-       spin_lock(&vif->lock);
-       if (vif->queues == NULL)
-               goto out;
+       rcu_read_lock();
+       num_queues = READ_ONCE(vif->num_queues);
 
        /* Aggregate tx and rx stats from each queue */
-       for (index = 0; index < vif->num_queues; ++index) {
+       for (index = 0; index < num_queues; ++index) {
                queue = &vif->queues[index];
                rx_bytes += queue->stats.rx_bytes;
                rx_packets += queue->stats.rx_packets;
@@ -241,8 +245,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
                tx_packets += queue->stats.tx_packets;
        }
 
-out:
-       spin_unlock(&vif->lock);
+       rcu_read_unlock();
 
        vif->dev->stats.rx_bytes = rx_bytes;
        vif->dev->stats.rx_packets = rx_packets;
@@ -378,10 +381,13 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
                                     struct ethtool_stats *stats, u64 * data)
 {
        struct xenvif *vif = netdev_priv(dev);
-       unsigned int num_queues = vif->num_queues;
+       unsigned int num_queues;
        int i;
        unsigned int queue_index;
 
+       rcu_read_lock();
+       num_queues = READ_ONCE(vif->num_queues);
+
        for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
                unsigned long accum = 0;
                for (queue_index = 0; queue_index < num_queues; ++queue_index) {
@@ -390,6 +396,8 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
                }
                data[i] = accum;
        }
+
+       rcu_read_unlock();
 }
 
 static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
index f9bcf4a665bcaebc4f33bd28849cef2dadc4f698..602d408fa25e98a4651716b1390d2507bced4605 100644 (file)
@@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif)
        netdev_err(vif->dev, "fatal error; disabling device\n");
        vif->disabled = true;
        /* Disable the vif from queue 0's kthread */
-       if (vif->queues)
+       if (vif->num_queues)
                xenvif_kick_thread(&vif->queues[0]);
 }
 
index d2d7cd9145b1c259a1f0f11414acca706e67bb7a..a56d3eab35dd650c4acfcda9e981c0220cba9e61 100644 (file)
@@ -495,26 +495,26 @@ static void backend_disconnect(struct backend_info *be)
        struct xenvif *vif = be->vif;
 
        if (vif) {
+               unsigned int num_queues = vif->num_queues;
                unsigned int queue_index;
-               struct xenvif_queue *queues;
 
                xen_unregister_watchers(vif);
 #ifdef CONFIG_DEBUG_FS
                xenvif_debugfs_delif(vif);
 #endif /* CONFIG_DEBUG_FS */
                xenvif_disconnect_data(vif);
-               for (queue_index = 0;
-                    queue_index < vif->num_queues;
-                    ++queue_index)
-                       xenvif_deinit_queue(&vif->queues[queue_index]);
 
-               spin_lock(&vif->lock);
-               queues = vif->queues;
+               /* At this point some of the handlers may still be active
+                * so we need to have additional synchronization here.
+                */
                vif->num_queues = 0;
-               vif->queues = NULL;
-               spin_unlock(&vif->lock);
+               synchronize_net();
 
-               vfree(queues);
+               for (queue_index = 0; queue_index < num_queues; ++queue_index)
+                       xenvif_deinit_queue(&vif->queues[queue_index]);
+
+               vfree(vif->queues);
+               vif->queues = NULL;
 
                xenvif_disconnect_ctrl(vif);
        }
index 5be4783e40d4c9e9ca547d89d5faa6c1437f927a..dea98ffb6f606a6079f40607c8f5476c7a738f7d 100644 (file)
@@ -103,15 +103,6 @@ static struct quirk_entry quirk_asus_x200ca = {
        .wapf = 2,
 };
 
-static struct quirk_entry quirk_no_rfkill = {
-       .no_rfkill = true,
-};
-
-static struct quirk_entry quirk_no_rfkill_wapf4 = {
-       .wapf = 4,
-       .no_rfkill = true,
-};
-
 static struct quirk_entry quirk_asus_ux303ub = {
        .wmi_backlight_native = true,
 };
@@ -194,7 +185,7 @@ static const struct dmi_system_id asus_quirks[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"),
                },
-               .driver_data = &quirk_no_rfkill_wapf4,
+               .driver_data = &quirk_asus_wapf4,
        },
        {
                .callback = dmi_matched,
@@ -203,7 +194,7 @@ static const struct dmi_system_id asus_quirks[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
                        DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"),
                },
-               .driver_data = &quirk_no_rfkill_wapf4,
+               .driver_data = &quirk_asus_wapf4,
        },
        {
                .callback = dmi_matched,
@@ -367,42 +358,6 @@ static const struct dmi_system_id asus_quirks[] = {
                },
                .driver_data = &quirk_asus_x200ca,
        },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. X555UB",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. N552VW",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. U303LB",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
-       {
-               .callback = dmi_matched,
-               .ident = "ASUSTeK COMPUTER INC. Z550MA",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"),
-               },
-               .driver_data = &quirk_no_rfkill,
-       },
        {
                .callback = dmi_matched,
                .ident = "ASUSTeK COMPUTER INC. UX303UB",
index 43cb680adbb42045aaeea332220460440180215a..8fe5890bf539f4f2eb722139f4c56a3065403e92 100644 (file)
@@ -159,6 +159,8 @@ MODULE_LICENSE("GPL");
 #define USB_INTEL_XUSB2PR              0xD0
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI  0x9c31
 
+static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
+
 struct bios_args {
        u32 arg0;
        u32 arg1;
@@ -2051,6 +2053,16 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
        return 0;
 }
 
+static bool ashs_present(void)
+{
+       int i = 0;
+       while (ashs_ids[i]) {
+               if (acpi_dev_found(ashs_ids[i++]))
+                       return true;
+       }
+       return false;
+}
+
 /*
  * WMI Driver
  */
@@ -2095,7 +2107,11 @@ static int asus_wmi_add(struct platform_device *pdev)
        if (err)
                goto fail_leds;
 
-       if (!asus->driver->quirks->no_rfkill) {
+       asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
+       if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+               asus->driver->wlan_ctrl_by_user = 1;
+
+       if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
                err = asus_wmi_rfkill_init(asus);
                if (err)
                        goto fail_rfkill;
@@ -2134,10 +2150,6 @@ static int asus_wmi_add(struct platform_device *pdev)
        if (err)
                goto fail_debugfs;
 
-       asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
-       if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
-               asus->driver->wlan_ctrl_by_user = 1;
-
        return 0;
 
 fail_debugfs:
index fdff626c3b51b039f3b63473a6cf333d04fda819..c9589d9342bbf8f883c49abbcd7cebf9a23c608e 100644 (file)
@@ -39,7 +39,6 @@ struct key_entry;
 struct asus_wmi;
 
 struct quirk_entry {
-       bool no_rfkill;
        bool hotplug_wireless;
        bool scalar_panel_brightness;
        bool store_backlight_power;
index 2b218b1d13e55dc985a2ca27e44b6a6ddf905141..e12cc3504d48799b447e636e21c2c6c440fc5827 100644 (file)
 
 #define FUJITSU_LCD_N_LEVELS 8
 
-#define ACPI_FUJITSU_CLASS              "fujitsu"
-#define ACPI_FUJITSU_HID                "FUJ02B1"
-#define ACPI_FUJITSU_DRIVER_NAME       "Fujitsu laptop FUJ02B1 ACPI brightness driver"
-#define ACPI_FUJITSU_DEVICE_NAME        "Fujitsu FUJ02B1"
-#define ACPI_FUJITSU_HOTKEY_HID        "FUJ02E3"
-#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
-#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
+#define ACPI_FUJITSU_CLASS             "fujitsu"
+#define ACPI_FUJITSU_BL_HID            "FUJ02B1"
+#define ACPI_FUJITSU_BL_DRIVER_NAME    "Fujitsu laptop FUJ02B1 ACPI brightness driver"
+#define ACPI_FUJITSU_BL_DEVICE_NAME    "Fujitsu FUJ02B1"
+#define ACPI_FUJITSU_LAPTOP_HID                "FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME        "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
+#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME        "Fujitsu FUJ02E3"
 
 #define ACPI_FUJITSU_NOTIFY_CODE1     0x80
 
 /* FUNC interface - command values */
-#define FUNC_RFKILL    0x1000
+#define FUNC_FLAGS     0x1000
 #define FUNC_LEDS      0x1001
 #define FUNC_BUTTONS   0x1002
 #define FUNC_BACKLIGHT  0x1004
 /* FUNC interface - responses */
 #define UNSUPPORTED_CMD 0x80000000
 
+/* FUNC interface - status flags */
+#define FLAG_RFKILL    0x020
+#define FLAG_LID       0x100
+#define FLAG_DOCK      0x200
+
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 /* FUNC interface - LED control */
 #define FUNC_LED_OFF   0x1
 #endif
 
 /* Device controlling the backlight and associated keys */
-struct fujitsu_t {
+struct fujitsu_bl {
        acpi_handle acpi_handle;
        struct acpi_device *dev;
        struct input_dev *input;
@@ -150,12 +155,12 @@ struct fujitsu_t {
        unsigned int brightness_level;
 };
 
-static struct fujitsu_t *fujitsu;
+static struct fujitsu_bl *fujitsu_bl;
 static int use_alt_lcd_levels = -1;
 static int disable_brightness_adjust = -1;
 
-/* Device used to access other hotkeys on the laptop */
-struct fujitsu_hotkey_t {
+/* Device used to access hotkeys and other features on the laptop */
+struct fujitsu_laptop {
        acpi_handle acpi_handle;
        struct acpi_device *dev;
        struct input_dev *input;
@@ -163,17 +168,15 @@ struct fujitsu_hotkey_t {
        struct platform_device *pf_device;
        struct kfifo fifo;
        spinlock_t fifo_lock;
-       int rfkill_supported;
-       int rfkill_state;
+       int flags_supported;
+       int flags_state;
        int logolamp_registered;
        int kblamps_registered;
        int radio_led_registered;
        int eco_led_registered;
 };
 
-static struct fujitsu_hotkey_t *fujitsu_hotkey;
-
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event);
+static struct fujitsu_laptop *fujitsu_laptop;
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 static enum led_brightness logolamp_get(struct led_classdev *cdev);
@@ -222,8 +225,6 @@ static struct led_classdev eco_led = {
 static u32 dbg_level = 0x03;
 #endif
 
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event);
-
 /* Fujitsu ACPI interface function */
 
 static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
@@ -239,7 +240,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
        unsigned long long value;
        acpi_handle handle = NULL;
 
-       status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle);
+       status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle);
        if (ACPI_FAILURE(status)) {
                vdbg_printk(FUJLAPTOP_DBG_ERROR,
                                "FUNC interface is not present\n");
@@ -300,9 +301,9 @@ static int radio_led_set(struct led_classdev *cdev,
                                enum led_brightness brightness)
 {
        if (brightness >= LED_FULL)
-               return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON);
+               return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON);
        else
-               return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0);
+               return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0);
 }
 
 static int eco_led_set(struct led_classdev *cdev,
@@ -346,7 +347,7 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev)
 {
        enum led_brightness brightness = LED_OFF;
 
-       if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON)
+       if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON)
                brightness = LED_FULL;
 
        return brightness;
@@ -373,10 +374,10 @@ static int set_lcd_level(int level)
        vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n",
                    level);
 
-       if (level < 0 || level >= fujitsu->max_brightness)
+       if (level < 0 || level >= fujitsu_bl->max_brightness)
                return -EINVAL;
 
-       status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+       status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle);
        if (ACPI_FAILURE(status)) {
                vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
                return -ENODEV;
@@ -398,10 +399,10 @@ static int set_lcd_level_alt(int level)
        vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n",
                    level);
 
-       if (level < 0 || level >= fujitsu->max_brightness)
+       if (level < 0 || level >= fujitsu_bl->max_brightness)
                return -EINVAL;
 
-       status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
+       status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle);
        if (ACPI_FAILURE(status)) {
                vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
                return -ENODEV;
@@ -421,19 +422,19 @@ static int get_lcd_level(void)
 
        vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n");
 
-       status =
-           acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+       status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL,
+                                      &state);
        if (ACPI_FAILURE(status))
                return 0;
 
-       fujitsu->brightness_level = state & 0x0fffffff;
+       fujitsu_bl->brightness_level = state & 0x0fffffff;
 
        if (state & 0x80000000)
-               fujitsu->brightness_changed = 1;
+               fujitsu_bl->brightness_changed = 1;
        else
-               fujitsu->brightness_changed = 0;
+               fujitsu_bl->brightness_changed = 0;
 
-       return fujitsu->brightness_level;
+       return fujitsu_bl->brightness_level;
 }
 
 static int get_max_brightness(void)
@@ -443,14 +444,14 @@ static int get_max_brightness(void)
 
        vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n");
 
-       status =
-           acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state);
+       status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL,
+                                      &state);
        if (ACPI_FAILURE(status))
                return -1;
 
-       fujitsu->max_brightness = state;
+       fujitsu_bl->max_brightness = state;
 
-       return fujitsu->max_brightness;
+       return fujitsu_bl->max_brightness;
 }
 
 /* Backlight device stuff */
@@ -483,7 +484,7 @@ static int bl_update_status(struct backlight_device *b)
        return ret;
 }
 
-static const struct backlight_ops fujitsubl_ops = {
+static const struct backlight_ops fujitsu_bl_ops = {
        .get_brightness = bl_get_brightness,
        .update_status = bl_update_status,
 };
@@ -511,7 +512,7 @@ show_brightness_changed(struct device *dev,
 
        int ret;
 
-       ret = fujitsu->brightness_changed;
+       ret = fujitsu_bl->brightness_changed;
        if (ret < 0)
                return ret;
 
@@ -539,7 +540,7 @@ static ssize_t store_lcd_level(struct device *dev,
        int level, ret;
 
        if (sscanf(buf, "%i", &level) != 1
-           || (level < 0 || level >= fujitsu->max_brightness))
+           || (level < 0 || level >= fujitsu_bl->max_brightness))
                return -EINVAL;
 
        if (use_alt_lcd_levels)
@@ -567,9 +568,9 @@ static ssize_t
 show_lid_state(struct device *dev,
                        struct device_attribute *attr, char *buf)
 {
-       if (!(fujitsu_hotkey->rfkill_supported & 0x100))
+       if (!(fujitsu_laptop->flags_supported & FLAG_LID))
                return sprintf(buf, "unknown\n");
-       if (fujitsu_hotkey->rfkill_state & 0x100)
+       if (fujitsu_laptop->flags_state & FLAG_LID)
                return sprintf(buf, "open\n");
        else
                return sprintf(buf, "closed\n");
@@ -579,9 +580,9 @@ static ssize_t
 show_dock_state(struct device *dev,
                        struct device_attribute *attr, char *buf)
 {
-       if (!(fujitsu_hotkey->rfkill_supported & 0x200))
+       if (!(fujitsu_laptop->flags_supported & FLAG_DOCK))
                return sprintf(buf, "unknown\n");
-       if (fujitsu_hotkey->rfkill_state & 0x200)
+       if (fujitsu_laptop->flags_state & FLAG_DOCK)
                return sprintf(buf, "docked\n");
        else
                return sprintf(buf, "undocked\n");
@@ -591,9 +592,9 @@ static ssize_t
 show_radios_state(struct device *dev,
                        struct device_attribute *attr, char *buf)
 {
-       if (!(fujitsu_hotkey->rfkill_supported & 0x20))
+       if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL))
                return sprintf(buf, "unknown\n");
-       if (fujitsu_hotkey->rfkill_state & 0x20)
+       if (fujitsu_laptop->flags_state & FLAG_RFKILL)
                return sprintf(buf, "on\n");
        else
                return sprintf(buf, "killed\n");
@@ -607,7 +608,7 @@ static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store);
 static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store);
 static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store);
 
-static struct attribute *fujitsupf_attributes[] = {
+static struct attribute *fujitsu_pf_attributes[] = {
        &dev_attr_brightness_changed.attr,
        &dev_attr_max_brightness.attr,
        &dev_attr_lcd_level.attr,
@@ -617,11 +618,11 @@ static struct attribute *fujitsupf_attributes[] = {
        NULL
 };
 
-static struct attribute_group fujitsupf_attribute_group = {
-       .attrs = fujitsupf_attributes
+static struct attribute_group fujitsu_pf_attribute_group = {
+       .attrs = fujitsu_pf_attributes
 };
 
-static struct platform_driver fujitsupf_driver = {
+static struct platform_driver fujitsu_pf_driver = {
        .driver = {
                   .name = "fujitsu-laptop",
                   }
@@ -630,39 +631,30 @@ static struct platform_driver fujitsupf_driver = {
 static void __init dmi_check_cb_common(const struct dmi_system_id *id)
 {
        pr_info("Identified laptop model '%s'\n", id->ident);
-       if (use_alt_lcd_levels == -1) {
-               if (acpi_has_method(NULL,
-                               "\\_SB.PCI0.LPCB.FJEX.SBL2"))
-                       use_alt_lcd_levels = 1;
-               else
-                       use_alt_lcd_levels = 0;
-               vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as "
-                       "%i\n", use_alt_lcd_levels);
-       }
 }
 
 static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
 {
        dmi_check_cb_common(id);
-       fujitsu->keycode1 = KEY_SCREENLOCK;     /* "Lock" */
-       fujitsu->keycode2 = KEY_HELP;   /* "Mobility Center" */
+       fujitsu_bl->keycode1 = KEY_SCREENLOCK;  /* "Lock" */
+       fujitsu_bl->keycode2 = KEY_HELP;        /* "Mobility Center" */
        return 1;
 }
 
 static int __init dmi_check_cb_s6420(const struct dmi_system_id *id)
 {
        dmi_check_cb_common(id);
-       fujitsu->keycode1 = KEY_SCREENLOCK;     /* "Lock" */
-       fujitsu->keycode2 = KEY_HELP;   /* "Mobility Center" */
+       fujitsu_bl->keycode1 = KEY_SCREENLOCK;  /* "Lock" */
+       fujitsu_bl->keycode2 = KEY_HELP;        /* "Mobility Center" */
        return 1;
 }
 
 static int __init dmi_check_cb_p8010(const struct dmi_system_id *id)
 {
        dmi_check_cb_common(id);
-       fujitsu->keycode1 = KEY_HELP;   /* "Support" */
-       fujitsu->keycode3 = KEY_SWITCHVIDEOMODE;        /* "Presentation" */
-       fujitsu->keycode4 = KEY_WWW;    /* "Internet" */
+       fujitsu_bl->keycode1 = KEY_HELP;                /* "Support" */
+       fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE;     /* "Presentation" */
+       fujitsu_bl->keycode4 = KEY_WWW;                 /* "Internet" */
        return 1;
 }
 
@@ -693,7 +685,7 @@ static const struct dmi_system_id fujitsu_dmi_table[] __initconst = {
 
 /* ACPI device for LCD brightness control */
 
-static int acpi_fujitsu_add(struct acpi_device *device)
+static int acpi_fujitsu_bl_add(struct acpi_device *device)
 {
        int state = 0;
        struct input_dev *input;
@@ -702,22 +694,22 @@ static int acpi_fujitsu_add(struct acpi_device *device)
        if (!device)
                return -EINVAL;
 
-       fujitsu->acpi_handle = device->handle;
-       sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+       fujitsu_bl->acpi_handle = device->handle;
+       sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME);
        sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-       device->driver_data = fujitsu;
+       device->driver_data = fujitsu_bl;
 
-       fujitsu->input = input = input_allocate_device();
+       fujitsu_bl->input = input = input_allocate_device();
        if (!input) {
                error = -ENOMEM;
                goto err_stop;
        }
 
-       snprintf(fujitsu->phys, sizeof(fujitsu->phys),
+       snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys),
                 "%s/video/input0", acpi_device_hid(device));
 
        input->name = acpi_device_name(device);
-       input->phys = fujitsu->phys;
+       input->phys = fujitsu_bl->phys;
        input->id.bustype = BUS_HOST;
        input->id.product = 0x06;
        input->dev.parent = &device->dev;
@@ -730,7 +722,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
        if (error)
                goto err_free_input_dev;
 
-       error = acpi_bus_update_power(fujitsu->acpi_handle, &state);
+       error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state);
        if (error) {
                pr_err("Error reading power state\n");
                goto err_unregister_input_dev;
@@ -740,7 +732,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
               acpi_device_name(device), acpi_device_bid(device),
               !device->power.state ? "on" : "off");
 
-       fujitsu->dev = device;
+       fujitsu_bl->dev = device;
 
        if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
                vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -750,6 +742,15 @@ static int acpi_fujitsu_add(struct acpi_device *device)
                        pr_err("_INI Method failed\n");
        }
 
+       if (use_alt_lcd_levels == -1) {
+               if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2"))
+                       use_alt_lcd_levels = 1;
+               else
+                       use_alt_lcd_levels = 0;
+               vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n",
+                           use_alt_lcd_levels);
+       }
+
        /* do config (detect defaults) */
        use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
        disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;
@@ -758,7 +759,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
                    use_alt_lcd_levels, disable_brightness_adjust);
 
        if (get_max_brightness() <= 0)
-               fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
+               fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS;
        get_lcd_level();
 
        return 0;
@@ -772,38 +773,38 @@ err_stop:
        return error;
 }
 
-static int acpi_fujitsu_remove(struct acpi_device *device)
+static int acpi_fujitsu_bl_remove(struct acpi_device *device)
 {
-       struct fujitsu_t *fujitsu = acpi_driver_data(device);
-       struct input_dev *input = fujitsu->input;
+       struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device);
+       struct input_dev *input = fujitsu_bl->input;
 
        input_unregister_device(input);
 
-       fujitsu->acpi_handle = NULL;
+       fujitsu_bl->acpi_handle = NULL;
 
        return 0;
 }
 
 /* Brightness notify */
 
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
 {
        struct input_dev *input;
        int keycode;
        int oldb, newb;
 
-       input = fujitsu->input;
+       input = fujitsu_bl->input;
 
        switch (event) {
        case ACPI_FUJITSU_NOTIFY_CODE1:
                keycode = 0;
-               oldb = fujitsu->brightness_level;
+               oldb = fujitsu_bl->brightness_level;
                get_lcd_level();
-               newb = fujitsu->brightness_level;
+               newb = fujitsu_bl->brightness_level;
 
                vdbg_printk(FUJLAPTOP_DBG_TRACE,
                            "brightness button event [%i -> %i (%i)]\n",
-                           oldb, newb, fujitsu->brightness_changed);
+                           oldb, newb, fujitsu_bl->brightness_changed);
 
                if (oldb < newb) {
                        if (disable_brightness_adjust != 1) {
@@ -840,7 +841,7 @@ static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
 
 /* ACPI device for hotkey handling */
 
-static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
+static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 {
        int result = 0;
        int state = 0;
@@ -851,42 +852,42 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
        if (!device)
                return -EINVAL;
 
-       fujitsu_hotkey->acpi_handle = device->handle;
+       fujitsu_laptop->acpi_handle = device->handle;
        sprintf(acpi_device_name(device), "%s",
-               ACPI_FUJITSU_HOTKEY_DEVICE_NAME);
+               ACPI_FUJITSU_LAPTOP_DEVICE_NAME);
        sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
-       device->driver_data = fujitsu_hotkey;
+       device->driver_data = fujitsu_laptop;
 
        /* kfifo */
-       spin_lock_init(&fujitsu_hotkey->fifo_lock);
-       error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+       spin_lock_init(&fujitsu_laptop->fifo_lock);
+       error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int),
                        GFP_KERNEL);
        if (error) {
                pr_err("kfifo_alloc failed\n");
                goto err_stop;
        }
 
-       fujitsu_hotkey->input = input = input_allocate_device();
+       fujitsu_laptop->input = input = input_allocate_device();
        if (!input) {
                error = -ENOMEM;
                goto err_free_fifo;
        }
 
-       snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys),
+       snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys),
                 "%s/video/input0", acpi_device_hid(device));
 
        input->name = acpi_device_name(device);
-       input->phys = fujitsu_hotkey->phys;
+       input->phys = fujitsu_laptop->phys;
        input->id.bustype = BUS_HOST;
        input->id.product = 0x06;
        input->dev.parent = &device->dev;
 
        set_bit(EV_KEY, input->evbit);
-       set_bit(fujitsu->keycode1, input->keybit);
-       set_bit(fujitsu->keycode2, input->keybit);
-       set_bit(fujitsu->keycode3, input->keybit);
-       set_bit(fujitsu->keycode4, input->keybit);
-       set_bit(fujitsu->keycode5, input->keybit);
+       set_bit(fujitsu_bl->keycode1, input->keybit);
+       set_bit(fujitsu_bl->keycode2, input->keybit);
+       set_bit(fujitsu_bl->keycode3, input->keybit);
+       set_bit(fujitsu_bl->keycode4, input->keybit);
+       set_bit(fujitsu_bl->keycode5, input->keybit);
        set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit);
        set_bit(KEY_UNKNOWN, input->keybit);
 
@@ -894,7 +895,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
        if (error)
                goto err_free_input_dev;
 
-       error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state);
+       error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state);
        if (error) {
                pr_err("Error reading power state\n");
                goto err_unregister_input_dev;
@@ -904,7 +905,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
                acpi_device_name(device), acpi_device_bid(device),
                !device->power.state ? "on" : "off");
 
-       fujitsu_hotkey->dev = device;
+       fujitsu_laptop->dev = device;
 
        if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
                vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
@@ -920,27 +921,27 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
                ; /* No action, result is discarded */
        vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
 
-       fujitsu_hotkey->rfkill_supported =
-               call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
+       fujitsu_laptop->flags_supported =
+               call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0);
 
        /* Make sure our bitmask of supported functions is cleared if the
           RFKILL function block is not implemented, like on the S7020. */
-       if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD)
-               fujitsu_hotkey->rfkill_supported = 0;
+       if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD)
+               fujitsu_laptop->flags_supported = 0;
 
-       if (fujitsu_hotkey->rfkill_supported)
-               fujitsu_hotkey->rfkill_state =
-                       call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+       if (fujitsu_laptop->flags_supported)
+               fujitsu_laptop->flags_state =
+                       call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
 
        /* Suspect this is a keymap of the application panel, print it */
        pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
        if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                &logolamp_led);
                if (result == 0) {
-                       fujitsu_hotkey->logolamp_registered = 1;
+                       fujitsu_laptop->logolamp_registered = 1;
                } else {
                        pr_err("Could not register LED handler for logo lamp, error %i\n",
                               result);
@@ -949,10 +950,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
        if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
           (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                &kblamps_led);
                if (result == 0) {
-                       fujitsu_hotkey->kblamps_registered = 1;
+                       fujitsu_laptop->kblamps_registered = 1;
                } else {
                        pr_err("Could not register LED handler for keyboard lamps, error %i\n",
                               result);
@@ -966,10 +967,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
         * that an RF LED is present.
         */
        if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                &radio_led);
                if (result == 0) {
-                       fujitsu_hotkey->radio_led_registered = 1;
+                       fujitsu_laptop->radio_led_registered = 1;
                } else {
                        pr_err("Could not register LED handler for radio LED, error %i\n",
                               result);
@@ -983,10 +984,10 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
        */
        if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) &&
           (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
-               result = led_classdev_register(&fujitsu->pf_device->dev,
+               result = led_classdev_register(&fujitsu_bl->pf_device->dev,
                                                &eco_led);
                if (result == 0) {
-                       fujitsu_hotkey->eco_led_registered = 1;
+                       fujitsu_laptop->eco_led_registered = 1;
                } else {
                        pr_err("Could not register LED handler for eco LED, error %i\n",
                               result);
@@ -1002,47 +1003,47 @@ err_unregister_input_dev:
 err_free_input_dev:
        input_free_device(input);
 err_free_fifo:
-       kfifo_free(&fujitsu_hotkey->fifo);
+       kfifo_free(&fujitsu_laptop->fifo);
 err_stop:
        return error;
 }
 
-static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
+static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
 {
-       struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device);
-       struct input_dev *input = fujitsu_hotkey->input;
+       struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device);
+       struct input_dev *input = fujitsu_laptop->input;
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
-       if (fujitsu_hotkey->logolamp_registered)
+       if (fujitsu_laptop->logolamp_registered)
                led_classdev_unregister(&logolamp_led);
 
-       if (fujitsu_hotkey->kblamps_registered)
+       if (fujitsu_laptop->kblamps_registered)
                led_classdev_unregister(&kblamps_led);
 
-       if (fujitsu_hotkey->radio_led_registered)
+       if (fujitsu_laptop->radio_led_registered)
                led_classdev_unregister(&radio_led);
 
-       if (fujitsu_hotkey->eco_led_registered)
+       if (fujitsu_laptop->eco_led_registered)
                led_classdev_unregister(&eco_led);
 #endif
 
        input_unregister_device(input);
 
-       kfifo_free(&fujitsu_hotkey->fifo);
+       kfifo_free(&fujitsu_laptop->fifo);
 
-       fujitsu_hotkey->acpi_handle = NULL;
+       fujitsu_laptop->acpi_handle = NULL;
 
        return 0;
 }
 
-static void acpi_fujitsu_hotkey_press(int keycode)
+static void acpi_fujitsu_laptop_press(int keycode)
 {
-       struct input_dev *input = fujitsu_hotkey->input;
+       struct input_dev *input = fujitsu_laptop->input;
        int status;
 
-       status = kfifo_in_locked(&fujitsu_hotkey->fifo,
+       status = kfifo_in_locked(&fujitsu_laptop->fifo,
                                 (unsigned char *)&keycode, sizeof(keycode),
-                                &fujitsu_hotkey->fifo_lock);
+                                &fujitsu_laptop->fifo_lock);
        if (status != sizeof(keycode)) {
                vdbg_printk(FUJLAPTOP_DBG_WARN,
                            "Could not push keycode [0x%x]\n", keycode);
@@ -1054,16 +1055,16 @@ static void acpi_fujitsu_hotkey_press(int keycode)
                    "Push keycode into ringbuffer [%d]\n", keycode);
 }
 
-static void acpi_fujitsu_hotkey_release(void)
+static void acpi_fujitsu_laptop_release(void)
 {
-       struct input_dev *input = fujitsu_hotkey->input;
+       struct input_dev *input = fujitsu_laptop->input;
        int keycode, status;
 
        while (true) {
-               status = kfifo_out_locked(&fujitsu_hotkey->fifo,
+               status = kfifo_out_locked(&fujitsu_laptop->fifo,
                                          (unsigned char *)&keycode,
                                          sizeof(keycode),
-                                         &fujitsu_hotkey->fifo_lock);
+                                         &fujitsu_laptop->fifo_lock);
                if (status != sizeof(keycode))
                        return;
                input_report_key(input, keycode, 0);
@@ -1073,14 +1074,14 @@ static void acpi_fujitsu_hotkey_release(void)
        }
 }
 
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 {
        struct input_dev *input;
        int keycode;
        unsigned int irb = 1;
        int i;
 
-       input = fujitsu_hotkey->input;
+       input = fujitsu_laptop->input;
 
        if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
                keycode = KEY_UNKNOWN;
@@ -1093,9 +1094,9 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
                return;
        }
 
-       if (fujitsu_hotkey->rfkill_supported)
-               fujitsu_hotkey->rfkill_state =
-                       call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+       if (fujitsu_laptop->flags_supported)
+               fujitsu_laptop->flags_state =
+                       call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
 
        i = 0;
        while ((irb =
@@ -1103,19 +1104,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
                        && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
                switch (irb & 0x4ff) {
                case KEY1_CODE:
-                       keycode = fujitsu->keycode1;
+                       keycode = fujitsu_bl->keycode1;
                        break;
                case KEY2_CODE:
-                       keycode = fujitsu->keycode2;
+                       keycode = fujitsu_bl->keycode2;
                        break;
                case KEY3_CODE:
-                       keycode = fujitsu->keycode3;
+                       keycode = fujitsu_bl->keycode3;
                        break;
                case KEY4_CODE:
-                       keycode = fujitsu->keycode4;
+                       keycode = fujitsu_bl->keycode4;
                        break;
                case KEY5_CODE:
-                       keycode = fujitsu->keycode5;
+                       keycode = fujitsu_bl->keycode5;
                        break;
                case 0:
                        keycode = 0;
@@ -1128,17 +1129,17 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
                }
 
                if (keycode > 0)
-                       acpi_fujitsu_hotkey_press(keycode);
+                       acpi_fujitsu_laptop_press(keycode);
                else if (keycode == 0)
-                       acpi_fujitsu_hotkey_release();
+                       acpi_fujitsu_laptop_release();
        }
 
        /* On some models (first seen on the Skylake-based Lifebook
         * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
-        * handled in software; its state is queried using FUNC_RFKILL
+        * handled in software; its state is queried using FUNC_FLAGS
         */
-       if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
-           (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+       if ((fujitsu_laptop->flags_supported & BIT(26)) &&
+           (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) {
                keycode = KEY_TOUCHPAD_TOGGLE;
                input_report_key(input, keycode, 1);
                input_sync(input);
@@ -1150,83 +1151,81 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 
 /* Initialization */
 
-static const struct acpi_device_id fujitsu_device_ids[] = {
-       {ACPI_FUJITSU_HID, 0},
+static const struct acpi_device_id fujitsu_bl_device_ids[] = {
+       {ACPI_FUJITSU_BL_HID, 0},
        {"", 0},
 };
 
-static struct acpi_driver acpi_fujitsu_driver = {
-       .name = ACPI_FUJITSU_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_bl_driver = {
+       .name = ACPI_FUJITSU_BL_DRIVER_NAME,
        .class = ACPI_FUJITSU_CLASS,
-       .ids = fujitsu_device_ids,
+       .ids = fujitsu_bl_device_ids,
        .ops = {
-               .add = acpi_fujitsu_add,
-               .remove = acpi_fujitsu_remove,
-               .notify = acpi_fujitsu_notify,
+               .add = acpi_fujitsu_bl_add,
+               .remove = acpi_fujitsu_bl_remove,
+               .notify = acpi_fujitsu_bl_notify,
                },
 };
 
-static const struct acpi_device_id fujitsu_hotkey_device_ids[] = {
-       {ACPI_FUJITSU_HOTKEY_HID, 0},
+static const struct acpi_device_id fujitsu_laptop_device_ids[] = {
+       {ACPI_FUJITSU_LAPTOP_HID, 0},
        {"", 0},
 };
 
-static struct acpi_driver acpi_fujitsu_hotkey_driver = {
-       .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_laptop_driver = {
+       .name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME,
        .class = ACPI_FUJITSU_CLASS,
-       .ids = fujitsu_hotkey_device_ids,
+       .ids = fujitsu_laptop_device_ids,
        .ops = {
-               .add = acpi_fujitsu_hotkey_add,
-               .remove = acpi_fujitsu_hotkey_remove,
-               .notify = acpi_fujitsu_hotkey_notify,
+               .add = acpi_fujitsu_laptop_add,
+               .remove = acpi_fujitsu_laptop_remove,
+               .notify = acpi_fujitsu_laptop_notify,
                },
 };
 
 static const struct acpi_device_id fujitsu_ids[] __used = {
-       {ACPI_FUJITSU_HID, 0},
-       {ACPI_FUJITSU_HOTKEY_HID, 0},
+       {ACPI_FUJITSU_BL_HID, 0},
+       {ACPI_FUJITSU_LAPTOP_HID, 0},
        {"", 0}
 };
 MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
 
 static int __init fujitsu_init(void)
 {
-       int ret, result, max_brightness;
+       int ret, max_brightness;
 
        if (acpi_disabled)
                return -ENODEV;
 
-       fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
-       if (!fujitsu)
+       fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL);
+       if (!fujitsu_bl)
                return -ENOMEM;
-       fujitsu->keycode1 = KEY_PROG1;
-       fujitsu->keycode2 = KEY_PROG2;
-       fujitsu->keycode3 = KEY_PROG3;
-       fujitsu->keycode4 = KEY_PROG4;
-       fujitsu->keycode5 = KEY_RFKILL;
+       fujitsu_bl->keycode1 = KEY_PROG1;
+       fujitsu_bl->keycode2 = KEY_PROG2;
+       fujitsu_bl->keycode3 = KEY_PROG3;
+       fujitsu_bl->keycode4 = KEY_PROG4;
+       fujitsu_bl->keycode5 = KEY_RFKILL;
        dmi_check_system(fujitsu_dmi_table);
 
-       result = acpi_bus_register_driver(&acpi_fujitsu_driver);
-       if (result < 0) {
-               ret = -ENODEV;
+       ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
+       if (ret)
                goto fail_acpi;
-       }
 
        /* Register platform stuff */
 
-       fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
-       if (!fujitsu->pf_device) {
+       fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+       if (!fujitsu_bl->pf_device) {
                ret = -ENOMEM;
                goto fail_platform_driver;
        }
 
-       ret = platform_device_add(fujitsu->pf_device);
+       ret = platform_device_add(fujitsu_bl->pf_device);
        if (ret)
                goto fail_platform_device1;
 
        ret =
-           sysfs_create_group(&fujitsu->pf_device->dev.kobj,
-                              &fujitsupf_attribute_group);
+           sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
+                              &fujitsu_pf_attribute_group);
        if (ret)
                goto fail_platform_device2;
 
@@ -1236,90 +1235,88 @@ static int __init fujitsu_init(void)
                struct backlight_properties props;
 
                memset(&props, 0, sizeof(struct backlight_properties));
-               max_brightness = fujitsu->max_brightness;
+               max_brightness = fujitsu_bl->max_brightness;
                props.type = BACKLIGHT_PLATFORM;
                props.max_brightness = max_brightness - 1;
-               fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
-                                                              NULL, NULL,
-                                                              &fujitsubl_ops,
-                                                              &props);
-               if (IS_ERR(fujitsu->bl_device)) {
-                       ret = PTR_ERR(fujitsu->bl_device);
-                       fujitsu->bl_device = NULL;
+               fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop",
+                                                                 NULL, NULL,
+                                                                 &fujitsu_bl_ops,
+                                                                 &props);
+               if (IS_ERR(fujitsu_bl->bl_device)) {
+                       ret = PTR_ERR(fujitsu_bl->bl_device);
+                       fujitsu_bl->bl_device = NULL;
                        goto fail_sysfs_group;
                }
-               fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
+               fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
        }
 
-       ret = platform_driver_register(&fujitsupf_driver);
+       ret = platform_driver_register(&fujitsu_pf_driver);
        if (ret)
                goto fail_backlight;
 
-       /* Register hotkey driver */
+       /* Register laptop driver */
 
-       fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL);
-       if (!fujitsu_hotkey) {
+       fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL);
+       if (!fujitsu_laptop) {
                ret = -ENOMEM;
-               goto fail_hotkey;
+               goto fail_laptop;
        }
 
-       result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver);
-       if (result < 0) {
-               ret = -ENODEV;
-               goto fail_hotkey1;
-       }
+       ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
+       if (ret)
+               goto fail_laptop1;
 
        /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
        if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {
                if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
-                       fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN;
+                       fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
                else
-                       fujitsu->bl_device->props.power = FB_BLANK_UNBLANK;
+                       fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
        }
 
        pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n");
 
        return 0;
 
-fail_hotkey1:
-       kfree(fujitsu_hotkey);
-fail_hotkey:
-       platform_driver_unregister(&fujitsupf_driver);
+fail_laptop1:
+       kfree(fujitsu_laptop);
+fail_laptop:
+       platform_driver_unregister(&fujitsu_pf_driver);
 fail_backlight:
-       backlight_device_unregister(fujitsu->bl_device);
+       backlight_device_unregister(fujitsu_bl->bl_device);
 fail_sysfs_group:
-       sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
-                          &fujitsupf_attribute_group);
+       sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+                          &fujitsu_pf_attribute_group);
 fail_platform_device2:
-       platform_device_del(fujitsu->pf_device);
+       platform_device_del(fujitsu_bl->pf_device);
 fail_platform_device1:
-       platform_device_put(fujitsu->pf_device);
+       platform_device_put(fujitsu_bl->pf_device);
 fail_platform_driver:
-       acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+       acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
 fail_acpi:
-       kfree(fujitsu);
+       kfree(fujitsu_bl);
 
        return ret;
 }
 
 static void __exit fujitsu_cleanup(void)
 {
-       acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+       acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver);
 
-       kfree(fujitsu_hotkey);
+       kfree(fujitsu_laptop);
 
-       platform_driver_unregister(&fujitsupf_driver);
+       platform_driver_unregister(&fujitsu_pf_driver);
 
-       backlight_device_unregister(fujitsu->bl_device);
+       backlight_device_unregister(fujitsu_bl->bl_device);
 
-       sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
-                          &fujitsupf_attribute_group);
+       sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+                          &fujitsu_pf_attribute_group);
 
-       platform_device_unregister(fujitsu->pf_device);
+       platform_device_unregister(fujitsu_bl->pf_device);
 
-       acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+       acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
 
-       kfree(fujitsu);
+       kfree(fujitsu_bl);
 
        pr_info("driver unloaded\n");
 }
@@ -1341,7 +1338,3 @@ MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon");
 MODULE_DESCRIPTION("Fujitsu laptop extras support");
 MODULE_VERSION(FUJITSU_DRIVER_VERSION);
 MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*");
-MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");
index 438d4c72c7b36c27982ed8e60b9a42cbff58dc8f..ff563db025b3eb6afa8b5f4345281573328695de 100644 (file)
@@ -54,7 +54,7 @@ __asm__(".text                        \n"
 
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-       struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+       struct desc_struct *gdt = get_cpu_gdt_rw((cpu)); \
        set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
        set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
 } while(0)
@@ -95,8 +95,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
                return PNP_FUNCTION_NOT_SUPPORTED;
 
        cpu = get_cpu();
-       save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
-       get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+       save_desc_40 = get_cpu_gdt_rw(cpu)[0x40 / 8];
+       get_cpu_gdt_rw(cpu)[0x40 / 8] = bad_bios_desc;
 
        /* On some boxes IRQ's during PnP BIOS calls are deadly.  */
        spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -134,7 +134,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
                             :"memory");
        spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-       get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+       get_cpu_gdt_rw(cpu)[0x40 / 8] = save_desc_40;
        put_cpu();
 
        /* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -477,7 +477,7 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
        pnp_bios_callpoint.segment = PNP_CS16;
 
        for_each_possible_cpu(i) {
-               struct desc_struct *gdt = get_cpu_gdt_table(i);
+               struct desc_struct *gdt = get_cpu_gdt_rw(i);
                if (!gdt)
                        continue;
                set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
index 230043c1c90ffcfe5781bcdea23cc52dc9bc6732..4bf55b5d78be53cd0aced1b7e5f2b59fcf93d186 100644 (file)
@@ -1241,19 +1241,32 @@ config SCSI_LPFC
        tristate "Emulex LightPulse Fibre Channel Support"
        depends on PCI && SCSI
        depends on SCSI_FC_ATTRS
-       depends on NVME_FC && NVME_TARGET_FC
        select CRC_T10DIF
-       help
+       ---help---
           This lpfc driver supports the Emulex LightPulse
           Family of Fibre Channel PCI host adapters.
 
 config SCSI_LPFC_DEBUG_FS
        bool "Emulex LightPulse Fibre Channel debugfs Support"
        depends on SCSI_LPFC && DEBUG_FS
-       help
+       ---help---
          This makes debugging information from the lpfc driver
          available via the debugfs filesystem.
 
+config LPFC_NVME_INITIATOR
+       bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+       depends on SCSI_LPFC && NVME_FC
+       ---help---
+         This enables NVME Initiator support in the Emulex lpfc driver.
+
+config LPFC_NVME_TARGET
+       bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+       depends on SCSI_LPFC && NVME_TARGET_FC
+       ---help---
+         This enables NVME Target support in the Emulex lpfc driver.
+         Target enablement must still be enabled on a per adapter
+         basis by module parameters.
+
 config SCSI_SIM710
        tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
        depends on (EISA || MCA) && SCSI
index 2e5338dec621fbff89c8a68acc3ba241173f3239..7b0410e0f569481cc40101fab38a768c7e274c8e 100644 (file)
@@ -468,7 +468,7 @@ err_out:
        return -1;
 
 err_blink:
-       return (status > 16) & 0xFF;
+       return (status >> 16) & 0xFF;
 }
 
 static inline u32 aac_get_vector(struct aac_dev *dev)
index 07c08ce68d70af2fc09be51a3f4c8ba67c5a1d9d..894b1e3ebd56f4a141a8ed49a298819e3abbe8b2 100644 (file)
@@ -561,8 +561,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
        WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
        task->state = state;
 
-       if (!list_empty(&task->running))
+       spin_lock_bh(&conn->taskqueuelock);
+       if (!list_empty(&task->running)) {
+               pr_debug_once("%s while task on list", __func__);
                list_del_init(&task->running);
+       }
+       spin_unlock_bh(&conn->taskqueuelock);
 
        if (conn->task == task)
                conn->task = NULL;
@@ -784,7 +788,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                if (session->tt->xmit_task(task))
                        goto free_task;
        } else {
+               spin_lock_bh(&conn->taskqueuelock);
                list_add_tail(&task->running, &conn->mgmtqueue);
+               spin_unlock_bh(&conn->taskqueuelock);
                iscsi_conn_queue_work(conn);
        }
 
@@ -1475,8 +1481,10 @@ void iscsi_requeue_task(struct iscsi_task *task)
         * this may be on the requeue list already if the xmit_task callout
         * is handling the r2ts while we are adding new ones
         */
+       spin_lock_bh(&conn->taskqueuelock);
        if (list_empty(&task->running))
                list_add_tail(&task->running, &conn->requeue);
+       spin_unlock_bh(&conn->taskqueuelock);
        iscsi_conn_queue_work(conn);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1513,22 +1521,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
         * only have one nop-out as a ping from us and targets should not
         * overflow us with nop-ins
         */
+       spin_lock_bh(&conn->taskqueuelock);
 check_mgmt:
        while (!list_empty(&conn->mgmtqueue)) {
                conn->task = list_entry(conn->mgmtqueue.next,
                                         struct iscsi_task, running);
                list_del_init(&conn->task->running);
+               spin_unlock_bh(&conn->taskqueuelock);
                if (iscsi_prep_mgmt_task(conn, conn->task)) {
                        /* regular RX path uses back_lock */
                        spin_lock_bh(&conn->session->back_lock);
                        __iscsi_put_task(conn->task);
                        spin_unlock_bh(&conn->session->back_lock);
                        conn->task = NULL;
+                       spin_lock_bh(&conn->taskqueuelock);
                        continue;
                }
                rc = iscsi_xmit_task(conn);
                if (rc)
                        goto done;
+               spin_lock_bh(&conn->taskqueuelock);
        }
 
        /* process pending command queue */
@@ -1536,19 +1548,24 @@ check_mgmt:
                conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
                                        running);
                list_del_init(&conn->task->running);
+               spin_unlock_bh(&conn->taskqueuelock);
                if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
                        fail_scsi_task(conn->task, DID_IMM_RETRY);
+                       spin_lock_bh(&conn->taskqueuelock);
                        continue;
                }
                rc = iscsi_prep_scsi_cmd_pdu(conn->task);
                if (rc) {
                        if (rc == -ENOMEM || rc == -EACCES) {
+                               spin_lock_bh(&conn->taskqueuelock);
                                list_add_tail(&conn->task->running,
                                              &conn->cmdqueue);
                                conn->task = NULL;
+                               spin_unlock_bh(&conn->taskqueuelock);
                                goto done;
                        } else
                                fail_scsi_task(conn->task, DID_ABORT);
+                       spin_lock_bh(&conn->taskqueuelock);
                        continue;
                }
                rc = iscsi_xmit_task(conn);
@@ -1559,6 +1576,7 @@ check_mgmt:
                 * we need to check the mgmt queue for nops that need to
                 * be sent to aviod starvation
                 */
+               spin_lock_bh(&conn->taskqueuelock);
                if (!list_empty(&conn->mgmtqueue))
                        goto check_mgmt;
        }
@@ -1578,12 +1596,15 @@ check_mgmt:
                conn->task = task;
                list_del_init(&conn->task->running);
                conn->task->state = ISCSI_TASK_RUNNING;
+               spin_unlock_bh(&conn->taskqueuelock);
                rc = iscsi_xmit_task(conn);
                if (rc)
                        goto done;
+               spin_lock_bh(&conn->taskqueuelock);
                if (!list_empty(&conn->mgmtqueue))
                        goto check_mgmt;
        }
+       spin_unlock_bh(&conn->taskqueuelock);
        spin_unlock_bh(&conn->session->frwd_lock);
        return -ENODATA;
 
@@ -1739,7 +1760,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
                        goto prepd_reject;
                }
        } else {
+               spin_lock_bh(&conn->taskqueuelock);
                list_add_tail(&task->running, &conn->cmdqueue);
+               spin_unlock_bh(&conn->taskqueuelock);
                iscsi_conn_queue_work(conn);
        }
 
@@ -2897,6 +2920,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
        INIT_LIST_HEAD(&conn->mgmtqueue);
        INIT_LIST_HEAD(&conn->cmdqueue);
        INIT_LIST_HEAD(&conn->requeue);
+       spin_lock_init(&conn->taskqueuelock);
        INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
        /* allocate login_task used for the login/text sequences */
index 0bba2e30b4f09f62ef096ce73df629681a1646b6..257bbdd0f0b83a54d36d0378db18dedb93472737 100644 (file)
@@ -99,12 +99,13 @@ struct lpfc_sli2_slim;
 #define FC_MAX_ADPTMSG         64
 
 #define MAX_HBAEVT     32
+#define MAX_HBAS_NO_RESET 16
 
 /* Number of MSI-X vectors the driver uses */
 #define LPFC_MSIX_VECTORS      2
 
 /* lpfc wait event data ready flag */
-#define LPFC_DATA_READY                (1<<0)
+#define LPFC_DATA_READY                0       /* bit 0 */
 
 /* queue dump line buffer size */
 #define LPFC_LBUF_SZ           128
@@ -692,6 +693,7 @@ struct lpfc_hba {
                                         * capability
                                         */
 #define HBA_NVME_IOQ_FLUSH      0x80000 /* NVME IO queues flushed. */
+#define NVME_XRI_ABORT_EVENT   0x100000
 
        uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
        struct lpfc_dmabuf slim2p;
index 5c783ef7f260612e881dd199bccd767d9179a99d..5c3be3e6f5e2aebfa3da8d47a92775cb17d774a4 100644 (file)
@@ -3010,6 +3010,12 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:"
 static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
                   lpfc_poll_show, lpfc_poll_store);
 
+int lpfc_no_hba_reset_cnt;
+unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = {
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444);
+MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset");
+
 LPFC_ATTR(sli_mode, 0, 0, 3,
        "SLI mode selector:"
        " 0 - auto (SLI-3 if supported),"
@@ -4451,7 +4457,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr,
                return -EINVAL;
 
        phba->cfg_fcp_imax = (uint32_t)val;
-       for (i = 0; i < phba->io_channel_irqs; i++)
+
+       for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
                lpfc_modify_hba_eq_delay(phba, i);
 
        return strlen(buf);
index 843dd73004da0239089442a8591bd2c37e141f16..54e6ac42fbcd4269e2272c5b81271d607232da0a 100644 (file)
@@ -384,7 +384,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *);
 extern struct device_attribute *lpfc_hba_attrs[];
 extern struct device_attribute *lpfc_vport_attrs[];
 extern struct scsi_host_template lpfc_template;
-extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_no_hr;
 extern struct scsi_host_template lpfc_template_nvme;
 extern struct scsi_host_template lpfc_vport_template;
 extern struct fc_function_template lpfc_transport_functions;
@@ -554,3 +554,5 @@ void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
                                struct lpfc_wcqe_complete *abts_cmpl);
 extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
+extern int lpfc_no_hba_reset_cnt;
+extern unsigned long lpfc_no_hba_reset[];
index c22bb3f887e15b767c8cc53334389cba053c50a9..d3e9af983015ccfc1408a99b61ee190e9730098c 100644 (file)
@@ -939,8 +939,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                         "FC4 x%08x, Data: x%08x x%08x\n",
                                         ndlp, did, ndlp->nlp_fc4_type,
                                         FC_TYPE_FCP, FC_TYPE_NVME);
+                       ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
                }
-               ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
                lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
                lpfc_issue_els_prli(vport, ndlp, 0);
        } else
index 9f4798e9d9380dab26d1a506fcf5fe1c43dd7493..913eed822cb8ed85d4d726a2ee3150c7760a4f47 100644 (file)
@@ -3653,17 +3653,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
                        idiag.ptr_private = phba->sli4_hba.nvmels_cq;
                        goto pass_check;
                }
-               /* NVME LS complete queue */
-               if (phba->sli4_hba.nvmels_cq &&
-                   phba->sli4_hba.nvmels_cq->queue_id == queid) {
-                       /* Sanity check */
-                       rc = lpfc_idiag_que_param_check(
-                                       phba->sli4_hba.nvmels_cq, index, count);
-                       if (rc)
-                               goto error_out;
-                       idiag.ptr_private = phba->sli4_hba.nvmels_cq;
-                       goto pass_check;
-               }
                /* FCP complete queue */
                if (phba->sli4_hba.fcp_cq) {
                        for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
@@ -3738,17 +3727,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf,
                        idiag.ptr_private = phba->sli4_hba.nvmels_wq;
                        goto pass_check;
                }
-               /* NVME LS work queue */
-               if (phba->sli4_hba.nvmels_wq &&
-                   phba->sli4_hba.nvmels_wq->queue_id == queid) {
-                       /* Sanity check */
-                       rc = lpfc_idiag_que_param_check(
-                                       phba->sli4_hba.nvmels_wq, index, count);
-                       if (rc)
-                               goto error_out;
-                       idiag.ptr_private = phba->sli4_hba.nvmels_wq;
-                       goto pass_check;
-               }
                /* FCP work queue */
                if (phba->sli4_hba.fcp_wq) {
                        for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
index 2d26440e6f2fe6b3d9d2bc457c112397601409cf..d9c61d030034da95854d2524406c17c202a317b9 100644 (file)
@@ -5177,15 +5177,15 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba)
 
 static uint32_t
 lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
-               struct lpfc_hba *phba)
+               struct lpfc_vport *vport)
 {
 
        desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
 
-       memcpy(desc->port_names.wwnn, phba->wwnn,
+       memcpy(desc->port_names.wwnn, &vport->fc_nodename,
                        sizeof(desc->port_names.wwnn));
 
-       memcpy(desc->port_names.wwpn, phba->wwpn,
+       memcpy(desc->port_names.wwpn, &vport->fc_portname,
                        sizeof(desc->port_names.wwpn));
 
        desc->length = cpu_to_be32(sizeof(desc->port_names));
@@ -5279,7 +5279,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context,
        len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *)
                                       (len + pcmd), &rdp_context->link_stat);
        len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *)
-                                            (len + pcmd), phba);
+                                            (len + pcmd), vport);
        len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *)
                                        (len + pcmd), vport, ndlp);
        len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd),
@@ -8371,11 +8371,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                        spin_lock_irq(shost->host_lock);
                        vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
                        spin_unlock_irq(shost->host_lock);
-                       if (vport->port_type == LPFC_PHYSICAL_PORT
-                               && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
-                               lpfc_issue_init_vfi(vport);
-                       else
+                       if (mb->mbxStatus == MBX_NOT_FINISHED)
+                               break;
+                       if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+                           !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+                               if (phba->sli_rev == LPFC_SLI_REV4)
+                                       lpfc_issue_init_vfi(vport);
+                               else
+                                       lpfc_initial_flogi(vport);
+                       } else {
                                lpfc_initial_fdisc(vport);
+                       }
                        break;
                }
        } else {
index 194a14d5f8a9821bf7c3ddc562049db294ce3e07..180b072beef6b079c756474d889e3a494ad75131 100644 (file)
@@ -313,8 +313,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                                 ndlp->nlp_state, ndlp->nlp_rpi);
        }
 
-       if (!(vport->load_flag & FC_UNLOADING) &&
-           !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+       if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
            !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
            (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
            (ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) &&
@@ -641,6 +640,8 @@ lpfc_work_done(struct lpfc_hba *phba)
                        lpfc_handle_rrq_active(phba);
                if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
                        lpfc_sli4_fcp_xri_abort_event_proc(phba);
+               if (phba->hba_flag & NVME_XRI_ABORT_EVENT)
+                       lpfc_sli4_nvme_xri_abort_event_proc(phba);
                if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
                        lpfc_sli4_els_xri_abort_event_proc(phba);
                if (phba->hba_flag & ASYNC_EVENT)
@@ -2173,7 +2174,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
        uint32_t boot_flag, addr_mode;
        uint16_t fcf_index, next_fcf_index;
        struct lpfc_fcf_rec *fcf_rec = NULL;
-       uint16_t vlan_id;
+       uint16_t vlan_id = LPFC_FCOE_NULL_VID;
        bool select_new_fcf;
        int rc;
 
@@ -4020,9 +4021,11 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                rdata = rport->dd_data;
                /* break the link before dropping the ref */
                ndlp->rport = NULL;
-               if (rdata && rdata->pnode == ndlp)
-                       lpfc_nlp_put(ndlp);
-               rdata->pnode = NULL;
+               if (rdata) {
+                       if (rdata->pnode == ndlp)
+                               lpfc_nlp_put(ndlp);
+                       rdata->pnode = NULL;
+               }
                /* drop reference for earlier registeration */
                put_device(&rport->dev);
        }
@@ -4344,9 +4347,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 {
        INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
        INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
-       init_timer(&ndlp->nlp_delayfunc);
-       ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
-       ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+       setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay,
+                       (unsigned long)ndlp);
        ndlp->nlp_DID = did;
        ndlp->vport = vport;
        ndlp->phba = vport->phba;
@@ -4606,9 +4608,9 @@ lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba,
                pring = qp->pring;
                if (!pring)
                        continue;
-               spin_lock_irq(&pring->ring_lock);
+               spin_lock(&pring->ring_lock);
                __lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
-               spin_unlock_irq(&pring->ring_lock);
+               spin_unlock(&pring->ring_lock);
        }
        spin_unlock_irq(&phba->hbalock);
 }
index cfdb068a3bfccb76046fbc8d0cb000e80bc905ff..15277705cb6b8c650880572837954d91b6bc01eb 100644 (file)
@@ -1001,7 +1001,7 @@ struct eq_delay_info {
        uint32_t phase;
        uint32_t delay_multi;
 };
-#define        LPFC_MAX_EQ_DELAY       8
+#define        LPFC_MAX_EQ_DELAY_EQID_CNT      8
 
 struct sgl_page_pairs {
        uint32_t sgl_pg0_addr_lo;
@@ -1070,7 +1070,7 @@ struct lpfc_mbx_modify_eq_delay {
        union {
                struct {
                        uint32_t num_eq;
-                       struct eq_delay_info eq[LPFC_MAX_EQ_DELAY];
+                       struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT];
                } request;
                struct {
                        uint32_t word0;
index 0ee429d773f394e826074e6580bbd73c212e26ba..2697d49da4d7762d13430cfd9737463d909bf93f 100644 (file)
@@ -3555,6 +3555,44 @@ out_free_mem:
        return rc;
 }
 
+static uint64_t
+lpfc_get_wwpn(struct lpfc_hba *phba)
+{
+       uint64_t wwn;
+       int rc;
+       LPFC_MBOXQ_t *mboxq;
+       MAILBOX_t *mb;
+
+
+       mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+                                               GFP_KERNEL);
+       if (!mboxq)
+               return (uint64_t)-1;
+
+       /* First get WWN of HBA instance */
+       lpfc_read_nv(phba, mboxq);
+       rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+       if (rc != MBX_SUCCESS) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                               "6019 Mailbox failed , mbxCmd x%x "
+                               "READ_NV, mbxStatus x%x\n",
+                               bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+                               bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+               mempool_free(mboxq, phba->mbox_mem_pool);
+               return (uint64_t) -1;
+       }
+       mb = &mboxq->u.mb;
+       memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t));
+       /* wwn is WWPN of HBA instance */
+       mempool_free(mboxq, phba->mbox_mem_pool);
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               return be64_to_cpu(wwn);
+       else
+               return (((wwn & 0xffffffff00000000) >> 32) |
+                       ((wwn & 0x00000000ffffffff) << 32));
+
+}
+
 /**
  * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
  * @phba: pointer to lpfc hba data structure.
@@ -3676,17 +3714,32 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
        struct lpfc_vport *vport;
        struct Scsi_Host  *shost = NULL;
        int error = 0;
+       int i;
+       uint64_t wwn;
+       bool use_no_reset_hba = false;
+
+       wwn = lpfc_get_wwpn(phba);
+
+       for (i = 0; i < lpfc_no_hba_reset_cnt; i++) {
+               if (wwn == lpfc_no_hba_reset[i]) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                                       "6020 Setting use_no_reset port=%llx\n",
+                                       wwn);
+                       use_no_reset_hba = true;
+                       break;
+               }
+       }
 
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
                if (dev != &phba->pcidev->dev) {
                        shost = scsi_host_alloc(&lpfc_vport_template,
                                                sizeof(struct lpfc_vport));
                } else {
-                       if (phba->sli_rev == LPFC_SLI_REV4)
+                       if (!use_no_reset_hba)
                                shost = scsi_host_alloc(&lpfc_template,
                                                sizeof(struct lpfc_vport));
                        else
-                               shost = scsi_host_alloc(&lpfc_template_s3,
+                               shost = scsi_host_alloc(&lpfc_template_no_hr,
                                                sizeof(struct lpfc_vport));
                }
        } else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
@@ -3734,17 +3787,14 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
        INIT_LIST_HEAD(&vport->rcv_buffer_list);
        spin_lock_init(&vport->work_port_lock);
 
-       init_timer(&vport->fc_disctmo);
-       vport->fc_disctmo.function = lpfc_disc_timeout;
-       vport->fc_disctmo.data = (unsigned long)vport;
+       setup_timer(&vport->fc_disctmo, lpfc_disc_timeout,
+                       (unsigned long)vport);
 
-       init_timer(&vport->els_tmofunc);
-       vport->els_tmofunc.function = lpfc_els_timeout;
-       vport->els_tmofunc.data = (unsigned long)vport;
+       setup_timer(&vport->els_tmofunc, lpfc_els_timeout,
+                       (unsigned long)vport);
 
-       init_timer(&vport->delayed_disc_tmo);
-       vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo;
-       vport->delayed_disc_tmo.data = (unsigned long)vport;
+       setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo,
+                       (unsigned long)vport);
 
        error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
        if (error)
@@ -5406,21 +5456,15 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba)
        INIT_LIST_HEAD(&phba->luns);
 
        /* MBOX heartbeat timer */
-       init_timer(&psli->mbox_tmo);
-       psli->mbox_tmo.function = lpfc_mbox_timeout;
-       psli->mbox_tmo.data = (unsigned long) phba;
+       setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba);
        /* Fabric block timer */
-       init_timer(&phba->fabric_block_timer);
-       phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
-       phba->fabric_block_timer.data = (unsigned long) phba;
+       setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout,
+                       (unsigned long)phba);
        /* EA polling mode timer */
-       init_timer(&phba->eratt_poll);
-       phba->eratt_poll.function = lpfc_poll_eratt;
-       phba->eratt_poll.data = (unsigned long) phba;
+       setup_timer(&phba->eratt_poll, lpfc_poll_eratt,
+                       (unsigned long)phba);
        /* Heartbeat timer */
-       init_timer(&phba->hb_tmofunc);
-       phba->hb_tmofunc.function = lpfc_hb_timeout;
-       phba->hb_tmofunc.data = (unsigned long)phba;
+       setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba);
 
        return 0;
 }
@@ -5446,9 +5490,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
         */
 
        /* FCP polling mode timer */
-       init_timer(&phba->fcp_poll_timer);
-       phba->fcp_poll_timer.function = lpfc_poll_timeout;
-       phba->fcp_poll_timer.data = (unsigned long) phba;
+       setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout,
+                       (unsigned long)phba);
 
        /* Host attention work mask setup */
        phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
@@ -5482,7 +5525,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba)
 
        /* Initialize the host templates the configured values. */
        lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
-       lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt;
+       lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
+       lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 
        /* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
        if (phba->cfg_enable_bg) {
@@ -5617,14 +5661,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
         * Initialize timers used by driver
         */
 
-       init_timer(&phba->rrq_tmr);
-       phba->rrq_tmr.function = lpfc_rrq_timeout;
-       phba->rrq_tmr.data = (unsigned long)phba;
+       setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba);
 
        /* FCF rediscover timer */
-       init_timer(&phba->fcf.redisc_wait);
-       phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
-       phba->fcf.redisc_wait.data = (unsigned long)phba;
+       setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo,
+                       (unsigned long)phba);
 
        /*
         * Control structure for handling external multi-buffer mailbox
@@ -5706,6 +5747,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
        /* Initialize the host templates with the updated values. */
        lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
        lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+       lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
 
        if (phba->cfg_sg_dma_buf_size  <= LPFC_MIN_SG_SLI4_BUF_SZ)
                phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
@@ -5736,6 +5778,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                /* Initialize the Abort nvme buffer list used by driver */
                spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
                INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+               /* Fast-path XRI aborted CQ Event work queue list */
+               INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
        }
 
        /* This abort list used by worker thread */
@@ -8712,12 +8756,9 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
                }
        }
 
-       /*
-        * Configure EQ delay multipier for interrupt coalescing using
-        * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
-        */
-       for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+       for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
                lpfc_modify_hba_eq_delay(phba, qidx);
+
        return 0;
 
 out_destroy:
@@ -8973,6 +9014,11 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba)
        /* Pending ELS XRI abort events */
        list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
                         &cqelist);
+       if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+               /* Pending NVME XRI abort events */
+               list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+                                &cqelist);
+       }
        /* Pending asynnc events */
        list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
                         &cqelist);
@@ -10400,12 +10446,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
        fc_remove_host(shost);
        scsi_remove_host(shost);
 
-       /* Perform ndlp cleanup on the physical port.  The nvme and nvmet
-        * localports are destroyed after to cleanup all transport memory.
-        */
        lpfc_cleanup(vport);
-       lpfc_nvmet_destroy_targetport(phba);
-       lpfc_nvme_destroy_localport(vport);
 
        /*
         * Bring down the SLI Layer. This step disable all interrupts,
@@ -12018,6 +12059,7 @@ static struct pci_driver lpfc_driver = {
        .id_table       = lpfc_id_table,
        .probe          = lpfc_pci_probe_one,
        .remove         = lpfc_pci_remove_one,
+       .shutdown       = lpfc_pci_remove_one,
        .suspend        = lpfc_pci_suspend_one,
        .resume         = lpfc_pci_resume_one,
        .err_handler    = &lpfc_err_handler,
index c61d8d692edeeeca94512d2eeb0c2fcbdff0b382..5986c7957199df6ef97343a3c0402931cbdeb7ad 100644 (file)
@@ -646,7 +646,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
        }
 
        dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
-       dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
        if (!dma_buf->iocbq) {
                kfree(dma_buf->context);
                pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
@@ -658,6 +657,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba)
                                "2621 Ran out of nvmet iocb/WQEs\n");
                return NULL;
        }
+       dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
        nvmewqe = dma_buf->iocbq;
        wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
        /* Initialize WQE */
index 609a908ea9db5ba291e678c296726d69cae784f9..0a4c1908140940cc116450cc5d5cb5a7a99de634 100644 (file)
@@ -316,7 +316,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
        bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
        bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
        bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
-       bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+       bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ);
        bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
 
        /* Word 6 */
@@ -620,15 +620,15 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
         * Embed the payload in the last half of the WQE
         * WQE words 16-30 get the NVME CMD IU payload
         *
-        * WQE Word 16 is already setup with flags
-        * WQE words 17-19 get payload Words 2-4
+        * WQE words 16-19 get payload Words 1-4
         * WQE words 20-21 get payload Words 6-7
         * WQE words 22-29 get payload Words 16-23
         */
-       wptr = &wqe->words[17];  /* WQE ptr */
+       wptr = &wqe->words[16];  /* WQE ptr */
        dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
-       dptr += 2;              /* Skip Words 0-1 in payload */
+       dptr++;                 /* Skip Word 0 in payload */
 
+       *wptr++ = *dptr++;      /* Word 1 */
        *wptr++ = *dptr++;      /* Word 2 */
        *wptr++ = *dptr++;      /* Word 3 */
        *wptr++ = *dptr++;      /* Word 4 */
@@ -978,9 +978,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                        bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
                               NVME_WRITE_CMD);
 
-                       /* Word 16 */
-                       wqe->words[16] = LPFC_NVME_EMBED_WRITE;
-
                        phba->fc4NvmeOutputRequests++;
                } else {
                        /* Word 7 */
@@ -1002,9 +999,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                        bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
                               NVME_READ_CMD);
 
-                       /* Word 16 */
-                       wqe->words[16] = LPFC_NVME_EMBED_READ;
-
                        phba->fc4NvmeInputRequests++;
                }
        } else {
@@ -1026,9 +1020,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
                /* Word 11 */
                bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
 
-               /* Word 16 */
-               wqe->words[16] = LPFC_NVME_EMBED_CMD;
-
                phba->fc4NvmeControlRequests++;
        }
        /*
@@ -1286,6 +1277,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
        pnvme_fcreq->private = (void *)lpfc_ncmd;
        lpfc_ncmd->nvmeCmd = pnvme_fcreq;
        lpfc_ncmd->nrport = rport;
+       lpfc_ncmd->ndlp = ndlp;
        lpfc_ncmd->start_time = jiffies;
 
        lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
@@ -1319,7 +1311,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
                                 "sid: x%x did: x%x oxid: x%x\n",
                                 ret, vport->fc_myDID, ndlp->nlp_DID,
                                 lpfc_ncmd->cur_iocbq.sli4_xritag);
-               ret = -EINVAL;
+               ret = -EBUSY;
                goto out_free_nvme_buf;
        }
 
@@ -1821,10 +1813,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
                                                pdma_phys_sgl1, cur_xritag);
                                if (status) {
                                        /* failure, put on abort nvme list */
-                                       lpfc_ncmd->exch_busy = 1;
+                                       lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
                                } else {
                                        /* success, put on NVME buffer list */
-                                       lpfc_ncmd->exch_busy = 0;
+                                       lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
                                        lpfc_ncmd->status = IOSTAT_SUCCESS;
                                        num_posted++;
                                }
@@ -1854,10 +1846,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba,
                                         struct lpfc_nvme_buf, list);
                        if (status) {
                                /* failure, put on abort nvme list */
-                               lpfc_ncmd->exch_busy = 1;
+                               lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
                        } else {
                                /* success, put on NVME buffer list */
-                               lpfc_ncmd->exch_busy = 0;
+                               lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
                                lpfc_ncmd->status = IOSTAT_SUCCESS;
                                num_posted++;
                        }
@@ -2099,7 +2091,7 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
        unsigned long iflag = 0;
 
        lpfc_ncmd->nonsg_phys = 0;
-       if (lpfc_ncmd->exch_busy) {
+       if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
                spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
                                        iflag);
                lpfc_ncmd->nvmeCmd = NULL;
@@ -2135,11 +2127,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd)
 int
 lpfc_nvme_create_localport(struct lpfc_vport *vport)
 {
+       int ret = 0;
        struct lpfc_hba  *phba = vport->phba;
        struct nvme_fc_port_info nfcp_info;
        struct nvme_fc_local_port *localport;
        struct lpfc_nvme_lport *lport;
-       int len, ret = 0;
+       int len;
 
        /* Initialize this localport instance.  The vport wwn usage ensures
         * that NPIV is accounted for.
@@ -2156,8 +2149,12 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
        /* localport is allocated from the stack, but the registration
         * call allocates heap memory as well as the private area.
         */
+#ifdef CONFIG_LPFC_NVME_INITIATOR
        ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
                                         &vport->phba->pcidev->dev, &localport);
+#else
+       ret = -ENOMEM;
+#endif
        if (!ret) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
                                 "6005 Successfully registered local "
@@ -2173,10 +2170,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
                lport->vport = vport;
                INIT_LIST_HEAD(&lport->rport_list);
                vport->nvmei_support = 1;
+               len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+               vport->phba->total_nvme_bufs += len;
        }
 
-       len  = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
-       vport->phba->total_nvme_bufs += len;
        return ret;
 }
 
@@ -2193,6 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
 void
 lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
        struct nvme_fc_local_port *localport;
        struct lpfc_nvme_lport *lport;
        struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
@@ -2208,7 +2206,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
                         "6011 Destroying NVME localport %p\n",
                         localport);
-
        list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
                /* The last node ref has to get released now before the rport
                 * private memory area is released by the transport.
@@ -2222,6 +2219,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
                                         "6008 rport fail destroy %x\n", ret);
                wait_for_completion_timeout(&rport->rport_unreg_done, 5);
        }
+
        /* lport's rport list is clear.  Unregister
         * lport and release resources.
         */
@@ -2245,6 +2243,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
                                 "Failed, status x%x\n",
                                 ret);
        }
+#endif
 }
 
 void
@@ -2275,6 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport)
 int
 lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
        int ret = 0;
        struct nvme_fc_local_port *localport;
        struct lpfc_nvme_lport *lport;
@@ -2348,7 +2348,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                        rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
                rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
                rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
-
                ret = nvme_fc_register_remoteport(localport, &rpinfo,
                                                  &remote_port);
                if (!ret) {
@@ -2384,6 +2383,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                                 ndlp->nlp_type, ndlp->nlp_DID, ndlp);
        }
        return ret;
+#else
+       return 0;
+#endif
 }
 
 /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
@@ -2401,6 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 void
 lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
+#ifdef CONFIG_LPFC_NVME_INITIATOR
        int ret;
        struct nvme_fc_local_port *localport;
        struct lpfc_nvme_lport *lport;
@@ -2458,7 +2461,61 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        return;
 
  input_err:
+#endif
        lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
                         "6168: State error: lport %p, rport%p FCID x%06x\n",
                         vport->localport, ndlp->rport, ndlp->nlp_DID);
 }
+
+/**
+ * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+                          struct sli4_wcqe_xri_aborted *axri)
+{
+       uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+       uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+       struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
+       struct lpfc_nodelist *ndlp;
+       unsigned long iflag = 0;
+       int rrq_empty = 0;
+
+       if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+               return;
+       spin_lock_irqsave(&phba->hbalock, iflag);
+       spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
+                                &phba->sli4_hba.lpfc_abts_nvme_buf_list,
+                                list) {
+               if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
+                       list_del(&lpfc_ncmd->list);
+                       lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+                       lpfc_ncmd->status = IOSTAT_SUCCESS;
+                       spin_unlock(
+                               &phba->sli4_hba.abts_nvme_buf_list_lock);
+
+                       rrq_empty = list_empty(&phba->active_rrq_list);
+                       spin_unlock_irqrestore(&phba->hbalock, iflag);
+                       ndlp = lpfc_ncmd->ndlp;
+                       if (ndlp) {
+                               lpfc_set_rrq_active(
+                                       phba, ndlp,
+                                       lpfc_ncmd->cur_iocbq.sli4_lxritag,
+                                       rxid, 1);
+                               lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+                       }
+                       lpfc_release_nvme_buf(phba, lpfc_ncmd);
+                       if (rrq_empty)
+                               lpfc_worker_wake_up(phba);
+                       return;
+               }
+       }
+       spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+       spin_unlock_irqrestore(&phba->hbalock, iflag);
+}
index b2fae5e813f8a82b17b53a5cd5affd8f7e5d6bad..1347deb8dd6cbd3e56db9f615cbc55e8290fc443 100644 (file)
@@ -57,6 +57,7 @@ struct lpfc_nvme_buf {
        struct list_head list;
        struct nvmefc_fcp_req *nvmeCmd;
        struct lpfc_nvme_rport *nrport;
+       struct lpfc_nodelist *ndlp;
 
        uint32_t timeout;
 
index c421e1738ee989efcca922da9b145995cc164c72..b7739a554fe00505401bae97183ec3ce69559768 100644 (file)
@@ -571,6 +571,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
                lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
                                "6102 Bad state IO x%x aborted\n",
                                ctxp->oxid);
+               rc = -ENXIO;
                goto aerr;
        }
 
@@ -580,6 +581,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
                lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
                                "6152 FCP Drop IO x%x: Prep\n",
                                ctxp->oxid);
+               rc = -ENXIO;
                goto aerr;
        }
 
@@ -618,8 +620,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
        ctxp->wqeq->hba_wqidx = 0;
        nvmewqeq->context2 = NULL;
        nvmewqeq->context3 = NULL;
+       rc = -EBUSY;
 aerr:
-       return -ENXIO;
+       return rc;
 }
 
 static void
@@ -668,9 +671,13 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba)
        lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
                                           NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
 
+#ifdef CONFIG_LPFC_NVME_TARGET
        error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
                                             &phba->pcidev->dev,
                                             &phba->targetport);
+#else
+       error = -ENOMEM;
+#endif
        if (error) {
                lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
                                "6025 Cannot register NVME targetport "
@@ -731,9 +738,25 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba)
        return 0;
 }
 
+/**
+ * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the nvmet xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * NVMET aborted xri.
+ **/
+void
+lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+                           struct sli4_wcqe_xri_aborted *axri)
+{
+       /* TODO: work in progress */
+}
+
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
        struct lpfc_nvmet_tgtport *tgtp;
 
        if (phba->nvmet_support == 0)
@@ -745,6 +768,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
                wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
        }
        phba->targetport = NULL;
+#endif
 }
 
 /**
@@ -764,6 +788,7 @@ static void
 lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                           struct hbq_dmabuf *nvmebuf)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
        struct lpfc_nvmet_tgtport *tgtp;
        struct fc_frame_header *fc_hdr;
        struct lpfc_nvmet_rcv_ctx *ctxp;
@@ -844,6 +869,7 @@ dropit:
 
        atomic_inc(&tgtp->xmt_ls_abort);
        lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+#endif
 }
 
 /**
@@ -865,6 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
                            struct rqb_dmabuf *nvmebuf,
                            uint64_t isr_timestamp)
 {
+#ifdef CONFIG_LPFC_NVME_TARGET
        struct lpfc_nvmet_rcv_ctx *ctxp;
        struct lpfc_nvmet_tgtport *tgtp;
        struct fc_frame_header *fc_hdr;
@@ -955,7 +982,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 
        atomic_inc(&tgtp->rcv_fcp_cmd_drop);
        lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-                       "6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+                       "6159 FCP Drop IO x%x: err x%x\n",
                        ctxp->oxid, rc);
 dropit:
        lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
@@ -970,6 +997,7 @@ dropit:
                /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
                lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
        }
+#endif
 }
 
 /**
@@ -1114,7 +1142,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
        bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
        bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
        bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
-       bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+       bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP);
        bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
 
        /* Word 6 */
@@ -1445,7 +1473,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
 
        case NVMET_FCOP_RSP:
                /* Words 0 - 2 */
-               sgel = &rsp->sg[0];
                physaddr = rsp->rspdma;
                wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
                wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;
@@ -1681,8 +1708,8 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
        struct lpfc_nodelist *ndlp;
 
        lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-                       "6067 %s: Entrypoint: sid %x xri %x\n", __func__,
-                       sid, xri);
+                       "6067 Abort: sid %x xri x%x/x%x\n",
+                       sid, xri, ctxp->wqeq->sli4_xritag);
 
        tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
 
@@ -1693,7 +1720,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
                atomic_inc(&tgtp->xmt_abort_rsp_error);
                lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
                                "6134 Drop ABTS - wrong NDLP state x%x.\n",
-                               ndlp->nlp_state);
+                               (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
                /* No failure to an ABTS request. */
                return 0;
@@ -1791,7 +1818,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
                atomic_inc(&tgtp->xmt_abort_rsp_error);
                lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
                                "6160 Drop ABTS - wrong NDLP state x%x.\n",
-                               ndlp->nlp_state);
+                               (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
 
                /* No failure to an ABTS request. */
                return 0;
index 9d6384af9fce7e9b321e8031a421869095fbab55..54fd0c81ceaf69a7ceb475acb87e309df7e10de1 100644 (file)
@@ -5953,12 +5953,13 @@ struct scsi_host_template lpfc_template_nvme = {
        .track_queue_depth      = 0,
 };
 
-struct scsi_host_template lpfc_template_s3 = {
+struct scsi_host_template lpfc_template_no_hr = {
        .module                 = THIS_MODULE,
        .name                   = LPFC_DRIVER_NAME,
        .proc_name              = LPFC_DRIVER_NAME,
        .info                   = lpfc_info,
        .queuecommand           = lpfc_queuecommand,
+       .eh_timed_out           = fc_eh_timed_out,
        .eh_abort_handler       = lpfc_abort_handler,
        .eh_device_reset_handler = lpfc_device_reset_handler,
        .eh_target_reset_handler = lpfc_target_reset_handler,
@@ -6015,7 +6016,6 @@ struct scsi_host_template lpfc_vport_template = {
        .eh_abort_handler       = lpfc_abort_handler,
        .eh_device_reset_handler = lpfc_device_reset_handler,
        .eh_target_reset_handler = lpfc_target_reset_handler,
-       .eh_bus_reset_handler   = lpfc_bus_reset_handler,
        .slave_alloc            = lpfc_slave_alloc,
        .slave_configure        = lpfc_slave_configure,
        .slave_destroy          = lpfc_slave_destroy,
index e43e5e23c24b475f3f8930bcae9cb67e4685153e..1c9fa45df7eb5deec4c8988b18ffe36f4621b267 100644 (file)
@@ -1,3 +1,4 @@
+
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
@@ -952,7 +953,7 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
        start_sglq = sglq;
        while (!found) {
                if (!sglq)
-                       return NULL;
+                       break;
                if (ndlp && ndlp->active_rrqs_xri_bitmap &&
                    test_bit(sglq->sli4_lxritag,
                    ndlp->active_rrqs_xri_bitmap)) {
@@ -12212,6 +12213,41 @@ void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
        }
 }
 
+/**
+ * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 NVME abort XRI events.
+ **/
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+       struct lpfc_cq_event *cq_event;
+
+       /* First, declare the fcp xri abort event has been handled */
+       spin_lock_irq(&phba->hbalock);
+       phba->hba_flag &= ~NVME_XRI_ABORT_EVENT;
+       spin_unlock_irq(&phba->hbalock);
+       /* Now, handle all the fcp xri abort events */
+       while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) {
+               /* Get the first event from the head of the event queue */
+               spin_lock_irq(&phba->hbalock);
+               list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+                                cq_event, struct lpfc_cq_event, list);
+               spin_unlock_irq(&phba->hbalock);
+               /* Notify aborted XRI for NVME work queue */
+               if (phba->nvmet_support) {
+                       lpfc_sli4_nvmet_xri_aborted(phba,
+                                                   &cq_event->cqe.wcqe_axri);
+               } else {
+                       lpfc_sli4_nvme_xri_aborted(phba,
+                                                  &cq_event->cqe.wcqe_axri);
+               }
+               /* Free the event processed back to the free pool */
+               lpfc_sli4_cq_event_release(phba, cq_event);
+       }
+}
+
 /**
  * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
  * @phba: pointer to lpfc hba data structure.
@@ -12709,10 +12745,22 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
                spin_unlock_irqrestore(&phba->hbalock, iflags);
                workposted = true;
                break;
+       case LPFC_NVME:
+               spin_lock_irqsave(&phba->hbalock, iflags);
+               list_add_tail(&cq_event->list,
+                             &phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
+               /* Set the nvme xri abort event flag */
+               phba->hba_flag |= NVME_XRI_ABORT_EVENT;
+               spin_unlock_irqrestore(&phba->hbalock, iflags);
+               workposted = true;
+               break;
        default:
                lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
-                               "0603 Invalid work queue CQE subtype (x%x)\n",
-                               cq->subtype);
+                               "0603 Invalid CQ subtype %d: "
+                               "%08x %08x %08x %08x\n",
+                               cq->subtype, wcqe->word0, wcqe->parameter,
+                               wcqe->word2, wcqe->word3);
+               lpfc_sli4_cq_event_release(phba, cq_event);
                workposted = false;
                break;
        }
@@ -13827,6 +13875,8 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
  * @startq: The starting FCP EQ to modify
  *
  * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
+ * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
+ * updated in one mailbox command.
  *
  * The @phba struct is used to send mailbox command to HBA. The @startq
  * is used to get the starting FCP EQ to change.
@@ -13879,7 +13929,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq)
                eq_delay->u.request.eq[cnt].phase = 0;
                eq_delay->u.request.eq[cnt].delay_multi = dmult;
                cnt++;
-               if (cnt >= LPFC_MAX_EQ_DELAY)
+               if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT)
                        break;
        }
        eq_delay->u.request.num_eq = cnt;
@@ -15185,17 +15235,17 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
                drq = drqp[idx];
                cq  = cqp[idx];
 
-               if (hrq->entry_count != drq->entry_count) {
-                       status = -EINVAL;
-                       goto out;
-               }
-
                /* sanity check on queue memory */
                if (!hrq || !drq || !cq) {
                        status = -ENODEV;
                        goto out;
                }
 
+               if (hrq->entry_count != drq->entry_count) {
+                       status = -EINVAL;
+                       goto out;
+               }
+
                if (idx == 0) {
                        bf_set(lpfc_mbx_rq_create_num_pages,
                               &rq_create->u.request,
index 91153c9f6d18259b3978b7f3218ad76198170c34..710458cf11d62f77c7a48973aded3f255b233277 100644 (file)
@@ -642,6 +642,7 @@ struct lpfc_sli4_hba {
        struct list_head sp_asynce_work_queue;
        struct list_head sp_fcp_xri_aborted_work_queue;
        struct list_head sp_els_xri_aborted_work_queue;
+       struct list_head sp_nvme_xri_aborted_work_queue;
        struct list_head sp_unsol_work_queue;
        struct lpfc_sli4_link link_state;
        struct lpfc_sli4_lnk_info lnk_info;
@@ -794,9 +795,14 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
 int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
                        void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
 void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba);
 void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
 void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
                               struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+                               struct sli4_wcqe_xri_aborted *axri);
+void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+                                struct sli4_wcqe_xri_aborted *axri);
 void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
                               struct sli4_wcqe_xri_aborted *);
 void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *);
index 86c6c9b26b823a04dd5afbdff4688901d577c82d..d4e95e28f4e3d55a6ec97162cfdb1e26998e4fe1 100644 (file)
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "11.2.0.7"
+#define LPFC_DRIVER_VERSION "11.2.0.10"
 #define LPFC_DRIVER_NAME               "lpfc"
 
 /* Used for SLI 2/3 */
index 7fe7e6ed595b79e8831bfbeb55767e5d65ff7e5c..8981806fb13fa7792e2b8d45f4fc6880b362da25 100644 (file)
@@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc,
        u64 sas_address, u16 handle, u8 phy_number, u8 link_rate);
 extern struct sas_function_template mpt3sas_transport_functions;
 extern struct scsi_transport_template *mpt3sas_transport_template;
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-                               enum scsi_device_state new_state);
 /* trigger data externs */
 void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc,
        struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data);
index 46e866c36c8a884a98588a8d7a81272aed0ce400..919ba2bb15f110f4619a72646d170f87ad985237 100644 (file)
@@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev,
            sas_device_priv_data->sas_target->handle);
        sas_device_priv_data->block = 1;
 
-       r = scsi_internal_device_block(sdev);
+       r = scsi_internal_device_block(sdev, false);
        if (r == -EINVAL)
                sdev_printk(KERN_WARNING, sdev,
                    "device_block failed with return(%d) for handle(0x%04x)\n",
@@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
                    "performing a block followed by an unblock\n",
                    r, sas_device_priv_data->sas_target->handle);
                sas_device_priv_data->block = 1;
-               r = scsi_internal_device_block(sdev);
+               r = scsi_internal_device_block(sdev, false);
                if (r)
                        sdev_printk(KERN_WARNING, sdev, "retried device_block "
                            "failed with return(%d) for handle(0x%04x)\n",
@@ -4677,7 +4677,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
        struct MPT3SAS_DEVICE *sas_device_priv_data;
        u32 response_code = 0;
        unsigned long flags;
-       unsigned int sector_sz;
 
        mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
 
@@ -4742,20 +4741,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
        }
 
        xfer_cnt = le32_to_cpu(mpi_reply->TransferCount);
-
-       /* In case of bogus fw or device, we could end up having
-        * unaligned partial completion. We can force alignment here,
-        * then scsi-ml does not need to handle this misbehavior.
-        */
-       sector_sz = scmd->device->sector_size;
-       if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz &&
-                    xfer_cnt % sector_sz)) {
-               sdev_printk(KERN_INFO, scmd->device,
-                   "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
-                           xfer_cnt, sector_sz);
-               xfer_cnt = round_down(xfer_cnt, sector_sz);
-       }
-
        scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt);
        if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
                log_info =  le32_to_cpu(mpi_reply->IOCLogInfo);
index 23bd70628a2f05b9c5a5c2dbd6184e4ad6d076b2..7d173f48a81e8d240cd5d957b2997a3c0c3f0223 100644 (file)
@@ -81,14 +81,17 @@ struct qedf_dbg_ctx {
 #define QEDF_INFO(pdev, level, fmt, ...)       \
                qedf_dbg_info(pdev, __func__, __LINE__, level, fmt,     \
                              ## __VA_ARGS__)
-
-extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
                          const char *fmt, ...);
-extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
                           const char *, ...);
-extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+__printf(4, 5)
+void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
                            u32 line, const char *, ...);
-extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(5, 6)
+void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
                          u32 info, const char *fmt, ...);
 
 /* GRC Dump related defines */
index 868d423380d120ca82c135a8e545fbd98e1ba935..ed58b9104f58b8894b3dfd924c6facfaba29be67 100644 (file)
@@ -203,7 +203,7 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb)
                        case FIP_DT_MAC:
                                mp = (struct fip_mac_desc *)desc;
                                QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
-                                   "fd_mac=%pM.\n", __func__, mp->fd_mac);
+                                   "fd_mac=%pM\n", mp->fd_mac);
                                ether_addr_copy(cvl_mac, mp->fd_mac);
                                break;
                        case FIP_DT_NAME:
index ee0dcf9d3aba7847eaa673ab3542a1c0d6d375c3..46debe5034af102710a574a80254433b94265271 100644 (file)
@@ -1342,7 +1342,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
                } else {
                        refcount = kref_read(&io_req->refcount);
                        QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
-                           "%d:0:%d:%d xid=0x%0x op=0x%02x "
+                           "%d:0:%d:%lld xid=0x%0x op=0x%02x "
                            "lba=%02x%02x%02x%02x cdb_status=%d "
                            "fcp_resid=0x%x refcount=%d.\n",
                            qedf->lport->host->host_no, sc_cmd->device->id,
@@ -1426,7 +1426,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
 
        sc_cmd->result = result << 16;
        refcount = kref_read(&io_req->refcount);
-       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+       QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing "
            "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
            "allowed=%d retries=%d refcount=%d.\n",
            qedf->lport->host->host_no, sc_cmd->device->id,
index d9d7a86b5f8baf13038cfc1bd17885945d1fdf4e..8e2a160490e66a747e75bf9c0c1a149e34d474a3 100644 (file)
@@ -2456,8 +2456,8 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf)
        }
 
        QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
-           "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
-           qedf->bdq_pbl_dma);
+                 "BDQ PBL addr=0x%p dma=%pad\n",
+                 qedf->bdq_pbl, &qedf->bdq_pbl_dma);
 
        /*
         * Populate BDQ PBL with physical and virtual address of individual
index 955936274241406e2c8df92a7f3a5fcd530d7f05..59417199bf363ae956bb2832f10fe22307a978d1 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/debugfs.h>
 #include <linux/module.h>
 
-int do_not_recover;
+int qedi_do_not_recover;
 static struct dentry *qedi_dbg_root;
 
 void
@@ -74,22 +74,22 @@ qedi_dbg_exit(void)
 static ssize_t
 qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg)
 {
-       if (!do_not_recover)
-               do_not_recover = 1;
+       if (!qedi_do_not_recover)
+               qedi_do_not_recover = 1;
 
        QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-                 do_not_recover);
+                 qedi_do_not_recover);
        return 0;
 }
 
 static ssize_t
 qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg)
 {
-       if (do_not_recover)
-               do_not_recover = 0;
+       if (qedi_do_not_recover)
+               qedi_do_not_recover = 0;
 
        QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
-                 do_not_recover);
+                 qedi_do_not_recover);
        return 0;
 }
 
@@ -141,7 +141,7 @@ qedi_dbg_do_not_recover_cmd_read(struct file *filp, char __user *buffer,
        if (*ppos)
                return 0;
 
-       cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover);
+       cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover);
        cnt = min_t(int, count, cnt - *ppos);
        *ppos += cnt;
        return cnt;
index c9f0ef4e11b33ce9ca2a707645b1b2088b6a1f63..2bce3efc66a4b4bda8bae40768ca3e961a6d33b0 100644 (file)
@@ -1461,9 +1461,9 @@ static void qedi_tmf_work(struct work_struct *work)
                  get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id,
                  qedi_conn->iscsi_conn_id);
 
-       if (do_not_recover) {
+       if (qedi_do_not_recover) {
                QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
-                        do_not_recover);
+                        qedi_do_not_recover);
                goto abort_ret;
        }
 
index 8e488de88ece9fb8fc49b991935cf5c24a87b5b4..63d793f460645d44c4ae29638145cbcbcbc9be78 100644 (file)
 
 #include "qedi_iscsi.h"
 
+#ifdef CONFIG_DEBUG_FS
+extern int qedi_do_not_recover;
+#else
+#define qedi_do_not_recover (0)
+#endif
+
 extern uint qedi_io_tracing;
-extern int do_not_recover;
+
 extern struct scsi_host_template qedi_host_template;
 extern struct iscsi_transport qedi_iscsi_transport;
 extern const struct qed_iscsi_ops *qedi_ops;
index b9f79d36142d5e85182d39d16ba34a7a1a6f6521..4cc474364c50568806b16520ddd66239b9f3ebfd 100644 (file)
@@ -833,7 +833,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
                return ERR_PTR(ret);
        }
 
-       if (do_not_recover) {
+       if (qedi_do_not_recover) {
                ret = -ENOMEM;
                return ERR_PTR(ret);
        }
@@ -957,7 +957,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
        struct qedi_endpoint *qedi_ep;
        int ret = 0;
 
-       if (do_not_recover)
+       if (qedi_do_not_recover)
                return 1;
 
        qedi_ep = ep->dd_data;
@@ -1025,7 +1025,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
                }
 
                if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
-                       if (do_not_recover) {
+                       if (qedi_do_not_recover) {
                                QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
                                          "Do not recover cid=0x%x\n",
                                          qedi_ep->iscsi_cid);
@@ -1039,7 +1039,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
                }
        }
 
-       if (do_not_recover)
+       if (qedi_do_not_recover)
                goto ep_exit_recover;
 
        switch (qedi_ep->state) {
index 5eda21d903e93dfc96702552d2de9deb7f01c734..8e3d92807cb8033acea1bc3c9e990b05f521f8a8 100644 (file)
@@ -1805,7 +1805,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
         */
        qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params);
 
-       qedi_setup_int(qedi);
+       rc = qedi_setup_int(qedi);
        if (rc)
                goto stop_iscsi_func;
 
index 21d9fb7fc88796cbaa09fbfa160b9b20c17e2015..51b4179469d1851be96872ee39b24739fc34135e 100644 (file)
@@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id,
            "%-+5d  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F\n", size);
        ql_dbg(level, vha, id,
            "----- -----------------------------------------------\n");
-       for (cnt = 0; cnt < size; cnt++, buf++) {
-               if (cnt % 16 == 0)
-                       ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU);
-               printk(" %02x", *buf);
-               if (cnt % 16 == 15)
-                       printk("\n");
+       for (cnt = 0; cnt < size; cnt += 16) {
+               ql_dbg(level, vha, id, "%04x: ", cnt);
+               print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1,
+                              buf + cnt, min(16U, size - cnt), false);
        }
-       if (cnt % 16 != 0)
-               printk("\n");
 }
index ba2286652ff647f285761e046f3699bed5c39eba..19125d72f322c934d84d2d71c748ca9c08418846 100644 (file)
@@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume);
 /**
  * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
  * @sdev:      device to block
+ * @wait:      Whether or not to wait until ongoing .queuecommand() /
+ *             .queue_rq() calls have finished.
  *
  * Block request made by scsi lld's to temporarily stop all
  * scsi commands on the specified device. May sleep.
@@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume);
  * remove the rport mutex lock and unlock calls from srp_queuecommand().
  */
 int
-scsi_internal_device_block(struct scsi_device *sdev)
+scsi_internal_device_block(struct scsi_device *sdev, bool wait)
 {
        struct request_queue *q = sdev->request_queue;
        unsigned long flags;
@@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev)
         * request queue. 
         */
        if (q->mq_ops) {
-               blk_mq_quiesce_queue(q);
+               if (wait)
+                       blk_mq_quiesce_queue(q);
+               else
+                       blk_mq_stop_hw_queues(q);
        } else {
                spin_lock_irqsave(q->queue_lock, flags);
                blk_stop_queue(q);
                spin_unlock_irqrestore(q->queue_lock, flags);
-               scsi_wait_for_queuecommand(sdev);
+               if (wait)
+                       scsi_wait_for_queuecommand(sdev);
        }
 
        return 0;
@@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
 static void
 device_block(struct scsi_device *sdev, void *data)
 {
-       scsi_internal_device_block(sdev);
+       scsi_internal_device_block(sdev, true);
 }
 
 static int
index 99bfc985e1903bcffd10762d52f1a72448aaed96..f11bd102d6d5d6b5a390762448002cd5b0f357d7 100644 (file)
@@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { }
  */
 
 #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT  600     /* units in seconds */
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
-                                       enum scsi_device_state new_state);
 
 #endif /* _SCSI_PRIV_H */
index d277e8620e3e39794584ac29dc55cc3ce476a03a..fcfeddc79331bbf32a71e296cf606513ae5b3d78 100644 (file)
@@ -1783,6 +1783,8 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 {
        int result = SCpnt->result;
        unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
+       unsigned int sector_size = SCpnt->device->sector_size;
+       unsigned int resid;
        struct scsi_sense_hdr sshdr;
        struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
        struct request *req = SCpnt->request;
@@ -1813,6 +1815,21 @@ static int sd_done(struct scsi_cmnd *SCpnt)
                        scsi_set_resid(SCpnt, blk_rq_bytes(req));
                }
                break;
+       default:
+               /*
+                * In case of bogus fw or device, we could end up having
+                * an unaligned partial completion. Check this here and force
+                * alignment.
+                */
+               resid = scsi_get_resid(SCpnt);
+               if (resid & (sector_size - 1)) {
+                       sd_printk(KERN_INFO, sdkp,
+                               "Unaligned partial completion (resid=%u, sector_sz=%u)\n",
+                               resid, sector_size);
+                       resid = min(scsi_bufflen(SCpnt),
+                                   round_up(resid, sector_size));
+                       scsi_set_resid(SCpnt, resid);
+               }
        }
 
        if (result) {
index 638e5f427c901fddee96a53328cd8cc76f1368be..016639d7fef176da5e54ddc5d4d32e110ea0656a 100644 (file)
@@ -400,8 +400,6 @@ MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels")
  */
 static int storvsc_timeout = 180;
 
-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
-
 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
 static struct scsi_transport_template *fc_transport_template;
 #endif
@@ -1383,6 +1381,22 @@ static int storvsc_do_io(struct hv_device *device,
        return ret;
 }
 
+static int storvsc_device_alloc(struct scsi_device *sdevice)
+{
+       /*
+        * Set blist flag to permit the reading of the VPD pages even when
+        * the target may claim SPC-2 compliance. MSFT targets currently
+        * claim SPC-2 compliance while they implement post SPC-2 features.
+        * With this flag we can correctly handle WRITE_SAME_16 issues.
+        *
+        * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
+        * still supports REPORT LUN.
+        */
+       sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
+
+       return 0;
+}
+
 static int storvsc_device_configure(struct scsi_device *sdevice)
 {
 
@@ -1395,14 +1409,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
 
        sdevice->no_write_same = 1;
 
-       /*
-        * Add blist flags to permit the reading of the VPD pages even when
-        * the target may claim SPC-2 compliance. MSFT targets currently
-        * claim SPC-2 compliance while they implement post SPC-2 features.
-        * With this patch we can correctly handle WRITE_SAME_16 issues.
-        */
-       sdevice->sdev_bflags |= msft_blist_flags;
-
        /*
         * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
         * if the device is a MSFT virtual device.  If the host is
@@ -1661,6 +1667,7 @@ static struct scsi_host_template scsi_driver = {
        .eh_host_reset_handler =        storvsc_host_reset_handler,
        .proc_name =            "storvsc_host",
        .eh_timed_out =         storvsc_eh_timed_out,
+       .slave_alloc =          storvsc_device_alloc,
        .slave_configure =      storvsc_device_configure,
        .cmd_per_lun =          255,
        .this_id =              -1,
index 318e4a1f76c92bab27954b5cc29a3d374c8eb6e7..54deeb754db5fccf7d918b604916b30d8defa8e5 100644 (file)
@@ -146,7 +146,7 @@ enum attr_idn {
 /* Descriptor idn for Query requests */
 enum desc_idn {
        QUERY_DESC_IDN_DEVICE           = 0x0,
-       QUERY_DESC_IDN_CONFIGURAION     = 0x1,
+       QUERY_DESC_IDN_CONFIGURATION    = 0x1,
        QUERY_DESC_IDN_UNIT             = 0x2,
        QUERY_DESC_IDN_RFU_0            = 0x3,
        QUERY_DESC_IDN_INTERCONNECT     = 0x4,
@@ -162,19 +162,13 @@ enum desc_header_offset {
        QUERY_DESC_DESC_TYPE_OFFSET     = 0x01,
 };
 
-enum ufs_desc_max_size {
-       QUERY_DESC_DEVICE_MAX_SIZE              = 0x40,
-       QUERY_DESC_CONFIGURAION_MAX_SIZE        = 0x90,
-       QUERY_DESC_UNIT_MAX_SIZE                = 0x23,
-       QUERY_DESC_INTERCONNECT_MAX_SIZE        = 0x06,
-       /*
-        * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes
-        * of descriptor header.
-        */
-       QUERY_DESC_STRING_MAX_SIZE              = 0xFE,
-       QUERY_DESC_GEOMETRY_MAX_SIZE            = 0x44,
-       QUERY_DESC_POWER_MAX_SIZE               = 0x62,
-       QUERY_DESC_RFU_MAX_SIZE                 = 0x00,
+enum ufs_desc_def_size {
+       QUERY_DESC_DEVICE_DEF_SIZE              = 0x40,
+       QUERY_DESC_CONFIGURATION_DEF_SIZE       = 0x90,
+       QUERY_DESC_UNIT_DEF_SIZE                = 0x23,
+       QUERY_DESC_INTERCONNECT_DEF_SIZE        = 0x06,
+       QUERY_DESC_GEOMETRY_DEF_SIZE            = 0x44,
+       QUERY_DESC_POWER_DEF_SIZE               = 0x62,
 };
 
 /* Unit descriptor parameters offsets in bytes*/
index dc6efbd1be8ef344bb994589054c24843e442230..1359913bf840ce0522e09fe72b5c93bd210e4db7 100644 (file)
 #define ufshcd_hex_dump(prefix_str, buf, len) \
 print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false)
 
-static u32 ufs_query_desc_max_size[] = {
-       QUERY_DESC_DEVICE_MAX_SIZE,
-       QUERY_DESC_CONFIGURAION_MAX_SIZE,
-       QUERY_DESC_UNIT_MAX_SIZE,
-       QUERY_DESC_RFU_MAX_SIZE,
-       QUERY_DESC_INTERCONNECT_MAX_SIZE,
-       QUERY_DESC_STRING_MAX_SIZE,
-       QUERY_DESC_RFU_MAX_SIZE,
-       QUERY_DESC_GEOMETRY_MAX_SIZE,
-       QUERY_DESC_POWER_MAX_SIZE,
-       QUERY_DESC_RFU_MAX_SIZE,
-};
-
 enum {
        UFSHCD_MAX_CHANNEL      = 0,
        UFSHCD_MAX_ID           = 1,
@@ -2857,7 +2844,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
                goto out;
        }
 
-       if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
+       if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
                dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n",
                                __func__, *buf_len);
                err = -EINVAL;
@@ -2937,6 +2924,92 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
        return err;
 }
 
+/**
+ * ufshcd_read_desc_length - read the specified descriptor length from header
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_index: descriptor index
+ * @desc_length: pointer to variable to read the length of descriptor
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+static int ufshcd_read_desc_length(struct ufs_hba *hba,
+       enum desc_idn desc_id,
+       int desc_index,
+       int *desc_length)
+{
+       int ret;
+       u8 header[QUERY_DESC_HDR_SIZE];
+       int header_len = QUERY_DESC_HDR_SIZE;
+
+       if (desc_id >= QUERY_DESC_IDN_MAX)
+               return -EINVAL;
+
+       ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+                                       desc_id, desc_index, 0, header,
+                                       &header_len);
+
+       if (ret) {
+               dev_err(hba->dev, "%s: Failed to get descriptor header id %d",
+                       __func__, desc_id);
+               return ret;
+       } else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) {
+               dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch",
+                       __func__, header[QUERY_DESC_DESC_TYPE_OFFSET],
+                       desc_id);
+               ret = -EINVAL;
+       }
+
+       *desc_length = header[QUERY_DESC_LENGTH_OFFSET];
+       return ret;
+
+}
+
+/**
+ * ufshcd_map_desc_id_to_length - map descriptor IDN to its length
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_len: mapped desc length (out)
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba,
+       enum desc_idn desc_id, int *desc_len)
+{
+       switch (desc_id) {
+       case QUERY_DESC_IDN_DEVICE:
+               *desc_len = hba->desc_size.dev_desc;
+               break;
+       case QUERY_DESC_IDN_POWER:
+               *desc_len = hba->desc_size.pwr_desc;
+               break;
+       case QUERY_DESC_IDN_GEOMETRY:
+               *desc_len = hba->desc_size.geom_desc;
+               break;
+       case QUERY_DESC_IDN_CONFIGURATION:
+               *desc_len = hba->desc_size.conf_desc;
+               break;
+       case QUERY_DESC_IDN_UNIT:
+               *desc_len = hba->desc_size.unit_desc;
+               break;
+       case QUERY_DESC_IDN_INTERCONNECT:
+               *desc_len = hba->desc_size.interc_desc;
+               break;
+       case QUERY_DESC_IDN_STRING:
+               *desc_len = QUERY_DESC_MAX_SIZE;
+               break;
+       case QUERY_DESC_IDN_RFU_0:
+       case QUERY_DESC_IDN_RFU_1:
+               *desc_len = 0;
+               break;
+       default:
+               *desc_len = 0;
+               return -EINVAL;
+       }
+       return 0;
+}
+EXPORT_SYMBOL(ufshcd_map_desc_id_to_length);
+
 /**
  * ufshcd_read_desc_param - read the specified descriptor parameter
  * @hba: Pointer to adapter instance
@@ -2951,42 +3024,49 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
 static int ufshcd_read_desc_param(struct ufs_hba *hba,
                                  enum desc_idn desc_id,
                                  int desc_index,
-                                 u32 param_offset,
+                                 u8 param_offset,
                                  u8 *param_read_buf,
-                                 u32 param_size)
+                                 u8 param_size)
 {
        int ret;
        u8 *desc_buf;
-       u32 buff_len;
+       int buff_len;
        bool is_kmalloc = true;
 
-       /* safety checks */
-       if (desc_id >= QUERY_DESC_IDN_MAX)
+       /* Safety check */
+       if (desc_id >= QUERY_DESC_IDN_MAX || !param_size)
                return -EINVAL;
 
-       buff_len = ufs_query_desc_max_size[desc_id];
-       if ((param_offset + param_size) > buff_len)
-               return -EINVAL;
+       /* Get the max length of descriptor from structure filled up at probe
+        * time.
+        */
+       ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
 
-       if (!param_offset && (param_size == buff_len)) {
-               /* memory space already available to hold full descriptor */
-               desc_buf = param_read_buf;
-               is_kmalloc = false;
-       } else {
-               /* allocate memory to hold full descriptor */
+       /* Sanity checks */
+       if (ret || !buff_len) {
+               dev_err(hba->dev, "%s: Failed to get full descriptor length",
+                       __func__);
+               return ret;
+       }
+
+       /* Check whether we need temp memory */
+       if (param_offset != 0 || param_size < buff_len) {
                desc_buf = kmalloc(buff_len, GFP_KERNEL);
                if (!desc_buf)
                        return -ENOMEM;
+       } else {
+               desc_buf = param_read_buf;
+               is_kmalloc = false;
        }
 
+       /* Request for full descriptor */
        ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
-                                       desc_id, desc_index, 0, desc_buf,
-                                       &buff_len);
+                                       desc_id, desc_index, 0,
+                                       desc_buf, &buff_len);
 
        if (ret) {
                dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
                        __func__, desc_id, desc_index, param_offset, ret);
-
                goto out;
        }
 
@@ -2998,25 +3078,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
                goto out;
        }
 
-       /*
-        * While reading variable size descriptors (like string descriptor),
-        * some UFS devices may report the "LENGTH" (field in "Transaction
-        * Specific fields" of Query Response UPIU) same as what was requested
-        * in Query Request UPIU instead of reporting the actual size of the
-        * variable size descriptor.
-        * Although it's safe to ignore the "LENGTH" field for variable size
-        * descriptors as we can always derive the length of the descriptor from
-        * the descriptor header fields. Hence this change impose the length
-        * match check only for fixed size descriptors (for which we always
-        * request the correct size as part of Query Request UPIU).
-        */
-       if ((desc_id != QUERY_DESC_IDN_STRING) &&
-           (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
-               dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
-                       __func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
-               ret = -EINVAL;
-               goto out;
-       }
+       /* Check wherher we will not copy more data, than available */
+       if (is_kmalloc && param_size > buff_len)
+               param_size = buff_len;
 
        if (is_kmalloc)
                memcpy(param_read_buf, &desc_buf[param_offset], param_size);
@@ -5919,8 +5983,8 @@ static int ufshcd_set_icc_levels_attr(struct ufs_hba *hba, u32 icc_level)
 static void ufshcd_init_icc_levels(struct ufs_hba *hba)
 {
        int ret;
-       int buff_len = QUERY_DESC_POWER_MAX_SIZE;
-       u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE];
+       int buff_len = hba->desc_size.pwr_desc;
+       u8 desc_buf[hba->desc_size.pwr_desc];
 
        ret = ufshcd_read_power_desc(hba, desc_buf, buff_len);
        if (ret) {
@@ -6017,11 +6081,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
 {
        int err;
        u8 model_index;
-       u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
-       u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+       u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0};
+       u8 desc_buf[hba->desc_size.dev_desc];
 
-       err = ufshcd_read_device_desc(hba, desc_buf,
-                                       QUERY_DESC_DEVICE_MAX_SIZE);
+       err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc);
        if (err) {
                dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
                        __func__, err);
@@ -6038,14 +6101,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba,
        model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
 
        err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
-                                       QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+                               QUERY_DESC_MAX_SIZE, ASCII_STD);
        if (err) {
                dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
                        __func__, err);
                goto out;
        }
 
-       str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+       str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
        strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
                min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
                      MAX_MODEL_LEN));
@@ -6251,6 +6314,51 @@ static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba)
        hba->req_abort_count = 0;
 }
 
+static void ufshcd_init_desc_sizes(struct ufs_hba *hba)
+{
+       int err;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0,
+               &hba->desc_size.dev_desc);
+       if (err)
+               hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0,
+               &hba->desc_size.pwr_desc);
+       if (err)
+               hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0,
+               &hba->desc_size.interc_desc);
+       if (err)
+               hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0,
+               &hba->desc_size.conf_desc);
+       if (err)
+               hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0,
+               &hba->desc_size.unit_desc);
+       if (err)
+               hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+
+       err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0,
+               &hba->desc_size.geom_desc);
+       if (err)
+               hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
+static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
+{
+       hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+       hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+       hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+       hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+       hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+       hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
 /**
  * ufshcd_probe_hba - probe hba to detect device and initialize
  * @hba: per-adapter instance
@@ -6285,6 +6393,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
        if (ret)
                goto out;
 
+       /* Init check for device descriptor sizes */
+       ufshcd_init_desc_sizes(hba);
+
        ret = ufs_get_device_desc(hba, &card);
        if (ret) {
                dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
@@ -6320,6 +6431,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 
        /* set the state as operational after switching to desired gear */
        hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
        /*
         * If we are in error handling context or in power management callbacks
         * context, no need to scan the host
@@ -7774,6 +7886,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
        hba->mmio_base = mmio_base;
        hba->irq = irq;
 
+       /* Set descriptor lengths to specification defaults */
+       ufshcd_def_desc_sizes(hba);
+
        err = ufshcd_hba_init(hba);
        if (err)
                goto out_error;
index 7630600217a2ef91e7d3629e5a4d2a0716c24c2a..cdc8bd05f7dfcf7189a4a616bb90c2ea3a8e2ece 100644 (file)
@@ -220,6 +220,15 @@ struct ufs_dev_cmd {
        struct ufs_query query;
 };
 
+struct ufs_desc_size {
+       int dev_desc;
+       int pwr_desc;
+       int geom_desc;
+       int interc_desc;
+       int unit_desc;
+       int conf_desc;
+};
+
 /**
  * struct ufs_clk_info - UFS clock related info
  * @list: list headed by hba->clk_list_head
@@ -483,6 +492,7 @@ struct ufs_stats {
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @desc_size: descriptor sizes reported by device
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
  *  device is known or not.
@@ -666,6 +676,7 @@ struct ufs_hba {
        bool is_urgent_bkops_lvl_checked;
 
        struct rw_semaphore clk_scaling_lock;
+       struct ufs_desc_size desc_size;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
@@ -832,6 +843,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
        enum flag_idn idn, bool *flag_res);
 int ufshcd_hold(struct ufs_hba *hba, bool async);
 void ufshcd_release(struct ufs_hba *hba);
+
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id,
+       int *desc_length);
+
 u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */
index ef474a7487449b4c1d51f82643988eb08fc1ed86..c374e3b5c678d215bfa9e7ed33e2d033e5d4bfb3 100644 (file)
@@ -1487,7 +1487,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                irq_flag &= ~PCI_IRQ_MSI;
 
        error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
-       if (error)
+       if (error < 0)
                goto out_reset_adapter;
 
        adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
index b7b87ecefcdfc712b74a79298594cdaa94ab68bc..9fca8d225ee092e92e1fb71a7a6faff5ee1a831c 100644 (file)
@@ -532,7 +532,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
 
        newsock->ops = sock->ops;
 
-       rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+       rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
        if (rc == -EAGAIN) {
                /* Nothing ready, so wait for activity */
                init_waitqueue_entry(&wait, current);
@@ -540,7 +540,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock)
                set_current_state(TASK_INTERRUPTIBLE);
                schedule();
                remove_wait_queue(sk_sleep(sock->sk), &wait);
-               rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+               rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
        }
 
        if (rc)
index 7d398d300e972c3604727ac8d6d4fbf7302318a6..9382db998ec9549319f47b55ccd561abf1169874 100644 (file)
@@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con)
        newsock->type = con->sock->type;
        newsock->ops = con->sock->ops;
 
-       result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+       result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
        if (result < 0)
                goto accept_err;
 
index 65145a3df065192345c66ebc311d464fe09a6f29..72934df6847150ba50dfbadad78fe10e01d2eadd 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1320,6 +1320,7 @@ void setup_new_exec(struct linux_binprm * bprm)
        else
                set_dumpable(current->mm, suid_dumpable);
 
+       arch_setup_new_exec();
        perf_event_exec();
        __set_task_comm(current, kbasename(bprm->filename), true);
 
index ef600591d96f9a42be98699025f4cf94ef8e7762..63ee2940775ce9c16daca5c2f7590e0c6e57bc07 100644 (file)
@@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb)
        spin_unlock_bh(&wb->work_lock);
 }
 
+static void finish_writeback_work(struct bdi_writeback *wb,
+                                 struct wb_writeback_work *work)
+{
+       struct wb_completion *done = work->done;
+
+       if (work->auto_free)
+               kfree(work);
+       if (done && atomic_dec_and_test(&done->cnt))
+               wake_up_all(&wb->bdi->wb_waitq);
+}
+
 static void wb_queue_work(struct bdi_writeback *wb,
                          struct wb_writeback_work *work)
 {
        trace_writeback_queue(wb, work);
 
-       spin_lock_bh(&wb->work_lock);
-       if (!test_bit(WB_registered, &wb->state))
-               goto out_unlock;
        if (work->done)
                atomic_inc(&work->done->cnt);
-       list_add_tail(&work->list, &wb->work_list);
-       mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+       spin_lock_bh(&wb->work_lock);
+
+       if (test_bit(WB_registered, &wb->state)) {
+               list_add_tail(&work->list, &wb->work_list);
+               mod_delayed_work(bdi_wq, &wb->dwork, 0);
+       } else
+               finish_writeback_work(wb, work);
+
        spin_unlock_bh(&wb->work_lock);
 }
 
@@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb)
 
        set_bit(WB_writeback_running, &wb->state);
        while ((work = get_next_work_item(wb)) != NULL) {
-               struct wb_completion *done = work->done;
-
                trace_writeback_exec(wb, work);
-
                wrote += wb_writeback(wb, work);
-
-               if (work->auto_free)
-                       kfree(work);
-               if (done && atomic_dec_and_test(&done->cnt))
-                       wake_up_all(&wb->bdi->wb_waitq);
+               finish_writeback_work(wb, work);
        }
 
        /*
index c45084ac642d1929058ea5d903ad796d574a45cc..511e1ed7e2ded7b0a9dc9882cffe0b66c37c96a2 100644 (file)
@@ -207,7 +207,7 @@ struct lm_lockname {
        struct gfs2_sbd *ln_sbd;
        u64 ln_number;
        unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
 
 #define lm_name_equal(name1, name2) \
         (((name1)->ln_number == (name2)->ln_number) && \
index 4348027384f5edf06a66dd417214b9bbd3dd05cd..d0ab7e56d0b41a7a97f3640eb0ab4801f747dffc 100644 (file)
@@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
 
        new_sock->type = sock->type;
        new_sock->ops = sock->ops;
-       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
        if (ret < 0)
                goto out;
 
index d04547fcf274af0eaee18096c94b22652551b9f7..eb00bc133bca673c556eb85a18385bbc3748dfcf 100644 (file)
@@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+               int size);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
index c6809ff41197d934c068e84b19eb77986bc7dccf..96b45cd6c63f0686d3c1cce5c41b232f0ab82080 100644 (file)
@@ -629,6 +629,93 @@ xfs_dir2_sf_check(
 }
 #endif /* DEBUG */
 
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+       struct xfs_mount                *mp,
+       struct xfs_dir2_sf_hdr          *sfp,
+       int                             size)
+{
+       struct xfs_dir2_sf_entry        *sfep;
+       struct xfs_dir2_sf_entry        *next_sfep;
+       char                            *endp;
+       const struct xfs_dir_ops        *dops;
+       xfs_ino_t                       ino;
+       int                             i;
+       int                             i8count;
+       int                             offset;
+       __uint8_t                       filetype;
+
+       dops = xfs_dir_get_ops(mp, NULL);
+
+       /*
+        * Give up if the directory is way too short.
+        */
+       XFS_WANT_CORRUPTED_RETURN(mp, size >
+                       offsetof(struct xfs_dir2_sf_hdr, parent));
+       XFS_WANT_CORRUPTED_RETURN(mp, size >=
+                       xfs_dir2_sf_hdr_size(sfp->i8count));
+
+       endp = (char *)sfp + size;
+
+       /* Check .. entry */
+       ino = dops->sf_get_parent_ino(sfp);
+       i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+       XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+       offset = dops->data_first_offset;
+
+       /* Check all reported entries */
+       sfep = xfs_dir2_sf_firstentry(sfp);
+       for (i = 0; i < sfp->count; i++) {
+               /*
+                * struct xfs_dir2_sf_entry has a variable length.
+                * Check the fixed-offset parts of the structure are
+                * within the data buffer.
+                */
+               XFS_WANT_CORRUPTED_RETURN(mp,
+                               ((char *)sfep + sizeof(*sfep)) < endp);
+
+               /* Don't allow names with known bad length. */
+               XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+               /*
+                * Check that the variable-length part of the structure is
+                * within the data buffer.  The next entry starts after the
+                * name component, so nextentry is an acceptable test.
+                */
+               next_sfep = dops->sf_nextentry(sfp, sfep);
+               XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+               /* Check that the offsets always increase. */
+               XFS_WANT_CORRUPTED_RETURN(mp,
+                               xfs_dir2_sf_get_offset(sfep) >= offset);
+
+               /* Check the inode number. */
+               ino = dops->sf_get_ino(sfp, sfep);
+               i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+               XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+               /* Check the file type. */
+               filetype = dops->sf_get_ftype(sfep);
+               XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+               offset = xfs_dir2_sf_get_offset(sfep) +
+                               dops->data_entsize(sfep->namelen);
+
+               sfep = next_sfep;
+       }
+       XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+       XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+       /* Make sure this whole thing ought to be in local format. */
+       XFS_WANT_CORRUPTED_RETURN(mp, offset +
+              (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+              (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+       return 0;
+}
+
 /*
  * Create a new (shortform) directory.
  */
index 25c1e078aef6a5925c12f2cc91b0d18b8b38711b..9653e964eda4f99ca611bb2cb6449a470be45d48 100644 (file)
@@ -33,6 +33,8 @@
 #include "xfs_trace.h"
 #include "xfs_attr_sf.h"
 #include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_ifork_zone;
 
@@ -320,6 +322,7 @@ xfs_iformat_local(
        int             whichfork,
        int             size)
 {
+       int             error;
 
        /*
         * If the size is unreasonable, then something
@@ -336,6 +339,14 @@ xfs_iformat_local(
                return -EFSCORRUPTED;
        }
 
+       if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+               error = xfs_dir2_sf_verify(ip->i_mount,
+                               (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+                               size);
+               if (error)
+                       return error;
+       }
+
        xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
        return 0;
 }
@@ -856,7 +867,7 @@ xfs_iextents_copy(
  * In these cases, the format always takes precedence, because the
  * format indicates the current state of the fork.
  */
-void
+int
 xfs_iflush_fork(
        xfs_inode_t             *ip,
        xfs_dinode_t            *dip,
@@ -866,6 +877,7 @@ xfs_iflush_fork(
        char                    *cp;
        xfs_ifork_t             *ifp;
        xfs_mount_t             *mp;
+       int                     error;
        static const short      brootflag[2] =
                { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
        static const short      dataflag[2] =
@@ -874,7 +886,7 @@ xfs_iflush_fork(
                { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 
        if (!iip)
-               return;
+               return 0;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        /*
         * This can happen if we gave up in iformat in an error path,
@@ -882,12 +894,19 @@ xfs_iflush_fork(
         */
        if (!ifp) {
                ASSERT(whichfork == XFS_ATTR_FORK);
-               return;
+               return 0;
        }
        cp = XFS_DFORK_PTR(dip, whichfork);
        mp = ip->i_mount;
        switch (XFS_IFORK_FORMAT(ip, whichfork)) {
        case XFS_DINODE_FMT_LOCAL:
+               if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+                       error = xfs_dir2_sf_verify(mp,
+                                       (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+                                       ifp->if_bytes);
+                       if (error)
+                               return error;
+               }
                if ((iip->ili_fields & dataflag[whichfork]) &&
                    (ifp->if_bytes > 0)) {
                        ASSERT(ifp->if_u1.if_data != NULL);
@@ -940,6 +959,7 @@ xfs_iflush_fork(
                ASSERT(0);
                break;
        }
+       return 0;
 }
 
 /*
index 7fb8365326d1a745583c4f133bc5a63668316b33..132dc59fdde6942cd22fca4ae11b8adbc193f051 100644 (file)
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int            xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void           xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int            xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
                                struct xfs_inode_log_item *, int);
 void           xfs_idestroy_fork(struct xfs_inode *, int);
 void           xfs_idata_realloc(struct xfs_inode *, int, int);
index 003a99b83bd8845e22d6311be1d474679521242d..ad9396e516f6e389b88bca5dc2dc41d3372ed714 100644 (file)
@@ -71,22 +71,11 @@ xfs_dir2_sf_getdents(
        struct xfs_da_geometry  *geo = args->geo;
 
        ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-       /*
-        * Give up if the directory is way too short.
-        */
-       if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-               ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
-               return -EIO;
-       }
-
        ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
        ASSERT(dp->i_df.if_u1.if_data != NULL);
 
        sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 
-       if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
-               return -EFSCORRUPTED;
-
        /*
         * If the block number in the offset is out of range, we're done.
         */
index 7eaf1ef74e3c63ebb3c640e32d2db87864984a4a..c7fe2c2123ab8375caf0e0349a454ed8b2762095 100644 (file)
@@ -3475,6 +3475,7 @@ xfs_iflush_int(
        struct xfs_inode_log_item *iip = ip->i_itemp;
        struct xfs_dinode       *dip;
        struct xfs_mount        *mp = ip->i_mount;
+       int                     error;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(xfs_isiflocked(ip));
@@ -3557,9 +3558,14 @@ xfs_iflush_int(
        if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
                ip->i_d.di_flushiter = 0;
 
-       xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-       if (XFS_IFORK_Q(ip))
-               xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+       error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+       if (error)
+               return error;
+       if (XFS_IFORK_Q(ip)) {
+               error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+               if (error)
+                       return error;
+       }
        xfs_inobp_check(mp, bp);
 
        /*
index cc5d9a1405df925c43ec62a03be03c30ae921f56..41e5b6784b975d1974604c6265486ab858c3865f 100644 (file)
@@ -32,10 +32,4 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        /* by default, allow everything */
        return true;
 }
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
-       /* by default, allow everything */
-       return true;
-}
 #endif /* _ASM_GENERIC_MM_HOOKS_H */
index 1fad160f35de8e89953af075173a2ad219c9693b..7dfa767dc68012ac52ac81d48e92b3ec79c97311 100644 (file)
@@ -341,6 +341,31 @@ static inline int pte_unused(pte_t pte)
 }
 #endif
 
+#ifndef pte_access_permitted
+#define pte_access_permitted(pte, write) \
+       (pte_present(pte) && (!(write) || pte_write(pte)))
+#endif
+
+#ifndef pmd_access_permitted
+#define pmd_access_permitted(pmd, write) \
+       (pmd_present(pmd) && (!(write) || pmd_write(pmd)))
+#endif
+
+#ifndef pud_access_permitted
+#define pud_access_permitted(pud, write) \
+       (pud_present(pud) && (!(write) || pud_write(pud)))
+#endif
+
+#ifndef p4d_access_permitted
+#define p4d_access_permitted(p4d, write) \
+       (p4d_present(p4d) && (!(write) || p4d_write(p4d)))
+#endif
+
+#ifndef pgd_access_permitted
+#define pgd_access_permitted(pgd, write) \
+       (pgd_present(pgd) && (!(write) || pgd_write(pgd)))
+#endif
+
 #ifndef __HAVE_ARCH_PMD_SAME
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
index a2bfd7843f18f6e79d8bc7e5743b031345153053..e2b9c6fe271496e45dca5abb94d814dd8a9c3c04 100644 (file)
@@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
 
 int af_alg_release(struct socket *sock);
 void af_alg_release_parent(struct sock *sk);
-int af_alg_accept(struct sock *sk, struct socket *newsock);
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
index aef47be2a5c1a3fd3ea75161f5bc93627772515c..af9dbc44fd9212f42b1fd07212f8dda7d059ed53 100644 (file)
@@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
                                            int, const char __user *);
 
+asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
+
 /*
  * For most but not all architectures, "am I in a compat syscall?" and
  * "am I a compat task?" are the same question.  For architectures on which
index 61d042bbbf607253033d9948b291cab2322814ba..68449293c4b6233c1a1d4133b1819376a9310225 100644 (file)
@@ -163,6 +163,7 @@ struct dccp_request_sock {
        __u64                    dreq_isr;
        __u64                    dreq_gsr;
        __be32                   dreq_service;
+       spinlock_t               dreq_lock;
        struct list_head         dreq_featneg;
        __u32                    dreq_timestamp_echo;
        __u32                    dreq_timestamp_time;
index 0c167fdee5f7d126ed4de7e1201d514d1402a5ca..fbf7b39e81035506b73ddc860e3029119104b255 100644 (file)
@@ -409,6 +409,7 @@ struct bpf_prog {
        u16                     pages;          /* Number of allocated pages */
        kmemcheck_bitfield_begin(meta);
        u16                     jited:1,        /* Is our filter JIT'ed? */
+                               locked:1,       /* Program image locked? */
                                gpl_compatible:1, /* Is filter GPL compatible? */
                                cb_access:1,    /* Is control block accessed? */
                                dst_needed:1,   /* Do we need dst entry? */
@@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 #ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-       set_memory_ro((unsigned long)fp, fp->pages);
+       fp->locked = 1;
+       WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-       set_memory_rw((unsigned long)fp, fp->pages);
+       if (fp->locked) {
+               WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
+               /* In case set_memory_rw() fails, we want to be the first
+                * to crash here instead of some random place later on.
+                */
+               fp->locked = 0;
+       }
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-       set_memory_ro((unsigned long)hdr, hdr->pages);
+       WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-       set_memory_rw((unsigned long)hdr, hdr->pages);
+       WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
 }
 #else
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
index 672cfef72fc85da6a30579b00bf7e1349758a3ac..97cbca19430d82aa2b4c835db1edf2d6620517ba 100644 (file)
 #define ICC_IGRPEN0_EL1_MASK           (1 << ICC_IGRPEN0_EL1_SHIFT)
 #define ICC_IGRPEN1_EL1_SHIFT          0
 #define ICC_IGRPEN1_EL1_MASK           (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB                        (1U << 2)
+#define ICC_SRE_EL1_DFB                        (1U << 1)
 #define ICC_SRE_EL1_SRE                        (1U << 0)
 
 /*
index 188eced6813eddb9c313fdb59016b972835e7674..9f3616085423cfca654264a4f5b9fed022431997 100644 (file)
@@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode(
 {
        return NULL;
 }
+static inline bool irq_domain_check_msi_remap(void)
+{
+       return false;
+}
 #endif /* !CONFIG_IRQ_DOMAIN */
 
 #endif /* _LINUX_IRQDOMAIN_H */
index b01fe100908430708df0df5162594b497ffdad62..87ff4f58a2f0182ec0586c0dee923bc30e004149 100644 (file)
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
        ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
 
 #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+       ({ typeof(ptr) ____ptr = (ptr); \
+          !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+       })
 /**
  * ptr_is_a_nulls - Test if a ptr is a nulls
  * @ptr: ptr to be tested
index 5f01c88f0800daaacca6f9b2a272e657552af4c9..e197d3ca3e8a4119dc2c70ddaf8151b19aabe12a 100644 (file)
@@ -430,6 +430,10 @@ static inline int pud_devmap(pud_t pud)
 {
        return 0;
 }
+static inline int pgd_devmap(pgd_t pgd)
+{
+       return 0;
+}
 #endif
 
 /*
index f60f45fe226fcad85590b1eda3fafb1d170e943a..45cdb27791a33ff8d494549ce92f080a93434889 100644 (file)
@@ -367,6 +367,11 @@ struct mm_struct {
 #endif
        unsigned long mmap_base;                /* base of mmap area */
        unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+       /* Base adresses for compatible mmap() */
+       unsigned long mmap_compat_base;
+       unsigned long mmap_compat_legacy_base;
+#endif
        unsigned long task_size;                /* size of task vm space */
        unsigned long highest_vm_end;           /* highest vma end address */
        pgd_t * pgd;
index cd0c8bd0a1dec0d2047509177ee8d33de1f185d4..0620f5e18c96b706b70fb71005b29008a11fffb1 100644 (file)
@@ -146,7 +146,7 @@ struct proto_ops {
        int             (*socketpair)(struct socket *sock1,
                                      struct socket *sock2);
        int             (*accept)    (struct socket *sock,
-                                     struct socket *newsock, int flags);
+                                     struct socket *newsock, int flags, bool kern);
        int             (*getname)   (struct socket *sock,
                                      struct sockaddr *addr,
                                      int *sockaddr_len, int peer);
index 84943e8057ef85a646a8f43155805fd8fa2273d8..316a19f6b635db760570a7e7b66af2a03b697e5e 100644 (file)
@@ -148,7 +148,7 @@ static inline int page_cache_get_speculative(struct page *page)
 
 #ifdef CONFIG_TINY_RCU
 # ifdef CONFIG_PREEMPT_COUNT
-       VM_BUG_ON(!in_atomic());
+       VM_BUG_ON(!in_atomic() && !irqs_disabled());
 # endif
        /*
         * Preempt must be disabled here - we rely on rcu_read_lock doing
@@ -186,7 +186,7 @@ static inline int page_cache_add_speculative(struct page *page, int count)
 
 #if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
 # ifdef CONFIG_PREEMPT_COUNT
-       VM_BUG_ON(!in_atomic());
+       VM_BUG_ON(!in_atomic() && !irqs_disabled());
 # endif
        VM_BUG_ON_PAGE(page_count(page) == 0, page);
        page_ref_add(page, count);
index 772476028a6507f356fe3946372a024588679c3f..43a774873aa96d4af64d0cdebb579be572a6658a 100644 (file)
@@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
 int genphy_soft_reset(struct phy_device *phydev);
+static inline int genphy_no_soft_reset(struct phy_device *phydev)
+{
+       return 0;
+}
 void phy_driver_unregister(struct phy_driver *drv);
 void phy_drivers_unregister(struct phy_driver *drv, int n);
 int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h
new file mode 100644 (file)
index 0000000..d60d4e2
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+       unsigned long start;
+       unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ *    lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
index 7bd2403e4fef1ad7fb0a5f03b4e104e96234d26b..ed5c3838780de5ba9509071bef56e8d521dc5782 100644 (file)
@@ -37,14 +37,26 @@ extern void get_random_bytes(void *buf, int nbytes);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern void get_random_bytes_arch(void *buf, int nbytes);
-extern int random_int_secret_init(void);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
 #endif
 
-unsigned int get_random_int(void);
-unsigned long get_random_long(void);
+u32 get_random_u32(void);
+u64 get_random_u64(void);
+static inline unsigned int get_random_int(void)
+{
+       return get_random_u32();
+}
+static inline unsigned long get_random_long(void)
+{
+#if BITS_PER_LONG == 64
+       return get_random_u64();
+#else
+       return get_random_u32();
+#endif
+}
+
 unsigned long randomize_page(unsigned long start, unsigned long range);
 
 u32 prandom_u32(void);
index 4ae95f7e8597b0b43575d04aaf524cf252761e6e..a23a3315318048eec1fc18678e17967f03eaa89b 100644 (file)
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
                ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
                pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
 
+/**
+ * hlist_nulls_for_each_entry_safe -
+ *   iterate over list of given type safe against removal of list entry
+ * @tpos:      the type * to use as a loop cursor.
+ * @pos:       the &struct hlist_nulls_node to use as a loop cursor.
+ * @head:      the head for your list.
+ * @member:    the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)               \
+       for (({barrier();}),                                                    \
+            pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));            \
+               (!is_a_nulls(pos)) &&                                           \
+               ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);        \
+                  pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
 #endif
 #endif
index 58373875e8eec2aee668e5fdac4526796ebe78c8..55125d67433851df5fffba9f641e907735bae772 100644 (file)
@@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n,
 { }
 #endif /* CONFIG_HARDENED_USERCOPY */
 
+#ifndef arch_setup_new_exec
+static inline void arch_setup_new_exec(void) { }
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
index b7952d55b9c00039a9eca46544997c10722682b6..f39ae697347f6590459ee4178de84160b43841e2 100644 (file)
@@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
                          int addr_len, int flags, int is_sendmsg);
 int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
                       int addr_len, int flags);
-int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+               bool kern);
 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
 ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
                      size_t size, int flags);
index 826f198374f809a4b7ca23ada4a46433b972ef35..c7a577976bec0887218a969bc8197dc1c8eb13f0 100644 (file)
@@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
         return (unsigned long)min_t(u64, when, max_when);
 }
 
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
 
 int inet_csk_get_port(struct sock *sk, unsigned short snum);
 
index a244db5e5ff7fa65bcf0a4124cbbe5f4682e6d9d..07a0b128625a4e24f9aa83019eff6eb17308eda3 100644 (file)
@@ -476,7 +476,8 @@ struct sctp_pf {
        int  (*send_verify) (struct sctp_sock *, union sctp_addr *);
        int  (*supported_addrs)(const struct sctp_sock *, __be16 *);
        struct sock *(*create_accept_sk) (struct sock *sk,
-                                         struct sctp_association *asoc);
+                                         struct sctp_association *asoc,
+                                         bool kern);
        int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
        void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
        void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
index 5e5997654db6454f82179cc35c4bc22e89d0c06f..03252d53975de7ad0da66d35802738830b0e3367 100644 (file)
@@ -236,6 +236,7 @@ struct sock_common {
   *    @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
   *    @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
   *    @sk_lock:       synchronizer
+  *    @sk_kern_sock: True if sock is using kernel lock classes
   *    @sk_rcvbuf: size of receive buffer in bytes
   *    @sk_wq: sock wait queue and async head
   *    @sk_rx_dst: receive input route used by early demux
@@ -430,7 +431,8 @@ struct sock {
 #endif
 
        kmemcheck_bitfield_begin(flags);
-       unsigned int            sk_padding : 2,
+       unsigned int            sk_padding : 1,
+                               sk_kern_sock : 1,
                                sk_no_check_tx : 1,
                                sk_no_check_rx : 1,
                                sk_userlocks : 4,
@@ -1015,7 +1017,8 @@ struct proto {
                                        int addr_len);
        int                     (*disconnect)(struct sock *sk, int flags);
 
-       struct sock *           (*accept)(struct sock *sk, int flags, int *err);
+       struct sock *           (*accept)(struct sock *sk, int flags, int *err,
+                                         bool kern);
 
        int                     (*ioctl)(struct sock *sk, int cmd,
                                         unsigned long arg);
@@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
 int sock_no_bind(struct socket *, struct sockaddr *, int);
 int sock_no_connect(struct socket *, struct sockaddr *, int, int);
 int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int);
+int sock_no_accept(struct socket *, struct socket *, int, bool);
 int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
 unsigned int sock_no_poll(struct file *, struct socket *,
                          struct poll_table_struct *);
index b0e275de6dec0d2be9adf09810e889c89d7ad06c..583875ea136ab228ea14a727581c384f50527211 100644 (file)
@@ -196,6 +196,7 @@ struct iscsi_conn {
        struct iscsi_task       *task;          /* xmit task in progress */
 
        /* xmit */
+       spinlock_t              taskqueuelock;  /* protects the next three lists */
        struct list_head        mgmtqueue;      /* mgmt (control) xmit queue */
        struct list_head        cmdqueue;       /* data-path cmd queue */
        struct list_head        requeue;        /* tasks needing another run */
index 6f22b39f1b0c3bc8bb1812a631a638ef1987a784..080c7ce9bae8892a43838043d986282a1385283a 100644 (file)
@@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev)
                sdev->sdev_state == SDEV_CREATED_BLOCK;
 }
 
+int scsi_internal_device_block(struct scsi_device *sdev, bool wait);
+int scsi_internal_device_unblock(struct scsi_device *sdev,
+                                enum scsi_device_state new_state);
+
 /* accessor functions for the SCSI parameters */
 static inline int scsi_device_sync(struct scsi_device *sdev)
 {
index bce990f5a35d77615bb536009940f6146d1e7d82..31acce9019a63e7b4123dd89d9daa7f31111f74a 100644 (file)
@@ -241,21 +241,21 @@ TRACE_EVENT(xen_mmu_set_pud,
                      (int)sizeof(pudval_t) * 2, (unsigned long long)__entry->pudval)
        );
 
-TRACE_EVENT(xen_mmu_set_pgd,
-           TP_PROTO(pgd_t *pgdp, pgd_t *user_pgdp, pgd_t pgdval),
-           TP_ARGS(pgdp, user_pgdp, pgdval),
+TRACE_EVENT(xen_mmu_set_p4d,
+           TP_PROTO(p4d_t *p4dp, p4d_t *user_p4dp, p4d_t p4dval),
+           TP_ARGS(p4dp, user_p4dp, p4dval),
            TP_STRUCT__entry(
-                   __field(pgd_t *, pgdp)
-                   __field(pgd_t *, user_pgdp)
-                   __field(pgdval_t, pgdval)
-                   ),
-           TP_fast_assign(__entry->pgdp = pgdp;
-                          __entry->user_pgdp = user_pgdp;
-                          __entry->pgdval = pgdval.pgd),
-           TP_printk("pgdp %p user_pgdp %p pgdval %0*llx (raw %0*llx)",
-                     __entry->pgdp, __entry->user_pgdp,
-                     (int)sizeof(pgdval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->pgdval)),
-                     (int)sizeof(pgdval_t) * 2, (unsigned long long)__entry->pgdval)
+                   __field(p4d_t *, p4dp)
+                   __field(p4d_t *, user_p4dp)
+                   __field(p4dval_t, p4dval)
+                   ),
+           TP_fast_assign(__entry->p4dp = p4dp;
+                          __entry->user_p4dp = user_p4dp;
+                          __entry->p4dval = p4d_val(p4dval)),
+           TP_printk("p4dp %p user_p4dp %p p4dval %0*llx (raw %0*llx)",
+                     __entry->p4dp, __entry->user_p4dp,
+                     (int)sizeof(p4dval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->p4dval)),
+                     (int)sizeof(p4dval_t) * 2, (unsigned long long)__entry->p4dval)
        );
 
 TRACE_EVENT(xen_mmu_pud_clear,
index d08c63f3dd6ff47c7cf090927e91f27cfc0d767f..0c5d5dd61b6ab1d2039686d25683e6dffa1f634e 100644 (file)
@@ -64,7 +64,7 @@ struct packet_diag_mclist {
        __u32   pdmc_count;
        __u16   pdmc_type;
        __u16   pdmc_alen;
-       __u8    pdmc_addr[MAX_ADDR_LEN];
+       __u8    pdmc_addr[32]; /* MAX_ADDR_LEN */
 };
 
 struct packet_diag_ring {
index eae2f15657c62c31f66353f4bd3c4af1e03451cf..f9c9d994820327a3c23db348886fd535190b43e8 100644 (file)
@@ -882,7 +882,6 @@ static void __init do_basic_setup(void)
        do_ctors();
        usermodehelper_enable();
        do_initcalls();
-       random_int_secret_init();
 }
 
 static void __init do_pre_smp_initcalls(void)
index 3ea87fb19a9416771985d9236f148ffb927ad19b..afe5bab376c9811c7b82f56bc0b93ce69b6a579b 100644 (file)
 #include <linux/bpf.h>
 #include <linux/jhash.h>
 #include <linux/filter.h>
+#include <linux/rculist_nulls.h>
 #include "percpu_freelist.h"
 #include "bpf_lru_list.h"
 
 struct bucket {
-       struct hlist_head head;
+       struct hlist_nulls_head head;
        raw_spinlock_t lock;
 };
 
@@ -44,9 +45,14 @@ enum extra_elem_state {
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
        union {
-               struct hlist_node hash_node;
-               struct bpf_htab *htab;
-               struct pcpu_freelist_node fnode;
+               struct hlist_nulls_node hash_node;
+               struct {
+                       void *padding;
+                       union {
+                               struct bpf_htab *htab;
+                               struct pcpu_freelist_node fnode;
+                       };
+               };
        };
        union {
                struct rcu_head rcu;
@@ -162,7 +168,8 @@ skip_percpu_elems:
                                 offsetof(struct htab_elem, lru_node),
                                 htab->elem_size, htab->map.max_entries);
        else
-               pcpu_freelist_populate(&htab->freelist, htab->elems,
+               pcpu_freelist_populate(&htab->freelist,
+                                      htab->elems + offsetof(struct htab_elem, fnode),
                                       htab->elem_size, htab->map.max_entries);
 
        return 0;
@@ -217,6 +224,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
        int err, i;
        u64 cost;
 
+       BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+                    offsetof(struct htab_elem, hash_node.pprev));
+       BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+                    offsetof(struct htab_elem, hash_node.pprev));
+
        if (lru && !capable(CAP_SYS_ADMIN))
                /* LRU implementation is much complicated than other
                 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
@@ -326,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
                goto free_htab;
 
        for (i = 0; i < htab->n_buckets; i++) {
-               INIT_HLIST_HEAD(&htab->buckets[i].head);
+               INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
                raw_spin_lock_init(&htab->buckets[i].lock);
        }
 
@@ -366,20 +378,44 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
        return &htab->buckets[hash & (htab->n_buckets - 1)];
 }
 
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
 {
        return &__select_bucket(htab, hash)->head;
 }
 
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
                                         void *key, u32 key_size)
 {
+       struct hlist_nulls_node *n;
+       struct htab_elem *l;
+
+       hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+               if (l->hash == hash && !memcmp(&l->key, key, key_size))
+                       return l;
+
+       return NULL;
+}
+
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+                                              u32 hash, void *key,
+                                              u32 key_size, u32 n_buckets)
+{
+       struct hlist_nulls_node *n;
        struct htab_elem *l;
 
-       hlist_for_each_entry_rcu(l, head, hash_node)
+again:
+       hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
                if (l->hash == hash && !memcmp(&l->key, key, key_size))
                        return l;
 
+       if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+               goto again;
+
        return NULL;
 }
 
@@ -387,7 +423,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        struct htab_elem *l;
        u32 hash, key_size;
 
@@ -400,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
 
        head = select_bucket(htab, hash);
 
-       l = lookup_elem_raw(head, hash, key, key_size);
+       l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
        return l;
 }
@@ -433,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 {
        struct bpf_htab *htab = (struct bpf_htab *)arg;
-       struct htab_elem *l, *tgt_l;
-       struct hlist_head *head;
+       struct htab_elem *l = NULL, *tgt_l;
+       struct hlist_nulls_head *head;
+       struct hlist_nulls_node *n;
        unsigned long flags;
        struct bucket *b;
 
@@ -444,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 
        raw_spin_lock_irqsave(&b->lock, flags);
 
-       hlist_for_each_entry_rcu(l, head, hash_node)
+       hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
                if (l == tgt_l) {
-                       hlist_del_rcu(&l->hash_node);
+                       hlist_nulls_del_rcu(&l->hash_node);
                        break;
                }
 
@@ -459,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        struct htab_elem *l, *next_l;
        u32 hash, key_size;
        int i;
@@ -473,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
        head = select_bucket(htab, hash);
 
        /* lookup the key */
-       l = lookup_elem_raw(head, hash, key, key_size);
+       l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
 
        if (!l) {
                i = 0;
@@ -481,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
        }
 
        /* key was found, get next key in the same bucket */
-       next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+       next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
                                  struct htab_elem, hash_node);
 
        if (next_l) {
@@ -500,7 +537,7 @@ find_first_elem:
                head = select_bucket(htab, i);
 
                /* pick first element in the bucket */
-               next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+               next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
                                          struct htab_elem, hash_node);
                if (next_l) {
                        /* if it's not empty, just return it */
@@ -582,9 +619,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
        int err = 0;
 
        if (prealloc) {
-               l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
-               if (!l_new)
+               struct pcpu_freelist_node *l;
+
+               l = pcpu_freelist_pop(&htab->freelist);
+               if (!l)
                        err = -E2BIG;
+               else
+                       l_new = container_of(l, struct htab_elem, fnode);
        } else {
                if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
                        atomic_dec(&htab->count);
@@ -661,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
        struct htab_elem *l_new = NULL, *l_old;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        unsigned long flags;
        struct bucket *b;
        u32 key_size, hash;
@@ -700,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
        /* add new element to the head of the list, so that
         * concurrent search will find it before old elem
         */
-       hlist_add_head_rcu(&l_new->hash_node, head);
+       hlist_nulls_add_head_rcu(&l_new->hash_node, head);
        if (l_old) {
-               hlist_del_rcu(&l_old->hash_node);
+               hlist_nulls_del_rcu(&l_old->hash_node);
                free_htab_elem(htab, l_old);
        }
        ret = 0;
@@ -716,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
        struct htab_elem *l_new, *l_old = NULL;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        unsigned long flags;
        struct bucket *b;
        u32 key_size, hash;
@@ -757,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
        /* add new element to the head of the list, so that
         * concurrent search will find it before old elem
         */
-       hlist_add_head_rcu(&l_new->hash_node, head);
+       hlist_nulls_add_head_rcu(&l_new->hash_node, head);
        if (l_old) {
                bpf_lru_node_set_ref(&l_new->lru_node);
-               hlist_del_rcu(&l_old->hash_node);
+               hlist_nulls_del_rcu(&l_old->hash_node);
        }
        ret = 0;
 
@@ -781,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
        struct htab_elem *l_new = NULL, *l_old;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        unsigned long flags;
        struct bucket *b;
        u32 key_size, hash;
@@ -820,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                        ret = PTR_ERR(l_new);
                        goto err;
                }
-               hlist_add_head_rcu(&l_new->hash_node, head);
+               hlist_nulls_add_head_rcu(&l_new->hash_node, head);
        }
        ret = 0;
 err:
@@ -834,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
        struct htab_elem *l_new = NULL, *l_old;
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        unsigned long flags;
        struct bucket *b;
        u32 key_size, hash;
@@ -882,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
        } else {
                pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
                                value, onallcpus);
-               hlist_add_head_rcu(&l_new->hash_node, head);
+               hlist_nulls_add_head_rcu(&l_new->hash_node, head);
                l_new = NULL;
        }
        ret = 0;
@@ -910,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 static int htab_map_delete_elem(struct bpf_map *map, void *key)
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        struct bucket *b;
        struct htab_elem *l;
        unsigned long flags;
@@ -930,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
        l = lookup_elem_raw(head, hash, key, key_size);
 
        if (l) {
-               hlist_del_rcu(&l->hash_node);
+               hlist_nulls_del_rcu(&l->hash_node);
                free_htab_elem(htab, l);
                ret = 0;
        }
@@ -942,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
 static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 {
        struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-       struct hlist_head *head;
+       struct hlist_nulls_head *head;
        struct bucket *b;
        struct htab_elem *l;
        unsigned long flags;
@@ -962,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
        l = lookup_elem_raw(head, hash, key, key_size);
 
        if (l) {
-               hlist_del_rcu(&l->hash_node);
+               hlist_nulls_del_rcu(&l->hash_node);
                ret = 0;
        }
 
@@ -977,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
        int i;
 
        for (i = 0; i < htab->n_buckets; i++) {
-               struct hlist_head *head = select_bucket(htab, i);
-               struct hlist_node *n;
+               struct hlist_nulls_head *head = select_bucket(htab, i);
+               struct hlist_nulls_node *n;
                struct htab_elem *l;
 
-               hlist_for_each_entry_safe(l, n, head, hash_node) {
-                       hlist_del_rcu(&l->hash_node);
+               hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+                       hlist_nulls_del_rcu(&l->hash_node);
                        if (l->state != HTAB_EXTRA_ELEM_USED)
                                htab_elem_free(htab, l);
                }
index 8bfe0afaee1082186f805a2a37a556247cbfa608..b37bd9ab7f574242722c1d9b0503b896019488b2 100644 (file)
@@ -500,9 +500,15 @@ unlock:
        raw_spin_unlock(&trie->lock);
 }
 
+static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+       return -ENOTSUPP;
+}
+
 static const struct bpf_map_ops trie_ops = {
        .map_alloc = trie_alloc,
        .map_free = trie_free,
+       .map_get_next_key = trie_get_next_key,
        .map_lookup_elem = trie_lookup_elem,
        .map_update_elem = trie_update_elem,
        .map_delete_elem = trie_delete_elem,
index 56eba9caa632adcc118114d8aa55cbab00895495..1dc22f6b49f5e06c4af22222dfb1b32c885ce16a 100644 (file)
@@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
                struct task_struct *task;
                int count = 0;
 
-               seq_printf(seq, "css_set %p\n", cset);
+               seq_printf(seq, "css_set %pK\n", cset);
 
                list_for_each_entry(task, &cset->tasks, cg_list) {
                        if (count++ > MAX_TASKS_SHOWN_PER_CSS)
index e756dae493008e4bc4bf9f87846f818d7349ede8..2237201d66d5dacf1fe952a15d9acc54b1e90b50 100644 (file)
@@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task)
                /* Only log the first time events_limit is incremented. */
                if (atomic64_inc_return(&pids->events_limit) == 1) {
                        pr_info("cgroup: fork rejected by pids controller in ");
-                       pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+                       pr_cont_cgroup_path(css->cgroup);
                        pr_cont("\n");
                }
                cgroup_file_notify(&pids->events_file);
index b56a558e406db6375bea4b07e5873a4c6f0b401e..b118735fea9da471a15ba627c87af523b891bafa 100644 (file)
@@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image)
                ret = crypto_shash_final(desc, digest);
                if (ret)
                        goto out_free_digest;
-               ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
-                                               sha_regions, sha_region_sz, 0);
+               ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+                                                    sha_regions, sha_region_sz, 0);
                if (ret)
                        goto out_free_digest;
 
-               ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
-                                               digest, SHA256_DIGEST_SIZE, 0);
+               ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+                                                    digest, SHA256_DIGEST_SIZE, 0);
                if (ret)
                        goto out_free_digest;
        }
index 4cef7e4706b098d7918b53ff1e1b931d1a5ec8dc..799a8a4521870a6444818fef64c0ae1e2dfad671 100644 (file)
@@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image,
 extern struct mutex kexec_mutex;
 
 #ifdef CONFIG_KEXEC_FILE
-struct kexec_sha_region {
-       unsigned long start;
-       unsigned long len;
-};
-
+#include <linux/purgatory.h>
 void kimage_file_post_load_cleanup(struct kimage *image);
 #else /* CONFIG_KEXEC_FILE */
 static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
index 072cbc9b175dc1efbe95c14858f810f92db12130..c0168b7da1eaf22c216147ca5ebd03ef7311dca8 100644 (file)
@@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
        struct timer_list *timer = &dwork->timer;
        struct work_struct *work = &dwork->work;
 
+       WARN_ON_ONCE(!wq);
        WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
                     timer->data != (unsigned long)dwork);
        WARN_ON_ONCE(timer_pending(timer));
index 9b8fccb969dc0fee5f5e0a623bc5b97a99bbc602..c89f472b658ceaa221de242680927032c020a393 100644 (file)
@@ -137,7 +137,7 @@ config HAVE_MEMBLOCK_NODE_MAP
 config HAVE_MEMBLOCK_PHYS_MAP
        bool
 
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
        bool
 
 config ARCH_DISCARD_MEMBLOCK
index c74bad1bf6e8f2423c19cb8b6c3a386272ef8252..2559a3987de71a00926457e0336755ce797832ef 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1155,7 +1155,7 @@ struct page *get_dump_page(unsigned long addr)
 #endif /* CONFIG_ELF_CORE */
 
 /*
- * Generic RCU Fast GUP
+ * Generic Fast GUP
  *
  * get_user_pages_fast attempts to pin user pages by walking the page
  * tables directly and avoids taking locks. Thus the walker needs to be
@@ -1176,8 +1176,8 @@ struct page *get_dump_page(unsigned long addr)
  * Before activating this code, please be aware that the following assumptions
  * are currently made:
  *
- *  *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
- *      pages containing page tables.
+ *  *) Either HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
+ *  free pages containing page tables or TLB flushing requires IPI broadcast.
  *
  *  *) ptes can be read atomically by the architecture.
  *
@@ -1187,36 +1187,59 @@ struct page *get_dump_page(unsigned long addr)
  *
  * This code is based heavily on the PowerPC implementation by Nick Piggin.
  */
-#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
+#ifdef CONFIG_HAVE_GENERIC_GUP
+
+#ifndef gup_get_pte
+/*
+ * We assume that the PTE can be read atomically. If this is not the case for
+ * your architecture, please provide the helper.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+       return READ_ONCE(*ptep);
+}
+#endif
+
+static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+{
+       while ((*nr) - nr_start) {
+               struct page *page = pages[--(*nr)];
+
+               ClearPageReferenced(page);
+               put_page(page);
+       }
+}
 
 #ifdef __HAVE_ARCH_PTE_SPECIAL
 static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
                         int write, struct page **pages, int *nr)
 {
+       struct dev_pagemap *pgmap = NULL;
+       int nr_start = *nr, ret = 0;
        pte_t *ptep, *ptem;
-       int ret = 0;
 
        ptem = ptep = pte_offset_map(&pmd, addr);
        do {
-               /*
-                * In the line below we are assuming that the pte can be read
-                * atomically. If this is not the case for your architecture,
-                * please wrap this in a helper function!
-                *
-                * for an example see gup_get_pte in arch/x86/mm/gup.c
-                */
-               pte_t pte = READ_ONCE(*ptep);
+               pte_t pte = gup_get_pte(ptep);
                struct page *head, *page;
 
                /*
                 * Similar to the PMD case below, NUMA hinting must take slow
                 * path using the pte_protnone check.
                 */
-               if (!pte_present(pte) || pte_special(pte) ||
-                       pte_protnone(pte) || (write && !pte_write(pte)))
+               if (pte_protnone(pte))
                        goto pte_unmap;
 
-               if (!arch_pte_access_permitted(pte, write))
+               if (!pte_access_permitted(pte, write))
+                       goto pte_unmap;
+
+               if (pte_devmap(pte)) {
+                       pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
+                       if (unlikely(!pgmap)) {
+                               undo_dev_pagemap(nr, nr_start, pages);
+                               goto pte_unmap;
+                       }
+               } else if (pte_special(pte))
                        goto pte_unmap;
 
                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -1232,6 +1255,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
                }
 
                VM_BUG_ON_PAGE(compound_head(page) != head, page);
+
+               put_dev_pagemap(pgmap);
+               SetPageReferenced(page);
                pages[*nr] = page;
                (*nr)++;
 
@@ -1261,15 +1287,76 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 }
 #endif /* __HAVE_ARCH_PTE_SPECIAL */
 
+#ifdef __HAVE_ARCH_PTE_DEVMAP
+static int __gup_device_huge(unsigned long pfn, unsigned long addr,
+               unsigned long end, struct page **pages, int *nr)
+{
+       int nr_start = *nr;
+       struct dev_pagemap *pgmap = NULL;
+
+       do {
+               struct page *page = pfn_to_page(pfn);
+
+               pgmap = get_dev_pagemap(pfn, pgmap);
+               if (unlikely(!pgmap)) {
+                       undo_dev_pagemap(nr, nr_start, pages);
+                       return 0;
+               }
+               SetPageReferenced(page);
+               pages[*nr] = page;
+               get_page(page);
+               put_dev_pagemap(pgmap);
+               (*nr)++;
+               pfn++;
+       } while (addr += PAGE_SIZE, addr != end);
+       return 1;
+}
+
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+               unsigned long end, struct page **pages, int *nr)
+{
+       unsigned long fault_pfn;
+
+       fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+               unsigned long end, struct page **pages, int *nr)
+{
+       unsigned long fault_pfn;
+
+       fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+       return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+#else
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+               unsigned long end, struct page **pages, int *nr)
+{
+       BUILD_BUG();
+       return 0;
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+               unsigned long end, struct page **pages, int *nr)
+{
+       BUILD_BUG();
+       return 0;
+}
+#endif
+
 static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
 {
        struct page *head, *page;
        int refs;
 
-       if (write && !pmd_write(orig))
+       if (!pmd_access_permitted(orig, write))
                return 0;
 
+       if (pmd_devmap(orig))
+               return __gup_device_huge_pmd(orig, addr, end, pages, nr);
+
        refs = 0;
        head = pmd_page(orig);
        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1293,6 +1380,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                return 0;
        }
 
+       SetPageReferenced(head);
        return 1;
 }
 
@@ -1302,9 +1390,12 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
        struct page *head, *page;
        int refs;
 
-       if (write && !pud_write(orig))
+       if (!pud_access_permitted(orig, write))
                return 0;
 
+       if (pud_devmap(orig))
+               return __gup_device_huge_pud(orig, addr, end, pages, nr);
+
        refs = 0;
        head = pud_page(orig);
        page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
@@ -1328,6 +1419,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                return 0;
        }
 
+       SetPageReferenced(head);
        return 1;
 }
 
@@ -1338,9 +1430,10 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
        int refs;
        struct page *head, *page;
 
-       if (write && !pgd_write(orig))
+       if (!pgd_access_permitted(orig, write))
                return 0;
 
+       BUILD_BUG_ON(pgd_devmap(orig));
        refs = 0;
        head = pgd_page(orig);
        page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
@@ -1364,6 +1457,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
                return 0;
        }
 
+       SetPageReferenced(head);
        return 1;
 }
 
@@ -1455,7 +1549,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
                        if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
                                         P4D_SHIFT, next, write, pages, nr))
                                return 0;
-               } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+               } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
                        return 0;
        } while (p4dp++, addr = next, addr != end);
 
@@ -1520,6 +1614,21 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
        return nr;
 }
 
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+       unsigned long len, end;
+
+       len = (unsigned long) nr_pages << PAGE_SHIFT;
+       end = start + len;
+       return end >= start;
+}
+#endif
+
 /**
  * get_user_pages_fast() - pin user pages in memory
  * @start:     starting user address
@@ -1539,11 +1648,14 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                        struct page **pages)
 {
-       int nr, ret;
+       int nr = 0, ret = 0;
 
        start &= PAGE_MASK;
-       nr = __get_user_pages_fast(start, nr_pages, write, pages);
-       ret = nr;
+
+       if (gup_fast_permitted(start, nr_pages, write)) {
+               nr = __get_user_pages_fast(start, nr_pages, write, pages);
+               ret = nr;
+       }
 
        if (nr < nr_pages) {
                /* Try to get the remaining pages with get_user_pages */
@@ -1565,4 +1677,4 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
        return ret;
 }
 
-#endif /* CONFIG_HAVE_GENERIC_RCU_GUP */
+#endif /* CONFIG_HAVE_GENERIC_GUP */
index 538998a137d24e069969dcc3ed00cedc6c25616f..9ac639499bd1146347557141b10f1135ee2c0048 100644 (file)
@@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
 
 /**
  * pcpu_get_pages - get temp pages array
- * @chunk: chunk of interest
  *
  * Returns pointer to array of pointers to struct page which can be indexed
  * with pcpu_page_idx().  Note that there is only one array and accesses
@@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
  * RETURNS:
  * Pointer to temp pages array on success.
  */
-static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
+static struct page **pcpu_get_pages(void)
 {
        static struct page **pages;
        size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
@@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
 {
        struct page **pages;
 
-       pages = pcpu_get_pages(chunk);
+       pages = pcpu_get_pages();
        if (!pages)
                return -ENOMEM;
 
@@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
         * successful population attempt so the temp pages array must
         * be available now.
         */
-       pages = pcpu_get_pages(chunk);
+       pages = pcpu_get_pages();
        BUG_ON(!pages);
 
        /* unmap and free */
index 5696039b5c0707eddcb652bc120a8556ca3cc58b..60a6488e9e6d49d5e9c5d4b611a5f5b037342316 100644 (file)
@@ -1011,8 +1011,11 @@ area_found:
                mutex_unlock(&pcpu_alloc_mutex);
        }
 
-       if (chunk != pcpu_reserved_chunk)
+       if (chunk != pcpu_reserved_chunk) {
+               spin_lock_irqsave(&pcpu_lock, flags);
                pcpu_nr_empty_pop_pages -= occ_pages;
+               spin_unlock_irqrestore(&pcpu_lock, flags);
+       }
 
        if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
                pcpu_schedule_balance_work();
index db9794ec61d88efe16419a6c4534daf7c8770bc1..5589de7086af4eca7634e786918600a81cf6b09c 100644 (file)
@@ -318,7 +318,8 @@ out:
        return error;
 }
 
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
+static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
+                     bool kern)
 {
        struct sock *sk = sock->sk;
        struct sk_buff *skb;
@@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 
        lock_sock(sk);
 
-       error = svc_create(sock_net(sk), newsock, 0, 0);
+       error = svc_create(sock_net(sk), newsock, 0, kern);
        if (error)
                goto out;
 
index a8e42cedf1dbc7e11a5803a3dbe857e1e4cd54e1..b7c486752b3acf64b821ccb8b0e1a9bc25c945da 100644 (file)
@@ -1320,7 +1320,8 @@ out_release:
        return err;
 }
 
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
 {
        struct sk_buff *skb;
        struct sock *newsk;
index f307b145ea5405482434a9c37cafeb6d3f32dee4..507b80d59dec4fd3b0eb3c50ed1cd95a78adfcb7 100644 (file)
@@ -301,7 +301,7 @@ done:
 }
 
 static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
-                            int flags)
+                            int flags, bool kern)
 {
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct sock *sk = sock->sk, *nsk;
index aa1a814ceddca77f790f0c570e9c89ef08ebe186..ac3c650cb234f9985ddf0b54924db9000c4586c3 100644 (file)
@@ -471,7 +471,8 @@ done:
        return err;
 }
 
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
+                             bool kern)
 {
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct sock *sk = sock->sk, *nsk;
index e4e9a2da1e7e7a0e4b9764fe6de8819b9908ef3f..728e0c8dc8e74ccb134b8ed1d493ea8ee49bf49b 100644 (file)
@@ -627,7 +627,7 @@ done:
 }
 
 static int sco_sock_accept(struct socket *sock, struct socket *newsock,
-                          int flags)
+                          int flags, bool kern)
 {
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct sock *sk = sock->sk, *ch;
index 236f34244dbe1f2cd2bdfaf9d4eceb0765276882..013f2290bfa56df90708879437a762c812dec101 100644 (file)
@@ -30,6 +30,7 @@ EXPORT_SYMBOL(br_should_route_hook);
 static int
 br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+       br_drop_fake_rtable(skb);
        return netif_receive_skb(skb);
 }
 
index 95087e6e8258366af95579bb308d1a6e18266f0e..fa87fbd62bb788a27b8fd749d59c292aedd4fb69 100644 (file)
@@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv,
 }
 
 
-/* PF_BRIDGE/LOCAL_IN ************************************************/
-/* The packet is locally destined, which requires a real
- * dst_entry, so detach the fake one.  On the way up, the
- * packet would pass through PRE_ROUTING again (which already
- * took place when the packet entered the bridge), but we
- * register an IPv4 PRE_ROUTING 'sabotage' hook that will
- * prevent this from happening. */
-static unsigned int br_nf_local_in(void *priv,
-                                  struct sk_buff *skb,
-                                  const struct nf_hook_state *state)
-{
-       br_drop_fake_rtable(skb);
-       return NF_ACCEPT;
-}
-
 /* PF_BRIDGE/FORWARD *************************************************/
 static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -907,12 +892,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
                .hooknum = NF_BR_PRE_ROUTING,
                .priority = NF_BR_PRI_BRNF,
        },
-       {
-               .hook = br_nf_local_in,
-               .pf = NFPROTO_BRIDGE,
-               .hooknum = NF_BR_LOCAL_IN,
-               .priority = NF_BR_PRI_BRNF,
-       },
        {
                .hook = br_nf_forward_ip,
                .pf = NFPROTO_BRIDGE,
index 8637b2b71f3d4751366a2ca5ba46579e6a5fa953..7869ae3837ca741e344b1731dc50d8408d8bcb6c 100644 (file)
@@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev)
 {
        rtnl_lock();
        call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+       call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
        rtnl_unlock();
 }
 EXPORT_SYMBOL(netdev_notify_peers);
index 3945821e9c1f8f8c33290e55d33aba28ff68a9cd..65ea0ff4017c166fea648f3ef3db57966f44aa66 100644 (file)
@@ -953,7 +953,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
        while (--i >= new_num) {
                struct kobject *kobj = &dev->_rx[i].kobj;
 
-               if (!list_empty(&dev_net(dev)->exit_list))
+               if (!atomic_read(&dev_net(dev)->count))
                        kobj->uevent_suppress = 1;
                if (dev->sysfs_rx_queue_group)
                        sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
@@ -1371,7 +1371,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
        while (--i >= new_num) {
                struct netdev_queue *queue = dev->_tx + i;
 
-               if (!list_empty(&dev_net(dev)->exit_list))
+               if (!atomic_read(&dev_net(dev)->count))
                        queue->kobj.uevent_suppress = 1;
 #ifdef CONFIG_BQL
                sysfs_remove_group(&queue->kobj, &dql_group);
@@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
 {
        struct device *dev = &(ndev->dev);
 
-       if (!list_empty(&dev_net(ndev)->exit_list))
+       if (!atomic_read(&dev_net(ndev)->count))
                dev_set_uevent_suppress(dev, 1);
 
        kobject_get(&dev->kobj);
index f3557958e9bf147631a90b51fef0630920acd97b..cd4ba8c6b6091651403cf74de8c60ccf69aa3e7b 100644 (file)
@@ -3828,13 +3828,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
        if (!skb_may_tx_timestamp(sk, false))
                return;
 
-       /* take a reference to prevent skb_orphan() from freeing the socket */
-       sock_hold(sk);
-
-       *skb_hwtstamps(skb) = *hwtstamps;
-       __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
-       sock_put(sk);
+       /* Take a reference to prevent skb_orphan() from freeing the socket,
+        * but only if the socket refcount is not zero.
+        */
+       if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+               *skb_hwtstamps(skb) = *hwtstamps;
+               __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+               sock_put(sk);
+       }
 }
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
@@ -3893,7 +3894,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
 {
        struct sock *sk = skb->sk;
        struct sock_exterr_skb *serr;
-       int err;
+       int err = 1;
 
        skb->wifi_acked_valid = 1;
        skb->wifi_acked = acked;
@@ -3903,14 +3904,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
        serr->ee.ee_errno = ENOMSG;
        serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
 
-       /* take a reference to prevent skb_orphan() from freeing the socket */
-       sock_hold(sk);
-
-       err = sock_queue_err_skb(sk, skb);
+       /* Take a reference to prevent skb_orphan() from freeing the socket,
+        * but only if the socket refcount is not zero.
+        */
+       if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+               err = sock_queue_err_skb(sk, skb);
+               sock_put(sk);
+       }
        if (err)
                kfree_skb(skb);
-
-       sock_put(sk);
 }
 EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
 
index f6fd79f33097f3fa279fcb0b610286259af9b111..a96d5f7a5734a52dfd6a2df8490c7bd7f5f6599a 100644 (file)
@@ -197,66 +197,55 @@ EXPORT_SYMBOL(sk_net_capable);
 
 /*
  * Each address family might have different locking rules, so we have
- * one slock key per address family:
+ * one slock key per address family and separate keys for internal and
+ * userspace sockets.
  */
 static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_kern_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
+static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
 
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
  * locks is fast):
  */
+
+#define _sock_locks(x)                                           \
+  x "AF_UNSPEC",       x "AF_UNIX"     ,       x "AF_INET"     , \
+  x "AF_AX25"  ,       x "AF_IPX"      ,       x "AF_APPLETALK", \
+  x "AF_NETROM",       x "AF_BRIDGE"   ,       x "AF_ATMPVC"   , \
+  x "AF_X25"   ,       x "AF_INET6"    ,       x "AF_ROSE"     , \
+  x "AF_DECnet",       x "AF_NETBEUI"  ,       x "AF_SECURITY" , \
+  x "AF_KEY"   ,       x "AF_NETLINK"  ,       x "AF_PACKET"   , \
+  x "AF_ASH"   ,       x "AF_ECONET"   ,       x "AF_ATMSVC"   , \
+  x "AF_RDS"   ,       x "AF_SNA"      ,       x "AF_IRDA"     , \
+  x "AF_PPPOX" ,       x "AF_WANPIPE"  ,       x "AF_LLC"      , \
+  x "27"       ,       x "28"          ,       x "AF_CAN"      , \
+  x "AF_TIPC"  ,       x "AF_BLUETOOTH",       x "IUCV"        , \
+  x "AF_RXRPC" ,       x "AF_ISDN"     ,       x "AF_PHONET"   , \
+  x "AF_IEEE802154",   x "AF_CAIF"     ,       x "AF_ALG"      , \
+  x "AF_NFC"   ,       x "AF_VSOCK"    ,       x "AF_KCM"      , \
+  x "AF_QIPCRTR",      x "AF_SMC"      ,       x "AF_MAX"
+
 static const char *const af_family_key_strings[AF_MAX+1] = {
-  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
-  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
-  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
-  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
-  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
-  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
-  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
-  "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
-  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
-  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
-  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
-  "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG"      ,
-  "sk_lock-AF_NFC"   , "sk_lock-AF_VSOCK"    , "sk_lock-AF_KCM"      ,
-  "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC"     , "sk_lock-AF_MAX"
+       _sock_locks("sk_lock-")
 };
 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
-  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
-  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
-  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
-  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
-  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
-  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
-  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
-  "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
-  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
-  "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
-  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
-  "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG"      ,
-  "slock-AF_NFC"   , "slock-AF_VSOCK"    ,"slock-AF_KCM"       ,
-  "slock-AF_QIPCRTR", "slock-AF_SMC"     , "slock-AF_MAX"
+       _sock_locks("slock-")
 };
 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
-  "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
-  "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
-  "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
-  "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     ,
-  "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" ,
-  "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   ,
-  "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   ,
-  "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     ,
-  "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      ,
-  "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
-  "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
-  "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG"      ,
-  "clock-AF_NFC"   , "clock-AF_VSOCK"    , "clock-AF_KCM"      ,
-  "clock-AF_QIPCRTR", "clock-AF_SMC"     , "clock-AF_MAX"
+       _sock_locks("clock-")
+};
+
+static const char *const af_family_kern_key_strings[AF_MAX+1] = {
+       _sock_locks("k-sk_lock-")
+};
+static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
+       _sock_locks("k-slock-")
+};
+static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
+       _sock_locks("k-clock-")
 };
 
 /*
@@ -264,6 +253,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
  * so split the lock classes by using a per-AF key:
  */
 static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_kern_callback_keys[AF_MAX];
 
 /* Take into consideration the size of the struct sk_buff overhead in the
  * determination of these values, since that is non-constant across
@@ -1293,7 +1283,16 @@ lenout:
  */
 static inline void sock_lock_init(struct sock *sk)
 {
-       sock_lock_init_class_and_name(sk,
+       if (sk->sk_kern_sock)
+               sock_lock_init_class_and_name(
+                       sk,
+                       af_family_kern_slock_key_strings[sk->sk_family],
+                       af_family_kern_slock_keys + sk->sk_family,
+                       af_family_kern_key_strings[sk->sk_family],
+                       af_family_kern_keys + sk->sk_family);
+       else
+               sock_lock_init_class_and_name(
+                       sk,
                        af_family_slock_key_strings[sk->sk_family],
                        af_family_slock_keys + sk->sk_family,
                        af_family_key_strings[sk->sk_family],
@@ -1399,6 +1398,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
                 * why we need sk_prot_creator -acme
                 */
                sk->sk_prot = sk->sk_prot_creator = prot;
+               sk->sk_kern_sock = kern;
                sock_lock_init(sk);
                sk->sk_net_refcnt = kern ? 0 : 1;
                if (likely(sk->sk_net_refcnt))
@@ -2277,7 +2277,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
 }
 EXPORT_SYMBOL(sock_no_socketpair);
 
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
+                  bool kern)
 {
        return -EOPNOTSUPP;
 }
@@ -2481,7 +2482,14 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        }
 
        rwlock_init(&sk->sk_callback_lock);
-       lockdep_set_class_and_name(&sk->sk_callback_lock,
+       if (sk->sk_kern_sock)
+               lockdep_set_class_and_name(
+                       &sk->sk_callback_lock,
+                       af_kern_callback_keys + sk->sk_family,
+                       af_family_kern_clock_key_strings[sk->sk_family]);
+       else
+               lockdep_set_class_and_name(
+                       &sk->sk_callback_lock,
                        af_callback_keys + sk->sk_family,
                        af_family_clock_key_strings[sk->sk_family]);
 
index f053198e730c48c7ea8114706c3d4904228f41fb..5e3a7302f7747e4c4f3134eacab2f2c65b13402f 100644 (file)
@@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
        for (i = 0; i < hc->tx_seqbufc; i++)
                kfree(hc->tx_seqbuf[i]);
        hc->tx_seqbufc = 0;
+       dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 }
 
 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
index 409d0cfd34474812c3bf74f26cd423a3d65ee441..b99168b0fabf2a8c65defdd0b93d362630774e1a 100644 (file)
@@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 
        switch (type) {
        case ICMP_REDIRECT:
-               dccp_do_redirect(skb, sk);
+               if (!sock_owned_by_user(sk))
+                       dccp_do_redirect(skb, sk);
                goto out;
        case ICMP_SOURCE_QUENCH:
                /* Just silently ignore these. */
index 233b57367758c64c09ed40f7359cb8fcb1918d93..d9b6a4e403e701fd9b9ecf92bac496e45570054e 100644 (file)
@@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        np = inet6_sk(sk);
 
        if (type == NDISC_REDIRECT) {
-               struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+               if (!sock_owned_by_user(sk)) {
+                       struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
-               if (dst)
-                       dst->ops->redirect(dst, sk, skb);
+                       if (dst)
+                               dst->ops->redirect(dst, sk, skb);
+               }
                goto out;
        }
 
index e267e6f4c9a5566b369a03a600a408e5bd41cbad..abd07a443219853b022bef41cb072e90ff8f07f0 100644 (file)
@@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
        struct dccp_request_sock *dreq = dccp_rsk(req);
        bool own_req;
 
+       /* TCP/DCCP listeners became lockless.
+        * DCCP stores complex state in its request_sock, so we need
+        * a protection for them, now this code runs without being protected
+        * by the parent (listener) lock.
+        */
+       spin_lock_bh(&dreq->dreq_lock);
+
        /* Check for retransmitted REQUEST */
        if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
 
@@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
                        inet_rtx_syn_ack(sk, req);
                }
                /* Network Duplicate, discard packet */
-               return NULL;
+               goto out;
        }
 
        DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
@@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 
        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
                                                         req, &own_req);
-       if (!child)
-               goto listen_overflow;
-
-       return inet_csk_complete_hashdance(sk, child, req, own_req);
+       if (child) {
+               child = inet_csk_complete_hashdance(sk, child, req, own_req);
+               goto out;
+       }
 
-listen_overflow:
-       dccp_pr_debug("listen_overflow!\n");
        DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
 drop:
        if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
                req->rsk_ops->send_reset(sk, skb);
 
        inet_csk_reqsk_queue_drop(sk, req);
-       return NULL;
+out:
+       spin_unlock_bh(&dreq->dreq_lock);
+       return child;
 }
 
 EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req,
 {
        struct dccp_request_sock *dreq = dccp_rsk(req);
 
+       spin_lock_init(&dreq->dreq_lock);
        inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
        inet_rsk(req)->ir_num      = ntohs(dccp_hdr(skb)->dccph_dport);
        inet_rsk(req)->acked       = 0;
index e6e79eda97636df6f6e0e6e914405381c3efeaaa..7de5b40a5d0d1245ad995877f779e0d87d1cf398 100644 (file)
@@ -1070,7 +1070,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
        return skb == NULL ? ERR_PTR(err) : skb;
 }
 
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
+static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
+                    bool kern)
 {
        struct sock *sk = sock->sk, *newsk;
        struct sk_buff *skb = NULL;
@@ -1099,7 +1100,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
 
        cb = DN_SKB_CB(skb);
        sk->sk_ack_backlog--;
-       newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
+       newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
        if (newsk == NULL) {
                release_sock(sk);
                kfree_skb(skb);
index 602d40f43687c91db7250822439bacbe85318fa3..6b1fc6e4278ef4f1cba58412977918af31d73e62 100644 (file)
@@ -689,11 +689,12 @@ EXPORT_SYMBOL(inet_stream_connect);
  *     Accept a pending connection. The TCP layer now gives BSD semantics.
  */
 
-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+               bool kern)
 {
        struct sock *sk1 = sock->sk;
        int err = -EINVAL;
-       struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+       struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
 
        if (!sk2)
                goto do_err;
@@ -1487,8 +1488,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
        int proto = iph->protocol;
        int err = -ENOSYS;
 
-       if (skb->encapsulation)
+       if (skb->encapsulation) {
+               skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
                skb_set_inner_network_header(skb, nhoff);
+       }
 
        csum_replace2(&iph->check, iph->tot_len, newlen);
        iph->tot_len = newlen;
index b4d5980ade3b584c444d0f0c6523f03a2f71f884..5e313c1ac94fc88eca5fe3a0e9e46e551e955ff0 100644 (file)
@@ -424,7 +424,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 /*
  * This will accept the next outstanding connection.
  */
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct request_sock_queue *queue = &icsk->icsk_accept_queue;
index 737ce826d7ecfa040d07d7f8e8d6dedd01ca7330..7a3fd25e8913a99d0fcbb256bc9001f6f1d4dd6f 100644 (file)
@@ -966,7 +966,7 @@ static int __ip_append_data(struct sock *sk,
        cork->length += length;
        if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
            (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
            (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
                err = ip_ufo_append_data(sk, queue, getfrag, from, length,
                                         hh_len, fragheaderlen, transhdrlen,
index 9a89b8deafae1e9b2e8d1d9bc211c9c30b8dd8ec..575e19dcc01763ef3fa938dea3ea51995b573163 100644 (file)
@@ -279,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
  */
 void tcp_v4_mtu_reduced(struct sock *sk)
 {
-       struct dst_entry *dst;
        struct inet_sock *inet = inet_sk(sk);
-       u32 mtu = tcp_sk(sk)->mtu_info;
+       struct dst_entry *dst;
+       u32 mtu;
 
+       if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+               return;
+       mtu = tcp_sk(sk)->mtu_info;
        dst = inet_csk_update_pmtu(sk, mtu);
        if (!dst)
                return;
@@ -428,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 
        switch (type) {
        case ICMP_REDIRECT:
-               do_redirect(icmp_skb, sk);
+               if (!sock_owned_by_user(sk))
+                       do_redirect(icmp_skb, sk);
                goto out;
        case ICMP_SOURCE_QUENCH:
                /* Just silently ignore these. */
index 40d893556e6701ace6a02903e53c45822d6fa56d..b2ab411c6d3728fa7dbdebde045532a7317f5166 100644 (file)
@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk)
 
        sk_mem_reclaim_partial(sk);
 
-       if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+           !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
                goto out;
 
        if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk)
        struct inet_connection_sock *icsk = inet_csk(sk);
        int event;
 
-       if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+       if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+           !icsk->icsk_pending)
                goto out;
 
        if (time_after(icsk->icsk_timeout, jiffies)) {
index 04db40620ea65c1f369ef63490383e92def722ff..a9a9553ee63df8eb6e16e00d5da8c29406435350 100644 (file)
@@ -920,12 +920,12 @@ static int __init inet6_init(void)
        err = register_pernet_subsys(&inet6_net_ops);
        if (err)
                goto register_pernet_fail;
-       err = icmpv6_init();
-       if (err)
-               goto icmp_fail;
        err = ip6_mr_init();
        if (err)
                goto ipmr_fail;
+       err = icmpv6_init();
+       if (err)
+               goto icmp_fail;
        err = ndisc_init();
        if (err)
                goto ndisc_fail;
@@ -1061,10 +1061,10 @@ igmp_fail:
        ndisc_cleanup();
 ndisc_fail:
        ip6_mr_cleanup();
-ipmr_fail:
-       icmpv6_cleanup();
 icmp_fail:
        unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+       icmpv6_cleanup();
 register_pernet_fail:
        sock_unregister(PF_INET6);
        rtnl_unregister_all(PF_INET6);
index e4266746e4a2af67562bb05dd50ace54e55d3edd..d4bf2c68a545b44873e433930e4e999920de78c9 100644 (file)
@@ -923,6 +923,8 @@ add:
                        ins = &rt->dst.rt6_next;
                        iter = *ins;
                        while (iter) {
+                               if (iter->rt6i_metric > rt->rt6i_metric)
+                                       break;
                                if (rt6_qualify_for_ecmp(iter)) {
                                        *ins = iter->dst.rt6_next;
                                        fib6_purge_rt(iter, fn, info->nl_net);
index 0838e6d01d2e4979559cae63a20ca339a3e2c22c..93e58a5e18374bee41f5a17f0c5911e381acb142 100644 (file)
@@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
        struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
        int err = -ENOSYS;
 
-       if (skb->encapsulation)
+       if (skb->encapsulation) {
+               skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
                skb_set_inner_network_header(skb, nhoff);
+       }
 
        iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
 
index 528b3c1f3fdee4314e1c23007ae76333b4af7505..58f6288e9ba53e6964b74d71dde7615ead695c06 100644 (file)
@@ -768,13 +768,14 @@ slow_path:
         *      Fragment the datagram.
         */
 
-       *prevhdr = NEXTHDR_FRAGMENT;
        troom = rt->dst.dev->needed_tailroom;
 
        /*
         *      Keep copying data until we run out.
         */
        while (left > 0)        {
+               u8 *fragnexthdr_offset;
+
                len = left;
                /* IF: it doesn't fit, use 'mtu' - the data space left */
                if (len > mtu)
@@ -819,6 +820,10 @@ slow_path:
                 */
                skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 
+               fragnexthdr_offset = skb_network_header(frag);
+               fragnexthdr_offset += prevhdr - skb_network_header(skb);
+               *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
                /*
                 *      Build fragment header.
                 */
@@ -1385,7 +1390,7 @@ emsgsize:
        if ((((length + fragheaderlen) > mtu) ||
             (skb && skb_is_gso(skb))) &&
            (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
            (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
                err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
                                          hh_len, fragheaderlen, exthdrlen,
index 644ba59fbd9d5ed8d6ba4a8082dd327589c9bb68..3d8a3b63b4fdbec7d488194e21e0c9013f0ff6da 100644 (file)
@@ -485,11 +485,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
        if (!skb->ignore_df && skb->len > mtu) {
                skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
 
-               if (skb->protocol == htons(ETH_P_IPV6))
+               if (skb->protocol == htons(ETH_P_IPV6)) {
+                       if (mtu < IPV6_MIN_MTU)
+                               mtu = IPV6_MIN_MTU;
+
                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-               else
+               } else {
                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
                                  htonl(mtu));
+               }
 
                return -EMSGSIZE;
        }
index 229bfcc451ef5004e9e9d14c071937c1b9658711..35c58b669ebdfd73db32035bf4d6ccfdd01bbaaf 100644 (file)
@@ -3299,7 +3299,6 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
                nexthop_len = nla_total_size(0)  /* RTA_MULTIPATH */
                            + NLA_ALIGN(sizeof(struct rtnexthop))
                            + nla_total_size(16) /* RTA_GATEWAY */
-                           + nla_total_size(4)  /* RTA_OIF */
                            + lwtunnel_get_encap_size(rt->dst.lwtstate);
 
                nexthop_len *= rt->rt6i_nsiblings;
@@ -3323,7 +3322,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
 }
 
 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
-                           unsigned int *flags)
+                           unsigned int *flags, bool skip_oif)
 {
        if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
                *flags |= RTNH_F_LINKDOWN;
@@ -3336,7 +3335,8 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
                        goto nla_put_failure;
        }
 
-       if (rt->dst.dev &&
+       /* not needed for multipath encoding b/c it has a rtnexthop struct */
+       if (!skip_oif && rt->dst.dev &&
            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
                goto nla_put_failure;
 
@@ -3350,6 +3350,7 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+/* add multipath next hop */
 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
 {
        struct rtnexthop *rtnh;
@@ -3362,7 +3363,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
        rtnh->rtnh_hops = 0;
        rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
 
-       if (rt6_nexthop_info(skb, rt, &flags) < 0)
+       if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
                goto nla_put_failure;
 
        rtnh->rtnh_flags = flags;
@@ -3515,7 +3516,7 @@ static int rt6_fill_node(struct net *net,
 
                nla_nest_end(skb, mp);
        } else {
-               if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
+               if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
                        goto nla_put_failure;
        }
 
index 60a5295a7de6e877f5ab80ef32314c573c289d81..49fa2e8c3fa9212eef1198a1077a6726f0f1b6fc 100644 (file)
@@ -391,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        np = inet6_sk(sk);
 
        if (type == NDISC_REDIRECT) {
-               struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+               if (!sock_owned_by_user(sk)) {
+                       struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
 
-               if (dst)
-                       dst->ops->redirect(dst, sk, skb);
+                       if (dst)
+                               dst->ops->redirect(dst, sk, skb);
+               }
                goto out;
        }
 
index 81adc29a448dc5be56b96ddd5c42321417371d37..8d77ad5cadaff3aa1feb18f168e779c5a6e7f917 100644 (file)
@@ -828,7 +828,8 @@ out:
  *    Wait for incoming connection
  *
  */
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
+static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
 {
        struct sock *sk = sock->sk;
        struct irda_sock *new, *self = irda_sk(sk);
@@ -836,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
        struct sk_buff *skb = NULL;
        int err;
 
-       err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
+       err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
        if (err)
                return err;
 
index 89bbde1081ce5eb56c0c6a1c7c18b030f3de1198..84de7b6326dcdf7fcf0d8cb73f738d9c21c2f9fe 100644 (file)
@@ -938,7 +938,7 @@ done:
 
 /* Accept a pending connection */
 static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
-                           int flags)
+                           int flags, bool kern)
 {
        DECLARE_WAITQUEUE(wait, current);
        struct sock *sk = sock->sk, *nsk;
index 06186d608a274eb46cd768610c67e8a5a8e84c15..cb4fff785cbf5aaad520442dc243ae62dc5750ea 100644 (file)
@@ -641,11 +641,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
  *     @sock: Socket which connections arrive on.
  *     @newsock: Socket to move incoming connection to.
  *     @flags: User specified operational flags.
+ *     @kern: If the socket is kernel internal
  *
  *     Accept a new incoming connection.
  *     Returns 0 upon success, negative otherwise.
  */
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
+                        bool kern)
 {
        struct sock *sk = sock->sk, *newsk;
        struct llc_sock *llc, *newllc;
index 3818686182b210be11025ff69d82f58e4e08e401..33211f9a265608c378848c97b4be36a1cec9736d 100644 (file)
@@ -1288,7 +1288,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
                                /* fall through */
                        case NETDEV_CHANGE:
                                nh->nh_flags |= RTNH_F_LINKDOWN;
-                               ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+                               if (event != NETDEV_UNREGISTER)
+                                       ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
                                break;
                        }
                        if (event == NETDEV_UNREGISTER)
@@ -2028,6 +2029,7 @@ static void mpls_net_exit(struct net *net)
        for (index = 0; index < platform_labels; index++) {
                struct mpls_route *rt = rtnl_dereference(platform_label[index]);
                RCU_INIT_POINTER(platform_label[index], NULL);
+               mpls_notify_route(net, index, rt, NULL, NULL);
                mpls_rt_free(rt);
        }
        rtnl_unlock();
index 4bbf4526b88566d7c3f14e602f279b7e2570113c..ebf16f7f90892dd3029e643835859459baec4507 100644 (file)
@@ -765,7 +765,8 @@ out_release:
        return err;
 }
 
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
+static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
+                    bool kern)
 {
        struct sk_buff *skb;
        struct sock *newsk;
index 879885b31cce5ff2461c3a1524612527b9383bbd..2ffb18e73df6c03072fffeb68b660fb2f884eb45 100644 (file)
@@ -441,7 +441,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
 }
 
 static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
-                           int flags)
+                           int flags, bool kern)
 {
        DECLARE_WAITQUEUE(wait, current);
        struct sock *sk = sock->sk, *new_sk;
index 222bedcd95754c80644748daba365dc00b10fd8c..e81537991ddf0d67e6eca19fc9eb6f442d3c06a4 100644 (file)
@@ -772,7 +772,8 @@ static void pep_sock_close(struct sock *sk, long timeout)
        sock_put(sk);
 }
 
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
+                                   bool kern)
 {
        struct pep_sock *pn = pep_sk(sk), *newpn;
        struct sock *newsk = NULL;
@@ -846,7 +847,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
        }
 
        /* Create a new to-be-accepted sock */
-       newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
+       newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
+                        kern);
        if (!newsk) {
                pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
                err = -ENOBUFS;
index a6c8da3ee89349989a9f23e095b98293050da73a..64634e3ec2fc78ebb84ad8873f6e446d06844493 100644 (file)
@@ -305,7 +305,7 @@ out:
 }
 
 static int pn_socket_accept(struct socket *sock, struct socket *newsock,
-                               int flags)
+                           int flags, bool kern)
 {
        struct sock *sk = sock->sk;
        struct sock *newsk;
@@ -314,7 +314,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock,
        if (unlikely(sk->sk_state != TCP_LISTEN))
                return -EINVAL;
 
-       newsk = sk->sk_prot->accept(sk, flags, &err);
+       newsk = sk->sk_prot->accept(sk, flags, &err, kern);
        if (!newsk)
                return err;
 
index 0e04dcceb1d416438be8bb40fc68253f336f631d..1fa75ab7b733230585666abcb7279ba691365256 100644 (file)
@@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn)
         */
        rds_cong_remove_conn(conn);
 
+       put_net(conn->c_net);
        kmem_cache_free(rds_conn_slab, conn);
 
        spin_lock_irqsave(&rds_conn_lock, flags);
index ce3775abc6e7a1d30e335aaea749a9840e949786..1c38d2c7caa8e955585b45f0c9218a0775013b4d 100644 (file)
@@ -442,7 +442,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
                ic->i_send_cq = NULL;
                ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
                rdsdebug("ib_create_cq send failed: %d\n", ret);
-               goto out;
+               goto rds_ibdev_out;
        }
 
        ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
@@ -456,19 +456,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
                ic->i_recv_cq = NULL;
                ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
                rdsdebug("ib_create_cq recv failed: %d\n", ret);
-               goto out;
+               goto send_cq_out;
        }
 
        ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
        if (ret) {
                rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
-               goto out;
+               goto recv_cq_out;
        }
 
        ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
        if (ret) {
                rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
-               goto out;
+               goto recv_cq_out;
        }
 
        /* XXX negotiate max send/recv with remote? */
@@ -494,7 +494,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
        if (ret) {
                rdsdebug("rdma_create_qp failed: %d\n", ret);
-               goto out;
+               goto recv_cq_out;
        }
 
        ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
@@ -504,7 +504,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        if (!ic->i_send_hdrs) {
                ret = -ENOMEM;
                rdsdebug("ib_dma_alloc_coherent send failed\n");
-               goto out;
+               goto qp_out;
        }
 
        ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
@@ -514,7 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        if (!ic->i_recv_hdrs) {
                ret = -ENOMEM;
                rdsdebug("ib_dma_alloc_coherent recv failed\n");
-               goto out;
+               goto send_hdrs_dma_out;
        }
 
        ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
@@ -522,7 +522,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        if (!ic->i_ack) {
                ret = -ENOMEM;
                rdsdebug("ib_dma_alloc_coherent ack failed\n");
-               goto out;
+               goto recv_hdrs_dma_out;
        }
 
        ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
@@ -530,7 +530,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        if (!ic->i_sends) {
                ret = -ENOMEM;
                rdsdebug("send allocation failed\n");
-               goto out;
+               goto ack_dma_out;
        }
 
        ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
@@ -538,7 +538,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        if (!ic->i_recvs) {
                ret = -ENOMEM;
                rdsdebug("recv allocation failed\n");
-               goto out;
+               goto sends_out;
        }
 
        rds_ib_recv_init_ack(ic);
@@ -546,8 +546,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
                 ic->i_send_cq, ic->i_recv_cq);
 
-out:
+       return ret;
+
+sends_out:
+       vfree(ic->i_sends);
+ack_dma_out:
+       ib_dma_free_coherent(dev, sizeof(struct rds_header),
+                            ic->i_ack, ic->i_ack_dma);
+recv_hdrs_dma_out:
+       ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
+                                       sizeof(struct rds_header),
+                                       ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+send_hdrs_dma_out:
+       ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
+                                       sizeof(struct rds_header),
+                                       ic->i_send_hdrs, ic->i_send_hdrs_dma);
+qp_out:
+       rdma_destroy_qp(ic->i_cm_id);
+recv_cq_out:
+       if (!ib_destroy_cq(ic->i_recv_cq))
+               ic->i_recv_cq = NULL;
+send_cq_out:
+       if (!ib_destroy_cq(ic->i_send_cq))
+               ic->i_send_cq = NULL;
+rds_ibdev_out:
+       rds_ib_remove_conn(rds_ibdev, conn);
        rds_ib_dev_put(rds_ibdev);
+
        return ret;
 }
 
index 39518ef7af4dfbada74af4a685cd8fe8dbaf9e40..82d38ccf5e8bcf99eefd20934744cbeb7410406b 100644 (file)
@@ -147,7 +147,7 @@ struct rds_connection {
 
        /* Protocol version */
        unsigned int            c_version;
-       possible_net_t          c_net;
+       struct net              *c_net;
 
        struct list_head        c_map_item;
        unsigned long           c_map_queued;
@@ -162,13 +162,13 @@ struct rds_connection {
 static inline
 struct net *rds_conn_net(struct rds_connection *conn)
 {
-       return read_pnet(&conn->c_net);
+       return conn->c_net;
 }
 
 static inline
 void rds_conn_net_set(struct rds_connection *conn, struct net *net)
 {
-       write_pnet(&conn->c_net, net);
+       conn->c_net = get_net(net);
 }
 
 #define RDS_FLAG_CONG_BITMAP   0x01
index a973d3b4dff0b2216bf3698cfbfeeb4b227dfc37..22569007677357ba40347ee46e6584c683de2597 100644 (file)
@@ -484,9 +484,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net)
         * we do need to clean up the listen socket here.
         */
        if (rtn->rds_tcp_listen_sock) {
-               rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+               struct socket *lsock = rtn->rds_tcp_listen_sock;
+
                rtn->rds_tcp_listen_sock = NULL;
-               flush_work(&rtn->rds_tcp_accept_w);
+               rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
        }
 }
 
@@ -523,13 +524,13 @@ static void rds_tcp_kill_sock(struct net *net)
        struct rds_tcp_connection *tc, *_tc;
        LIST_HEAD(tmp_list);
        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+       struct socket *lsock = rtn->rds_tcp_listen_sock;
 
-       rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
        rtn->rds_tcp_listen_sock = NULL;
-       flush_work(&rtn->rds_tcp_accept_w);
+       rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
        spin_lock_irq(&rds_tcp_conn_lock);
        list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-               struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+               struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
                if (net != c_net || !tc->t_sock)
                        continue;
@@ -546,8 +547,12 @@ static void rds_tcp_kill_sock(struct net *net)
 void *rds_tcp_listen_sock_def_readable(struct net *net)
 {
        struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+       struct socket *lsock = rtn->rds_tcp_listen_sock;
+
+       if (!lsock)
+               return NULL;
 
-       return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+       return lsock->sk->sk_user_data;
 }
 
 static int rds_tcp_dev_event(struct notifier_block *this,
@@ -584,7 +589,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 
        spin_lock_irq(&rds_tcp_conn_lock);
        list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
-               struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+               struct net *c_net = tc->t_cpath->cp_conn->c_net;
 
                if (net != c_net || !tc->t_sock)
                        continue;
@@ -638,19 +643,19 @@ static int rds_tcp_init(void)
                goto out;
        }
 
-       ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
-       if (ret) {
-               pr_warn("could not register rds_tcp_dev_notifier\n");
+       ret = rds_tcp_recv_init();
+       if (ret)
                goto out_slab;
-       }
 
        ret = register_pernet_subsys(&rds_tcp_net_ops);
        if (ret)
-               goto out_notifier;
+               goto out_recv;
 
-       ret = rds_tcp_recv_init();
-       if (ret)
+       ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
+       if (ret) {
+               pr_warn("could not register rds_tcp_dev_notifier\n");
                goto out_pernet;
+       }
 
        rds_trans_register(&rds_tcp_transport);
 
@@ -660,9 +665,8 @@ static int rds_tcp_init(void)
 
 out_pernet:
        unregister_pernet_subsys(&rds_tcp_net_ops);
-out_notifier:
-       if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
-               pr_warn("could not unregister rds_tcp_dev_notifier\n");
+out_recv:
+       rds_tcp_recv_exit();
 out_slab:
        kmem_cache_destroy(rds_tcp_conn_slab);
 out:
index 9a1cc890657679798cf58888c42d5bb2372f0fef..56ea6620fcf97ce40d0926089b5e5b188ea1a1fe 100644 (file)
@@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk);
 
 /* tcp_listen.c */
 struct socket *rds_tcp_listen_init(struct net *);
-void rds_tcp_listen_stop(struct socket *);
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
 void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
 int rds_tcp_keepalive(struct socket *sock);
index 67d0929c7d3d0c97ed209af9a67b4d83343c3de1..507678853e6cb3bb769711d22d75f5099228faab 100644 (file)
@@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock)
 
        new_sock->type = sock->type;
        new_sock->ops = sock->ops;
-       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+       ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
        if (ret < 0)
                goto out;
 
@@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk)
         * before it has been accepted and the accepter has set up their
         * data_ready.. we only want to queue listen work for our listening
         * socket
+        *
+        * (*ready)() may be null if we are racing with netns delete, and
+        * the listen socket is being torn down.
         */
        if (sk->sk_state == TCP_LISTEN)
                rds_tcp_accept_work(sk);
@@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk)
 
 out:
        read_unlock_bh(&sk->sk_callback_lock);
-       ready(sk);
+       if (ready)
+               ready(sk);
 }
 
 struct socket *rds_tcp_listen_init(struct net *net)
@@ -271,7 +275,7 @@ out:
        return NULL;
 }
 
-void rds_tcp_listen_stop(struct socket *sock)
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
 {
        struct sock *sk;
 
@@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock)
 
        /* wait for accepts to stop and close the socket */
        flush_workqueue(rds_wq);
+       flush_work(acceptor);
        sock_release(sock);
 }
index b8a1df2c97853246b2485d9d30caa0e19b61278a..4a9729257023676565a0ff8c140ef56823b6d374 100644 (file)
@@ -871,7 +871,8 @@ out_release:
        return err;
 }
 
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
 {
        struct sk_buff *skb;
        struct sock *newsk;
index 9f4cfa25af7c92c406e81d8003b8aa07c7892a04..18b2ad8be8e2b57dd57ef846287add68b027b08e 100644 (file)
@@ -420,6 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
                             u16 skew)
 {
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+       enum rxrpc_call_state state;
        unsigned int offset = sizeof(struct rxrpc_wire_header);
        unsigned int ix;
        rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
@@ -434,14 +435,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
        _proto("Rx DATA %%%u { #%u f=%02x }",
               sp->hdr.serial, seq, sp->hdr.flags);
 
-       if (call->state >= RXRPC_CALL_COMPLETE)
+       state = READ_ONCE(call->state);
+       if (state >= RXRPC_CALL_COMPLETE)
                return;
 
        /* Received data implicitly ACKs all of the request packets we sent
         * when we're acting as a client.
         */
-       if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
-            call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+       if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+            state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
            !rxrpc_receiving_reply(call))
                return;
 
@@ -650,6 +652,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
        struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
        struct rxrpc_peer *peer;
        unsigned int mtu;
+       bool wake = false;
        u32 rwind = ntohl(ackinfo->rwind);
 
        _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
@@ -657,9 +660,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
               ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
               rwind, ntohl(ackinfo->jumbo_max));
 
-       if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
-               rwind = RXRPC_RXTX_BUFF_SIZE - 1;
-       call->tx_winsize = rwind;
+       if (call->tx_winsize != rwind) {
+               if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+                       rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+               if (rwind > call->tx_winsize)
+                       wake = true;
+               call->tx_winsize = rwind;
+       }
+
        if (call->cong_ssthresh > rwind)
                call->cong_ssthresh = rwind;
 
@@ -673,6 +681,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
                spin_unlock_bh(&peer->lock);
                _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
        }
+
+       if (wake)
+               wake_up(&call->waitq);
 }
 
 /*
@@ -799,7 +810,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
                return rxrpc_proto_abort("AK0", call, 0);
 
        /* Ignore ACKs unless we are or have just been transmitting. */
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
        case RXRPC_CALL_CLIENT_SEND_REQUEST:
        case RXRPC_CALL_CLIENT_AWAIT_REPLY:
        case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -940,7 +951,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
 static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
                                          struct rxrpc_call *call)
 {
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
        case RXRPC_CALL_SERVER_AWAIT_ACK:
                rxrpc_call_completed(call);
                break;
index 6491ca46a03fda6dc66e02e887ad08012acca14b..3e2f1a8e9c5b51bf90ce8679c06b6ec8a8958ea9 100644 (file)
@@ -527,7 +527,7 @@ try_again:
                msg->msg_namelen = len;
        }
 
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
        case RXRPC_CALL_SERVER_ACCEPTING:
                ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
                break;
@@ -640,7 +640,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 
        mutex_lock(&call->user_mutex);
 
-       switch (call->state) {
+       switch (READ_ONCE(call->state)) {
        case RXRPC_CALL_CLIENT_RECV_REPLY:
        case RXRPC_CALL_SERVER_RECV_REQUEST:
        case RXRPC_CALL_SERVER_ACK_REQUEST:
index bc2d3dcff9de76fcc42a20a3aeaec2305ebd2d6c..97ab214ca4118d7a451a4e56a916bd5809ae81f3 100644 (file)
@@ -488,6 +488,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
        __releases(&rx->sk.sk_lock.slock)
 {
+       enum rxrpc_call_state state;
        enum rxrpc_command cmd;
        struct rxrpc_call *call;
        unsigned long user_call_ID = 0;
@@ -526,13 +527,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
                        return PTR_ERR(call);
                /* ... and we have the call lock. */
        } else {
-               ret = -EBUSY;
-               if (call->state == RXRPC_CALL_UNINITIALISED ||
-                   call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
-                   call->state == RXRPC_CALL_SERVER_PREALLOC ||
-                   call->state == RXRPC_CALL_SERVER_SECURING ||
-                   call->state == RXRPC_CALL_SERVER_ACCEPTING)
+               switch (READ_ONCE(call->state)) {
+               case RXRPC_CALL_UNINITIALISED:
+               case RXRPC_CALL_CLIENT_AWAIT_CONN:
+               case RXRPC_CALL_SERVER_PREALLOC:
+               case RXRPC_CALL_SERVER_SECURING:
+               case RXRPC_CALL_SERVER_ACCEPTING:
+                       ret = -EBUSY;
                        goto error_release_sock;
+               default:
+                       break;
+               }
 
                ret = mutex_lock_interruptible(&call->user_mutex);
                release_sock(&rx->sk);
@@ -542,10 +547,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
                }
        }
 
+       state = READ_ONCE(call->state);
        _debug("CALL %d USR %lx ST %d on CONN %p",
-              call->debug_id, call->user_call_ID, call->state, call->conn);
+              call->debug_id, call->user_call_ID, state, call->conn);
 
-       if (call->state >= RXRPC_CALL_COMPLETE) {
+       if (state >= RXRPC_CALL_COMPLETE) {
                /* it's too late for this call */
                ret = -ESHUTDOWN;
        } else if (cmd == RXRPC_CMD_SEND_ABORT) {
@@ -555,12 +561,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
        } else if (cmd != RXRPC_CMD_SEND_DATA) {
                ret = -EINVAL;
        } else if (rxrpc_is_client_call(call) &&
-                  call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+                  state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
                /* request phase complete for this client call */
                ret = -EPROTO;
        } else if (rxrpc_is_service_call(call) &&
-                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+                  state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+                  state != RXRPC_CALL_SERVER_SEND_REPLY) {
                /* Reply phase not begun or not complete for service call. */
                ret = -EPROTO;
        } else {
@@ -605,14 +611,21 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
        _debug("CALL %d USR %lx ST %d on CONN %p",
               call->debug_id, call->user_call_ID, call->state, call->conn);
 
-       if (call->state >= RXRPC_CALL_COMPLETE) {
-               ret = -ESHUTDOWN; /* it's too late for this call */
-       } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-                  call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-               ret = -EPROTO; /* request phase complete for this client call */
-       } else {
+       switch (READ_ONCE(call->state)) {
+       case RXRPC_CALL_CLIENT_SEND_REQUEST:
+       case RXRPC_CALL_SERVER_ACK_REQUEST:
+       case RXRPC_CALL_SERVER_SEND_REPLY:
                ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+               break;
+       case RXRPC_CALL_COMPLETE:
+               read_lock_bh(&call->state_lock);
+               ret = -call->error;
+               read_unlock_bh(&call->state_lock);
+               break;
+       default:
+                /* Request phase complete for this client call */
+               ret = -EPROTO;
+               break;
        }
 
        mutex_unlock(&call->user_mutex);
index ab80629099622c47933efb36662a323f98f66773..f9bb43c25697e70d18fe9bbba90f6e98dfe05759 100644 (file)
@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
        if (ret < 0)
                return ret;
 
+       if (!tb[TCA_CONNMARK_PARMS])
+               return -EINVAL;
+
        parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
        if (!tcf_hash_check(tn, parm->index, a, bind)) {
index 3b7074e2302487808dc1d16b01143d0b292ebe4e..c736627f8f4a0e0ff86db535ec95459a417e4ada 100644 (file)
@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
 
        return skb->len;
 nla_put_failure:
-       rcu_read_unlock();
        nlmsg_trim(skb, b);
        return -1;
 }
index 063baac5b9fe4048e9d7b41e848a33f0f73c61d4..961ee59f696a0b0a8b6c2bade0031a073dff53ad 100644 (file)
@@ -640,14 +640,15 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
 
 /* Create and initialize a new sk for the socket to be returned by accept(). */
 static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
-                                            struct sctp_association *asoc)
+                                            struct sctp_association *asoc,
+                                            bool kern)
 {
        struct sock *newsk;
        struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
        struct sctp6_sock *newsctp6sk;
        struct ipv6_txoptions *opt;
 
-       newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
+       newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
        if (!newsk)
                goto out;
 
index 1b6d4574d2b02a2877caba604bb549352a0f0470..989a900383b57c57590bff37e3aee7426fb0b156 100644 (file)
@@ -575,10 +575,11 @@ static int sctp_v4_is_ce(const struct sk_buff *skb)
 
 /* Create and initialize a new sk for the socket returned by accept(). */
 static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
-                                            struct sctp_association *asoc)
+                                            struct sctp_association *asoc,
+                                            bool kern)
 {
        struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
-                       sk->sk_prot, 0);
+                       sk->sk_prot, kern);
        struct inet_sock *newinet;
 
        if (!newsk)
index 6f0a9be50f5055fd7efa29bb8b183cc37b23b25f..0f378ea2ae38828d75dc215abdbc258e75cec431 100644 (file)
@@ -4116,7 +4116,7 @@ static int sctp_disconnect(struct sock *sk, int flags)
  * descriptor will be returned from accept() to represent the newly
  * formed association.
  */
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
+static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
 {
        struct sctp_sock *sp;
        struct sctp_endpoint *ep;
@@ -4151,7 +4151,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
         */
        asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
 
-       newsk = sp->pf->create_accept_sk(sk, asoc);
+       newsk = sp->pf->create_accept_sk(sk, asoc, kern);
        if (!newsk) {
                error = -ENOMEM;
                goto out;
index 85837ab90e8916e612d5dd0a21ef48c5e2c9e544..093803786eacf3388dc470a04c02293f60e4102e 100644 (file)
@@ -944,7 +944,7 @@ out:
 }
 
 static int smc_accept(struct socket *sock, struct socket *new_sock,
-                     int flags)
+                     int flags, bool kern)
 {
        struct sock *sk = sock->sk, *nsk;
        DECLARE_WAITQUEUE(wait, current);
index 2c1e8677ff2d4fdb2f29eaa6e06a7c323d27d981..e034fe4164beec7731c68ba2bc6920627741561b 100644 (file)
@@ -1506,7 +1506,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
        if (err)
                goto out_fd;
 
-       err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+       err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
        if (err < 0)
                goto out_fd;
 
@@ -1731,6 +1731,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
        /* We assume all kernel code knows the size of sockaddr_storage */
        msg.msg_namelen = 0;
        msg.msg_iocb = NULL;
+       msg.msg_flags = 0;
        if (sock->file->f_flags & O_NONBLOCK)
                flags |= MSG_DONTWAIT;
        err = sock_recvmsg(sock, &msg, flags);
@@ -3238,7 +3239,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
        if (err < 0)
                goto done;
 
-       err = sock->ops->accept(sock, *newsock, flags);
+       err = sock->ops->accept(sock, *newsock, flags, true);
        if (err < 0) {
                sock_release(*newsock);
                *newsock = NULL;
index 43e4045e72bc00cfbc9db6c1bf987a46e272969b..7130e73bd42c21758e88b24b875da4bd97b3c4d2 100644 (file)
@@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk);
 static void tipc_write_space(struct sock *sk);
 static void tipc_sock_destruct(struct sock *sk);
 static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+                      bool kern);
 static void tipc_sk_timeout(unsigned long data);
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
                           struct tipc_name_seq const *seq);
@@ -2029,7 +2030,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
  *
  * Returns 0 on success, errno otherwise
  */
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+                      bool kern)
 {
        struct sock *new_sk, *sk = sock->sk;
        struct sk_buff *buf;
@@ -2051,7 +2053,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 
        buf = skb_peek(&sk->sk_receive_queue);
 
-       res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
+       res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
        if (res)
                goto exit;
        security_sk_clone(sock->sk, new_sock->sk);
index ee37b390260a62f026f08e3da827ae45666bc2a6..928691c434087e8ff72b84fc6515539115c0509b 100644 (file)
@@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int);
 static int unix_stream_connect(struct socket *, struct sockaddr *,
                               int addr_len, int flags);
 static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int);
+static int unix_accept(struct socket *, struct socket *, int, bool);
 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 static unsigned int unix_dgram_poll(struct file *, struct socket *,
@@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
                set_bit(SOCK_PASSSEC, &new->flags);
 }
 
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
+static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
+                      bool kern)
 {
        struct sock *sk = sock->sk;
        struct sock *tsk;
index 9192ead6675114128817267926befe23f7cc1111..9f770f33c10098fd3fcccfd9c739ab9a28a6b6f5 100644 (file)
@@ -1250,7 +1250,8 @@ out:
        return err;
 }
 
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
+                       bool kern)
 {
        struct sock *listener;
        int err;
index fd28a49dbe8f0c99bb798acec314c63084fc22c6..8b911c29860e79f21b0ac8e1d3a80ed373fd537e 100644 (file)
@@ -852,7 +852,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
        return rc;
 }
 
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
+                     bool kern)
 {
        struct sock *sk = sock->sk;
        struct sock *newsk;
index 0806dccdf5078451e0dd9c5b5573d040ab21c831..236cbbc0ab9cfff05cd027ffb0dc56aa15e61033 100644 (file)
@@ -1243,7 +1243,7 @@ static inline int policy_to_flow_dir(int dir)
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
-                                                const struct flowi *fl)
+                                                const struct flowi *fl, u16 family)
 {
        struct xfrm_policy *pol;
 
@@ -1251,8 +1251,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
  again:
        pol = rcu_dereference(sk->sk_policy[dir]);
        if (pol != NULL) {
-               bool match = xfrm_selector_match(&pol->selector, fl,
-                                                sk->sk_family);
+               bool match = xfrm_selector_match(&pol->selector, fl, family);
                int err = 0;
 
                if (match) {
@@ -2239,7 +2238,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
        sk = sk_const_to_full_sk(sk);
        if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
                num_pols = 1;
-               pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+               pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
                err = xfrm_expand_policies(fl, family, pols,
                                           &num_pols, &num_xfrms);
                if (err < 0)
@@ -2518,7 +2517,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        pol = NULL;
        sk = sk_to_full_sk(sk);
        if (sk && sk->sk_policy[dir]) {
-               pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+               pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
                if (IS_ERR(pol)) {
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
                        return 0;
@@ -3069,6 +3068,11 @@ static int __net_init xfrm_net_init(struct net *net)
 {
        int rv;
 
+       /* Initialize the per-net locks here */
+       spin_lock_init(&net->xfrm.xfrm_state_lock);
+       spin_lock_init(&net->xfrm.xfrm_policy_lock);
+       mutex_init(&net->xfrm.xfrm_cfg_mutex);
+
        rv = xfrm_statistics_init(net);
        if (rv < 0)
                goto out_statistics;
@@ -3085,11 +3089,6 @@ static int __net_init xfrm_net_init(struct net *net)
        if (rv < 0)
                goto out;
 
-       /* Initialize the per-net locks here */
-       spin_lock_init(&net->xfrm.xfrm_state_lock);
-       spin_lock_init(&net->xfrm.xfrm_policy_lock);
-       mutex_init(&net->xfrm.xfrm_cfg_mutex);
-
        return 0;
 
 out:
diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
new file mode 100644 (file)
index 0000000..0674272
--- /dev/null
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+       struct pt_regs regs;
+       __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
index 4b498265dae6dc3b52b35818453f2c895809b323..67531f47781b4069a33557f2f5a832b3aa57492a 100644 (file)
@@ -1,12 +1,14 @@
 LIBDIR := ../../../lib
 BPFOBJ := $(LIBDIR)/bpf/bpf.o
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := test_kmod.sh
 
+all: $(TEST_GEN_PROGS)
+
 .PHONY: all clean force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
index e1f5b9eea1e874ab7f4698a1e20abc5588fe00bb..d1555e4240c0fb3066c459e05f76a6f91f3b66af 100644 (file)
@@ -8,6 +8,8 @@
  * License as published by the Free Software Foundation.
  */
 
+#include <asm/types.h>
+#include <linux/types.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -4583,10 +4585,12 @@ static bool is_admin(void)
        cap_flag_value_t sysadmin = CAP_CLEAR;
        const cap_value_t cap_val = CAP_SYS_ADMIN;
 
+#ifdef CAP_IS_SUPPORTED
        if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
                perror("cap_get_flag");
                return false;
        }
+#endif
        caps = cap_get_proc();
        if (!caps) {
                perror("cap_get_proc");
index d828bfb6ef2d9a55f5752352458bca0ab1958549..54064ced9e95b3c66e57748f82ddf65aca91e94b 100644 (file)
  */
 FUNC_START(load_vsx)
        li      r5,0
-       lxvx    vs20,r5,r3
+       lxvd2x  vs20,r5,r3
        addi    r5,r5,16
-       lxvx    vs21,r5,r3
+       lxvd2x  vs21,r5,r3
        addi    r5,r5,16
-       lxvx    vs22,r5,r3
+       lxvd2x  vs22,r5,r3
        addi    r5,r5,16
-       lxvx    vs23,r5,r3
+       lxvd2x  vs23,r5,r3
        addi    r5,r5,16
-       lxvx    vs24,r5,r3
+       lxvd2x  vs24,r5,r3
        addi    r5,r5,16
-       lxvx    vs25,r5,r3
+       lxvd2x  vs25,r5,r3
        addi    r5,r5,16
-       lxvx    vs26,r5,r3
+       lxvd2x  vs26,r5,r3
        addi    r5,r5,16
-       lxvx    vs27,r5,r3
+       lxvd2x  vs27,r5,r3
        addi    r5,r5,16
-       lxvx    vs28,r5,r3
+       lxvd2x  vs28,r5,r3
        addi    r5,r5,16
-       lxvx    vs29,r5,r3
+       lxvd2x  vs29,r5,r3
        addi    r5,r5,16
-       lxvx    vs30,r5,r3
+       lxvd2x  vs30,r5,r3
        addi    r5,r5,16
-       lxvx    vs31,r5,r3
+       lxvd2x  vs31,r5,r3
        blr
 FUNC_END(load_vsx)
 
 FUNC_START(store_vsx)
        li      r5,0
-       stxvx   vs20,r5,r3
+       stxvd2x vs20,r5,r3
        addi    r5,r5,16
-       stxvx   vs21,r5,r3
+       stxvd2x vs21,r5,r3
        addi    r5,r5,16
-       stxvx   vs22,r5,r3
+       stxvd2x vs22,r5,r3
        addi    r5,r5,16
-       stxvx   vs23,r5,r3
+       stxvd2x vs23,r5,r3
        addi    r5,r5,16
-       stxvx   vs24,r5,r3
+       stxvd2x vs24,r5,r3
        addi    r5,r5,16
-       stxvx   vs25,r5,r3
+       stxvd2x vs25,r5,r3
        addi    r5,r5,16
-       stxvx   vs26,r5,r3
+       stxvd2x vs26,r5,r3
        addi    r5,r5,16
-       stxvx   vs27,r5,r3
+       stxvd2x vs27,r5,r3
        addi    r5,r5,16
-       stxvx   vs28,r5,r3
+       stxvd2x vs28,r5,r3
        addi    r5,r5,16
-       stxvx   vs29,r5,r3
+       stxvd2x vs29,r5,r3
        addi    r5,r5,16
-       stxvx   vs30,r5,r3
+       stxvd2x vs30,r5,r3
        addi    r5,r5,16
-       stxvx   vs31,r5,r3
+       stxvd2x vs31,r5,r3
        blr
 FUNC_END(store_vsx)
index f6121612e769f5600d1cc0920037ee4c6ee0bf92..b9a22f18566ae27eef76b22590448e2725b15e12 100644 (file)
@@ -409,6 +409,51 @@ static void *threadproc(void *ctx)
        }
 }
 
+#ifdef __i386__
+
+#ifndef SA_RESTORE
+#define SA_RESTORER 0x04000000
+#endif
+
+/*
+ * The UAPI header calls this 'struct sigaction', which conflicts with
+ * glibc.  Sigh.
+ */
+struct fake_ksigaction {
+       void *handler;  /* the real type is nasty */
+       unsigned long sa_flags;
+       void (*sa_restorer)(void);
+       unsigned char sigset[8];
+};
+
+static void fix_sa_restorer(int sig)
+{
+       struct fake_ksigaction ksa;
+
+       if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) {
+               /*
+                * glibc has a nasty bug: it sometimes writes garbage to
+                * sa_restorer.  This interacts quite badly with anything
+                * that fiddles with SS because it can trigger legacy
+                * stack switching.  Patch it up.  See:
+                *
+                * https://sourceware.org/bugzilla/show_bug.cgi?id=21269
+                */
+               if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) {
+                       ksa.sa_restorer = NULL;
+                       if (syscall(SYS_rt_sigaction, sig, &ksa, NULL,
+                                   sizeof(ksa.sigset)) != 0)
+                               err(1, "rt_sigaction");
+               }
+       }
+}
+#else
+static void fix_sa_restorer(int sig)
+{
+       /* 64-bit glibc works fine. */
+}
+#endif
+
 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
                       int flags)
 {
@@ -420,6 +465,7 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
        if (sigaction(sig, &sa, 0))
                err(1, "sigaction");
 
+       fix_sa_restorer(sig);
 }
 
 static jmp_buf jmpbuf;
index 571b64a01c509741146e5e2263d5042457a2e14c..8d1da1af4b09e47c174cf7151b37b98666f03953 100644 (file)
@@ -360,29 +360,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
        return ret;
 }
 
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
-                                            struct vgic_its *its,
-                                            gpa_t addr, unsigned int len)
-{
-       u32 reg = 0;
-
-       mutex_lock(&its->cmd_lock);
-       if (its->creadr == its->cwriter)
-               reg |= GITS_CTLR_QUIESCENT;
-       if (its->enabled)
-               reg |= GITS_CTLR_ENABLE;
-       mutex_unlock(&its->cmd_lock);
-
-       return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
-                                    gpa_t addr, unsigned int len,
-                                    unsigned long val)
-{
-       its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
 static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
                                              struct vgic_its *its,
                                              gpa_t addr, unsigned int len)
@@ -1161,33 +1138,16 @@ static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its,
 #define ITS_CMD_SIZE                   32
 #define ITS_CMD_OFFSET(reg)            ((reg) & GENMASK(19, 5))
 
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
-                                       gpa_t addr, unsigned int len,
-                                       unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
 {
        gpa_t cbaser;
        u64 cmd_buf[4];
-       u32 reg;
 
-       if (!its)
-               return;
-
-       mutex_lock(&its->cmd_lock);
-
-       reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
-       reg = ITS_CMD_OFFSET(reg);
-       if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
-               mutex_unlock(&its->cmd_lock);
+       /* Commands are only processed when the ITS is enabled. */
+       if (!its->enabled)
                return;
-       }
 
-       its->cwriter = reg;
        cbaser = CBASER_ADDRESS(its->cbaser);
 
        while (its->cwriter != its->creadr) {
@@ -1207,6 +1167,34 @@ static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
                if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
                        its->creadr = 0;
        }
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+                                       gpa_t addr, unsigned int len,
+                                       unsigned long val)
+{
+       u64 reg;
+
+       if (!its)
+               return;
+
+       mutex_lock(&its->cmd_lock);
+
+       reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+       reg = ITS_CMD_OFFSET(reg);
+       if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+               mutex_unlock(&its->cmd_lock);
+               return;
+       }
+       its->cwriter = reg;
+
+       vgic_its_process_commands(kvm, its);
 
        mutex_unlock(&its->cmd_lock);
 }
@@ -1287,6 +1275,39 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
        *regptr = reg;
 }
 
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+                                            struct vgic_its *its,
+                                            gpa_t addr, unsigned int len)
+{
+       u32 reg = 0;
+
+       mutex_lock(&its->cmd_lock);
+       if (its->creadr == its->cwriter)
+               reg |= GITS_CTLR_QUIESCENT;
+       if (its->enabled)
+               reg |= GITS_CTLR_ENABLE;
+       mutex_unlock(&its->cmd_lock);
+
+       return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+                                    gpa_t addr, unsigned int len,
+                                    unsigned long val)
+{
+       mutex_lock(&its->cmd_lock);
+
+       its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+       /*
+        * Try to process any pending commands. This function bails out early
+        * if the ITS is disabled or no commands have been queued.
+        */
+       vgic_its_process_commands(kvm, its);
+
+       mutex_unlock(&its->cmd_lock);
+}
+
 #define REGISTER_ITS_DESC(off, rd, wr, length, acc)            \
 {                                                              \
        .reg_offset = off,                                      \
index 3654b4c835ef733c8f1255137849b253b71c1659..2a5db135272215d5c9d4bfa544b7d3ed11a9b9c3 100644 (file)
@@ -180,21 +180,37 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
 static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
                                    bool new_active_state)
 {
+       struct kvm_vcpu *requester_vcpu;
        spin_lock(&irq->irq_lock);
+
+       /*
+        * The vcpu parameter here can mean multiple things depending on how
+        * this function is called; when handling a trap from the kernel it
+        * depends on the GIC version, and these functions are also called as
+        * part of save/restore from userspace.
+        *
+        * Therefore, we have to figure out the requester in a reliable way.
+        *
+        * When accessing VGIC state from user space, the requester_vcpu is
+        * NULL, which is fine, because we guarantee that no VCPUs are running
+        * when accessing VGIC state from user space so irq->vcpu->cpu is
+        * always -1.
+        */
+       requester_vcpu = kvm_arm_get_running_vcpu();
+
        /*
         * If this virtual IRQ was written into a list register, we
         * have to make sure the CPU that runs the VCPU thread has
-        * synced back LR state to the struct vgic_irq.  We can only
-        * know this for sure, when either this irq is not assigned to
-        * anyone's AP list anymore, or the VCPU thread is not
-        * running on any CPUs.
+        * synced back the LR state to the struct vgic_irq.
         *
-        * In the opposite case, we know the VCPU thread may be on its
-        * way back from the guest and still has to sync back this
-        * IRQ, so we release and re-acquire the spin_lock to let the
-        * other thread sync back the IRQ.
+        * As long as the conditions below are true, we know the VCPU thread
+        * may be on its way back from the guest (we kicked the VCPU thread in
+        * vgic_change_active_prepare)  and still has to sync back this IRQ,
+        * so we release and re-acquire the spin_lock to let the other thread
+        * sync back the IRQ.
         */
        while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+              irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
               irq->vcpu->cpu != -1) /* VCPU thread is running */
                cond_resched_lock(&irq->irq_lock);
 
index edc6ee2dc852e9fb0f425e44e741434a71983731..be0f4c3e0142e04216cb28e1f965487d52d0b4c9 100644 (file)
@@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
        /*
         * If we are emulating a GICv3, we do it in an non-GICv2-compatible
         * way, so we force SRE to 1 to demonstrate this to the guest.
+        * Also, we don't support any form of IRQ/FIQ bypass.
         * This goes with the spec allowing the value to be RAO/WI.
         */
        if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
-               vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+               vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+                                    ICC_SRE_EL1_DFB |
+                                    ICC_SRE_EL1_SRE);
                vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
        } else {
                vgic_v3->vgic_sre = 0;