| | | | |
| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
+| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
pids.max
- A read-write single value file which exists on non-root cgroups. The
- default is "max".
+ A read-write single value file which exists on non-root
+ cgroups. The default is "max".
- Hard limit of number of processes.
+ Hard limit of number of processes.
pids.current
- A read-only single value file which exists on all cgroups.
+ A read-only single value file which exists on all cgroups.
- The number of processes currently in the cgroup and its descendants.
+ The number of processes currently in the cgroup and its
+ descendants.
Organisational operations are not blocked by cgroup policies, so it is
possible to have pids.current > pids.max. This can be done by either
For Axon it can be absent, though my current driver
doesn't handle phy-address yet so for now, keep
0x00ffffff in it.
+ - phy-handle : Used to describe configurations where a external PHY
+ is used. Please refer to:
+ Documentation/devicetree/bindings/net/ethernet.txt
- rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
operations (if absent the value is the same as
rx-fifo-size). For Axon, either absent or 2048.
offload, phandle of the TAH device node.
- tah-channel : 1 cell, optional. If appropriate, channel used on the
TAH engine.
+ - fixed-link : Fixed-link subnode describing a link to a non-MDIO
+ managed entity. See
+ Documentation/devicetree/bindings/net/fixed-link.txt
+ for details.
+ - mdio subnode : When the EMAC has a phy connected to its local
+ mdio, which us supported by the kernel's network
+ PHY library in drivers/net/phy, there must be device
+ tree subnode with the following required properties:
+ - #address-cells: Must be <1>.
+ - #size-cells: Must be <0>.
- Example:
+ For PHY definitions: Please refer to
+ Documentation/devicetree/bindings/net/phy.txt and
+ Documentation/devicetree/bindings/net/ethernet.txt
+
+ Examples:
EMAC0: ethernet@40000800 {
device_type = "network";
zmii-channel = <0>;
};
+ EMAC1: ethernet@ef600c00 {
+ device_type = "network";
+ compatible = "ibm,emac-apm821xx", "ibm,emac4sync";
+ interrupt-parent = <&EMAC1>;
+ interrupts = <0 1>;
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */
+ 1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>;
+ reg = <0xef600c00 0x000000c4>;
+ local-mac-address = [000000000000]; /* Filled in by U-Boot */
+ mal-device = <&MAL0>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <0>;
+ max-frame-size = <9000>;
+ rx-fifo-size = <16384>;
+ tx-fifo-size = <2048>;
+ fifo-entry-size = <10>;
+ phy-mode = "rgmii";
+ phy-handle = <&phy0>;
+ phy-map = <0x00000000>;
+ rgmii-device = <&RGMII0>;
+ rgmii-channel = <0>;
+ tah-device = <&TAH0>;
+ tah-channel = <0>;
+ has-inverted-stacr-oc;
+ has-new-stacr-staopc;
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ };
+ };
+ };
+
+
ii) McMAL node
Required properties:
- revision : as provided by the RGMII new version register if
available.
For Axon: 0x0000012a
-
FALSE (router)
forwarding - BOOLEAN
- Enable IP forwarding on this interface.
+ Enable IP forwarding on this interface. This controls whether packets
+ received _on_ this interface can be forwarded.
mc_forwarding - BOOLEAN
Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
slot. When changing an existing slot, it may be moved in the guest
physical memory space, or its flags may be modified. It may not be
resized. Slots may not overlap in guest physical address space.
+Bits 0-15 of "slot" specifies the slot id and this value should be
+less than the maximum number of user memory slots supported per VM.
+The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS,
+if this capability is supported by the architecture.
If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
specifies the address space which is being modified. They must be
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
+ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.
+The module mapping space size changes based on the CONFIG requirements for the
+following fixmap section.
+
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.
VERSION = 4
PATCHLEVEL = 11
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Fearless Coyote
# *DOCUMENTATION*
This value can be changed after boot using the
/proc/sys/vm/mmap_rnd_compat_bits tunable
+config HAVE_ARCH_COMPAT_MMAP_BASES
+ bool
+ help
+ This allows 64bit applications to invoke 32-bit mmap() syscall
+ and vice-versa 32-bit applications to call 64-bit mmap().
+ Required for applications doing different bitness syscalls.
+
config HAVE_COPY_THREAD_TLS
bool
help
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
def_bool y
depends on ARM_LPAE
#define HSR_EC_IABT_HYP (0x21)
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)
+#define HSR_EC_MAX (0x3f)
#define HSR_WFI_IS_WFE (_AC(1, UL) << 0)
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HAVE_ONE_REG
#define KVM_HALT_POLL_NS_DEFAULT 500000
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
case KVM_CAP_MSI_DEVID:
if (!kvm)
r = -EINVAL;
return 1;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n",
+ hsr);
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec,
[HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
{
u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x\n",
- (unsigned int)kvm_vcpu_get_hsr(vcpu));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
config ZONE_DMA
def_bool y
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
def_bool y
config ARCH_DMA_ADDR_T_64BIT
If unsure, say Y.
+config QCOM_QDF2400_ERRATUM_0065
+ bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size"
+ default y
+ help
+ On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports
+ ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have
+ been indicated as 16Bytes (0xf), not 8Bytes (0x7).
+
+ If unsure, say Y.
+
endmenu
def_bool y
depends on COMPAT && SYSVIPC
+config KEYS_COMPAT
+ def_bool y
+ depends on COMPAT && KEYS
+
endmenu
menu "Power management options"
static inline bool system_uses_ttbr0_pan(void)
{
return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
- !cpus_have_cap(ARM64_HAS_PAN);
+ !cpus_have_const_cap(ARM64_HAS_PAN);
}
#endif /* __ASSEMBLY__ */
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_USER_MEM_SLOTS 512
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HALT_POLL_NS_DEFAULT 500000
}
/**
- * cpu_suspend() - function to enter a low-power idle state
+ * arm_cpuidle_suspend() - function to enter a low-power idle state
* @arg: argument to pass to CPU suspend operations
*
* Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
return 0;
}
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- return NOTIFY_DONE;
-}
-
static void __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p, *cur_kprobe;
return ret;
}
+static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n",
+ hsr, esr_get_class_string(hsr));
+
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
static exit_handle_fn arm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec,
[ESR_ELx_EC_WFx] = kvm_handle_wfx,
[ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32,
[ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64,
u32 hsr = kvm_vcpu_get_hsr(vcpu);
u8 hsr_ec = ESR_ELx_EC(hsr);
- if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
- !arm_exit_handlers[hsr_ec]) {
- kvm_err("Unknown exception class: hsr: %#08x -- %s\n",
- hsr, esr_get_class_string(hsr));
- BUG();
- }
-
return arm_exit_handlers[hsr_ec];
}
#include <asm/kvm_hyp.h>
#include <asm/tlbflush.h>
+static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
+{
+ u64 val;
+
+ /*
+ * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and
+ * most TLB operations target EL2/EL0. In order to affect the
+ * guest TLBs (EL1/EL0), we need to change one of these two
+ * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so
+ * let's flip TGE before executing the TLB operation.
+ */
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ val = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ write_sysreg(val, hcr_el2);
+ isb();
+}
+
+static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm)
+{
+ write_sysreg(kvm->arch.vttbr, vttbr_el2);
+ isb();
+}
+
+static hyp_alternate_select(__tlb_switch_to_guest,
+ __tlb_switch_to_guest_nvhe,
+ __tlb_switch_to_guest_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
+static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm)
+{
+ /*
+ * We're done with the TLB operation, let's restore the host's
+ * view of HCR_EL2.
+ */
+ write_sysreg(0, vttbr_el2);
+ write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+}
+
+static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm)
+{
+ write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__tlb_switch_to_host,
+ __tlb_switch_to_host_nvhe,
+ __tlb_switch_to_host_vhe,
+ ARM64_HAS_VIRT_HOST_EXTN);
+
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
- isb();
+ __tlb_switch_to_guest()(kvm);
/*
* We could do so much better if we had the VA as well.
dsb(ish);
isb();
- write_sysreg(0, vttbr_el2);
+ __tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
- isb();
+ __tlb_switch_to_guest()(kvm);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- write_sysreg(0, vttbr_el2);
+ __tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
/* Switch to requested VMID */
- write_sysreg(kvm->arch.vttbr, vttbr_el2);
- isb();
+ __tlb_switch_to_guest()(kvm);
__tlbi(vmalle1);
dsb(nsh);
isb();
- write_sysreg(0, vttbr_el2);
+ __tlb_switch_to_host()(kvm);
}
void __hyp_text __kvm_flush_vm_context(void)
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
- pfn_to_nid(virt_to_pfn(_text)));
+ pfn_to_nid(virt_to_pfn(lm_alias(_text))));
/*
* vmemmap_populate() has populated the shadow region that covers the
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
- select HAVE_GENERIC_RCU_GUP
+ select HAVE_GENERIC_GUP
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
}
#ifdef CONFIG_PPC64_BOOT_WRAPPER
+ . = ALIGN(256);
.got :
{
__toc_start = .;
{
u32 *key = crypto_tfm_ctx(tfm);
- *key = 0;
+ *key = ~0;
return 0;
}
#define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit))
#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \
+ ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
#include <asm/barrier.h>
/* Macro for generating the ***_bits() functions */
#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
+/*
+ * Machine Check bits on power9
+ */
+#define P9_SRR1_MC_LOADSTORE(srr1) (((srr1) >> PPC_BITLSHIFT(42)) & 1)
+
+#define P9_SRR1_MC_IFETCH(srr1) ( \
+ PPC_BITEXTRACT(srr1, 45, 0) | \
+ PPC_BITEXTRACT(srr1, 44, 1) | \
+ PPC_BITEXTRACT(srr1, 43, 2) | \
+ PPC_BITEXTRACT(srr1, 36, 3) )
+
+/* 0 is reserved */
+#define P9_SRR1_MC_IFETCH_UE 1
+#define P9_SRR1_MC_IFETCH_SLB_PARITY 2
+#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT 3
+#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT 4
+#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT 5
+#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD 6
+/* 7 is reserved */
+#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT 8
+#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT 9
+/* 10 ? */
+#define P9_SRR1_MC_IFETCH_RA 11
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK 12
+#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE 13
+#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT 14
+#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15
+
+/* DSISR bits for machine check (On Power9) */
+#define P9_DSISR_MC_UE (PPC_BIT(48))
+#define P9_DSISR_MC_UE_TABLEWALK (PPC_BIT(49))
+#define P9_DSISR_MC_LINK_LOAD_TIMEOUT (PPC_BIT(50))
+#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT (PPC_BIT(51))
+#define P9_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52))
+#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53))
+#define P9_DSISR_MC_USER_TLBIE (PPC_BIT(54))
+#define P9_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55))
+#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB (PPC_BIT(56))
+#define P9_DSISR_MC_RA_LOAD (PPC_BIT(57))
+#define P9_DSISR_MC_RA_TABLEWALK (PPC_BIT(58))
+#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN (PPC_BIT(59))
+#define P9_DSISR_MC_RA_FOREIGN (PPC_BIT(60))
+
+/* SLB error bits */
+#define P9_DSISR_MC_SLB_ERRORS (P9_DSISR_MC_ERAT_MULTIHIT | \
+ P9_DSISR_MC_SLB_PARITY_MFSLB | \
+ P9_DSISR_MC_SLB_MULTIHIT_MFSLB)
+
enum MCE_Version {
MCE_V1 = 1,
};
MCE_ERROR_TYPE_SLB = 2,
MCE_ERROR_TYPE_ERAT = 3,
MCE_ERROR_TYPE_TLB = 4,
+ MCE_ERROR_TYPE_USER = 5,
+ MCE_ERROR_TYPE_RA = 6,
+ MCE_ERROR_TYPE_LINK = 7,
};
enum MCE_UeErrorType {
MCE_TLB_ERROR_MULTIHIT = 2,
};
+enum MCE_UserErrorType {
+ MCE_USER_ERROR_INDETERMINATE = 0,
+ MCE_USER_ERROR_TLBIE = 1,
+};
+
+enum MCE_RaErrorType {
+ MCE_RA_ERROR_INDETERMINATE = 0,
+ MCE_RA_ERROR_IFETCH = 1,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
+ MCE_RA_ERROR_LOAD = 4,
+ MCE_RA_ERROR_STORE = 5,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+};
+
+enum MCE_LinkErrorType {
+ MCE_LINK_ERROR_INDETERMINATE = 0,
+ MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+ MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+ MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+ MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+ MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
struct machine_check_event {
enum MCE_Version version:8; /* 0x00 */
uint8_t in_use; /* 0x01 */
uint64_t effective_address;
uint8_t reserved_2[16];
} tlb_error;
+
+ struct {
+ enum MCE_UserErrorType user_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } user_error;
+
+ struct {
+ enum MCE_RaErrorType ra_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } ra_error;
+
+ struct {
+ enum MCE_LinkErrorType link_error_type:8;
+ uint8_t effective_address_provided;
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint8_t reserved_2[16];
+ } link_error;
} u;
};
enum MCE_SlbErrorType slb_error_type:8;
enum MCE_EratErrorType erat_error_type:8;
enum MCE_TlbErrorType tlb_error_type:8;
+ enum MCE_UserErrorType user_error_type:8;
+ enum MCE_RaErrorType ra_error_type:8;
+ enum MCE_LinkErrorType link_error_type:8;
} u;
- uint8_t reserved[2];
+ enum MCE_Severity severity:8;
+ enum MCE_Initiator initiator:8;
};
#define MAX_MC_EVT 100
/* by default, allow everything */
return true;
}
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- /* by default, allow everything */
- return true;
-}
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
extern void __flush_tlb_power9(unsigned int action);
extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
{ /* Power9 */
.cpu_setup = __setup_cpu_power9,
.cpu_restore = __restore_cpu_power9,
.flush_tlb = __flush_tlb_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
{ /* Cell Broadband Engine */
case MCE_ERROR_TYPE_TLB:
mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
break;
+ case MCE_ERROR_TYPE_USER:
+ mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+ break;
case MCE_ERROR_TYPE_UNKNOWN:
default:
break;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
- mce->initiator = MCE_INITIATOR_CPU;
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED;
else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
- mce->severity = MCE_SEV_ERROR_SYNC;
+
+ mce->initiator = mce_err->initiator;
+ mce->severity = mce_err->severity;
/*
* Populate the mce error_type and type-specific error_type.
} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
mce->u.erat_error.effective_address_provided = true;
mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+ mce->u.user_error.effective_address_provided = true;
+ mce->u.user_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+ mce->u.ra_error.effective_address_provided = true;
+ mce->u.ra_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+ mce->u.link_error.effective_address_provided = true;
+ mce->u.link_error.effective_address = addr;
} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
"Parity",
"Multihit",
};
+ static const char *mc_user_types[] = {
+ "Indeterminate",
+ "tlbie(l) invalid",
+ };
+ static const char *mc_ra_types[] = {
+ "Indeterminate",
+ "Instruction fetch (bad)",
+ "Page table walk ifetch (bad)",
+ "Page table walk ifetch (foreign)",
+ "Load (bad)",
+ "Store (bad)",
+ "Page table walk Load/Store (bad)",
+ "Page table walk Load/Store (foreign)",
+ "Load/Store (foreign)",
+ };
+ static const char *mc_link_types[] = {
+ "Indeterminate",
+ "Instruction fetch (timeout)",
+ "Page table walk ifetch (timeout)",
+ "Load (timeout)",
+ "Store (timeout)",
+ "Page table walk Load/Store (timeout)",
+ };
/* Print things out */
if (evt->version != MCE_V1) {
printk("%s Effective address: %016llx\n",
level, evt->u.tlb_error.effective_address);
break;
+ case MCE_ERROR_TYPE_USER:
+ subtype = evt->u.user_error.user_error_type <
+ ARRAY_SIZE(mc_user_types) ?
+ mc_user_types[evt->u.user_error.user_error_type]
+ : "Unknown";
+ printk("%s Error type: User [%s]\n", level, subtype);
+ if (evt->u.user_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.user_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_RA:
+ subtype = evt->u.ra_error.ra_error_type <
+ ARRAY_SIZE(mc_ra_types) ?
+ mc_ra_types[evt->u.ra_error.ra_error_type]
+ : "Unknown";
+ printk("%s Error type: Real address [%s]\n", level, subtype);
+ if (evt->u.ra_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.ra_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ subtype = evt->u.link_error.link_error_type <
+ ARRAY_SIZE(mc_link_types) ?
+ mc_link_types[evt->u.link_error.link_error_type]
+ : "Unknown";
+ printk("%s Error type: Link [%s]\n", level, subtype);
+ if (evt->u.link_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.link_error.effective_address);
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
printk("%s Error type: Unknown\n", level);
if (evt->u.tlb_error.effective_address_provided)
return evt->u.tlb_error.effective_address;
break;
+ case MCE_ERROR_TYPE_USER:
+ if (evt->u.user_error.effective_address_provided)
+ return evt->u.user_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ if (evt->u.ra_error.effective_address_provided)
+ return evt->u.ra_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ if (evt->u.link_error.effective_address_provided)
+ return evt->u.link_error.effective_address;
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
break;
}
#endif
+static void flush_erat(void)
+{
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+}
+
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
+
+static int mce_flush(int what)
+{
+#ifdef CONFIG_PPC_STD_MMU_64
+ if (what == MCE_FLUSH_SLB) {
+ flush_and_reload_slb();
+ return 1;
+ }
+#endif
+ if (what == MCE_FLUSH_ERAT) {
+ flush_erat();
+ return 1;
+ }
+ if (what == MCE_FLUSH_TLB) {
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+ cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat)
+{
+ if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB))
+ dsisr &= ~slb;
+ if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT))
+ dsisr &= ~erat;
+ if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB))
+ dsisr &= ~tlb;
+ /* Any other errors we don't understand? */
+ if (dsisr)
+ return 0;
+ return 1;
+}
+
static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
{
long handled = 1;
long handled = 1;
struct mce_error_info mce_error_info = { 0 };
+ mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+ mce_error_info.initiator = MCE_INITIATOR_CPU;
+
srr1 = regs->msr;
nip = regs->nip;
long handled = 1;
struct mce_error_info mce_error_info = { 0 };
+ mce_error_info.severity = MCE_SEV_ERROR_SYNC;
+ mce_error_info.initiator = MCE_INITIATOR_CPU;
+
srr1 = regs->msr;
nip = regs->nip;
save_mce_event(regs, handled, &mce_error_info, nip, addr);
return handled;
}
+
+static int mce_handle_derror_p9(struct pt_regs *regs)
+{
+ uint64_t dsisr = regs->dsisr;
+
+ return mce_handle_flush_derrors(dsisr,
+ P9_DSISR_MC_SLB_PARITY_MFSLB |
+ P9_DSISR_MC_SLB_MULTIHIT_MFSLB,
+
+ P9_DSISR_MC_TLB_MULTIHIT_MFTLB,
+
+ P9_DSISR_MC_ERAT_MULTIHIT);
+}
+
+static int mce_handle_ierror_p9(struct pt_regs *regs)
+{
+ uint64_t srr1 = regs->msr;
+
+ switch (P9_SRR1_MC_IFETCH(srr1)) {
+ case P9_SRR1_MC_IFETCH_SLB_PARITY:
+ case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ return mce_flush(MCE_FLUSH_SLB);
+ case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ return mce_flush(MCE_FLUSH_TLB);
+ case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+ return mce_flush(MCE_FLUSH_ERAT);
+ default:
+ return 0;
+ }
+}
+
+static void mce_get_derror_p9(struct pt_regs *regs,
+ struct mce_error_info *mce_err, uint64_t *addr)
+{
+ uint64_t dsisr = regs->dsisr;
+
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->initiator = MCE_INITIATOR_CPU;
+
+ if (dsisr & P9_DSISR_MC_USER_TLBIE)
+ *addr = regs->nip;
+ else
+ *addr = regs->dar;
+
+ if (dsisr & P9_DSISR_MC_UE) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) {
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT;
+ } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) {
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT;
+ } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ } else if (dsisr & P9_DSISR_MC_USER_TLBIE) {
+ mce_err->error_type = MCE_ERROR_TYPE_USER;
+ mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE;
+ } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ } else if (dsisr & P9_DSISR_MC_RA_LOAD) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD;
+ } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+ } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) {
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+ }
+}
+
+static void mce_get_ierror_p9(struct pt_regs *regs,
+ struct mce_error_info *mce_err, uint64_t *addr)
+{
+ uint64_t srr1 = regs->msr;
+
+ switch (P9_SRR1_MC_IFETCH(srr1)) {
+ case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+ case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+ mce_err->severity = MCE_SEV_FATAL;
+ break;
+ default:
+ mce_err->severity = MCE_SEV_ERROR_SYNC;
+ break;
+ }
+
+ mce_err->initiator = MCE_INITIATOR_CPU;
+
+ *addr = regs->nip;
+
+ switch (P9_SRR1_MC_IFETCH(srr1)) {
+ case P9_SRR1_MC_IFETCH_UE:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_SLB_PARITY:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case P9_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ break;
+ case P9_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_LINK_TIMEOUT:
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT;
+ break;
+ case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT:
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT;
+ break;
+ case P9_SRR1_MC_IFETCH_RA:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_RA_TABLEWALK:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_STORE;
+ break;
+ case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT:
+ mce_err->error_type = MCE_ERROR_TYPE_LINK;
+ mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT;
+ break;
+ case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN:
+ mce_err->error_type = MCE_ERROR_TYPE_RA;
+ mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN;
+ break;
+ default:
+ break;
+ }
+}
+
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
+{
+ uint64_t nip, addr;
+ long handled;
+ struct mce_error_info mce_error_info = { 0 };
+
+ nip = regs->nip;
+
+ if (P9_SRR1_MC_LOADSTORE(regs->msr)) {
+ handled = mce_handle_derror_p9(regs);
+ mce_get_derror_p9(regs, &mce_error_info, &addr);
+ } else {
+ handled = mce_handle_ierror_p9(regs);
+ mce_get_ierror_p9(regs, &mce_error_info, &addr);
+ }
+
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
+ return handled;
+}
sdsync = POWER7P_MMCRA_SDAR_VALID;
else if (ppmu->flags & PPMU_ALT_SIPR)
sdsync = POWER6_MMCRA_SDSYNC;
+ else if (ppmu->flags & PPMU_NO_SIAR)
+ sdsync = MMCRA_SAMPLE_ENABLE;
else
sdsync = MMCRA_SDSYNC;
return !(event & ~valid_mask);
}
-static u64 mmcra_sdar_mode(u64 event)
+static inline bool is_event_marked(u64 event)
{
- if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1))
- return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+ if (event & EVENT_IS_MARKED)
+ return true;
+
+ return false;
+}
- return MMCRA_SDAR_MODE_TLB;
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+ /*
+ * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
+ * continous sampling mode.
+ *
+ * Incase of Power8:
+ * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+ * mode and will be un-changed when setting MMCRA[63] (Marked events).
+ *
+ * Incase of Power9:
+ * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+ * or if group already have any marked events.
+ * Non-Marked events (for DD1):
+ * MMCRA[SDAR_MODE] will be set to 0b01
+ * For rest
+ * MMCRA[SDAR_MODE] will be set from event code.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+ *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+ else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+ *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+ else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ *mmcra |= MMCRA_SDAR_MODE_TLB;
+ } else
+ *mmcra |= MMCRA_SDAR_MODE_TLB;
}
static u64 thresh_cmp_val(u64 value)
value |= CNST_L1_QUAL_VAL(cache);
}
- if (event & EVENT_IS_MARKED) {
+ if (is_event_marked(event)) {
mask |= CNST_SAMPLE_MASK;
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
}
}
/* In continuous sampling mode, update SDAR on TLB miss */
- mmcra |= mmcra_sdar_mode(event[i]);
+ mmcra_sdar_mode(event[i], &mmcra);
if (event[i] & EVENT_IS_L1) {
cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
}
- if (event[i] & EVENT_IS_MARKED) {
+ if (is_event_marked(event[i])) {
mmcra |= MMCRA_SAMPLE_ENABLE;
val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
#define MMCRA_THR_CMP_SHIFT 32
#define MMCRA_SDAR_MODE_SHIFT 42
#define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
#define MMCRA_IFM_SHIFT 30
/* MMCR1 Threshold Compare bit constant for power9 */
struct machine_check_event *evt)
{
int recovered = 0;
- uint64_t ea = get_mce_fault_addr(evt);
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
/* Platform corrected itself */
recovered = 1;
- } else if (ea && !is_kernel_addr(ea)) {
+ } else if (evt->severity == MCE_SEV_FATAL) {
+ /* Fatal machine check */
+ pr_err("Machine check interrupt is fatal\n");
+ recovered = 0;
+ } else if ((evt->severity == MCE_SEV_ERROR_SYNC) &&
+ (user_mode(regs) && !is_global_init(current))) {
/*
- * Faulting address is not in kernel text. We should be fine.
- * We need to find which process uses this address.
* For now, kill the task if we have received exception when
* in userspace.
*
* TODO: Queue up this address for hwpoisioning later.
*/
- if (user_mode(regs) && !is_global_init(current)) {
- _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
- recovered = 1;
- } else
- recovered = 0;
- } else if (user_mode(regs) && !is_global_init(current) &&
- evt->severity == MCE_SEV_ERROR_SYNC) {
- /*
- * If we have received a synchronous error when in userspace
- * kill the task.
- */
_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
recovered = 1;
}
}
static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- struct pci_bus *bus)
+ struct pci_bus *bus,
+ bool add_to_group)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
set_dma_offset(&dev->dev, pe->tce_bypass_base);
- iommu_add_device(&dev->dev);
+ if (add_to_group)
+ iommu_add_device(&dev->dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate,
+ add_to_group);
}
}
set_iommu_table_base(&pe->pdev->dev, tbl);
iommu_add_device(&pe->pdev->dev);
} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
return;
fail:
pnv_pci_ioda2_set_bypass(pe, false);
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+ if (pe->pbus)
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
pnv_ioda2_table_free(tbl);
}
table_group);
pnv_pci_ioda2_setup_default_config(pe);
+ if (pe->pbus)
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
level_shift = entries_shift + 3;
level_shift = max_t(unsigned, level_shift, PAGE_SHIFT);
+ if ((level_shift - 3) * levels + page_shift >= 60)
+ return -EINVAL;
+
/* Allocate TCE table */
addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
levels, tce_table_size, &offset, &total_allocated);
if (pe->flags & PNV_IODA_PE_DEV)
iommu_add_device(&pe->pdev->dev);
else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
}
#ifdef CONFIG_PCI_MSI
.data
.balign 8
-.globl sha256_digest
-sha256_digest:
+.globl purgatory_sha256_digest
+purgatory_sha256_digest:
.skip 32
- .size sha256_digest, . - sha256_digest
+ .size purgatory_sha256_digest, . - purgatory_sha256_digest
.balign 8
-.globl sha_regions
-sha_regions:
+.globl purgatory_sha_regions
+purgatory_sha_regions:
.skip 8 * 2 * 16
- .size sha_regions, . - sha_regions
+ .size purgatory_sha_regions, . - purgatory_sha_regions
ret = blkcipher_walk_done(desc, walk, nbytes - n);
}
if (k < n) {
- if (__ctr_paes_set_key(ctx) != 0)
+ if (__ctr_paes_set_key(ctx) != 0) {
+ if (locked)
+ spin_unlock(&ctrblk_lock);
return blkcipher_walk_done(desc, walk, -EIO);
+ }
}
}
if (locked)
#define _S390_CPUTIME_H
#include <linux/types.h>
-#include <asm/div64.h>
+#include <asm/timex.h>
#define CPUTIME_PER_USEC 4096ULL
#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
-typedef unsigned long long __nocast cputime_t;
-typedef unsigned long long __nocast cputime64_t;
-
#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
-static inline unsigned long __div(unsigned long long n, unsigned long base)
-{
- return n / base;
-}
-
/*
- * Convert cputime to microseconds and back.
+ * Convert cputime to microseconds.
*/
-static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+static inline u64 cputime_to_usecs(const u64 cputime)
{
- return (__force unsigned long long) cputime >> 12;
+ return cputime >> 12;
}
+/*
+ * Convert cputime to nanoseconds.
+ */
+#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
u64 arch_cpu_idle_time(int cpu);
/* by default, allow everything */
return true;
}
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- /* by default, allow everything */
- return true;
-}
#endif /* __S390_MMU_CONTEXT_H */
* ns = (todval * 125) >> 9;
*
* In order to avoid an overflow with the multiplication we can rewrite this.
- * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits)
* we end up with
*
- * ns = ((2^32 * th + tl) * 125 ) >> 9;
- * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ * ns = ((2^9 * th + tl) * 125 ) >> 9;
+ * -> ns = (th * 125) + ((tl * 125) >> 9);
*
*/
static inline unsigned long long tod_to_ns(unsigned long long todval)
{
- unsigned long long ns;
-
- ns = ((todval >> 32) << 23) * 125;
- ns += ((todval & 0xffffffff) * 125) >> 9;
- return ns;
+ return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
}
#endif
#define __NR_copy_file_range 375
#define __NR_preadv2 376
#define __NR_pwritev2 377
-#define NR_syscalls 378
+/* Number 378 is reserved for guarded storage */
+#define __NR_statx 379
+#define NR_syscalls 380
/*
* There are some system calls that are not present on 64 bit, some
COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
jnz .Lpgm_svcper # -> single stepped svc
1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 3f
+ j 4f
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
lg %r15,__LC_KERNEL_STACK
lgr %r14,%r12
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
jz 3f
mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-3: la %r11,STACK_FRAME_OVERHEAD(%r15)
- stg %r10,__THREAD_last_break(%r14)
+3: stg %r10,__THREAD_last_break(%r14)
+4: la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 4f
+ jz 5f
tmhh %r8,0x0001 # kernel per event ?
jz .Lpgm_kprobe
oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-4: REENABLE_IRQS
+5: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
static void __ipl_run(void *unused)
{
+ if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW)
+ diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
diag308(DIAG308_LOAD_CLEAR, NULL);
if (MACHINE_IS_VM)
__cpcmd("IPL", NULL, 0, NULL);
clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
/* Initialize per thread user and system timer values */
p->thread.user_timer = 0;
+ p->thread.guest_timer = 0;
p->thread.system_timer = 0;
+ p->thread.hardirq_timer = 0;
+ p->thread.softirq_timer = 0;
frame->sf.back_chain = 0;
/* new return point is ret_from_fork */
SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
SYSCALL(sys_preadv2,compat_sys_preadv2)
SYSCALL(sys_pwritev2,compat_sys_pwritev2)
+NI_SYSCALL
+SYSCALL(sys_statx,compat_sys_statx)
}
static void account_system_index_scaled(struct task_struct *p,
- cputime_t cputime, cputime_t scaled,
+ u64 cputime, u64 scaled,
enum cpu_usage_stat index)
{
p->stimescaled += cputime_to_nsecs(scaled);
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
pgste_t pgste;
pte_t *ptep;
pte_t pte;
bool dirty;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pgd = pgd_offset(mm, addr);
+ pud = pud_alloc(mm, pgd, addr);
+ if (!pud)
+ return false;
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return false;
+ /* We can't run guests backed by huge pages, but userspace can
+ * still set them up and then try to migrate them without any
+ * migration support.
+ */
+ if (pmd_large(*pmd))
+ return true;
+
+ ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (unlikely(!ptep))
return false;
*/
#include <linux/extable.h>
+#include <linux/ptrace.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/sched/debug.h>
*/
#include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <asm/extable.h>
int fixup_exception(struct pt_regs *regs)
{
return true;
}
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- /* by default, allow everything */
- return true;
-}
-
/*
* end asm-generic/mm_hooks.h functions
*/
extern void maybe_sigio_broken(int fd, int read);
extern void sigio_broken(int fd, int read);
-/* sys-x86_64/prctl.c */
-extern int os_arch_prctl(int pid, int code, unsigned long *addr);
+/* prctl.c */
+extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
/* tty.c */
extern int get_pty(void);
/* by default, allow everything */
return true;
}
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- /* by default, allow everything */
- return true;
-}
#endif
select HAVE_ARCH_KMEMCHECK
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
+ select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
bool
depends on STA2X11
+config HAVE_GENERIC_GUP
+ def_bool y
+
source "net/Kconfig"
source "drivers/Kconfig"
381 i386 pkey_alloc sys_pkey_alloc
382 i386 pkey_free sys_pkey_free
383 i386 statx sys_statx
+384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
d.p = 1; /* Present */
d.d = 1; /* 32-bit */
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
static int vgetcpu_online(unsigned int cpu)
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#include <asm/desc_defs.h>
#include <asm/ldt.h>
#include <asm/mmu.h>
+#include <asm/fixmap.h>
#include <linux/smp.h>
#include <linux/percpu.h>
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
{
return per_cpu(gdt_page, cpu).gdt;
}
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_gdt_rw(void)
+{
+ return this_cpu_ptr(&gdt_page)->gdt;
+}
+
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+{
+ return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+{
+ unsigned int idx = get_cpu_gdt_ro_index(cpu);
+ return (struct desc_struct *)__fix_to_virt(idx);
+}
+
+/* Provide the current read-only GDT */
+static inline struct desc_struct *get_current_gdt_ro(void)
+{
+ return get_cpu_gdt_ro(smp_processor_id());
+}
+
+/* Provide the physical address of the GDT page. */
+static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
+{
+ return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
+}
+
#ifdef CONFIG_X86_64
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
{
- struct desc_struct *d = get_cpu_gdt_table(cpu);
+ struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
entries * LDT_ENTRY_SIZE - 1);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
&ldt, DESC_LDT);
asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
}
}
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+ asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+ asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+ asm volatile("sidt %0":"=m" (*dtr));
+}
+
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+ struct desc_ptr gdt;
+ int cpu = raw_smp_processor_id();
+ bool restore = 0;
+ struct desc_struct *fixmap_gdt;
+
+ native_store_gdt(&gdt);
+ fixmap_gdt = get_cpu_gdt_ro(cpu);
+
+ /*
+ * If the current GDT is the read-only fixmap, swap to the original
+ * writeable version. Swap back at the end.
+ */
+ if (gdt.address == (unsigned long)fixmap_gdt) {
+ load_direct_gdt(cpu);
+ restore = 1;
+ }
+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+ if (restore)
+ load_fixmap_gdt(cpu);
+}
+#else
static inline void native_load_tr_desc(void)
{
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
}
+#endif
+
+static inline unsigned long native_store_tr(void)
+{
+ unsigned long tr;
+
+ asm volatile("str %0":"=r" (tr));
+
+ return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+ struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+ unsigned int i;
+
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
DECLARE_PER_CPU(bool, __tss_limit_invalid);
static inline void force_reload_TR(void)
{
- struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
+ struct desc_struct *d = get_current_gdt_rw();
tss_desc tss;
memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
this_cpu_write(__tss_limit_invalid, true);
}
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
- asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
- asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
- asm volatile("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
- asm volatile("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
- unsigned long tr;
-
- asm volatile("str %0":"=r" (tr));
-
- return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- unsigned int i;
-
- for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
- gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
#define LDT_empty(info) \
((info)->base_addr == 0 && \
} \
} while (0)
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+ return IS_ENABLED(CONFIG_X86_32) ||
+ (IS_ENABLED(CONFIG_COMPAT) &&
+ test_thread_flag(TIF_ADDR32));
+}
+
+extern unsigned long tasksize_32bit(void);
+extern unsigned long tasksize_64bit(void);
+extern unsigned long get_mmap_base(int is_legacy);
+
#ifdef CONFIG_X86_32
+#define __STACK_RND_MASK(is32bit) (0x7ff)
#define STACK_RND_MASK (0x7ff)
#define ARCH_DLINFO ARCH_DLINFO_IA32
#else /* CONFIG_X86_32 */
/* 1GB for 64bit, 8MB for 32bit */
-#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
+#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
#define ARCH_DLINFO \
do { \
int uses_interp);
#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static inline int mmap_is_ia32(void)
-{
- return IS_ENABLED(CONFIG_X86_32) ||
- (IS_ENABLED(CONFIG_COMPAT) &&
- test_thread_flag(TIF_ADDR32));
-}
-
/* Do not change the values. See get_align_mask() */
enum align_flags {
ALIGN_VA_32 = BIT(0),
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
+ /* Fixmap entries to remap the GDTs, one per processor. */
+ FIX_GDT_REMAP_BEGIN,
+ FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
__end_of_permanent_fixed_addresses,
/*
};
#else
struct kimage_arch {
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
}
#endif
-static inline bool __pkru_allows_pkey(u16 pkey, bool write)
-{
- u32 pkru = read_pkru();
-
- if (!__pkru_allows_read(pkru, pkey))
- return false;
- if (write && !__pkru_allows_write(pkru, pkey))
- return false;
-
- return true;
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
return __pkru_allows_pkey(vma_pkey(vma), write);
}
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
-}
#endif /* _ASM_X86_MMU_CONTEXT_H */
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
-/* MISC_FEATURE_ENABLES non-architectural features */
-#define MSR_MISC_FEATURE_ENABLES 0x00000140
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
-#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
#define MSR_IA32_TSC_DEADLINE 0x000006E0
PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
val);
}
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
static inline pud_t __pud(pudval_t val)
{
pudval_t ret;
return ret;
}
+static inline void pud_clear(pud_t *pudp)
+{
+ set_pud(pudp, __pud(0));
+}
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ p4dval_t val = native_p4d_val(p4d);
+
+ if (sizeof(p4dval_t) > sizeof(long))
+ PVOP_VCALL3(pv_mmu_ops.set_p4d, p4dp,
+ val, (u64)val >> 32);
+ else
+ PVOP_VCALL2(pv_mmu_ops.set_p4d, p4dp,
+ val);
+}
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+ set_p4d(p4dp, __p4d(0));
+}
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+
+#error FIXME
+
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
{
pgdval_t val = native_pgd_val(pgd);
set_pgd(pgdp, __pgd(0));
}
-static inline void pud_clear(pud_t *pudp)
-{
- set_pud(pudp, __pud(0));
-}
+#endif /* CONFIG_PGTABLE_LEVELS == 5 */
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
struct paravirt_callee_save pmd_val;
struct paravirt_callee_save make_pmd;
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
struct paravirt_callee_save pud_val;
struct paravirt_callee_save make_pud;
- void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
-#endif /* CONFIG_PGTABLE_LEVELS == 4 */
+ void (*set_p4d)(p4d_t *p4dp, p4d_t p4dval);
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#error FIXME
+#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
+
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
+
#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
struct pv_lazy_ops lazy_mode;
#endif /* CONFIG_X86_PAE */
#if CONFIG_PGTABLE_LEVELS > 3
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
- set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
+ set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
___pud_free_tlb(tlb, pud);
}
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
+ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+}
+
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ gfp_t gfp = GFP_KERNEL_ACCOUNT;
+
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ return (p4d_t *)get_zeroed_page(gfp);
+}
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
+
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+ unsigned long address)
+{
+ ___p4d_free_tlb(tlb, p4d);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } })
+#define gup_get_pte gup_get_pte
+/*
+ * WARNING: only to be used in the get_user_pages_fast() implementation.
+ *
+ * With get_user_pages_fast(), we walk down the pagetables without taking
+ * any locks. For this we would like to load the pointers atomically,
+ * but that is not possible (without expensive cmpxchg8b) on PAE. What
+ * we do have is the guarantee that a PTE will only either go from not
+ * present to present, or present to not present or both -- it will not
+ * switch to a completely different present page without a TLB flush in
+ * between; something that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ *
+ * ptep->pte_high = h;
+ * smp_wmb();
+ * ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ *
+ * ptep->pte_low = 0;
+ * smp_wmb();
+ * ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees 'l' iff pte_high
+ * sees 'h'. We load pte_high *after* loading pte_low, which ensures we
+ * don't see an older value of pte_high. *Then* we recheck pte_low,
+ * which ensures that we haven't picked up a changed pte high. We might
+ * have gotten rubbish values from pte_low and pte_high, but we are
+ * guaranteed that pte_low will not have the present bit set *unless*
+ * it is 'l'. Because get_user_pages_fast() only operates on present ptes
+ * we're safe.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+ pte_t pte;
+
+ do {
+ pte.pte_low = ptep->pte_low;
+ smp_rmb();
+ pte.pte_high = ptep->pte_high;
+ smp_rmb();
+ } while (unlikely(pte.pte_low != ptep->pte_low));
+
+ return pte;
+}
+
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
+typedef u64 p4dval_t;
typedef u64 pgdval_t;
typedef u64 pgprotval_t;
#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
-#ifndef __PAGETABLE_PUD_FOLDED
+#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
#define pgd_clear(pgd) native_pgd_clear(pgd)
#endif
+#ifndef set_p4d
+# define set_p4d(p4dp, p4d) native_set_p4d(p4dp, p4d)
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define p4d_clear(p4d) native_p4d_clear(p4d)
+#endif
+
#ifndef set_pud
# define set_pud(pudp, pud) native_set_pud(pudp, pud)
#endif
#define pgd_val(x) native_pgd_val(x)
#define __pgd(x) native_make_pgd(x)
+#ifndef __PAGETABLE_P4D_FOLDED
+#define p4d_val(x) native_p4d_val(x)
+#define __p4d(x) native_make_p4d(x)
+#endif
+
#ifndef __PAGETABLE_PUD_FOLDED
#define pud_val(x) native_pud_val(x)
#define __pud(x) native_make_pud(x)
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+ return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
+}
+
+static inline int p4d_large(p4d_t p4d)
+{
+ /* No 512 GiB pages yet */
+ return 0;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
return 0;
}
#endif
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+ return 0;
+}
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#define pte_pgprot(x) __pgprot(pte_flags(x))
#define pmd_pgprot(x) __pgprot(pmd_flags(x))
#define pud_pgprot(x) __pgprot(pud_flags(x))
+#define p4d_pgprot(x) __pgprot(p4d_flags(x))
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/log2.h>
+#include <asm/fixmap.h>
static inline int pte_none(pte_t pte)
{
}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+static inline unsigned long pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
#if CONFIG_PGTABLE_LEVELS > 3
+static inline int p4d_none(p4d_t p4d)
+{
+ return (native_p4d_val(p4d) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ return p4d_flags(p4d) & _PAGE_PRESENT;
+}
+
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+ return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d));
+}
+
+/*
+ * Currently stuck as a macro due to indirect forward reference to
+ * linux/mmzone.h's __section_mem_map_addr() definition:
+ */
+#define p4d_page(p4d) \
+ pfn_to_page((p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT)
+
+/* Find an entry in the third-level page table.. */
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+}
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
+static inline unsigned long p4d_index(unsigned long address)
+{
+ return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
+}
+
+#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
return pgd_flags(pgd) & _PAGE_PRESENT;
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
/* to find an entry in a page-table-directory. */
-static inline unsigned long pud_index(unsigned long address)
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
-{
- return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+ return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
static inline int pgd_bad(pgd_t pgd)
*/
return !native_pgd_val(pgd);
}
-#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* __ASSEMBLY__ */
#endif
}
+static inline bool __pkru_allows_pkey(u16 pkey, bool write)
+{
+ u32 pkru = read_pkru();
+
+ if (!__pkru_allows_read(pkru, pkey))
+ return false;
+ if (write && !__pkru_allows_write(pkru, pkey))
+ return false;
+
+ return true;
+}
+
+/*
+ * 'pteval' can come from a PTE, PMD or PUD. We only check
+ * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
+ * same value on all 3 types.
+ */
+static inline bool __pte_access_permitted(unsigned long pteval, bool write)
+{
+ unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
+
+ if (write)
+ need_pte_bits |= _PAGE_RW;
+
+ if ((pteval & need_pte_bits) != need_pte_bits)
+ return 0;
+
+ return __pkru_allows_pkey(pte_flags_pkey(pteval), write);
+}
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ return __pte_access_permitted(pte_val(pte), write);
+}
+
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+ return __pte_access_permitted(pmd_val(pmd), write);
+}
+
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+ return __pte_access_permitted(pud_val(pud), write);
+}
+
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
*/
#ifndef __ASSEMBLY__
#include <asm/processor.h>
-#include <asm/fixmap.h>
#include <linux/threads.h>
#include <asm/paravirt.h>
struct mm_struct;
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
-
static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
#endif
}
+static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ *p4dp = p4d;
+}
+
+static inline void native_p4d_clear(p4d_t *p4d)
+{
+ native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
+}
+
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
-#endif /* !__ASSEMBLY__ */
+#define gup_fast_permitted gup_fast_permitted
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages,
+ int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long)nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if (end < start)
+ return false;
+ if (end >> __VIRTUAL_MASK_SHIFT)
+ return false;
+ return true;
+}
+#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PGTABLE_64_H */
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END _AC(0xffffffffff000000, UL)
+/* The module sections ends with the start of the fixmap */
+#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
return native_pgd_val(pgd) & PTE_FLAGS_MASK;
}
-#if CONFIG_PGTABLE_LEVELS > 3
-#include <asm-generic/5level-fixup.h>
+#if CONFIG_PGTABLE_LEVELS > 4
+
+#error FIXME
+#else
+#include <asm-generic/pgtable-nop4d.h>
+
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+ return native_pgd_val(p4d.pgd);
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;
static inline pud_t native_make_pud(pmdval_t val)
return pud.pud;
}
#else
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
static inline pudval_t native_pud_val(pud_t pud)
{
- return native_pgd_val(pud.pgd);
+ return native_pgd_val(pud.p4d.pgd);
}
#endif
return pmd.pmd;
}
#else
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
static inline pmdval_t native_pmd_val(pmd_t pmd)
{
- return native_pgd_val(pmd.pud.pgd);
+ return native_pgd_val(pmd.pud.p4d.pgd);
}
#endif
+static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
+{
+ /* No 512 GiB huge pages yet */
+ return PTE_PFN_MASK;
+}
+
+static inline p4dval_t p4d_flags_mask(p4d_t p4d)
+{
+ return ~p4d_pfn_mask(p4d);
+}
+
+static inline p4dval_t p4d_flags(p4d_t p4d)
+{
+ return native_p4d_val(p4d) & p4d_flags_mask(p4d);
+}
+
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
+ PG_LEVEL_512G,
PG_LEVEL_NUM
};
extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
+extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
/*
* User space process size: 3GB (default).
*/
+#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
+#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
+DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+
/* Register/unregister a process' MPX related resource */
#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
#ifdef CONFIG_X86_32
extern int reboot_force;
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int option,
+ unsigned long cpuid_enabled);
#endif /* _ASM_X86_PROTO_H */
--- /dev/null
+#ifndef _ASM_X86_PURGATORY_H
+#define _ASM_X86_PURGATORY_H
+
+#ifndef __ASSEMBLY__
+#include <linux/purgatory.h>
+
+extern void purgatory(void);
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ * lookup in kexec works
+ */
+extern unsigned long purgatory_backup_dest;
+extern unsigned long purgatory_backup_src;
+extern unsigned long purgatory_backup_sz;
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_PURGATORY_H */
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
- struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+ struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
+#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_NOHZ (1 << TIF_NOHZ)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_THREAD_INFO_H */
}
}
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ cr4 ^= mask;
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
static inline void __flush_tlb_all(void)
{
- if (static_cpu_has(X86_FEATURE_PGE))
+ if (boot_cpu_has(X86_FEATURE_PGE))
__flush_tlb_global();
else
__flush_tlb();
#define pmd_val_ma(v) ((v).pmd)
#ifdef __PAGETABLE_PUD_FOLDED
-#define pud_val_ma(v) ((v).pgd.pgd)
+#define pud_val_ma(v) ((v).p4d.pgd.pgd)
#else
#define pud_val_ma(v) ((v).pud)
#endif
#define __pmd_ma(x) ((pmd_t) { (x) } )
-#define pgd_val_ma(x) ((x).pgd)
+#ifdef __PAGETABLE_P4D_FOLDED
+#define p4d_val_ma(x) ((x).pgd.pgd)
+#else
+#define p4d_val_ma(x) ((x).p4d)
+#endif
void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid);
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
+
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#ifdef CONFIG_SMP
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
- (unsigned long)get_cpu_gdt_table(smp_processor_id());
+ (unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
cpu = get_cpu();
BUG_ON(cpu != 0);
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
cpu = get_cpu();
BUG_ON(cpu != 0);
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
* Note we only set APM segments on CPU zero, since we pin the APM
* code to that CPU.
*/
- gdt = get_cpu_gdt_table(0);
+ gdt = get_cpu_gdt_rw(0);
set_desc_base(&gdt[APM_CS >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
set_desc_base(&gdt[APM_CS_16 >> 3],
load_stack_canary_segment();
}
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+#ifdef CONFIG_X86_64
+ /* On 64-bit systems, we use a read-only fixmap GDT. */
+ pgprot_t prot = PAGE_KERNEL_RO;
+#else
+ /*
+ * On native 32-bit systems, the GDT cannot be read-only because
+ * our double fault handler uses a task gate, and entering through
+ * a task gate needs to change an available TSS to busy. If the GDT
+ * is read-only, that will triple fault.
+ *
+ * On Xen PV, the GDT must be read-only because the hypervisor requires
+ * it.
+ */
+ pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+ PAGE_KERNEL_RO : PAGE_KERNEL;
+#endif
+
+ __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
+}
+
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
+{
+ struct desc_ptr gdt_descr;
+
+ gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL_GPL(load_direct_gdt);
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+ struct desc_ptr gdt_descr;
+
+ gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL_GPL(load_fixmap_gdt);
+
/*
* Current gdt points %fs at the "master" per-cpu area: after this,
* it's on the real one.
*/
void switch_to_new_gdt(int cpu)
{
- struct desc_ptr gdt_descr;
-
- gdt_descr.address = (long)get_cpu_gdt_table(cpu);
- gdt_descr.size = GDT_SIZE - 1;
- load_gdt(&gdt_descr);
+ /* Load the original GDT */
+ load_direct_gdt(cpu);
/* Reload the per-cpu base */
-
load_percpu_segment(cpu);
}
if (is_uv_system())
uv_cpu_init();
+
+ setup_fixmap_gdt(cpu);
+ load_fixmap_gdt(cpu);
}
#else
dbg_restore_debug_regs();
fpu__init_cpu();
+
+ setup_fixmap_gdt(cpu);
+ load_fixmap_gdt(cpu);
}
#endif
return;
}
- if (ring3mwait_disabled) {
- msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
- MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+ if (ring3mwait_disabled)
return;
- }
-
- msr_set_bit(MSR_MISC_FEATURE_ENABLES,
- MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
+ this_cpu_or(msr_misc_features_shadow,
+ 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
if (c == &boot_cpu_data)
ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
init_intel_energy_perf(c);
}
+static void init_cpuid_fault(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
+ if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
+ set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
+ }
+}
+
+static void init_intel_misc_features(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
+ return;
+
+ /* Clear all MISC features */
+ this_cpu_write(msr_misc_features_shadow, 0);
+
+ /* Check features and update capabilities and shadow control bits */
+ init_cpuid_fault(c);
+ probe_xeon_phi_r3mwait(c);
+
+ msr = this_cpu_read(msr_misc_features_shadow);
+ wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
+}
+
static void init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
init_intel_energy_perf(c);
- probe_xeon_phi_r3mwait(c);
+ init_intel_misc_features(c);
}
#ifdef CONFIG_X86_32
pgd_t *pgd, pmd_t *pmd, pte_t *pte,
unsigned long vaddr, unsigned long paddr)
{
+ p4d_t *p4d;
pud_t *pud;
pgd += pgd_index(vaddr);
if (!(pgd_val(*pgd) & _PAGE_PRESENT))
set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
#endif
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
if (!(pmd_val(*pmd) & _PAGE_PRESENT))
set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
static void free_transition_pgtable(struct kimage *image)
{
+ free_page((unsigned long)image->arch.p4d);
free_page((unsigned long)image->arch.pud);
free_page((unsigned long)image->arch.pmd);
free_page((unsigned long)image->arch.pte);
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
pgd += pgd_index(vaddr);
if (!pgd_present(*pgd)) {
+ p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
+ if (!p4d)
+ goto err;
+ image->arch.p4d = p4d;
+ set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+ }
+ p4d = p4d_offset(pgd, vaddr);
+ if (!p4d_present(*p4d)) {
pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
if (!pud)
goto err;
image->arch.pud = pud;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
}
- pud = pud_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
if (!pud_present(*pud)) {
pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
if (!pmd)
/* Setup copying of backup region */
if (image->type == KEXEC_TYPE_CRASH) {
- ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+ ret = kexec_purgatory_get_set_symbol(image,
+ "purgatory_backup_dest",
&image->arch.backup_load_addr,
sizeof(image->arch.backup_load_addr), 0);
if (ret)
return ret;
- ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+ ret = kexec_purgatory_get_set_symbol(image,
+ "purgatory_backup_src",
&image->arch.backup_src_start,
sizeof(image->arch.backup_src_start), 0);
if (ret)
return ret;
- ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+ ret = kexec_purgatory_get_set_symbol(image,
+ "purgatory_backup_sz",
&image->arch.backup_src_sz,
sizeof(image->arch.backup_src_sz), 0);
if (ret)
.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
.pud_val = PTE_IDENT,
.make_pud = PTE_IDENT,
- .set_pgd = native_set_pgd,
-#endif
+ .set_p4d = native_set_p4d,
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+#error FIXME
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
+#endif /* CONFIG_PGTABLE_LEVELS >= 4 */
#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
.pte_val = PTE_IDENT,
#include <asm/vm86.h>
#include <asm/switch_to.h>
#include <asm/desc.h>
+#include <asm/prctl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
fpu__clear(&tsk->thread.fpu);
}
-static void hard_disable_TSC(void)
-{
- cr4_set_bits(X86_CR4_TSD);
-}
-
void disable_TSC(void)
{
preempt_disable();
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_disable_TSC();
+ cr4_set_bits(X86_CR4_TSD);
preempt_enable();
}
-static void hard_enable_TSC(void)
-{
- cr4_clear_bits(X86_CR4_TSD);
-}
-
static void enable_TSC(void)
{
preempt_disable();
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_enable_TSC();
+ cr4_clear_bits(X86_CR4_TSD);
preempt_enable();
}
return 0;
}
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss)
-{
- struct thread_struct *prev, *next;
-
- prev = &prev_p->thread;
- next = &next_p->thread;
+DEFINE_PER_CPU(u64, msr_misc_features_shadow);
- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
+static void set_cpuid_faulting(bool on)
+{
+ u64 msrval;
- debugctl &= ~DEBUGCTLMSR_BTF;
- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
- debugctl |= DEBUGCTLMSR_BTF;
+ msrval = this_cpu_read(msr_misc_features_shadow);
+ msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+ msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
+ this_cpu_write(msr_misc_features_shadow, msrval);
+ wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
+}
- update_debugctlmsr(debugctl);
+static void disable_cpuid(void)
+{
+ preempt_disable();
+ if (!test_and_set_thread_flag(TIF_NOCPUID)) {
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOCPUID in the current running context.
+ */
+ set_cpuid_faulting(true);
}
+ preempt_enable();
+}
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
- hard_disable_TSC();
- else
- hard_enable_TSC();
+static void enable_cpuid(void)
+{
+ preempt_disable();
+ if (test_and_clear_thread_flag(TIF_NOCPUID)) {
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOCPUID in the current running context.
+ */
+ set_cpuid_faulting(false);
}
+ preempt_enable();
+}
+
+static int get_cpuid_mode(void)
+{
+ return !test_thread_flag(TIF_NOCPUID);
+}
+
+static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+{
+ if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+ return -ENODEV;
+
+ if (cpuid_enabled)
+ enable_cpuid();
+ else
+ disable_cpuid();
+
+ return 0;
+}
+
+/*
+ * Called immediately after a successful exec.
+ */
+void arch_setup_new_exec(void)
+{
+ /* If cpuid was previously disabled for this task, re-enable it. */
+ if (test_thread_flag(TIF_NOCPUID))
+ enable_cpuid();
+}
- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+static inline void switch_to_bitmap(struct tss_struct *tss,
+ struct thread_struct *prev,
+ struct thread_struct *next,
+ unsigned long tifp, unsigned long tifn)
+{
+ if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
-
/*
* Make sure that the TSS limit is correct for the CPU
* to notice the IO bitmap.
*/
refresh_tss_limit();
- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+ } else if (tifp & _TIF_IO_BITMAP) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss)
+{
+ struct thread_struct *prev, *next;
+ unsigned long tifp, tifn;
+
+ prev = &prev_p->thread;
+ next = &next_p->thread;
+
+ tifn = READ_ONCE(task_thread_info(next_p)->flags);
+ tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+ switch_to_bitmap(tss, prev, next, tifp, tifn);
+
propagate_user_return_notify(prev_p, next_p);
+
+ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+ arch_has_block_step()) {
+ unsigned long debugctl, msk;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ debugctl &= ~DEBUGCTLMSR_BTF;
+ msk = tifn & _TIF_BLOCKSTEP;
+ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ }
+
+ if ((tifp ^ tifn) & _TIF_NOTSC)
+ cr4_toggle_bits(X86_CR4_TSD);
+
+ if ((tifp ^ tifn) & _TIF_NOCPUID)
+ set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
}
/*
put_task_stack(p);
return ret;
}
+
+long do_arch_prctl_common(struct task_struct *task, int option,
+ unsigned long cpuid_enabled)
+{
+ switch (option) {
+ case ARCH_GET_CPUID:
+ return get_cpuid_mode();
+ case ARCH_SET_CPUID:
+ return set_cpuid_mode(task, cpuid_enabled);
+ }
+
+ return -EINVAL;
+}
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/syscalls.h>
#include <asm/pgtable.h>
#include <asm/ldt.h>
#include <asm/switch_to.h>
#include <asm/vm86.h>
#include <asm/intel_rdt.h>
+#include <asm/proto.h>
void __show_regs(struct pt_regs *regs, int all)
{
return prev_p;
}
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ return do_arch_prctl_common(current, option, arg2);
+}
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/syscalls.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
(struct user_desc __user *)tls, 0);
else
#endif
- err = do_arch_prctl(p, ARCH_SET_FS, tls);
+ err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err)
goto out;
}
}
#endif
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{
int ret = 0;
int doit = task == current;
int cpu;
- switch (code) {
+ switch (option) {
case ARCH_SET_GS:
- if (addr >= TASK_SIZE_MAX)
+ if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
- task->thread.gsbase = addr;
+ task->thread.gsbase = arg2;
if (doit) {
load_gs_index(0);
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
}
put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
with gs */
- if (addr >= TASK_SIZE_MAX)
+ if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
- task->thread.fsbase = addr;
+ task->thread.fsbase = arg2;
if (doit) {
/* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0);
- ret = wrmsrl_safe(MSR_FS_BASE, addr);
+ ret = wrmsrl_safe(MSR_FS_BASE, arg2);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_FS_BASE, base);
else
base = task->thread.fsbase;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)arg2);
break;
}
case ARCH_GET_GS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gsbase;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)arg2);
break;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
# ifdef CONFIG_X86_X32_ABI
case ARCH_MAP_VDSO_X32:
- return prctl_map_vdso(&vdso_image_x32, addr);
+ return prctl_map_vdso(&vdso_image_x32, arg2);
# endif
# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
case ARCH_MAP_VDSO_32:
- return prctl_map_vdso(&vdso_image_32, addr);
+ return prctl_map_vdso(&vdso_image_32, arg2);
# endif
case ARCH_MAP_VDSO_64:
- return prctl_map_vdso(&vdso_image_64, addr);
+ return prctl_map_vdso(&vdso_image_64, arg2);
#endif
default:
return ret;
}
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ long ret;
+
+ ret = do_arch_prctl_64(current, option, arg2);
+ if (ret == -EINVAL)
+ ret = do_arch_prctl_common(current, option, arg2);
+
+ return ret;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return do_arch_prctl(current, code, addr);
+ return do_arch_prctl_common(current, option, arg2);
}
+#endif
unsigned long KSTK_ESP(struct task_struct *task)
{
if (value >= TASK_SIZE_MAX)
return -EIO;
/*
- * When changing the segment base, use do_arch_prctl
+ * When changing the segment base, use do_arch_prctl_64
* to set either thread.fs or thread.fsindex and the
* corresponding GDT slot.
*/
if (child->thread.fsbase != value)
- return do_arch_prctl(child, ARCH_SET_FS, value);
+ return do_arch_prctl_64(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
/*
if (value >= TASK_SIZE_MAX)
return -EIO;
if (child->thread.gsbase != value)
- return do_arch_prctl(child, ARCH_SET_GS, value);
+ return do_arch_prctl_64(child, ARCH_SET_GS, value);
return 0;
#endif
}
Works just like arch_prctl, except that the arguments
are reversed. */
case PTRACE_ARCH_PRCTL:
- ret = do_arch_prctl(child, data, addr);
+ ret = do_arch_prctl_64(child, data, addr);
break;
#endif
.ident = "ASUS EeeBook X205TA",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"),
},
},
{ /* Handle problems with rebooting on ASUS EeeBook X205TAW */
kasan_init();
-#ifdef CONFIG_X86_32
- /* sync back kernel address range */
- clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
-
- /*
- * sync back low identity map too. It is used for example
- * in the 32-bit EFI stub.
- */
- clone_pgd_range(initial_page_table,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
-
tboot_probe();
map_vsyscall();
pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
gdt.s = 1;
- write_gdt_entry(get_cpu_gdt_table(cpu),
+ write_gdt_entry(get_cpu_gdt_rw(cpu),
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
#endif
}
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
+
+#ifdef CONFIG_X86_32
+ /*
+ * Sync back kernel address range. We want to make sure that
+ * all kernel mappings, including percpu mappings, are available
+ * in the smpboot asm. We can't reliably pick up percpu
+ * mappings using vmalloc_fault(), because exception dispatch
+ * needs percpu data.
+ */
+ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+
+ /*
+ * sync back low identity map too. It is used for example
+ * in the 32-bit EFI stub.
+ */
+ clone_pgd_range(initial_page_table,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+#endif
}
unsigned long timeout;
idle->thread.sp = (unsigned long)task_pt_regs(idle);
- early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
#include <linux/uaccess.h>
#include <linux/elf.h>
+#include <asm/elf.h>
+#include <asm/compat.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
static void find_start_end(unsigned long flags, unsigned long *begin,
unsigned long *end)
{
- if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
+ if (!in_compat_syscall() && (flags & MAP_32BIT)) {
/* This is usually used needed to map code in small
model, so it needs to be in the first 31bit. Limit
it to that. This means we need to move the
if (current->flags & PF_RANDOMIZE) {
*begin = randomize_page(*begin, 0x02000000);
}
- } else {
- *begin = current->mm->mmap_legacy_base;
- *end = TASK_SIZE;
+ return;
}
+
+ *begin = get_mmap_base(1);
+ *end = in_compat_syscall() ? tasksize_32bit() : tasksize_64bit();
}
unsigned long
return addr;
/* for MAP_32BIT mappings we force the legacy mmap base */
- if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
+ if (!in_compat_syscall() && (flags & MAP_32BIT))
goto bottomup;
/* requesting a specific address */
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = mm->mmap_base;
+ info.high_limit = get_mmap_base(0);
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
pgprot_t prot)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(&tboot_mm, vaddr);
- pud = pud_alloc(&tboot_mm, pgd, vaddr);
+ p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+ if (!p4d)
+ return -1;
+ pud = pud_alloc(&tboot_mm, p4d, vaddr);
if (!pud)
return -1;
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
cpu = get_cpu();
while (n-- > 0) {
- if (LDT_empty(info) || LDT_zero(info))
+ if (LDT_empty(info) || LDT_zero(info)) {
desc->a = desc->b = 0;
- else
+ } else {
fill_ldt(desc, info);
+
+ /*
+ * Always set the accessed bit so that the CPU
+ * doesn't try to write to the (read-only) GDT.
+ */
+ desc->type |= 1;
+ }
++info;
++desc;
}
struct vm_area_struct *vma;
spinlock_t *ptl;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
- pud = pud_offset(pgd, 0xA0000);
+ p4d = p4d_offset(pgd, 0xA0000);
+ if (p4d_none_or_clear_bad(p4d))
+ goto out;
+ pud = pud_offset(p4d, 0xA0000);
if (pud_none_or_clear_bad(pud))
goto out;
pmd = pmd_offset(pud, 0xA0000);
struct svm_cpu_data *sd;
uint64_t efer;
- struct desc_ptr gdt_descr;
struct desc_struct *gdt;
int me = raw_smp_processor_id();
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
- native_store_gdt(&gdt_descr);
- gdt = (struct desc_struct *)gdt_descr.address;
+ gdt = get_current_gdt_rw();
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
wrmsrl(MSR_EFER, efer | EFER_SVME);
* when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
*/
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
/*
* We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
*/
static unsigned long segment_base(u16 selector)
{
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
struct desc_struct *table;
unsigned long v;
if (!(selector & ~SEGMENT_RPL_MASK))
return 0;
- table = (struct desc_struct *)gdt->address;
+ table = get_current_gdt_ro();
if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
u16 ldt_selector = kvm_read_ldt();
#endif
if (vmx->host_state.msr_host_bndcfgs)
wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
- load_gdt(this_cpu_ptr(&host_gdt));
+ load_fixmap_gdt(raw_smp_processor_id());
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
}
if (!already_loaded) {
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+ void *gdt = get_current_gdt_ro();
unsigned long sysenter_esp;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
*/
vmcs_writel(HOST_TR_BASE,
(unsigned long)this_cpu_ptr(&cpu_tss));
- vmcs_writel(HOST_GDTR_BASE, gdt->address);
+ vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
/*
* VM exits change the host TR limit to 0x67 after a VM
ept_sync_global();
}
- native_store_gdt(this_cpu_ptr(&host_gdt));
-
return 0;
}
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 zero = 0;
gpa_t vmptr;
- struct vmcs12 *vmcs12;
- struct page *page;
if (!nested_vmx_check_permission(vcpu))
return 1;
if (vmptr == vmx->nested.current_vmptr)
nested_release_vmcs12(vmx);
- page = nested_get_page(vcpu, vmptr);
- if (page == NULL) {
- /*
- * For accurate processor emulation, VMCLEAR beyond available
- * physical memory should do nothing at all. However, it is
- * possible that a nested vmx bug, not a guest hypervisor bug,
- * resulted in this case, so let's shut down before doing any
- * more damage:
- */
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- return 1;
- }
- vmcs12 = kmap(page);
- vmcs12->launch_state = 0;
- kunmap(page);
- nested_release_page(page);
+ kvm_vcpu_write_guest(vcpu,
+ vmptr + offsetof(struct vmcs12, launch_state),
+ &zero, sizeof(zero));
nested_free_vmcs02(vmx, vmptr);
return false;
page = nested_get_page(vcpu, vmcs12->msr_bitmap);
- if (!page) {
- WARN_ON(1);
+ if (!page)
return false;
- }
msr_bitmap_l1 = (unsigned long *)kmap(page);
memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
*/
static void vmx_leave_nested(struct kvm_vcpu *vcpu)
{
- if (is_guest_mode(vcpu))
+ if (is_guest_mode(vcpu)) {
+ to_vmx(vcpu)->nested.nested_run_pending = 0;
nested_vmx_vmexit(vcpu, -1, 0, 0);
+ }
free_nested(to_vmx(vcpu));
}
KCOV_INSTRUMENT_tlb.o := n
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
#define PTE_LEVEL_MULT (PAGE_SIZE)
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
-#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+#define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+#define PGD_LEVEL_MULT (PTRS_PER_PUD * P4D_LEVEL_MULT)
#define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \
({ \
}
}
-static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
- unsigned long P)
+static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P)
{
int i;
pte_t *start;
pgprotval_t prot;
- start = (pte_t *) pmd_page_vaddr(addr);
+ start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
prot = pte_flags(*start);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
#if PTRS_PER_PMD > 1
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
- unsigned long P)
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
{
int i;
pmd_t *start;
pgprotval_t prot;
- start = (pmd_t *) pud_page_vaddr(addr);
+ start = (pmd_t *)pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
}
-static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
- unsigned long P)
+static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
{
int i;
pud_t *start;
pgprotval_t prot;
pud_t *prev_pud = NULL;
- start = (pud_t *) pgd_page_vaddr(addr);
+ start = (pud_t *)p4d_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
}
#else
-#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
-#define pgd_large(a) pud_large(__pud(pgd_val(a)))
-#define pgd_none(a) pud_none(__pud(pgd_val(a)))
+#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p)
+#define p4d_large(a) pud_large(__pud(p4d_val(a)))
+#define p4d_none(a) pud_none(__pud(p4d_val(a)))
+#endif
+
+#if PTRS_PER_P4D > 1
+
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
+{
+ int i;
+ p4d_t *start;
+ pgprotval_t prot;
+
+ start = (p4d_t *)pgd_page_vaddr(addr);
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
+ if (!p4d_none(*start)) {
+ if (p4d_large(*start) || !p4d_present(*start)) {
+ prot = p4d_flags(*start);
+ note_page(m, st, __pgprot(prot), 2);
+ } else {
+ walk_pud_level(m, st, *start,
+ P + i * P4D_LEVEL_MULT);
+ }
+ } else
+ note_page(m, st, __pgprot(0), 2);
+
+ start++;
+ }
+}
+
+#else
+#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
+#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
+#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
#endif
static inline bool is_hypervisor_range(int idx)
prot = pgd_flags(*start);
note_page(m, &st, __pgprot(prot), 1);
} else {
- walk_pud_level(m, &st, *start,
+ walk_p4d_level(m, &st, *start,
i * PGD_LEVEL_MULT);
}
} else
{
unsigned index = pgd_index(address);
pgd_t *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
/*
* set_pgd(pgd, *pgd_k); here would be useless on PAE
* and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
+ * set_p4d/set_pud.
*/
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
+ p4d = p4d_offset(pgd, address);
+ p4d_k = p4d_offset(pgd_k, address);
+ if (!p4d_present(*p4d_k))
+ return NULL;
+
+ pud = pud_offset(p4d, address);
+ pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k))
return NULL;
{
pgd_t *base = __va(read_cr3());
pgd_t *pgd = &base[pgd_index(address)];
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
goto out;
#endif
- pmd = pmd_offset(pud_offset(pgd, address), address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
+ pmd = pmd_offset(pud, address);
printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
/*
static noinline int vmalloc_fault(unsigned long address)
{
pgd_t *pgd, *pgd_ref;
+ p4d_t *p4d, *p4d_ref;
pud_t *pud, *pud_ref;
pmd_t *pmd, *pmd_ref;
pte_t *pte, *pte_ref;
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
arch_flush_lazy_mmu_mode();
- } else {
+ } else if (CONFIG_PGTABLE_LEVELS > 4) {
+ /*
+ * With folded p4d, pgd_none() is always false, so the pgd may
+ * point to an empty page table entry and pgd_page_vaddr()
+ * will return garbage.
+ *
+ * We will do the correct sanity check on the p4d level.
+ */
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
+ /* With 4-level paging, copying happens on the p4d level. */
+ p4d = p4d_offset(pgd, address);
+ p4d_ref = p4d_offset(pgd_ref, address);
+ if (p4d_none(*p4d_ref))
+ return -1;
+
+ if (p4d_none(*p4d)) {
+ set_p4d(p4d, *p4d_ref);
+ arch_flush_lazy_mmu_mode();
+ } else {
+ BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref));
+ }
+
/*
* Below here mismatches are bugs because these lower tables
* are shared:
*/
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
+ pud = pud_offset(p4d, address);
+ pud_ref = pud_offset(p4d_ref, address);
if (pud_none(*pud_ref))
return -1;
{
pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
pgd_t *pgd = base + pgd_index(address);
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!pgd_present(*pgd))
goto out;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (bad_address(p4d))
+ goto bad;
+
+ printk("P4D %lx ", p4d_val(*p4d));
+ if (!p4d_present(*p4d) || p4d_large(*p4d))
+ goto out;
+
+ pud = pud_offset(p4d, address);
if (bad_address(pud))
goto bad;
spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!pgd_present(*pgd))
return 0;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ return 0;
+
+ if (p4d_large(*p4d))
+ return spurious_fault_check(error_code, (pte_t *) p4d);
+
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
return 0;
+++ /dev/null
-/*
- * Lockless get_user_pages_fast for x86
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmstat.h>
-#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/memremap.h>
-
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-
-static inline pte_t gup_get_pte(pte_t *ptep)
-{
-#ifndef CONFIG_X86_PAE
- return READ_ONCE(*ptep);
-#else
- /*
- * With get_user_pages_fast, we walk down the pagetables without taking
- * any locks. For this we would like to load the pointers atomically,
- * but that is not possible (without expensive cmpxchg8b) on PAE. What
- * we do have is the guarantee that a pte will only either go from not
- * present to present, or present to not present or both -- it will not
- * switch to a completely different present page without a TLB flush in
- * between; something that we are blocking by holding interrupts off.
- *
- * Setting ptes from not present to present goes:
- * ptep->pte_high = h;
- * smp_wmb();
- * ptep->pte_low = l;
- *
- * And present to not present goes:
- * ptep->pte_low = 0;
- * smp_wmb();
- * ptep->pte_high = 0;
- *
- * We must ensure here that the load of pte_low sees l iff pte_high
- * sees h. We load pte_high *after* loading pte_low, which ensures we
- * don't see an older value of pte_high. *Then* we recheck pte_low,
- * which ensures that we haven't picked up a changed pte high. We might
- * have got rubbish values from pte_low and pte_high, but we are
- * guaranteed that pte_low will not have the present bit set *unless*
- * it is 'l'. And get_user_pages_fast only operates on present ptes, so
- * we're safe.
- *
- * gup_get_pte should not be used or copied outside gup.c without being
- * very careful -- it does not atomically load the pte or anything that
- * is likely to be useful for you.
- */
- pte_t pte;
-
-retry:
- pte.pte_low = ptep->pte_low;
- smp_rmb();
- pte.pte_high = ptep->pte_high;
- smp_rmb();
- if (unlikely(pte.pte_low != ptep->pte_low))
- goto retry;
-
- return pte;
-#endif
-}
-
-static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
-{
- while ((*nr) - nr_start) {
- struct page *page = pages[--(*nr)];
-
- ClearPageReferenced(page);
- put_page(page);
- }
-}
-
-/*
- * 'pteval' can come from a pte, pmd or pud. We only check
- * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
- * same value on all 3 types.
- */
-static inline int pte_allows_gup(unsigned long pteval, int write)
-{
- unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
-
- if (write)
- need_pte_bits |= _PAGE_RW;
-
- if ((pteval & need_pte_bits) != need_pte_bits)
- return 0;
-
- /* Check memory protection keys permissions. */
- if (!__pkru_allows_pkey(pte_flags_pkey(pteval), write))
- return 0;
-
- return 1;
-}
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct dev_pagemap *pgmap = NULL;
- int nr_start = *nr, ret = 0;
- pte_t *ptep, *ptem;
-
- /*
- * Keep the original mapped PTE value (ptem) around since we
- * might increment ptep off the end of the page when finishing
- * our loop iteration.
- */
- ptem = ptep = pte_offset_map(&pmd, addr);
- do {
- pte_t pte = gup_get_pte(ptep);
- struct page *page;
-
- /* Similar to the PMD case, NUMA hinting must take slow path */
- if (pte_protnone(pte))
- break;
-
- if (!pte_allows_gup(pte_val(pte), write))
- break;
-
- if (pte_devmap(pte)) {
- pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
- if (unlikely(!pgmap)) {
- undo_dev_pagemap(nr, nr_start, pages);
- break;
- }
- } else if (pte_special(pte))
- break;
-
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- get_page(page);
- put_dev_pagemap(pgmap);
- SetPageReferenced(page);
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
- if (addr == end)
- ret = 1;
- pte_unmap(ptem);
-
- return ret;
-}
-
-static inline void get_head_page_multiple(struct page *page, int nr)
-{
- VM_BUG_ON_PAGE(page != compound_head(page), page);
- VM_BUG_ON_PAGE(page_count(page) == 0, page);
- page_ref_add(page, nr);
- SetPageReferenced(page);
-}
-
-static int __gup_device_huge(unsigned long pfn, unsigned long addr,
- unsigned long end, struct page **pages, int *nr)
-{
- int nr_start = *nr;
- struct dev_pagemap *pgmap = NULL;
-
- do {
- struct page *page = pfn_to_page(pfn);
-
- pgmap = get_dev_pagemap(pfn, pgmap);
- if (unlikely(!pgmap)) {
- undo_dev_pagemap(nr, nr_start, pages);
- return 0;
- }
- SetPageReferenced(page);
- pages[*nr] = page;
- get_page(page);
- put_dev_pagemap(pgmap);
- (*nr)++;
- pfn++;
- } while (addr += PAGE_SIZE, addr != end);
- return 1;
-}
-
-static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
- unsigned long end, struct page **pages, int *nr)
-{
- unsigned long fault_pfn;
-
- fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
- unsigned long end, struct page **pages, int *nr)
-{
- unsigned long fault_pfn;
-
- fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- return __gup_device_huge(fault_pfn, addr, end, pages, nr);
-}
-
-static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- int refs;
-
- if (!pte_allows_gup(pmd_val(pmd), write))
- return 0;
-
- VM_BUG_ON(!pfn_valid(pmd_pfn(pmd)));
- if (pmd_devmap(pmd))
- return __gup_device_huge_pmd(pmd, addr, end, pages, nr);
-
- /* hugepages are never "special" */
- VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL);
-
- refs = 0;
- head = pmd_page(pmd);
- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
- get_head_page_multiple(head, refs);
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = *pmdp;
-
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- return 0;
- if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_protnone(pmd))
- return 0;
- if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
- return 0;
- } else {
- if (!gup_pte_range(pmd, addr, next, write, pages, nr))
- return 0;
- }
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- int refs;
-
- if (!pte_allows_gup(pud_val(pud), write))
- return 0;
-
- VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
- if (pud_devmap(pud))
- return __gup_device_huge_pud(pud, addr, end, pages, nr);
-
- /* hugepages are never "special" */
- VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL);
-
- refs = 0;
- head = pud_page(pud);
- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
- get_head_page_multiple(head, refs);
-
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = *pudp;
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (unlikely(pud_large(pud))) {
- if (!gup_huge_pud(pud, addr, next, write, pages, nr))
- return 0;
- } else {
- if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- }
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- (void __user *)start, len)))
- return 0;
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables and pages from being freed on x86.
- *
- * So long as we atomically load page table pointers versus teardown
- * (which we do on x86, with the above PAE exception), we can follow the
- * address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
-
- return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- pgd_t *pgdp;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
-
- end = start + len;
- if (end < start)
- goto slow_irqon;
-
-#ifdef CONFIG_X86_64
- if (end >> __VIRTUAL_MASK_SHIFT)
- goto slow_irqon;
-#endif
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables and pages from being freed on x86.
- *
- * So long as we atomically load page table pointers versus teardown
- * (which we do on x86, with the above PAE exception), we can follow the
- * address down to the the page and take a ref on it.
- */
- local_irq_disable();
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- goto slow;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- goto slow;
- } while (pgdp++, addr = next, addr != end);
- local_irq_enable();
-
- VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
- return nr;
-
- {
- int ret;
-
-slow:
- local_irq_enable();
-slow_irqon:
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- ret = get_user_pages_unlocked(start,
- (end - start) >> PAGE_SHIFT,
- pages, write ? FOLL_WRITE : 0);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
-
- return ret;
- }
-}
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
+#include <linux/compat.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
+#include <asm/elf.h>
#if 0 /* This is just for testing */
struct page *
info.flags = 0;
info.length = len;
- info.low_limit = current->mm->mmap_legacy_base;
- info.high_limit = TASK_SIZE;
+ info.low_limit = get_mmap_base(1);
+ info.high_limit = in_compat_syscall() ?
+ tasksize_32bit() : tasksize_64bit();
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = current->mm->mmap_base;
+ info.high_limit = get_mmap_base(0);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
return 0;
}
+static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ p4d_t *p4d = p4d_page + p4d_index(addr);
+ pud_t *pud;
+
+ next = (addr & P4D_MASK) + P4D_SIZE;
+ if (next > end)
+ next = end;
+
+ if (p4d_present(*p4d)) {
+ pud = pud_offset(p4d, 0);
+ ident_pud_init(info, pud, addr, next);
+ continue;
+ }
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ ident_pud_init(info, pud, addr, next);
+ set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long pstart, unsigned long pend)
{
for (; addr < end; addr = next) {
pgd_t *pgd = pgd_page + pgd_index(addr);
- pud_t *pud;
+ p4d_t *p4d;
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
if (next > end)
next = end;
if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, 0);
- result = ident_pud_init(info, pud, addr, next);
+ p4d = p4d_offset(pgd, 0);
+ result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
continue;
}
- pud = (pud_t *)info->alloc_pgt_page(info->context);
- if (!pud)
+ p4d = (p4d_t *)info->alloc_pgt_page(info->context);
+ if (!p4d)
return -ENOMEM;
- result = ident_pud_init(info, pud, addr, next);
+ result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+ } else {
+ /*
+ * With p4d folded, pgd is equal to p4d.
+ * The pgd entry has to point to the pud page table in this case.
+ */
+ pud_t *pud = pud_offset(p4d, 0);
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
}
return 0;
*/
static pmd_t * __init one_md_table_init(pgd_t *pgd)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd_table;
pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
return pmd_table;
}
#endif
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
pmd_table = pmd_offset(pud, 0);
return pmd_table;
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{
- return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
- vaddr), vaddr), vaddr);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
+ return pte_offset_kernel(pmd, vaddr);
}
static void __init kmap_init(void)
{
unsigned long vaddr;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte;
{
unsigned long pfn, va;
pgd_t *pgd, *base = swapper_pg_dir;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (!pgd_present(*pgd))
break;
- pud = pud_offset(pgd, va);
+ p4d = p4d_offset(pgd, va);
+ pud = pud_offset(p4d, va);
pmd = pmd_offset(pud, va);
if (!pmd_present(*pmd))
break;
unsigned long address;
for (address = start; address <= end; address += PGDIR_SIZE) {
- const pgd_t *pgd_ref = pgd_offset_k(address);
+ pgd_t *pgd_ref = pgd_offset_k(address);
+ const p4d_t *p4d_ref;
struct page *page;
- if (pgd_none(*pgd_ref))
+ /*
+ * With folded p4d, pgd_none() is always false, we need to
+ * handle synchonization on p4d level.
+ */
+ BUILD_BUG_ON(pgd_none(*pgd_ref));
+ p4d_ref = p4d_offset(pgd_ref, address);
+
+ if (p4d_none(*p4d_ref))
continue;
spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
+ p4d_t *p4d;
spinlock_t *pgt_lock;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ p4d = p4d_offset(pgd, address);
/* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
- BUG_ON(pgd_page_vaddr(*pgd)
- != pgd_page_vaddr(*pgd_ref));
+ if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
+ BUG_ON(p4d_page_vaddr(*p4d)
+ != p4d_page_vaddr(*p4d_ref));
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
+ if (p4d_none(*p4d))
+ set_p4d(p4d, *p4d_ref);
spin_unlock(pgt_lock);
}
return ptr;
}
-static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
+static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
{
if (pgd_none(*pgd)) {
- pud_t *pud = (pud_t *)spp_getpage();
- pgd_populate(&init_mm, pgd, pud);
- if (pud != pud_offset(pgd, 0))
+ p4d_t *p4d = (p4d_t *)spp_getpage();
+ pgd_populate(&init_mm, pgd, p4d);
+ if (p4d != p4d_offset(pgd, 0))
printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
- pud, pud_offset(pgd, 0));
+ p4d, p4d_offset(pgd, 0));
+ }
+ return p4d_offset(pgd, vaddr);
+}
+
+static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
+{
+ if (p4d_none(*p4d)) {
+ pud_t *pud = (pud_t *)spp_getpage();
+ p4d_populate(&init_mm, p4d, pud);
+ if (pud != pud_offset(p4d, 0))
+ printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+ pud, pud_offset(p4d, 0));
}
- return pud_offset(pgd, vaddr);
+ return pud_offset(p4d, vaddr);
}
static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
pmd_t *pmd = (pmd_t *) spp_getpage();
pud_populate(&init_mm, pud, pmd);
if (pmd != pmd_offset(pud, 0))
- printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+ printk(KERN_ERR "PAGETABLE BUG #02! %p <-> %p\n",
pmd, pmd_offset(pud, 0));
}
return pmd_offset(pud, vaddr);
pte_t *pte = (pte_t *) spp_getpage();
pmd_populate_kernel(&init_mm, pmd, pte);
if (pte != pte_offset_kernel(pmd, 0))
- printk(KERN_ERR "PAGETABLE BUG #02!\n");
+ printk(KERN_ERR "PAGETABLE BUG #03!\n");
}
return pte_offset_kernel(pmd, vaddr);
}
-void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+static void __set_pte_vaddr(pud_t *pud, unsigned long vaddr, pte_t new_pte)
{
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pud = pud_page + pud_index(vaddr);
- pmd = fill_pmd(pud, vaddr);
- pte = fill_pte(pmd, vaddr);
+ pmd_t *pmd = fill_pmd(pud, vaddr);
+ pte_t *pte = fill_pte(pmd, vaddr);
set_pte(pte, new_pte);
__flush_tlb_one(vaddr);
}
+void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte)
+{
+ p4d_t *p4d = p4d_page + p4d_index(vaddr);
+ pud_t *pud = fill_pud(p4d, vaddr);
+
+ __set_pte_vaddr(pud, vaddr, new_pte);
+}
+
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
+{
+ pud_t *pud = pud_page + pud_index(vaddr);
+
+ __set_pte_vaddr(pud, vaddr, new_pte);
+}
+
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
pgd_t *pgd;
- pud_t *pud_page;
+ p4d_t *p4d_page;
pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, native_pte_val(pteval));
"PGD FIXMAP MISSING, it should be setup in head.S!\n");
return;
}
- pud_page = (pud_t*)pgd_page_vaddr(*pgd);
- set_pte_vaddr_pud(pud_page, vaddr, pteval);
+
+ p4d_page = p4d_offset(pgd, 0);
+ set_pte_vaddr_p4d(p4d_page, vaddr, pteval);
}
pmd_t * __init populate_extra_pmd(unsigned long vaddr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pgd = pgd_offset_k(vaddr);
- pud = fill_pud(pgd, vaddr);
+ p4d = fill_p4d(pgd, vaddr);
+ pud = fill_pud(p4d, vaddr);
return fill_pmd(pud, vaddr);
}
enum page_cache_mode cache)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgprot_t prot;
for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
pgd = pgd_offset_k((unsigned long)__va(phys));
if (pgd_none(*pgd)) {
+ p4d = (p4d_t *) spp_getpage();
+ set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE |
+ _PAGE_USER));
+ }
+ p4d = p4d_offset(pgd, (unsigned long)__va(phys));
+ if (p4d_none(*p4d)) {
pud = (pud_t *) spp_getpage();
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
+ set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE |
_PAGE_USER));
}
- pud = pud_offset(pgd, (unsigned long)__va(phys));
+ pud = pud_offset(p4d, (unsigned long)__va(phys));
if (pud_none(*pud)) {
pmd = (pmd_t *) spp_getpage();
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
pgd_t *pgd = pgd_offset_k(vaddr);
+ p4d_t *p4d;
pud_t *pud;
vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
- if (pgd_val(*pgd)) {
- pud = (pud_t *)pgd_page_vaddr(*pgd);
+ BUILD_BUG_ON(pgd_none(*pgd));
+ p4d = p4d_offset(pgd, vaddr);
+ if (p4d_val(*p4d)) {
+ pud = (pud_t *)p4d_page_vaddr(*p4d);
paddr_last = phys_pud_init(pud, __pa(vaddr),
__pa(vaddr_end),
page_size_mask);
page_size_mask);
spin_lock(&init_mm.page_table_lock);
- pgd_populate(&init_mm, pgd, pud);
+ p4d_populate(&init_mm, p4d, pud);
spin_unlock(&init_mm.page_table_lock);
pgd_changed = true;
}
spin_unlock(&init_mm.page_table_lock);
}
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (!pud_none(*pud))
+ return;
+ }
+
+ /* free a pud talbe */
+ free_pagetable(p4d_page(*p4d), 0);
+ spin_lock(&init_mm.page_table_lock);
+ p4d_clear(p4d);
+ spin_unlock(&init_mm.page_table_lock);
+}
+
static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
bool direct)
update_page_count(PG_LEVEL_1G, -pages);
}
+static void __meminit
+remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
+ bool direct)
+{
+ unsigned long next, pages = 0;
+ pud_t *pud_base;
+ p4d_t *p4d;
+
+ p4d = p4d_start + p4d_index(addr);
+ for (; addr < end; addr = next, p4d++) {
+ next = p4d_addr_end(addr, end);
+
+ if (!p4d_present(*p4d))
+ continue;
+
+ BUILD_BUG_ON(p4d_large(*p4d));
+
+ pud_base = (pud_t *)p4d_page_vaddr(*p4d);
+ remove_pud_table(pud_base, addr, next, direct);
+ free_pud_table(pud_base, p4d);
+ }
+
+ if (direct)
+ update_page_count(PG_LEVEL_512G, -pages);
+}
+
/* start and end are both virtual address. */
static void __meminit
remove_pagetable(unsigned long start, unsigned long end, bool direct)
unsigned long next;
unsigned long addr;
pgd_t *pgd;
- pud_t *pud;
+ p4d_t *p4d;
for (addr = start; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
if (!pgd_present(*pgd))
continue;
- pud = (pud_t *)pgd_page_vaddr(*pgd);
- remove_pud_table(pud, addr, next, direct);
+ p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ remove_p4d_table(p4d, addr, next, direct);
}
flush_tlb_all();
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (pgd_none(*pgd))
return 0;
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
+ return 0;
+
+ pud = pud_offset(p4d, addr);
if (pud_none(*pud))
return 0;
unsigned long addr;
unsigned long next;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
if (!pgd)
return -ENOMEM;
- pud = vmemmap_pud_populate(pgd, addr, node);
+ p4d = vmemmap_p4d_populate(pgd, addr, node);
+ if (!p4d)
+ return -ENOMEM;
+
+ pud = vmemmap_pud_populate(p4d, addr, node);
if (!pud)
return -ENOMEM;
unsigned long end = (unsigned long)(start_page + size);
unsigned long next;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
unsigned int nr_pages;
}
get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d)) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ continue;
+ }
+ get_page_bootmem(section_nr, p4d_page(*p4d), MIX_SECTION_INFO);
+
+ pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
next = (addr + PAGE_SIZE) & PAGE_MASK;
continue;
/* Don't assume we're using swapper_pg_dir at this point */
pgd_t *base = __va(read_cr3());
pgd_t *pgd = &base[pgd_index(addr)];
- pud_t *pud = pud_offset(pgd, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
pmd_t *pmd = pmd_offset(pud, addr);
return pmd;
static void __init clear_pgds(unsigned long start,
unsigned long end)
{
- for (; start < end; start += PGDIR_SIZE)
- pgd_clear(pgd_offset_k(start));
+ pgd_t *pgd;
+
+ for (; start < end; start += PGDIR_SIZE) {
+ pgd = pgd_offset_k(start);
+ /*
+ * With folded p4d, pgd_clear() is nop, use p4d_clear()
+ * instead.
+ */
+ if (CONFIG_PGTABLE_LEVELS < 5)
+ p4d_clear(p4d_offset(pgd, start));
+ else
+ pgd_clear(pgd);
+ }
}
static void __init kasan_map_early_shadow(pgd_t *pgd)
#include <linux/limits.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
+#include <linux/compat.h>
#include <asm/elf.h>
struct va_alignment __read_mostly va_align = {
.flags = -1,
};
-static unsigned long stack_maxrandom_size(void)
+unsigned long tasksize_32bit(void)
+{
+ return IA32_PAGE_OFFSET;
+}
+
+unsigned long tasksize_64bit(void)
+{
+ return TASK_SIZE_MAX;
+}
+
+static unsigned long stack_maxrandom_size(unsigned long task_size)
{
unsigned long max = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
+ max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit());
+ max <<= PAGE_SHIFT;
}
return max;
}
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole with possible stack randomization.
- */
-#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
-#define MAX_GAP (TASK_SIZE/6*5)
+#ifdef CONFIG_COMPAT
+# define mmap32_rnd_bits mmap_rnd_compat_bits
+# define mmap64_rnd_bits mmap_rnd_bits
+#else
+# define mmap32_rnd_bits mmap_rnd_bits
+# define mmap64_rnd_bits mmap_rnd_bits
+#endif
+
+#define SIZE_128M (128 * 1024 * 1024UL)
static int mmap_is_legacy(void)
{
return sysctl_legacy_va_layout;
}
-unsigned long arch_mmap_rnd(void)
+static unsigned long arch_rnd(unsigned int rndbits)
{
- unsigned long rnd;
-
- if (mmap_is_ia32())
-#ifdef CONFIG_COMPAT
- rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-#else
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-#endif
- else
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+ return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
+}
- return rnd << PAGE_SHIFT;
+unsigned long arch_mmap_rnd(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap_min, gap_max;
+
+ /*
+ * Top of mmap area (just below the process stack).
+ * Leave an at least ~128 MB hole with possible stack randomization.
+ */
+ gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+ gap_max = (task_size / 6) * 5;
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
+ if (gap < gap_min)
+ gap = gap_min;
+ else if (gap > gap_max)
+ gap = gap_max;
+
+ return PAGE_ALIGN(task_size - gap - rnd);
+}
- return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+static unsigned long mmap_legacy_base(unsigned long rnd,
+ unsigned long task_size)
+{
+ return __TASK_UNMAPPED_BASE(task_size) + rnd;
}
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
+static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
+ unsigned long random_factor, unsigned long task_size)
+{
+ *legacy_base = mmap_legacy_base(random_factor, task_size);
+ if (mmap_is_legacy())
+ *base = *legacy_base;
+ else
+ *base = mmap_base(random_factor, task_size);
+}
+
void arch_pick_mmap_layout(struct mm_struct *mm)
{
- unsigned long random_factor = 0UL;
+ if (mmap_is_legacy())
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ else
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- if (current->flags & PF_RANDOMIZE)
- random_factor = arch_mmap_rnd();
+ arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
+ arch_rnd(mmap64_rnd_bits), tasksize_64bit());
+
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ /*
+ * The mmap syscall mapping base decision depends solely on the
+ * syscall type (64-bit or compat). This applies for 64bit
+ * applications and 32bit applications. The 64bit syscall uses
+ * mmap_base, the compat syscall uses mmap_compat_base.
+ */
+ arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
+ arch_rnd(mmap32_rnd_bits), tasksize_32bit());
+#endif
+}
- mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
+unsigned long get_mmap_base(int is_legacy)
+{
+ struct mm_struct *mm = current->mm;
- if (mmap_is_legacy()) {
- mm->mmap_base = mm->mmap_legacy_base;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ if (in_compat_syscall()) {
+ return is_legacy ? mm->mmap_compat_legacy_base
+ : mm->mmap_compat_base;
}
+#endif
+ return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
}
const char *arch_vma_name(struct vm_area_struct *vma)
pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
if (pgd_none(*pgd))
return NULL;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (p4d_none(*p4d))
+ return NULL;
+
+ *level = PG_LEVEL_512G;
+ if (p4d_large(*p4d) || !p4d_present(*p4d))
+ return (pte_t *)p4d;
+
+ pud = pud_offset(p4d, address);
if (pud_none(*pud))
return NULL;
pmd_t *lookup_pmd_address(unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pgd = pgd_offset_k(address);
if (pgd_none(*pgd))
return NULL;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, address);
if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
return NULL;
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
set_pte_atomic((pte_t *)pmd, pte);
}
pud_clear(pud);
}
-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
{
- pud_t *pud = pud_offset(pgd, start);
+ pud_t *pud = pud_offset(p4d, start);
/*
* Not on a GB page boundary?
return num_pages;
}
-static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
- pgprot_t pgprot)
+static int populate_pud(struct cpa_data *cpa, unsigned long start, p4d_t *p4d,
+ pgprot_t pgprot)
{
pud_t *pud;
unsigned long end;
cur_pages = (pre_end - start) >> PAGE_SHIFT;
cur_pages = min_t(int, (int)cpa->numpages, cur_pages);
- pud = pud_offset(pgd, start);
+ pud = pud_offset(p4d, start);
/*
* Need a PMD page?
if (cpa->numpages == cur_pages)
return cur_pages;
- pud = pud_offset(pgd, start);
+ pud = pud_offset(p4d, start);
pud_pgprot = pgprot_4k_2_large(pgprot);
/*
if (start < end) {
long tmp;
- pud = pud_offset(pgd, start);
+ pud = pud_offset(p4d, start);
if (pud_none(*pud))
if (alloc_pmd_page(pud))
return -1;
{
pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
pud_t *pud = NULL; /* shut up gcc */
+ p4d_t *p4d;
pgd_t *pgd_entry;
long ret;
pgd_entry = cpa->pgd + pgd_index(addr);
+ if (pgd_none(*pgd_entry)) {
+ p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+ if (!p4d)
+ return -1;
+
+ set_pgd(pgd_entry, __pgd(__pa(p4d) | _KERNPG_TABLE));
+ }
+
/*
* Allocate a PUD page and hand it down for mapping.
*/
- if (pgd_none(*pgd_entry)) {
+ p4d = p4d_offset(pgd_entry, addr);
+ if (p4d_none(*p4d)) {
pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
if (!pud)
return -1;
- set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
+ set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
}
pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(pgprot) |= pgprot_val(cpa->mask_set);
- ret = populate_pud(cpa, addr, pgd_entry, pgprot);
+ ret = populate_pud(cpa, addr, p4d, pgprot);
if (ret < 0) {
/*
* Leave the PUD page in place in case some other CPU or thread
* already found it, but remove any useless entries we just
* added to it.
*/
- unmap_pud_range(pgd_entry, addr,
+ unmap_pud_range(p4d, addr,
addr + (cpa->numpages << PAGE_SHIFT));
return ret;
}
static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
{
+ p4d_t *p4d;
pud_t *pud;
int i;
if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
return;
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
pmd_t *pmd = pmds[i];
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
BUG();
return;
}
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ if (p4d_none(*p4d)) {
+ BUG();
+ return;
+ }
+ pud = pud_offset(p4d, vaddr);
if (pud_none(*pud)) {
BUG();
return;
load_cr3(initial_page_table);
__flush_tlb_all();
- gdt_descr.address = __pa(get_cpu_gdt_table(0));
+ gdt_descr.address = get_cpu_gdt_paddr(0);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
{
struct desc_ptr gdt_descr;
- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+ gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
int __init efi_alloc_page_tables(void)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
gfp_t gfp_mask;
return -ENOMEM;
pgd = efi_pgd + pgd_index(EFI_VA_END);
+ p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
+ if (!p4d) {
+ free_page((unsigned long)efi_pgd);
+ return -ENOMEM;
+ }
- pud = pud_alloc_one(NULL, 0);
+ pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud) {
+ if (CONFIG_PGTABLE_LEVELS > 4)
+ free_page((unsigned long) pgd_page_vaddr(*pgd));
free_page((unsigned long)efi_pgd);
return -ENOMEM;
}
- pgd_populate(NULL, pgd, pud);
-
return 0;
}
{
unsigned num_entries;
pgd_t *pgd_k, *pgd_efi;
+ p4d_t *p4d_k, *p4d_efi;
pud_t *pud_k, *pud_efi;
if (efi_enabled(EFI_OLD_MEMMAP))
num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
+ /*
+ * As with PGDs, we share all P4D entries apart from the one entry
+ * that covers the EFI runtime mapping space.
+ */
+ BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
+ BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
+
+ pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
+ pgd_k = pgd_offset_k(EFI_VA_END);
+ p4d_efi = p4d_offset(pgd_efi, 0);
+ p4d_k = p4d_offset(pgd_k, 0);
+
+ num_entries = p4d_index(EFI_VA_END);
+ memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries);
+
/*
* We share all the PUD entries apart from those that map the
* EFI regions. Copy around them.
BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
- pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
- pud_efi = pud_offset(pgd_efi, 0);
-
- pgd_k = pgd_offset_k(EFI_VA_END);
- pud_k = pud_offset(pgd_k, 0);
+ p4d_efi = p4d_offset(pgd_efi, EFI_VA_END);
+ p4d_k = p4d_offset(pgd_k, EFI_VA_END);
+ pud_efi = pud_offset(p4d_efi, 0);
+ pud_k = pud_offset(p4d_k, 0);
num_entries = pud_index(EFI_VA_END);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
- pud_efi = pud_offset(pgd_efi, EFI_VA_START);
- pud_k = pud_offset(pgd_k, EFI_VA_START);
+ pud_efi = pud_offset(p4d_efi, EFI_VA_START);
+ pud_k = pud_offset(p4d_k, EFI_VA_START);
num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
* 'pmode_gdt' in wakeup_start.
*/
ctxt->gdt_desc.size = GDT_SIZE - 1;
- ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+ ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_rw(smp_processor_id());
store_tr(ctxt->tr);
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64
- struct desc_struct *desc = get_cpu_gdt_table(cpu);
+ struct desc_struct *desc = get_cpu_gdt_rw(cpu);
tss_desc tss;
#endif
set_tss_desc(cpu, t); /*
load_mm_ldt(current->active_mm); /* This does lldt */
fpu__resume_cpu();
+
+ /* The processor is back on the direct GDT, load back the fixmap */
+ load_fixmap_gdt(cpu);
}
/**
*/
static pmd_t *resume_one_md_table_init(pgd_t *pgd)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd_table;
return NULL;
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
#else
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
pmd_table = pmd_offset(pud, 0);
#endif
{
pmd_t *pmd;
pud_t *pud;
+ p4d_t *p4d;
/*
* The new mapping only has to cover the page containing the image
* the virtual address space after switching over to the original page
* tables used by the image kernel.
*/
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
+ if (!p4d)
+ return -ENOMEM;
+ }
+
pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- set_pgd(pgd + pgd_index(restore_jump_address),
- __pgd(__pa(pud) | _KERNPG_TABLE));
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
+ set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
+ } else {
+ /* No p4d for 4-level paging: point the pgd to the pud page table */
+ set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
return 0;
}
static int relocate_restore_code(void)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
relocated_restore_code = get_safe_page(GFP_ATOMIC);
if (!relocated_restore_code)
/* Make the page containing the relocated code executable */
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
- pud = pud_offset(pgd, relocated_restore_code);
+ p4d = p4d_offset(pgd, relocated_restore_code);
+ if (p4d_large(*p4d)) {
+ set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
+ goto out;
+ }
+ pud = pud_offset(p4d, relocated_restore_code);
if (pud_large(*pud)) {
set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
- } else {
- pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
-
- if (pmd_large(*pmd)) {
- set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
- } else {
- pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
-
- set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
- }
+ goto out;
+ }
+ pmd = pmd_offset(pud, relocated_restore_code);
+ if (pmd_large(*pmd)) {
+ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+ goto out;
}
+ pte = pte_offset_kernel(pmd, relocated_restore_code);
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+out:
__flush_tlb_all();
-
return 0;
}
* Version 2. See the file COPYING for more details.
*/
+#include <linux/bug.h>
+#include <asm/purgatory.h>
+
#include "sha256.h"
-#include "purgatory.h"
#include "../boot/string.h"
-struct sha_region {
- unsigned long start;
- unsigned long len;
-};
-
-static unsigned long backup_dest;
-static unsigned long backup_src;
-static unsigned long backup_sz;
+unsigned long purgatory_backup_dest __section(.kexec-purgatory);
+unsigned long purgatory_backup_src __section(.kexec-purgatory);
+unsigned long purgatory_backup_sz __section(.kexec-purgatory);
-static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
-struct sha_region sha_regions[16] = {};
+struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
/*
* On x86, second kernel requries first 640K of memory to boot. Copy
*/
static int copy_backup_region(void)
{
- if (backup_dest)
- memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
-
+ if (purgatory_backup_dest) {
+ memcpy((void *)purgatory_backup_dest,
+ (void *)purgatory_backup_src, purgatory_backup_sz);
+ }
return 0;
}
static int verify_sha256_digest(void)
{
- struct sha_region *ptr, *end;
+ struct kexec_sha_region *ptr, *end;
u8 digest[SHA256_DIGEST_SIZE];
struct sha256_state sctx;
sha256_init(&sctx);
- end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
- for (ptr = sha_regions; ptr < end; ptr++)
+ end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions);
+
+ for (ptr = purgatory_sha_regions; ptr < end; ptr++)
sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
sha256_final(&sctx, digest);
- if (memcmp(digest, sha256_digest, sizeof(digest)))
+ if (memcmp(digest, purgatory_sha256_digest, sizeof(digest)))
return 1;
return 0;
+++ /dev/null
-#ifndef PURGATORY_H
-#define PURGATORY_H
-
-#ifndef __ASSEMBLY__
-extern void purgatory(void);
-#endif /* __ASSEMBLY__ */
-
-#endif /* PURGATORY_H */
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
*/
-#include "purgatory.h"
+#include <asm/purgatory.h>
.text
.globl purgatory_start
#ifndef SHA256_H
#define SHA256_H
-
#include <linux/types.h>
#include <crypto/sha.h>
ifeq ($(CONFIG_X86_32),y)
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
return -ENOSYS;
}
-extern long arch_prctl(struct task_struct *task, int code,
+extern long arch_prctl(struct task_struct *task, int option,
unsigned long __user *addr);
#endif
#include <sys/ptrace.h>
#include <asm/ptrace.h>
-int os_arch_prctl(int pid, int code, unsigned long *addr)
+int os_arch_prctl(int pid, int option, unsigned long *arg2)
{
- return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
+ return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
}
--- /dev/null
+#include <linux/syscalls.h>
+#include <os.h>
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ return -EINVAL;
+}
#include <linux/sched.h>
#include <linux/sched/mm.h>
+#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/prctl.h> /* XXX This should get the constants from libc */
#include <os.h>
-long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
+long arch_prctl(struct task_struct *task, int option,
+ unsigned long __user *arg2)
{
- unsigned long *ptr = addr, tmp;
+ unsigned long *ptr = arg2, tmp;
long ret;
int pid = task->mm->context.id.u.pid;
* arch_prctl is run on the host, then the registers are read
* back.
*/
- switch (code) {
+ switch (option) {
case ARCH_SET_FS:
case ARCH_SET_GS:
ret = restore_registers(pid, ¤t->thread.regs.regs);
ptr = &tmp;
}
- ret = os_arch_prctl(pid, code, ptr);
+ ret = os_arch_prctl(pid, option, ptr);
if (ret)
return ret;
- switch (code) {
+ switch (option) {
case ARCH_SET_FS:
current->thread.arch.fs = (unsigned long) ptr;
ret = save_registers(pid, ¤t->thread.regs.regs);
ret = save_registers(pid, ¤t->thread.regs.regs);
break;
case ARCH_GET_FS:
- ret = put_user(tmp, addr);
+ ret = put_user(tmp, arg2);
break;
case ARCH_GET_GS:
- ret = put_user(tmp, addr);
+ ret = put_user(tmp, arg2);
break;
}
return ret;
}
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return arch_prctl(current, code, (unsigned long __user *) addr);
+ return arch_prctl(current, option, (unsigned long __user *) arg2);
}
void arch_switch_to(struct task_struct *to)
*shadow = t->tls_array[i];
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
mc = __xen_mc_entry(0);
return user_ptr;
}
-static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
{
struct mmu_update u;
u.ptr = virt_to_machine(ptr).maddr;
- u.val = pgd_val_ma(val);
+ u.val = p4d_val_ma(val);
xen_extend_mmu_update(&u);
}
/*
- * Raw hypercall-based set_pgd, intended for in early boot before
+ * Raw hypercall-based set_p4d, intended for in early boot before
* there's a page structure. This implies:
* 1. The only existing pagetable is the kernel's
* 2. It is always pinned
* 3. It has no user pagetable attached to it
*/
-static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
+static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
{
preempt_disable();
xen_mc_batch();
- __xen_set_pgd_hyper(ptr, val);
+ __xen_set_p4d_hyper(ptr, val);
xen_mc_issue(PARAVIRT_LAZY_MMU);
preempt_enable();
}
-static void xen_set_pgd(pgd_t *ptr, pgd_t val)
+static void xen_set_p4d(p4d_t *ptr, p4d_t val)
{
- pgd_t *user_ptr = xen_get_user_pgd(ptr);
+ pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr);
+ pgd_t pgd_val;
- trace_xen_mmu_set_pgd(ptr, user_ptr, val);
+ trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val);
/* If page is not pinned, we can just update the entry
directly */
*ptr = val;
if (user_ptr) {
WARN_ON(xen_page_pinned(user_ptr));
- *user_ptr = val;
+ pgd_val.pgd = p4d_val_ma(val);
+ *user_ptr = pgd_val;
}
return;
}
user updates together. */
xen_mc_batch();
- __xen_set_pgd_hyper(ptr, val);
+ __xen_set_p4d_hyper(ptr, val);
if (user_ptr)
- __xen_set_pgd_hyper(user_ptr, val);
+ __xen_set_p4d_hyper((p4d_t *)user_ptr, val);
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
#endif /* CONFIG_PGTABLE_LEVELS == 4 */
+static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
+ int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+ bool last, unsigned long limit)
+{
+ int i, nr, flush = 0;
+
+ nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
+ for (i = 0; i < nr; i++) {
+ if (!pmd_none(pmd[i]))
+ flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
+ }
+ return flush;
+}
+
+static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
+ int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+ bool last, unsigned long limit)
+{
+ int i, nr, flush = 0;
+
+ nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
+ for (i = 0; i < nr; i++) {
+ pmd_t *pmd;
+
+ if (pud_none(pud[i]))
+ continue;
+
+ pmd = pmd_offset(&pud[i], 0);
+ if (PTRS_PER_PMD > 1)
+ flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
+ flush |= xen_pmd_walk(mm, pmd, func,
+ last && i == nr - 1, limit);
+ }
+ return flush;
+}
+
+static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
+ int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
+ bool last, unsigned long limit)
+{
+ int i, nr, flush = 0;
+
+ nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
+ for (i = 0; i < nr; i++) {
+ pud_t *pud;
+
+ if (p4d_none(p4d[i]))
+ continue;
+
+ pud = pud_offset(&p4d[i], 0);
+ if (PTRS_PER_PUD > 1)
+ flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+ flush |= xen_pud_walk(mm, pud, func,
+ last && i == nr - 1, limit);
+ }
+ return flush;
+}
+
/*
* (Yet another) pagetable walker. This one is intended for pinning a
* pagetable. This means that it walks a pagetable and calls the
enum pt_level),
unsigned long limit)
{
- int flush = 0;
+ int i, nr, flush = 0;
unsigned hole_low, hole_high;
- unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
- unsigned pgdidx, pudidx, pmdidx;
/* The limit is the last byte to be touched */
limit--;
hole_low = pgd_index(USER_LIMIT);
hole_high = pgd_index(PAGE_OFFSET);
- pgdidx_limit = pgd_index(limit);
-#if PTRS_PER_PUD > 1
- pudidx_limit = pud_index(limit);
-#else
- pudidx_limit = 0;
-#endif
-#if PTRS_PER_PMD > 1
- pmdidx_limit = pmd_index(limit);
-#else
- pmdidx_limit = 0;
-#endif
-
- for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
- pud_t *pud;
+ nr = pgd_index(limit) + 1;
+ for (i = 0; i < nr; i++) {
+ p4d_t *p4d;
- if (pgdidx >= hole_low && pgdidx < hole_high)
+ if (i >= hole_low && i < hole_high)
continue;
- if (!pgd_val(pgd[pgdidx]))
+ if (pgd_none(pgd[i]))
continue;
- pud = pud_offset(&pgd[pgdidx], 0);
-
- if (PTRS_PER_PUD > 1) /* not folded */
- flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-
- for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
- pmd_t *pmd;
-
- if (pgdidx == pgdidx_limit &&
- pudidx > pudidx_limit)
- goto out;
-
- if (pud_none(pud[pudidx]))
- continue;
-
- pmd = pmd_offset(&pud[pudidx], 0);
-
- if (PTRS_PER_PMD > 1) /* not folded */
- flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
-
- for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
- struct page *pte;
-
- if (pgdidx == pgdidx_limit &&
- pudidx == pudidx_limit &&
- pmdidx > pmdidx_limit)
- goto out;
-
- if (pmd_none(pmd[pmdidx]))
- continue;
-
- pte = pmd_page(pmd[pmdidx]);
- flush |= (*func)(mm, pte, PT_PTE);
- }
- }
+ p4d = p4d_offset(&pgd[i], 0);
+ if (PTRS_PER_P4D > 1)
+ flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
+ flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
}
-out:
/* Do the top level last, so that the callbacks can use it as
a cue to do final things like tlb flushes. */
flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
xen_free_ro_pages(pa, PAGE_SIZE);
}
+static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
+{
+ unsigned long pa;
+ pte_t *pte_tbl;
+ int i;
+
+ if (pmd_large(*pmd)) {
+ pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
+ xen_free_ro_pages(pa, PMD_SIZE);
+ return;
+ }
+
+ pte_tbl = pte_offset_kernel(pmd, 0);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ if (pte_none(pte_tbl[i]))
+ continue;
+ pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
+ xen_free_ro_pages(pa, PAGE_SIZE);
+ }
+ set_pmd(pmd, __pmd(0));
+ xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
+{
+ unsigned long pa;
+ pmd_t *pmd_tbl;
+ int i;
+
+ if (pud_large(*pud)) {
+ pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
+ xen_free_ro_pages(pa, PUD_SIZE);
+ return;
+ }
+
+ pmd_tbl = pmd_offset(pud, 0);
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (pmd_none(pmd_tbl[i]))
+ continue;
+ xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
+ }
+ set_pud(pud, __pud(0));
+ xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
+}
+
+static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
+{
+ unsigned long pa;
+ pud_t *pud_tbl;
+ int i;
+
+ if (p4d_large(*p4d)) {
+ pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
+ xen_free_ro_pages(pa, P4D_SIZE);
+ return;
+ }
+
+ pud_tbl = pud_offset(p4d, 0);
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ if (pud_none(pud_tbl[i]))
+ continue;
+ xen_cleanmfnmap_pud(pud_tbl + i, unpin);
+ }
+ set_p4d(p4d, __p4d(0));
+ xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
+}
+
/*
* Since it is well isolated we can (and since it is perhaps large we should)
* also free the page tables mapping the initial P->M table.
*/
static void __init xen_cleanmfnmap(unsigned long vaddr)
{
- unsigned long va = vaddr & PMD_MASK;
- unsigned long pa;
- pgd_t *pgd = pgd_offset_k(va);
- pud_t *pud_page = pud_offset(pgd, 0);
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
+ pgd_t *pgd;
+ p4d_t *p4d;
unsigned int i;
bool unpin;
unpin = (vaddr == 2 * PGDIR_SIZE);
- set_pgd(pgd, __pgd(0));
- do {
- pud = pud_page + pud_index(va);
- if (pud_none(*pud)) {
- va += PUD_SIZE;
- } else if (pud_large(*pud)) {
- pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
- xen_free_ro_pages(pa, PUD_SIZE);
- va += PUD_SIZE;
- } else {
- pmd = pmd_offset(pud, va);
- if (pmd_large(*pmd)) {
- pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
- xen_free_ro_pages(pa, PMD_SIZE);
- } else if (!pmd_none(*pmd)) {
- pte = pte_offset_kernel(pmd, va);
- set_pmd(pmd, __pmd(0));
- for (i = 0; i < PTRS_PER_PTE; ++i) {
- if (pte_none(pte[i]))
- break;
- pa = pte_pfn(pte[i]) << PAGE_SHIFT;
- xen_free_ro_pages(pa, PAGE_SIZE);
- }
- xen_cleanmfnmap_free_pgtbl(pte, unpin);
- }
- va += PMD_SIZE;
- if (pmd_index(va))
- continue;
- set_pud(pud, __pud(0));
- xen_cleanmfnmap_free_pgtbl(pmd, unpin);
- }
-
- } while (pud_index(va) || pmd_index(va));
- xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
+ vaddr &= PMD_MASK;
+ pgd = pgd_offset_k(vaddr);
+ p4d = p4d_offset(pgd, 0);
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ if (p4d_none(p4d[i]))
+ continue;
+ xen_cleanmfnmap_p4d(p4d + i, unpin);
+ }
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ set_pgd(pgd, __pgd(0));
+ xen_cleanmfnmap_free_pgtbl(p4d, unpin);
+ }
}
static void __init xen_pagetable_p2m_free(void)
BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
}
#endif
-
return ret;
}
xen_release_ptpage(pfn, PT_PMD);
}
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{
xen_alloc_ptpage(mm, pfn, PT_PUD);
*/
void __init xen_relocate_p2m(void)
{
- phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
+ phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
- int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
+ int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
pte_t *pt;
pmd_t *pmd;
pud_t *pud;
+ p4d_t *p4d = NULL;
pgd_t *pgd;
unsigned long *new_p2m;
+ int save_pud;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
- n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
- n_frames = n_pte + n_pt + n_pmd + n_pud;
+ n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
+ if (PTRS_PER_P4D > 1)
+ n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
+ else
+ n_p4d = 0;
+ n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
new_area = xen_find_free_area(PFN_PHYS(n_frames));
if (!new_area) {
* To avoid any possible virtual address collision, just use
* 2 * PUD_SIZE for the new area.
*/
- pud_phys = new_area;
+ p4d_phys = new_area;
+ pud_phys = p4d_phys + PFN_PHYS(n_p4d);
pmd_phys = pud_phys + PFN_PHYS(n_pud);
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
pgd = __va(read_cr3());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
- for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
- pud = early_memremap(pud_phys, PAGE_SIZE);
- clear_page(pud);
- for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
- idx_pmd++) {
- pmd = early_memremap(pmd_phys, PAGE_SIZE);
- clear_page(pmd);
- for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
- idx_pt++) {
- pt = early_memremap(pt_phys, PAGE_SIZE);
- clear_page(pt);
- for (idx_pte = 0;
- idx_pte < min(n_pte, PTRS_PER_PTE);
- idx_pte++) {
- set_pte(pt + idx_pte,
- pfn_pte(p2m_pfn, PAGE_KERNEL));
- p2m_pfn++;
+ idx_p4d = 0;
+ save_pud = n_pud;
+ do {
+ if (n_p4d > 0) {
+ p4d = early_memremap(p4d_phys, PAGE_SIZE);
+ clear_page(p4d);
+ n_pud = min(save_pud, PTRS_PER_P4D);
+ }
+ for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
+ pud = early_memremap(pud_phys, PAGE_SIZE);
+ clear_page(pud);
+ for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
+ idx_pmd++) {
+ pmd = early_memremap(pmd_phys, PAGE_SIZE);
+ clear_page(pmd);
+ for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
+ idx_pt++) {
+ pt = early_memremap(pt_phys, PAGE_SIZE);
+ clear_page(pt);
+ for (idx_pte = 0;
+ idx_pte < min(n_pte, PTRS_PER_PTE);
+ idx_pte++) {
+ set_pte(pt + idx_pte,
+ pfn_pte(p2m_pfn, PAGE_KERNEL));
+ p2m_pfn++;
+ }
+ n_pte -= PTRS_PER_PTE;
+ early_memunmap(pt, PAGE_SIZE);
+ make_lowmem_page_readonly(__va(pt_phys));
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
+ PFN_DOWN(pt_phys));
+ set_pmd(pmd + idx_pt,
+ __pmd(_PAGE_TABLE | pt_phys));
+ pt_phys += PAGE_SIZE;
}
- n_pte -= PTRS_PER_PTE;
- early_memunmap(pt, PAGE_SIZE);
- make_lowmem_page_readonly(__va(pt_phys));
- pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
- PFN_DOWN(pt_phys));
- set_pmd(pmd + idx_pt,
- __pmd(_PAGE_TABLE | pt_phys));
- pt_phys += PAGE_SIZE;
+ n_pt -= PTRS_PER_PMD;
+ early_memunmap(pmd, PAGE_SIZE);
+ make_lowmem_page_readonly(__va(pmd_phys));
+ pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
+ PFN_DOWN(pmd_phys));
+ set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+ pmd_phys += PAGE_SIZE;
}
- n_pt -= PTRS_PER_PMD;
- early_memunmap(pmd, PAGE_SIZE);
- make_lowmem_page_readonly(__va(pmd_phys));
- pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
- PFN_DOWN(pmd_phys));
- set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
- pmd_phys += PAGE_SIZE;
+ n_pmd -= PTRS_PER_PUD;
+ early_memunmap(pud, PAGE_SIZE);
+ make_lowmem_page_readonly(__va(pud_phys));
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
+ if (n_p4d > 0)
+ set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
+ else
+ set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
+ pud_phys += PAGE_SIZE;
}
- n_pmd -= PTRS_PER_PUD;
- early_memunmap(pud, PAGE_SIZE);
- make_lowmem_page_readonly(__va(pud_phys));
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
- set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
- pud_phys += PAGE_SIZE;
- }
+ if (n_p4d > 0) {
+ save_pud -= PTRS_PER_P4D;
+ early_memunmap(p4d, PAGE_SIZE);
+ make_lowmem_page_readonly(__va(p4d_phys));
+ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
+ set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
+ p4d_phys += PAGE_SIZE;
+ }
+ } while (++idx_p4d < n_p4d);
/* Now copy the old p2m info to the new area. */
memcpy(new_p2m, xen_p2m_addr, size);
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
+ case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
-#if CONFIG_PGTABLE_LEVELS == 4
- pv_mmu_ops.set_pgd = xen_set_pgd;
+#if CONFIG_PGTABLE_LEVELS >= 4
+ pv_mmu_ops.set_p4d = xen_set_p4d;
#endif
/* This will work as long as patching hasn't happened yet
pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
pv_mmu_ops.release_pte = xen_release_pte;
pv_mmu_ops.release_pmd = xen_release_pmd;
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
pv_mmu_ops.alloc_pud = xen_alloc_pud;
pv_mmu_ops.release_pud = xen_release_pud;
#endif
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
-#if CONFIG_PGTABLE_LEVELS == 4
+#if CONFIG_PGTABLE_LEVELS >= 4
.pud_val = PV_CALLEE_SAVE(xen_pud_val),
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
- .set_pgd = xen_set_pgd_hyper,
+ .set_p4d = xen_set_p4d_hyper,
.alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init,
enum pt_level {
PT_PGD,
+ PT_P4D,
PT_PUD,
PT_PMD,
PT_PTE
if (ctxt == NULL)
return -ENOMEM;
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
bio_list_init(&punt);
bio_list_init(&nopunt);
- while ((bio = bio_list_pop(current->bio_list)))
+ while ((bio = bio_list_pop(¤t->bio_list[0])))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_list[0] = nopunt;
- *current->bio_list = nopunt;
+ bio_list_init(&nopunt);
+ while ((bio = bio_list_pop(¤t->bio_list[1])))
+ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_list[1] = nopunt;
spin_lock(&bs->rescue_lock);
bio_list_merge(&bs->rescue_list, &punt);
* we retry with the original gfp_flags.
*/
- if (current->bio_list && !bio_list_empty(current->bio_list))
+ if (current->bio_list &&
+ (!bio_list_empty(¤t->bio_list[0]) ||
+ !bio_list_empty(¤t->bio_list[1])))
gfp_mask &= ~__GFP_DIRECT_RECLAIM;
p = mempool_alloc(bs->bio_pool, gfp_mask);
*/
blk_qc_t generic_make_request(struct bio *bio)
{
- struct bio_list bio_list_on_stack;
+ /*
+ * bio_list_on_stack[0] contains bios submitted by the current
+ * make_request_fn.
+ * bio_list_on_stack[1] contains bios that were submitted before
+ * the current make_request_fn, but that haven't been processed
+ * yet.
+ */
+ struct bio_list bio_list_on_stack[2];
blk_qc_t ret = BLK_QC_T_NONE;
if (!generic_make_request_checks(bio))
* should be added at the tail
*/
if (current->bio_list) {
- bio_list_add(current->bio_list, bio);
+ bio_list_add(¤t->bio_list[0], bio);
goto out;
}
* bio_list, and call into ->make_request() again.
*/
BUG_ON(bio->bi_next);
- bio_list_init(&bio_list_on_stack);
- current->bio_list = &bio_list_on_stack;
+ bio_list_init(&bio_list_on_stack[0]);
+ current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (likely(blk_queue_enter(q, false) == 0)) {
- struct bio_list hold;
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
- hold = bio_list_on_stack;
- bio_list_init(&bio_list_on_stack);
+ bio_list_on_stack[1] = bio_list_on_stack[0];
+ bio_list_init(&bio_list_on_stack[0]);
ret = q->make_request_fn(q, bio);
blk_queue_exit(q);
*/
bio_list_init(&lower);
bio_list_init(&same);
- while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL)
+ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
if (q == bdev_get_queue(bio->bi_bdev))
bio_list_add(&same, bio);
else
bio_list_add(&lower, bio);
/* now assemble so we handle the lowest level first */
- bio_list_merge(&bio_list_on_stack, &lower);
- bio_list_merge(&bio_list_on_stack, &same);
- bio_list_merge(&bio_list_on_stack, &hold);
+ bio_list_merge(&bio_list_on_stack[0], &lower);
+ bio_list_merge(&bio_list_on_stack[0], &same);
+ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} else {
bio_io_error(bio);
}
- bio = bio_list_pop(current->bio_list);
+ bio = bio_list_pop(&bio_list_on_stack[0]);
} while (bio);
current->bio_list = NULL; /* deactivate */
for (i = 0; i < set->nr_hw_queues; i++) {
struct blk_mq_tags *tags = set->tags[i];
+ if (!tags)
+ continue;
+
for (j = 0; j < tags->nr_tags; j++) {
if (!tags->static_rqs[j])
continue;
return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
}
-static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
+static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
+ bool may_sleep)
{
struct request_queue *q = rq->q;
struct blk_mq_queue_data bd = {
}
insert:
- blk_mq_sched_insert_request(rq, false, true, true, false);
+ blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
}
/*
if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
- blk_mq_try_issue_directly(old_rq, &cookie);
+ blk_mq_try_issue_directly(old_rq, &cookie, false);
rcu_read_unlock();
} else {
srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
- blk_mq_try_issue_directly(old_rq, &cookie);
+ blk_mq_try_issue_directly(old_rq, &cookie, true);
srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
}
goto done;
return err;
}
-int af_alg_accept(struct sock *sk, struct socket *newsock)
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern)
{
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type;
if (!type)
goto unlock;
- sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0);
+ sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern);
err = -ENOMEM;
if (!sk2)
goto unlock;
}
EXPORT_SYMBOL_GPL(af_alg_accept);
-static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
+static int alg_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
- return af_alg_accept(sock->sk, newsock);
+ return af_alg_accept(sock->sk, newsock, kern);
}
static const struct proto_ops alg_proto_ops = {
return err ?: len;
}
-static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
+static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
if (err)
return err;
- err = af_alg_accept(ask->parent, newsock);
+ err = af_alg_accept(ask->parent, newsock, kern);
if (err)
return err;
}
static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
int err;
if (err)
return err;
- return hash_accept(sock, newsock, flags);
+ return hash_accept(sock, newsock, flags, kern);
}
static struct proto_ops algif_hash_ops_nokey = {
case AHCI_LS1043A:
if (!qpriv->ecc_addr)
return -EINVAL;
- writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+ writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+ qpriv->ecc_addr);
writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
if (qpriv->is_dmacoherent)
case AHCI_LS1046A:
if (!qpriv->ecc_addr)
return -EINVAL;
- writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr);
+ writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2,
+ qpriv->ecc_addr);
writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
if (qpriv->is_dmacoherent)
break;
default:
- WARN_ON_ONCE(1);
return AC_ERR_SYSTEM;
}
static void ata_tport_release(struct device *dev)
{
- put_device(dev->parent);
}
/**
device_initialize(dev);
dev->type = &ata_port_type;
- dev->parent = get_device(parent);
+ dev->parent = parent;
dev->release = ata_tport_release;
dev_set_name(dev, "ata%d", ap->print_id);
transport_setup_device(dev);
static void ata_tlink_release(struct device *dev)
{
- put_device(dev->parent);
}
/**
int error;
device_initialize(dev);
- dev->parent = get_device(&ap->tdev);
+ dev->parent = &ap->tdev;
dev->release = ata_tlink_release;
if (ata_is_host_link(link))
dev_set_name(dev, "link%d", ap->print_id);
static void ata_tdev_release(struct device *dev)
{
- put_device(dev->parent);
}
/**
int error;
device_initialize(dev);
- dev->parent = get_device(&link->tdev);
+ dev->parent = &link->tdev;
dev->release = ata_tdev_release;
if (ata_is_host_link(link))
dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno);
irq, err);
return err;
}
- omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
- priv->clk = of_clk_get(pdev->dev.of_node, 0);
+ priv->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
return -EPROBE_DEFER;
if (!IS_ERR(priv->clk)) {
dev_err(&pdev->dev, "unable to enable the clk, "
"err = %d\n", err);
}
+
+ /*
+ * On OMAP4, enabling the shutdown_oflo interrupt is
+ * done in the interrupt mask register. There is no
+ * such register on EIP76, and it's enabled by the
+ * same bit in the control register
+ */
+ if (priv->pdata->regs[RNG_INTMASK_REG])
+ omap_rng_write(priv, RNG_INTMASK_REG,
+ RNG_SHUTDOWN_OFLO_MASK);
+ else
+ omap_rng_write(priv, RNG_CONTROL_REG,
+ RNG_SHUTDOWN_OFLO_MASK);
}
return 0;
}
*/
static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS;
-/*
- * The minimum number of seconds between urandom pool reseeding. We
- * do this to limit the amount of entropy that can be drained from the
- * input pool even if there are heavy demands on /dev/urandom.
- */
-static int random_min_urandom_seed = 60;
-
/*
* Originally, we used a primitive polynomial of degree .poolwords
* over GF(2). The taps for various sizes are defined below. They
*/
static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
-static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
static struct fasync_struct *fasync;
static DEFINE_SPINLOCK(random_ready_list_lock);
int entropy_count;
int entropy_total;
unsigned int initialized:1;
- unsigned int limit:1;
unsigned int last_data_init:1;
__u8 last_data[EXTRACT_SIZE];
};
static struct entropy_store input_pool = {
.poolinfo = &poolinfo_table[0],
.name = "input",
- .limit = 1,
.lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
.pool = input_pool_data
};
static struct entropy_store blocking_pool = {
.poolinfo = &poolinfo_table[1],
.name = "blocking",
- .limit = 1,
.pull = &input_pool,
.lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
.pool = blocking_pool_data,
spin_unlock_irqrestore(&primary_crng.lock, flags);
}
-static inline void maybe_reseed_primary_crng(void)
-{
- if (crng_init > 2 &&
- time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
- crng_reseed(&primary_crng, &input_pool);
-}
-
static inline void crng_wait_ready(void)
{
wait_event_interruptible(crng_init_wait, crng_ready());
r->entropy_count > r->poolinfo->poolfracbits)
return;
- if (r->limit == 0 && random_min_urandom_seed) {
- unsigned long now = jiffies;
-
- if (time_before(now,
- r->last_pulled + random_min_urandom_seed * HZ))
- return;
- r->last_pulled = now;
- }
-
_xfer_secondary_pool(r, nbytes);
}
{
__u32 tmp[OUTPUT_POOL_WORDS];
- /* For /dev/random's pool, always leave two wakeups' worth */
- int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4;
int bytes = nbytes;
/* pull at least as much as a wakeup */
trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8,
ENTROPY_BITS(r), ENTROPY_BITS(r->pull));
bytes = extract_entropy(r->pull, tmp, bytes,
- random_read_wakeup_bits / 8, rsvd_bytes);
+ random_read_wakeup_bits / 8, 0);
mix_pool_bytes(r, tmp, bytes);
credit_entropy_bits(r, bytes*8);
}
static size_t account(struct entropy_store *r, size_t nbytes, int min,
int reserved)
{
- int entropy_count, orig;
+ int entropy_count, orig, have_bytes;
size_t ibytes, nfrac;
BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
retry:
entropy_count = orig = ACCESS_ONCE(r->entropy_count);
ibytes = nbytes;
- /* If limited, never pull more than available */
- if (r->limit) {
- int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
+ /* never pull more than available */
+ have_bytes = entropy_count >> (ENTROPY_SHIFT + 3);
- if ((have_bytes -= reserved) < 0)
- have_bytes = 0;
- ibytes = min_t(size_t, ibytes, have_bytes);
- }
+ if ((have_bytes -= reserved) < 0)
+ have_bytes = 0;
+ ibytes = min_t(size_t, ibytes, have_bytes);
if (ibytes < min)
ibytes = 0;
static int min_read_thresh = 8, min_write_thresh;
static int max_read_thresh = OUTPUT_POOL_WORDS * 32;
static int max_write_thresh = INPUT_POOL_WORDS * 32;
+static int random_min_urandom_seed = 60;
static char sysctl_bootid[16];
/*
};
#endif /* CONFIG_SYSCTL */
-static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
-
-int random_int_secret_init(void)
-{
- get_random_bytes(random_int_secret, sizeof(random_int_secret));
- return 0;
-}
-
-static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash)
- __aligned(sizeof(unsigned long));
+struct batched_entropy {
+ union {
+ u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)];
+ u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)];
+ };
+ unsigned int position;
+};
/*
- * Get a random word for internal kernel use only. Similar to urandom but
- * with the goal of minimal entropy pool depletion. As a result, the random
- * value is not cryptographically secure but for several uses the cost of
- * depleting entropy is too high
+ * Get a random word for internal kernel use only. The quality of the random
+ * number is either as good as RDRAND or as good as /dev/urandom, with the
+ * goal of being quite fast and not depleting entropy.
*/
-unsigned int get_random_int(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64);
+u64 get_random_u64(void)
{
- __u32 *hash;
- unsigned int ret;
+ u64 ret;
+ struct batched_entropy *batch;
- if (arch_get_random_int(&ret))
+#if BITS_PER_LONG == 64
+ if (arch_get_random_long((unsigned long *)&ret))
return ret;
+#else
+ if (arch_get_random_long((unsigned long *)&ret) &&
+ arch_get_random_long((unsigned long *)&ret + 1))
+ return ret;
+#endif
- hash = get_cpu_var(get_random_int_hash);
-
- hash[0] += current->pid + jiffies + random_get_entropy();
- md5_transform(hash, random_int_secret);
- ret = hash[0];
- put_cpu_var(get_random_int_hash);
-
+ batch = &get_cpu_var(batched_entropy_u64);
+ if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
+ extract_crng((u8 *)batch->entropy_u64);
+ batch->position = 0;
+ }
+ ret = batch->entropy_u64[batch->position++];
+ put_cpu_var(batched_entropy_u64);
return ret;
}
-EXPORT_SYMBOL(get_random_int);
+EXPORT_SYMBOL(get_random_u64);
-/*
- * Same as get_random_int(), but returns unsigned long.
- */
-unsigned long get_random_long(void)
+static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32);
+u32 get_random_u32(void)
{
- __u32 *hash;
- unsigned long ret;
+ u32 ret;
+ struct batched_entropy *batch;
- if (arch_get_random_long(&ret))
+ if (arch_get_random_int(&ret))
return ret;
- hash = get_cpu_var(get_random_int_hash);
-
- hash[0] += current->pid + jiffies + random_get_entropy();
- md5_transform(hash, random_int_secret);
- ret = *(unsigned long *)hash;
- put_cpu_var(get_random_int_hash);
-
+ batch = &get_cpu_var(batched_entropy_u32);
+ if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
+ extract_crng((u8 *)batch->entropy_u32);
+ batch->position = 0;
+ }
+ ret = batch->entropy_u32[batch->position++];
+ put_cpu_var(batched_entropy_u32);
return ret;
}
-EXPORT_SYMBOL(get_random_long);
+EXPORT_SYMBOL(get_random_u32);
/**
* randomize_page - Generate a random, page aligned address
scatterwalk_done(&walk, out, 0);
}
-static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+static void s5p_sg_done(struct s5p_aes_dev *dev)
{
if (dev->sg_dst_cpy) {
dev_dbg(dev->dev,
}
s5p_free_sg_cpy(dev, &dev->sg_src_cpy);
s5p_free_sg_cpy(dev, &dev->sg_dst_cpy);
+}
- /* holding a lock outside */
+/* Calls the completion. Cannot be called with dev->lock hold. */
+static void s5p_aes_complete(struct s5p_aes_dev *dev, int err)
+{
dev->req->base.complete(&dev->req->base, err);
dev->busy = false;
}
}
/*
- * Returns true if new transmitting (output) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_outdata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ * - 0 if there is no more data,
+ * - 1 if new transmitting (output) data is ready and its address+length
+ * have to be written to device (by calling s5p_set_dma_outdata()).
*/
-static bool s5p_aes_tx(struct s5p_aes_dev *dev)
+static int s5p_aes_tx(struct s5p_aes_dev *dev)
{
- int err = 0;
- bool ret = false;
+ int ret = 0;
s5p_unset_outdata(dev);
if (!sg_is_last(dev->sg_dst)) {
- err = s5p_set_outdata(dev, sg_next(dev->sg_dst));
- if (err)
- s5p_aes_complete(dev, err);
- else
- ret = true;
- } else {
- s5p_aes_complete(dev, err);
-
- dev->busy = true;
- tasklet_schedule(&dev->tasklet);
+ ret = s5p_set_outdata(dev, sg_next(dev->sg_dst));
+ if (!ret)
+ ret = 1;
}
return ret;
}
/*
- * Returns true if new receiving (input) data is ready and its
- * address+length have to be written to device (by calling
- * s5p_set_dma_indata()). False otherwise.
+ * Returns -ERRNO on error (mapping of new data failed).
+ * On success returns:
+ * - 0 if there is no more data,
+ * - 1 if new receiving (input) data is ready and its address+length
+ * have to be written to device (by calling s5p_set_dma_indata()).
*/
-static bool s5p_aes_rx(struct s5p_aes_dev *dev)
+static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/)
{
- int err;
- bool ret = false;
+ int ret = 0;
s5p_unset_indata(dev);
if (!sg_is_last(dev->sg_src)) {
- err = s5p_set_indata(dev, sg_next(dev->sg_src));
- if (err)
- s5p_aes_complete(dev, err);
- else
- ret = true;
+ ret = s5p_set_indata(dev, sg_next(dev->sg_src));
+ if (!ret)
+ ret = 1;
}
return ret;
{
struct platform_device *pdev = dev_id;
struct s5p_aes_dev *dev = platform_get_drvdata(pdev);
- bool set_dma_tx = false;
- bool set_dma_rx = false;
+ int err_dma_tx = 0;
+ int err_dma_rx = 0;
+ bool tx_end = false;
unsigned long flags;
uint32_t status;
+ int err;
spin_lock_irqsave(&dev->lock, flags);
+ /*
+ * Handle rx or tx interrupt. If there is still data (scatterlist did not
+ * reach end), then map next scatterlist entry.
+ * In case of such mapping error, s5p_aes_complete() should be called.
+ *
+ * If there is no more data in tx scatter list, call s5p_aes_complete()
+ * and schedule new tasklet.
+ */
status = SSS_READ(dev, FCINTSTAT);
if (status & SSS_FCINTSTAT_BRDMAINT)
- set_dma_rx = s5p_aes_rx(dev);
- if (status & SSS_FCINTSTAT_BTDMAINT)
- set_dma_tx = s5p_aes_tx(dev);
+ err_dma_rx = s5p_aes_rx(dev);
+
+ if (status & SSS_FCINTSTAT_BTDMAINT) {
+ if (sg_is_last(dev->sg_dst))
+ tx_end = true;
+ err_dma_tx = s5p_aes_tx(dev);
+ }
SSS_WRITE(dev, FCINTPEND, status);
- /*
- * Writing length of DMA block (either receiving or transmitting)
- * will start the operation immediately, so this should be done
- * at the end (even after clearing pending interrupts to not miss the
- * interrupt).
- */
- if (set_dma_tx)
- s5p_set_dma_outdata(dev, dev->sg_dst);
- if (set_dma_rx)
- s5p_set_dma_indata(dev, dev->sg_src);
+ if (err_dma_rx < 0) {
+ err = err_dma_rx;
+ goto error;
+ }
+ if (err_dma_tx < 0) {
+ err = err_dma_tx;
+ goto error;
+ }
+
+ if (tx_end) {
+ s5p_sg_done(dev);
+
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ s5p_aes_complete(dev, 0);
+ dev->busy = true;
+ tasklet_schedule(&dev->tasklet);
+ } else {
+ /*
+ * Writing length of DMA block (either receiving or
+ * transmitting) will start the operation immediately, so this
+ * should be done at the end (even after clearing pending
+ * interrupts to not miss the interrupt).
+ */
+ if (err_dma_tx == 1)
+ s5p_set_dma_outdata(dev, dev->sg_dst);
+ if (err_dma_rx == 1)
+ s5p_set_dma_indata(dev, dev->sg_src);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ }
+
+ return IRQ_HANDLED;
+
+error:
+ s5p_sg_done(dev);
spin_unlock_irqrestore(&dev->lock, flags);
+ s5p_aes_complete(dev, err);
return IRQ_HANDLED;
}
s5p_unset_indata(dev);
indata_error:
- s5p_aes_complete(dev, err);
+ s5p_sg_done(dev);
spin_unlock_irqrestore(&dev->lock, flags);
+ s5p_aes_complete(dev, err);
}
static void s5p_tasklet_cb(unsigned long data)
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
}
- err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt,
- IRQF_SHARED, pdev->name, pdev);
+ err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL,
+ s5p_aes_interrupt, IRQF_ONESHOT,
+ pdev->name, pdev);
if (err < 0) {
dev_warn(dev, "feed control interrupt is not available.\n");
goto err_irq;
static int __init crossbar_of_init(struct device_node *node)
{
+ u32 max = 0, entry, reg_size;
int i, size, reserved = 0;
- u32 max = 0, entry;
const __be32 *irqsr;
int ret = -ENOMEM;
if (!cb->register_offsets)
goto err_irq_map;
- of_property_read_u32(node, "ti,reg-size", &size);
+ of_property_read_u32(node, "ti,reg-size", ®_size);
- switch (size) {
+ switch (reg_size) {
case 1:
cb->write = crossbar_writeb;
break;
continue;
cb->register_offsets[i] = reserved;
- reserved += size;
+ reserved += reg_size;
}
of_property_read_u32(node, "ti,irqs-safe-map", &cb->safe_map);
its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144;
}
+static void __maybe_unused its_enable_quirk_qdf2400_e0065(void *data)
+{
+ struct its_node *its = data;
+
+ /* On QDF2400, the size of the ITE is 16Bytes */
+ its->ite_size = 16;
+}
+
static const struct gic_quirk its_quirks[] = {
#ifdef CONFIG_CAVIUM_ERRATUM_22375
{
.mask = 0xffff0fff,
.init = its_enable_quirk_cavium_23144,
},
+#endif
+#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065
+ {
+ .desc = "ITS: QDF2400 erratum 0065",
+ .iidr = 0x00001070, /* QDF2400 ITS rev 1.x */
+ .mask = 0xffffffff,
+ .init = its_enable_quirk_qdf2400_e0065,
+ },
#endif
{
}
return -ENODEV;
}
+ if (hostif->desc.bNumEndpoints < 1)
+ return -ENODEV;
+
dev_info(&udev->dev,
"%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n",
__func__, le16_to_cpu(udev->descriptor.idVendor),
* byte, not the size, hence the "-1").
*/
state->host_gdt_desc.size = GDT_SIZE-1;
- state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
+ state->host_gdt_desc.address = (long)get_cpu_gdt_rw(i);
/*
* All CPUs on the Host use the same Interrupt Descriptor
* The Host needs to be able to use the LGUEST segments on this
* CPU, too, so put them in the Host GDT.
*/
- get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
- get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
+ get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
+ get_cpu_gdt_rw(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
}
/*
* To get all the fields, copy all archdata
*/
dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata;
+ dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops;
#endif /* CONFIG_PCI */
#ifdef DEBUG
struct dm_offload *o = container_of(cb, struct dm_offload, cb);
struct bio_list list;
struct bio *bio;
+ int i;
INIT_LIST_HEAD(&o->cb.list);
if (unlikely(!current->bio_list))
return;
- list = *current->bio_list;
- bio_list_init(current->bio_list);
-
- while ((bio = bio_list_pop(&list))) {
- struct bio_set *bs = bio->bi_pool;
- if (unlikely(!bs) || bs == fs_bio_set) {
- bio_list_add(current->bio_list, bio);
- continue;
+ for (i = 0; i < 2; i++) {
+ list = current->bio_list[i];
+ bio_list_init(¤t->bio_list[i]);
+
+ while ((bio = bio_list_pop(&list))) {
+ struct bio_set *bs = bio->bi_pool;
+ if (unlikely(!bs) || bs == fs_bio_set) {
+ bio_list_add(¤t->bio_list[i], bio);
+ continue;
+ }
+
+ spin_lock(&bs->rescue_lock);
+ bio_list_add(&bs->rescue_list, bio);
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
+ spin_unlock(&bs->rescue_lock);
}
-
- spin_lock(&bs->rescue_lock);
- bio_list_add(&bs->rescue_list, bio);
- queue_work(bs->rescue_workqueue, &bs->rescue_work);
- spin_unlock(&bs->rescue_lock);
}
}
bm_lockres->flags |= DLM_LKF_NOQUEUE;
ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
if (ret == -EAGAIN) {
- memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
s = read_resync_info(mddev, bm_lockres);
if (s) {
pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
lockres_free(cinfo->bitmap_lockres);
unlock_all_bitmaps(mddev);
dlm_release_lockspace(cinfo->lockspace, 2);
+ kfree(cinfo);
return 0;
}
}
EXPORT_SYMBOL(md_flush_request);
-void md_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
- struct mddev *mddev = cb->data;
- md_wakeup_thread(mddev->thread);
- kfree(cb);
-}
-EXPORT_SYMBOL(md_unplug);
-
static inline struct mddev *mddev_get(struct mddev *mddev)
{
atomic_inc(&mddev->active);
}
sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
- sb->super_offset = rdev->sb_start;
+ sb->super_offset = cpu_to_le64(rdev->sb_start);
sb->sb_csum = calc_sb_1_csum(sb);
do {
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
/* Check if any mddev parameters have changed */
if ((mddev->dev_sectors != le64_to_cpu(sb->size)) ||
(mddev->reshape_position != le64_to_cpu(sb->reshape_position)) ||
- (mddev->layout != le64_to_cpu(sb->layout)) ||
+ (mddev->layout != le32_to_cpu(sb->layout)) ||
(mddev->raid_disks != le32_to_cpu(sb->raid_disks)) ||
(mddev->chunk_sectors != le32_to_cpu(sb->chunksize)))
return true;
mddev->layout = info->layout;
mddev->chunk_sectors = info->chunk_size >> 9;
- mddev->max_disks = MD_SB_DISKS;
-
if (mddev->persistent) {
- mddev->flags = 0;
- mddev->sb_flags = 0;
+ mddev->max_disks = MD_SB_DISKS;
+ mddev->flags = 0;
+ mddev->sb_flags = 0;
}
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return -ENOSPC;
}
rv = mddev->pers->resize(mddev, num_sectors);
- if (!rv)
- revalidate_disk(mddev->gendisk);
+ if (!rv) {
+ if (mddev->queue) {
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ revalidate_disk(mddev->gendisk);
+ }
+ }
return rv;
}
extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
struct mddev *mddev);
-extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
-static inline int mddev_check_plugged(struct mddev *mddev)
-{
- return !!blk_check_plugged(md_unplug, mddev,
- sizeof(struct blk_plug_cb));
-}
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
{
static void freeze_array(struct r1conf *conf, int extra)
{
/* Stop sync I/O and normal I/O and wait for everything to
- * go quite.
+ * go quiet.
* This is called in two situations:
* 1) management command handlers (reshape, remove disk, quiesce).
* 2) one normal I/O request failed.
split = bio;
}
- if (bio_data_dir(split) == READ)
+ if (bio_data_dir(split) == READ) {
raid1_read_request(mddev, split);
- else
+
+ /*
+ * If a bio is splitted, the first part of bio will
+ * pass barrier but the bio is queued in
+ * current->bio_list (see generic_make_request). If
+ * there is a raise_barrier() called here, the second
+ * part of bio can't pass barrier. But since the first
+ * part bio isn't dispatched to underlaying disks yet,
+ * the barrier is never released, hence raise_barrier
+ * will alays wait. We have a deadlock.
+ * Note, this only happens in read path. For write
+ * path, the first part of bio is dispatched in a
+ * schedule() call (because of blk plug) or offloaded
+ * to raid10d.
+ * Quitting from the function immediately can change
+ * the bio order queued in bio_list and avoid the deadlock.
+ */
+ if (split != bio) {
+ generic_make_request(bio);
+ break;
+ }
+ } else
raid1_write_request(mddev, split);
} while (split != bio);
}
return ret;
}
md_set_array_sectors(mddev, newsize);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
!conf->barrier ||
(atomic_read(&conf->nr_pending) &&
current->bio_list &&
- !bio_list_empty(current->bio_list)),
+ (!bio_list_empty(¤t->bio_list[0]) ||
+ !bio_list_empty(¤t->bio_list[1]))),
conf->resync_lock);
conf->nr_waiting--;
if (!conf->nr_waiting)
mbio->bi_bdev = (void*)rdev;
atomic_inc(&r10_bio->remaining);
+
+ cb = blk_check_plugged(raid10_unplug, mddev,
+ sizeof(*plug));
+ if (cb)
+ plug = container_of(cb, struct raid10_plug_cb,
+ cb);
+ else
+ plug = NULL;
spin_lock_irqsave(&conf->device_lock, flags);
- bio_list_add(&conf->pending_bio_list, mbio);
- conf->pending_count++;
+ if (plug) {
+ bio_list_add(&plug->pending, mbio);
+ plug->pending_cnt++;
+ } else {
+ bio_list_add(&conf->pending_bio_list, mbio);
+ conf->pending_count++;
+ }
spin_unlock_irqrestore(&conf->device_lock, flags);
- if (!mddev_check_plugged(mddev))
+ if (!plug)
md_wakeup_thread(mddev->thread);
}
}
split = bio;
}
+ /*
+ * If a bio is splitted, the first part of bio will pass
+ * barrier but the bio is queued in current->bio_list (see
+ * generic_make_request). If there is a raise_barrier() called
+ * here, the second part of bio can't pass barrier. But since
+ * the first part bio isn't dispatched to underlaying disks
+ * yet, the barrier is never released, hence raise_barrier will
+ * alays wait. We have a deadlock.
+ * Note, this only happens in read path. For write path, the
+ * first part of bio is dispatched in a schedule() call
+ * (because of blk plug) or offloaded to raid10d.
+ * Quitting from the function immediately can change the bio
+ * order queued in bio_list and avoid the deadlock.
+ */
__make_request(mddev, split);
+ if (split != bio && bio_data_dir(bio) == READ) {
+ generic_make_request(bio);
+ break;
+ }
} while (split != bio);
/* In case raid10d snuck in to freeze_array */
return ret;
}
md_set_array_sectors(mddev, size);
- if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
- }
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > oldsize) {
mddev->recovery_cp = oldsize;
(test_bit(R5_Wantdrain, &dev->flags) ||
test_bit(R5_InJournal, &dev->flags))) ||
(srctype == SYNDROME_SRC_WRITTEN &&
- dev->written)) {
+ (dev->written ||
+ test_bit(R5_InJournal, &dev->flags)))) {
if (test_bit(R5_InJournal, &dev->flags))
srcs[slot] = sh->dev[i].orig_page;
else
return ret;
}
md_set_array_sectors(mddev, newsize);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors;
processed = xgbe_rx_poll(channel, budget);
/* If we processed everything, we are done */
- if (processed < budget) {
- /* Turn off polling */
- napi_complete_done(napi, processed);
-
+ if ((processed < budget) && napi_complete_done(napi, processed)) {
/* Enable Tx and Rx interrupts */
if (pdata->channel_irq_mode)
xgbe_enable_rx_tx_int(pdata, channel);
} while ((processed < budget) && (processed != last_processed));
/* If we processed everything, we are done */
- if (processed < budget) {
- /* Turn off polling */
- napi_complete_done(napi, processed);
-
+ if ((processed < budget) && napi_complete_done(napi, processed)) {
/* Enable Tx and Rx interrupts */
xgbe_enable_rx_tx_ints(pdata);
}
if (!((1U << i) & self->msix_entry_mask))
continue;
- free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
if (pdev->msix_enabled)
irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL);
+ free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]);
self->msix_entry_mask &= ~(1U << i);
}
}
dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA;
- /* VF with OLD Hypervisor or old PF do not support filtering */
if (IS_PF(bp)) {
if (chip_is_e1x)
bp->accept_any_vlan = true;
else
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#ifdef CONFIG_BNX2X_SRIOV
- } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
-#endif
}
+ /* For VF we'll know whether to enable VLAN filtering after
+ * getting a response to CHANNEL_TLV_ACQUIRE from PF.
+ */
dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX;
dev->features |= NETIF_F_HIGHDMA;
if (!netif_running(bp->dev)) {
DP(BNX2X_MSG_PTP,
"PTP adjfreq called while the interface is down\n");
- return -EFAULT;
+ return -ENETDOWN;
}
if (ppb < 0) {
{
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP adjtime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta);
timecounter_adjtime(&bp->timecounter, delta);
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP gettime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timecounter_read(&bp->timecounter);
DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns);
struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info);
u64 ns;
+ if (!netif_running(bp->dev)) {
+ DP(BNX2X_MSG_PTP,
+ "PTP settime called while the interface is down\n");
+ return -ENETDOWN;
+ }
+
ns = timespec64_to_ns(ts);
DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns);
rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count);
if (rc)
goto init_one_freemem;
+
+#ifdef CONFIG_BNX2X_SRIOV
+ /* VF with OLD Hypervisor or old PF do not support filtering */
+ if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) {
+ dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ }
+#endif
}
/* Enable SRIOV if capability found in configuration space */
/* Add/Remove the filter */
rc = bnx2x_config_vlan_mac(bp, &ramrod);
- if (rc && rc != -EEXIST) {
+ if (rc == -EEXIST)
+ return 0;
+ if (rc) {
BNX2X_ERR("Failed to %s %s\n",
filter->add ? "add" : "delete",
(filter->type == BNX2X_VF_FILTER_VLAN_MAC) ?
return rc;
}
+ filter->applied = true;
+
return 0;
}
/* Rollback if needed */
if (i != filters->count) {
BNX2X_ERR("Managed only %d/%d filters - rolling back\n",
- i, filters->count + 1);
+ i, filters->count);
while (--i >= 0) {
+ if (!filters->filters[i].applied)
+ continue;
filters->filters[i].add = !filters->filters[i].add;
bnx2x_vf_mac_vlan_config(bp, vf, qid,
&filters->filters[i],
continue;
}
- DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid);
+ DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+ "add addresses for vf %d\n", vf->abs_vfid);
for_each_vfq(vf, j) {
struct bnx2x_vf_queue *rxq = vfq_get(vf, j);
cpu_to_le32(U64_HI(q_stats_addr));
cur_query_entry->address.lo =
cpu_to_le32(U64_LO(q_stats_addr));
- DP(BNX2X_MSG_IOV,
- "added address %x %x for vf %d queue %d client %d\n",
- cur_query_entry->address.hi,
- cur_query_entry->address.lo, cur_query_entry->funcID,
- j, cur_query_entry->index);
+ DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS),
+ "added address %x %x for vf %d queue %d client %d\n",
+ cur_query_entry->address.hi,
+ cur_query_entry->address.lo,
+ cur_query_entry->funcID,
+ j, cur_query_entry->index);
cur_query_entry++;
cur_data_offset += sizeof(struct per_queue_stats);
stats_count++;
(BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/
bool add;
+ bool applied;
u8 *mac;
u16 vid;
};
struct bnx2x *bp = netdev_priv(dev);
struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
- int rc, i = 0;
+ int rc = 0, i = 0;
struct netdev_hw_addr *ha;
if (bp->state != BNX2X_STATE_OPEN) {
/* Get Rx mode requested */
DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags);
+ /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */
+ if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) {
+ DP(NETIF_MSG_IFUP,
+ "VF supports not more than %d multicast MAC addresses\n",
+ PFVF_MAX_MULTICAST_PER_VF);
+ rc = -EINVAL;
+ goto out;
+ }
+
netdev_for_each_mc_addr(ha, dev) {
DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
bnx2x_mc_addr(ha));
i++;
}
- /* We support four PFVF_MAX_MULTICAST_PER_VF mcast
- * addresses tops
- */
- if (i >= PFVF_MAX_MULTICAST_PER_VF) {
- DP(NETIF_MSG_IFUP,
- "VF supports not more than %d multicast MAC addresses\n",
- PFVF_MAX_MULTICAST_PER_VF);
- return -EINVAL;
- }
-
req->n_multicast = i;
req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED;
req->vf_qid = 0;
out:
bnx2x_vfpf_finalize(bp, &req->first_tlv);
- return 0;
+ return rc;
}
/* request pf to add a vlan for the vf */
goto op_err;
}
+ /* build vlan list */
+ fl = NULL;
+
+ rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl,
+ VFPF_VLAN_FILTER);
+ if (rc)
+ goto op_err;
+
+ if (fl) {
+ /* set vlan list */
+ rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl,
+ msg->vf_qid,
+ false);
+ if (rc)
+ goto op_err;
+ }
+
}
if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) {
vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK;
}
#endif
+ if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) &
+ FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED))
+ bp->flags |= BNXT_FLAG_FW_LLDP_AGENT;
+
switch (resp->port_partition_type) {
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0:
case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5:
bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
}
- link_info->support_auto_speeds =
- le16_to_cpu(resp->supported_speeds_auto_mode);
+ if (resp->supported_speeds_auto_mode)
+ link_info->support_auto_speeds =
+ le16_to_cpu(resp->supported_speeds_auto_mode);
hwrm_phy_qcaps_exit:
mutex_unlock(&bp->hwrm_cmd_lock);
if (!silent)
bnxt_dbg_dump_states(bp);
if (netif_running(bp->dev)) {
+ int rc;
+
+ if (!silent)
+ bnxt_ulp_stop(bp);
bnxt_close_nic(bp, false, false);
- bnxt_open_nic(bp, false, false);
+ rc = bnxt_open_nic(bp, false, false);
+ if (!silent && !rc)
+ bnxt_ulp_start(bp);
}
}
if (rc)
goto init_err_pci_clean;
+ rc = bnxt_hwrm_func_reset(bp);
+ if (rc)
+ goto init_err_pci_clean;
+
bnxt_hwrm_fw_set_time(bp);
dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
if (rc)
goto init_err_pci_clean;
- rc = bnxt_hwrm_func_reset(bp);
- if (rc)
- goto init_err_pci_clean;
-
rc = bnxt_init_int_mode(bp);
if (rc)
goto init_err_pci_clean;
BNXT_FLAG_ROCEV2_CAP)
#define BNXT_FLAG_NO_AGG_RINGS 0x20000
#define BNXT_FLAG_RX_PAGE_MODE 0x40000
+ #define BNXT_FLAG_FW_LLDP_AGENT 0x80000
#define BNXT_FLAG_CHIP_NITRO_A0 0x1000000
#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \
return;
bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
- if (BNXT_PF(bp))
+ if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT))
bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
else
bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
/*
* Broadcom GENET (Gigabit Ethernet) controller driver
*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
genet_dma_ring_regs[r]);
}
+static int bcmgenet_begin(struct net_device *dev)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
+ /* Turn on the clock */
+ return clk_prepare_enable(priv->clk);
+}
+
+static void bcmgenet_complete(struct net_device *dev)
+{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+
+ /* Turn off the clock */
+ clk_disable_unprepare(priv->clk);
+}
+
static int bcmgenet_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes),
/* Misc UniMAC counters */
STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt,
- UMAC_RBUF_OVFL_CNT),
- STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT),
+ UMAC_RBUF_OVFL_CNT_V1),
+ STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt,
+ UMAC_RBUF_ERR_CNT_V1),
STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT),
STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
}
}
+static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset)
+{
+ u16 new_offset;
+ u32 val;
+
+ switch (offset) {
+ case UMAC_RBUF_OVFL_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_OVFL_CNT_V2;
+ else
+ new_offset = RBUF_OVFL_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ case UMAC_RBUF_ERR_CNT_V1:
+ if (GENET_IS_V2(priv))
+ new_offset = RBUF_ERR_CNT_V2;
+ else
+ new_offset = RBUF_ERR_CNT_V3PLUS;
+
+ val = bcmgenet_rbuf_readl(priv, new_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_rbuf_writel(priv, 0, new_offset);
+ break;
+ default:
+ val = bcmgenet_umac_readl(priv, offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0, offset);
+ break;
+ }
+
+ return val;
+}
+
static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv)
{
int i, j = 0;
case BCMGENET_STAT_NETDEV:
case BCMGENET_STAT_SOFT:
continue;
- case BCMGENET_STAT_MIB_RX:
- case BCMGENET_STAT_MIB_TX:
case BCMGENET_STAT_RUNT:
- if (s->type != BCMGENET_STAT_MIB_RX)
- offset = BCMGENET_STAT_OFFSET;
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_TX:
+ offset += BCMGENET_STAT_OFFSET;
+ /* fall through */
+ case BCMGENET_STAT_MIB_RX:
val = bcmgenet_umac_readl(priv,
UMAC_MIB_START + j + offset);
+ offset = 0; /* Reset Offset */
break;
case BCMGENET_STAT_MISC:
- val = bcmgenet_umac_readl(priv, s->reg_offset);
- /* clear if overflowed */
- if (val == ~0)
- bcmgenet_umac_writel(priv, 0, s->reg_offset);
+ if (GENET_IS_V1(priv)) {
+ val = bcmgenet_umac_readl(priv, s->reg_offset);
+ /* clear if overflowed */
+ if (val == ~0)
+ bcmgenet_umac_writel(priv, 0,
+ s->reg_offset);
+ } else {
+ val = bcmgenet_update_stat_misc(priv,
+ s->reg_offset);
+ }
break;
}
/* standard ethtool support functions. */
static const struct ethtool_ops bcmgenet_ethtool_ops = {
+ .begin = bcmgenet_begin,
+ .complete = bcmgenet_complete,
.get_strings = bcmgenet_get_strings,
.get_sset_count = bcmgenet_get_sset_count,
.get_ethtool_stats = bcmgenet_get_ethtool_stats,
struct bcmgenet_priv *priv = netdev_priv(dev);
struct device *kdev = &priv->pdev->dev;
struct enet_cb *tx_cb_ptr;
- struct netdev_queue *txq;
unsigned int pkts_compl = 0;
unsigned int bytes_compl = 0;
unsigned int c_index;
dev->stats.tx_packets += pkts_compl;
dev->stats.tx_bytes += bytes_compl;
- txq = netdev_get_tx_queue(dev, ring->queue);
- netdev_tx_completed_queue(txq, pkts_compl, bytes_compl);
-
- if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
- if (netif_tx_queue_stopped(txq))
- netif_tx_wake_queue(txq);
- }
+ netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue),
+ pkts_compl, bytes_compl);
return pkts_compl;
}
struct bcmgenet_tx_ring *ring =
container_of(napi, struct bcmgenet_tx_ring, napi);
unsigned int work_done = 0;
+ struct netdev_queue *txq;
+ unsigned long flags;
- work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
+ spin_lock_irqsave(&ring->lock, flags);
+ work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring);
+ if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
+ txq = netdev_get_tx_queue(ring->priv->dev, ring->queue);
+ netif_tx_wake_queue(txq);
+ }
+ spin_unlock_irqrestore(&ring->lock, flags);
if (work_done == 0) {
napi_complete(napi);
/* Interrupt bottom half */
static void bcmgenet_irq_task(struct work_struct *work)
{
+ unsigned long flags;
+ unsigned int status;
struct bcmgenet_priv *priv = container_of(
work, struct bcmgenet_priv, bcmgenet_irq_work);
netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
- if (priv->irq0_stat & UMAC_IRQ_MPD_R) {
- priv->irq0_stat &= ~UMAC_IRQ_MPD_R;
+ spin_lock_irqsave(&priv->lock, flags);
+ status = priv->irq0_stat;
+ priv->irq0_stat = 0;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (status & UMAC_IRQ_MPD_R) {
netif_dbg(priv, wol, priv->dev,
"magic packet detected, waking up\n");
bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
}
/* Link UP/DOWN event */
- if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
+ if (status & UMAC_IRQ_LINK_EVENT)
phy_mac_interrupt(priv->phydev,
- !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
- priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
- }
+ !!(status & UMAC_IRQ_LINK_UP));
}
/* bcmgenet_isr1: handle Rx and Tx priority queues */
struct bcmgenet_priv *priv = dev_id;
struct bcmgenet_rx_ring *rx_ring;
struct bcmgenet_tx_ring *tx_ring;
- unsigned int index;
+ unsigned int index, status;
- /* Save irq status for bottom-half processing. */
- priv->irq1_stat =
- bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
+ /* Read irq status */
+ status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
/* clear interrupts */
- bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
+ bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
- "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+ "%s: IRQ=0x%x\n", __func__, status);
/* Check Rx priority queue interrupts */
for (index = 0; index < priv->hw_params->rx_queues; index++) {
- if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
+ if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index)))
continue;
rx_ring = &priv->rx_rings[index];
/* Check Tx priority queue interrupts */
for (index = 0; index < priv->hw_params->tx_queues; index++) {
- if (!(priv->irq1_stat & BIT(index)))
+ if (!(status & BIT(index)))
continue;
tx_ring = &priv->tx_rings[index];
struct bcmgenet_priv *priv = dev_id;
struct bcmgenet_rx_ring *rx_ring;
struct bcmgenet_tx_ring *tx_ring;
+ unsigned int status;
+ unsigned long flags;
- /* Save irq status for bottom-half processing. */
- priv->irq0_stat =
- bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
+ /* Read irq status */
+ status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) &
~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS);
/* clear interrupts */
- bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR);
+ bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
- "IRQ=0x%x\n", priv->irq0_stat);
+ "IRQ=0x%x\n", status);
- if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) {
+ if (status & UMAC_IRQ_RXDMA_DONE) {
rx_ring = &priv->rx_rings[DESC_INDEX];
if (likely(napi_schedule_prep(&rx_ring->napi))) {
}
}
- if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) {
+ if (status & UMAC_IRQ_TXDMA_DONE) {
tx_ring = &priv->tx_rings[DESC_INDEX];
if (likely(napi_schedule_prep(&tx_ring->napi))) {
}
}
- if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
- UMAC_IRQ_PHY_DET_F |
- UMAC_IRQ_LINK_EVENT |
- UMAC_IRQ_HFB_SM |
- UMAC_IRQ_HFB_MM |
- UMAC_IRQ_MPD_R)) {
- /* all other interested interrupts handled in bottom half */
- schedule_work(&priv->bcmgenet_irq_work);
- }
-
if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) &&
- priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
- priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR);
+ status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) {
wake_up(&priv->wq);
}
+ /* all other interested interrupts handled in bottom half */
+ status &= (UMAC_IRQ_LINK_EVENT |
+ UMAC_IRQ_MPD_R);
+ if (status) {
+ /* Save irq status for bottom-half processing. */
+ spin_lock_irqsave(&priv->lock, flags);
+ priv->irq0_stat |= status;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ schedule_work(&priv->bcmgenet_irq_work);
+ }
+
return IRQ_HANDLED;
}
err_fini_dma:
bcmgenet_fini_dma(priv);
err_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
*/
gphy_rev = reg & 0xffff;
+ /* This is reserved so should require special treatment */
+ if (gphy_rev == 0 || gphy_rev == 0x01ff) {
+ pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
+ return;
+ }
+
/* This is the good old scheme, just GPHY major, no minor nor patch */
if ((gphy_rev & 0xf0) != 0)
priv->gphy_rev = gphy_rev << 8;
else if ((gphy_rev & 0xff00) != 0)
priv->gphy_rev = gphy_rev;
- /* This is reserved so should require special treatment */
- else if (gphy_rev == 0 || gphy_rev == 0x01ff) {
- pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev);
- return;
- }
-
#ifdef CONFIG_PHYS_ADDR_T_64BIT
if (!(params->flags & GENET_HAS_40BITS))
pr_warn("GENET does not support 40-bits PA\n");
const void *macaddr;
struct resource *r;
int err = -EIO;
+ const char *phy_mode_str;
/* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */
dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1,
goto err;
}
+ spin_lock_init(&priv->lock);
+
SET_NETDEV_DEV(dev, &pdev->dev);
dev_set_drvdata(&pdev->dev, dev);
ether_addr_copy(dev->dev_addr, macaddr);
priv->clk_eee = NULL;
}
+ /* If this is an internal GPHY, power it on now, before UniMAC is
+ * brought out of reset as absolutely no UniMAC activity is allowed
+ */
+ if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) &&
+ !strcasecmp(phy_mode_str, "internal"))
+ bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
+
err = reset_umac(priv);
if (err)
goto err_clk_disable;
return 0;
out_clk_disable:
+ if (priv->internal_phy)
+ bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
clk_disable_unprepare(priv->clk);
return ret;
}
/*
- * Copyright (c) 2014 Broadcom Corporation
+ * Copyright (c) 2014-2017 Broadcom
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
#define MDIO_REG_SHIFT 16
#define MDIO_REG_MASK 0x1F
-#define UMAC_RBUF_OVFL_CNT 0x61C
+#define UMAC_RBUF_OVFL_CNT_V1 0x61C
+#define RBUF_OVFL_CNT_V2 0x80
+#define RBUF_OVFL_CNT_V3PLUS 0x94
#define UMAC_MPD_CTRL 0x620
#define MPD_EN (1 << 0)
#define UMAC_MPD_PW_MS 0x624
#define UMAC_MPD_PW_LS 0x628
-#define UMAC_RBUF_ERR_CNT 0x634
+#define UMAC_RBUF_ERR_CNT_V1 0x634
+#define RBUF_ERR_CNT_V2 0x84
+#define RBUF_ERR_CNT_V3PLUS 0x98
#define UMAC_MDF_ERR_CNT 0x638
#define UMAC_MDF_CTRL 0x650
#define UMAC_MDF_ADDR 0x654
struct work_struct bcmgenet_irq_work;
int irq0;
int irq1;
- unsigned int irq0_stat;
- unsigned int irq1_stat;
int wol_irq;
bool wol_irq_disabled;
+ /* shared status */
+ spinlock_t lock;
+ unsigned int irq0_stat;
+
/* HW descriptors/checksum variables */
bool desc_64b_en;
bool desc_rxchk_en;
*/
struct octeon_sg_entry *sg;
- u64 sg_dma_ptr;
+ dma_addr_t sg_dma_ptr;
};
struct handshake {
struct octnic_gather *g;
int i;
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+
if (!lio->glist)
return;
do {
g = (struct octnic_gather *)
list_delete_head(&lio->glist[i]);
- if (g) {
- if (g->sg) {
- dma_unmap_single(&lio->oct_dev->
- pci_dev->dev,
- g->sg_dma_ptr,
- g->sg_size,
- DMA_TO_DEVICE);
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
- }
+ if (g)
kfree(g);
- }
} while (g);
+
+ if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+ lio_dma_free(lio->oct_dev,
+ lio->glist_entry_size * lio->tx_qsize,
+ lio->glists_virt_base[i],
+ lio->glists_dma_base[i]);
+ }
}
- kfree((void *)lio->glist);
- kfree((void *)lio->glist_lock);
+ kfree(lio->glists_virt_base);
+ lio->glists_virt_base = NULL;
+
+ kfree(lio->glists_dma_base);
+ lio->glists_dma_base = NULL;
+
+ kfree(lio->glist);
+ lio->glist = NULL;
}
/**
lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock),
GFP_KERNEL);
if (!lio->glist_lock)
- return 1;
+ return -ENOMEM;
lio->glist = kcalloc(num_iqs, sizeof(*lio->glist),
GFP_KERNEL);
if (!lio->glist) {
- kfree((void *)lio->glist_lock);
- return 1;
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+ return -ENOMEM;
+ }
+
+ lio->glist_entry_size =
+ ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+ /* allocate memory to store virtual and dma base address of
+ * per glist consistent memory
+ */
+ lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+ GFP_KERNEL);
+ lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+ GFP_KERNEL);
+
+ if (!lio->glists_virt_base || !lio->glists_dma_base) {
+ delete_glists(lio);
+ return -ENOMEM;
}
for (i = 0; i < num_iqs; i++) {
INIT_LIST_HEAD(&lio->glist[i]);
+ lio->glists_virt_base[i] =
+ lio_dma_alloc(oct,
+ lio->glist_entry_size * lio->tx_qsize,
+ &lio->glists_dma_base[i]);
+
+ if (!lio->glists_virt_base[i]) {
+ delete_glists(lio);
+ return -ENOMEM;
+ }
+
for (j = 0; j < lio->tx_qsize; j++) {
g = kzalloc_node(sizeof(*g), GFP_KERNEL,
numa_node);
if (!g)
break;
- g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
- OCT_SG_ENTRY_SIZE);
+ g->sg = lio->glists_virt_base[i] +
+ (j * lio->glist_entry_size);
- g->sg = kmalloc_node(g->sg_size + 8,
- GFP_KERNEL, numa_node);
- if (!g->sg)
- g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
- if (!g->sg) {
- kfree(g);
- break;
- }
-
- /* The gather component should be aligned on 64-bit
- * boundary
- */
- if (((unsigned long)g->sg) & 7) {
- g->adjust = 8 - (((unsigned long)g->sg) & 7);
- g->sg = (struct octeon_sg_entry *)
- ((unsigned long)g->sg + g->adjust);
- }
- g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev,
- g->sg, g->sg_size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev,
- g->sg_dma_ptr)) {
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
- kfree(g);
- break;
- }
+ g->sg_dma_ptr = lio->glists_dma_base[i] +
+ (j * lio->glist_entry_size);
list_add_tail(&g->list, &lio->glist[i]);
}
if (j != lio->tx_qsize) {
delete_glists(lio);
- return 1;
+ return -ENOMEM;
}
}
i++;
}
- dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
- g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
list_add_tail(&g->list, &lio->glist[iq]);
i++;
}
- dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev,
- g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
i++;
}
- dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr,
- g->sg_size, DMA_TO_DEVICE);
dptr = g->sg_dma_ptr;
if (OCTEON_CN23XX_PF(oct))
* received from the IP layer.
*/
struct octeon_sg_entry *sg;
+
+ dma_addr_t sg_dma_ptr;
};
struct octeon_device_priv {
struct octnic_gather *g;
int i;
+ kfree(lio->glist_lock);
+ lio->glist_lock = NULL;
+
if (!lio->glist)
return;
do {
g = (struct octnic_gather *)
list_delete_head(&lio->glist[i]);
- if (g) {
- if (g->sg)
- kfree((void *)((unsigned long)g->sg -
- g->adjust));
+ if (g)
kfree(g);
- }
} while (g);
+
+ if (lio->glists_virt_base && lio->glists_virt_base[i]) {
+ lio_dma_free(lio->oct_dev,
+ lio->glist_entry_size * lio->tx_qsize,
+ lio->glists_virt_base[i],
+ lio->glists_dma_base[i]);
+ }
}
+ kfree(lio->glists_virt_base);
+ lio->glists_virt_base = NULL;
+
+ kfree(lio->glists_dma_base);
+ lio->glists_dma_base = NULL;
+
kfree(lio->glist);
- kfree(lio->glist_lock);
+ lio->glist = NULL;
}
/**
lio->glist_lock =
kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
if (!lio->glist_lock)
- return 1;
+ return -ENOMEM;
lio->glist =
kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
if (!lio->glist) {
kfree(lio->glist_lock);
- return 1;
+ lio->glist_lock = NULL;
+ return -ENOMEM;
+ }
+
+ lio->glist_entry_size =
+ ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE);
+
+ /* allocate memory to store virtual and dma base address of
+ * per glist consistent memory
+ */
+ lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base),
+ GFP_KERNEL);
+ lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base),
+ GFP_KERNEL);
+
+ if (!lio->glists_virt_base || !lio->glists_dma_base) {
+ delete_glists(lio);
+ return -ENOMEM;
}
for (i = 0; i < num_iqs; i++) {
INIT_LIST_HEAD(&lio->glist[i]);
+ lio->glists_virt_base[i] =
+ lio_dma_alloc(lio->oct_dev,
+ lio->glist_entry_size * lio->tx_qsize,
+ &lio->glists_dma_base[i]);
+
+ if (!lio->glists_virt_base[i]) {
+ delete_glists(lio);
+ return -ENOMEM;
+ }
+
for (j = 0; j < lio->tx_qsize; j++) {
g = kzalloc(sizeof(*g), GFP_KERNEL);
if (!g)
break;
- g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
- OCT_SG_ENTRY_SIZE);
+ g->sg = lio->glists_virt_base[i] +
+ (j * lio->glist_entry_size);
- g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
- if (!g->sg) {
- kfree(g);
- break;
- }
+ g->sg_dma_ptr = lio->glists_dma_base[i] +
+ (j * lio->glist_entry_size);
- /* The gather component should be aligned on 64-bit
- * boundary
- */
- if (((unsigned long)g->sg) & 7) {
- g->adjust = 8 - (((unsigned long)g->sg) & 7);
- g->sg = (struct octeon_sg_entry *)
- ((unsigned long)g->sg + g->adjust);
- }
list_add_tail(&g->list, &lio->glist[i]);
}
if (j != lio->tx_qsize) {
delete_glists(lio);
- return 1;
+ return -ENOMEM;
}
}
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
i++;
}
- dma_unmap_single(&lio->oct_dev->pci_dev->dev,
- finfo->dptr, g->sg_size,
- DMA_TO_DEVICE);
-
iq = skb_iq(lio, skb);
spin_lock(&lio->glist_lock[iq]);
i++;
}
- dptr = dma_map_single(&oct->pci_dev->dev,
- g->sg, g->sg_size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
- dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
- __func__);
- dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
- skb->len - skb->data_len,
- DMA_TO_DEVICE);
- for (j = 1; j <= frags; j++) {
- frag = &skb_shinfo(skb)->frags[j - 1];
- dma_unmap_page(&oct->pci_dev->dev,
- g->sg[j >> 2].ptr[j & 3],
- frag->size, DMA_TO_DEVICE);
- }
- return NETDEV_TX_BUSY;
- }
+ dptr = g->sg_dma_ptr;
ndata.cmd.cmd3.dptr = dptr;
finfo->dptr = dptr;
#define CN23XX_MAX_RINGS_PER_VF 8
#define CN23XX_MAX_INPUT_QUEUES CN23XX_MAX_RINGS_PER_PF
-#define CN23XX_MAX_IQ_DESCRIPTORS 2048
+#define CN23XX_MAX_IQ_DESCRIPTORS 512
#define CN23XX_DB_MIN 1
#define CN23XX_DB_MAX 8
#define CN23XX_DB_TIMEOUT 1
#define CN23XX_MAX_OUTPUT_QUEUES CN23XX_MAX_RINGS_PER_PF
-#define CN23XX_MAX_OQ_DESCRIPTORS 2048
+#define CN23XX_MAX_OQ_DESCRIPTORS 512
#define CN23XX_OQ_BUF_SIZE 1536
#define CN23XX_OQ_PKTSPER_INTR 128
/*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
-#define CN23XX_OQ_REFIL_THRESHOLD 128
+#define CN23XX_OQ_REFIL_THRESHOLD 16
#define CN23XX_OQ_INTR_PKT 64
#define CN23XX_OQ_INTR_TIME 100
recv_buffer_destroy(droq->recv_buf_list[i].buffer,
pg_info);
- if (droq->desc_ring && droq->desc_ring[i].info_ptr)
- lio_unmap_ring_info(oct->pci_dev,
- (u64)droq->
- desc_ring[i].info_ptr,
- OCT_DROQ_INFO_SIZE);
droq->recv_buf_list[i].buffer = NULL;
}
vfree(droq->recv_buf_list);
if (droq->info_base_addr)
- cnnic_free_aligned_dma(oct->pci_dev, droq->info_list,
- droq->info_alloc_size,
- droq->info_base_addr,
- droq->info_list_dma);
+ lio_free_info_buffer(oct, droq);
if (droq->desc_ring)
lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no,
droq->max_count);
- droq->info_list =
- cnnic_numa_alloc_aligned_dma((droq->max_count *
- OCT_DROQ_INFO_SIZE),
- &droq->info_alloc_size,
- &droq->info_base_addr,
- numa_node);
+ droq->info_list = lio_alloc_info_buffer(oct, droq);
if (!droq->info_list) {
dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n");
lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE),
size_t desc_ring_dma;
/** Info ptr list are allocated at this virtual address. */
- size_t info_base_addr;
+ void *info_base_addr;
/** DMA mapped address of the info list */
- size_t info_list_dma;
+ dma_addr_t info_list_dma;
/** Allocated size of info list. */
u32 info_alloc_size;
return 1;
}
-static inline void *
-cnnic_numa_alloc_aligned_dma(u32 size,
- u32 *alloc_size,
- size_t *orig_ptr,
- int numa_node)
-{
- int retries = 0;
- void *ptr = NULL;
-
-#define OCTEON_MAX_ALLOC_RETRIES 1
- do {
- struct page *page = NULL;
-
- page = alloc_pages_node(numa_node,
- GFP_KERNEL,
- get_order(size));
- if (!page)
- page = alloc_pages(GFP_KERNEL,
- get_order(size));
- ptr = (void *)page_address(page);
- if ((unsigned long)ptr & 0x07) {
- __free_pages(page, get_order(size));
- ptr = NULL;
- /* Increment the size required if the first
- * attempt failed.
- */
- if (!retries)
- size += 7;
- }
- retries++;
- } while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr);
-
- *alloc_size = size;
- *orig_ptr = (unsigned long)ptr;
- if ((unsigned long)ptr & 0x07)
- ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL));
- return ptr;
-}
-
-#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
- free_pages(orig_ptr, get_order(size))
-
static inline int
sleep_cond(wait_queue_head_t *wait_queue, int *condition)
{
/** Array of gather component linked lists */
struct list_head *glist;
+ void **glists_virt_base;
+ dma_addr_t *glists_dma_base;
+ u32 glist_entry_size;
/** Pointer to the NIC properties for the Octeon device this network
* interface is associated with.
#define lio_dma_free(oct, size, virt_addr, dma_addr) \
dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
+static inline void *
+lio_alloc_info_buffer(struct octeon_device *oct,
+ struct octeon_droq *droq)
+{
+ void *virt_ptr;
+
+ virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE),
+ &droq->info_list_dma);
+ if (virt_ptr) {
+ droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE;
+ droq->info_base_addr = virt_ptr;
+ }
+
+ return virt_ptr;
+}
+
+static inline void lio_free_info_buffer(struct octeon_device *oct,
+ struct octeon_droq *droq)
+{
+ lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr,
+ droq->info_list_dma);
+}
+
static inline
void *get_rbd(struct sk_buff *skb)
{
static inline u64
lio_map_ring_info(struct octeon_droq *droq, u32 i)
{
- dma_addr_t dma_addr;
- struct octeon_device *oct = droq->oct_dev;
-
- dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
- OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
-
- WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
-
- return (u64)dma_addr;
-}
-
-static inline void
-lio_unmap_ring_info(struct pci_dev *pci_dev,
- u64 info_ptr, u32 size)
-{
- dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE);
+ return droq->info_list_dma + (i * sizeof(struct octeon_droq_info));
}
static inline u64
#define MAX_QUEUES_PER_QSET 8
struct queue_set *qs;
struct nicvf_cq_poll *napi[8];
+ void *iommu_domain;
u8 vf_id;
u8 sqs_id;
bool sqs_mode;
#include <linux/log2.h>
#include <linux/prefetch.h>
#include <linux/irq.h>
+#include <linux/iommu.h>
#include "nic_reg.h"
#include "nic.h"
/* Get actual TSO descriptors and free them */
tso_sqe =
(struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+ nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+ tso_sqe->subdesc_cnt);
nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+ } else {
+ nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr,
+ hdr->subdesc_cnt);
}
nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
prefetch(skb);
{
struct sk_buff *skb;
struct nicvf *nic = netdev_priv(netdev);
+ struct nicvf *snic = nic;
int err = 0;
int rq_idx;
if (err && !cqe_rx->rb_cnt)
return;
- skb = nicvf_get_rcv_skb(nic, cqe_rx);
+ skb = nicvf_get_rcv_skb(snic, cqe_rx);
if (!skb) {
netdev_dbg(nic->netdev, "Packet not received\n");
return;
if (!pass1_silicon(nic->pdev))
nic->hw_tso = true;
+ /* Get iommu domain for iova to physical addr conversion */
+ nic->iommu_domain = iommu_get_domain_for_dev(dev);
+
pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
if (sdevid == 0xA134)
nic->t88 = true;
#include <linux/netdevice.h>
#include <linux/ip.h>
#include <linux/etherdevice.h>
+#include <linux/iommu.h>
#include <net/ip.h>
#include <net/tso.h>
#include "q_struct.h"
#include "nicvf_queues.h"
+#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0)
+
+static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr)
+{
+ /* Translation is installed only when IOMMU is present */
+ if (nic->iommu_domain)
+ return iommu_iova_to_phys(nic->iommu_domain, dma_addr);
+ return dma_addr;
+}
+
static void nicvf_get_page(struct nicvf *nic)
{
if (!nic->rb_pageref || !nic->rb_page)
static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
u32 buf_len, u64 **rbuf)
{
- int order = (PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0;
+ int order = NICVF_PAGE_ORDER;
/* Check if request can be accomodated in previous allocated page */
if (nic->rb_page &&
}
nicvf_get_page(nic);
- nic->rb_page = NULL;
/* Allocate a new page */
+ nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+ order);
if (!nic->rb_page) {
- nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
- order);
- if (!nic->rb_page) {
- this_cpu_inc(nic->pnicvf->drv_stats->
- rcv_buffer_alloc_failures);
- return -ENOMEM;
- }
- nic->rb_page_offset = 0;
+ this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
+ return -ENOMEM;
}
-
+ nic->rb_page_offset = 0;
ret:
- *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset);
+ /* HW will ensure data coherency, CPU sync not required */
+ *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
+ nic->rb_page_offset, buf_len,
+ DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC));
+ if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
+ if (!nic->rb_page_offset)
+ __free_pages(nic->rb_page, order);
+ nic->rb_page = NULL;
+ return -ENOMEM;
+ }
nic->rb_page_offset += buf_len;
return 0;
rbdr->dma_size = buf_size;
rbdr->enable = true;
rbdr->thresh = RBDR_THRESH;
+ rbdr->head = 0;
+ rbdr->tail = 0;
nic->rb_page = NULL;
for (idx = 0; idx < ring_len; idx++) {
err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN,
&rbuf);
- if (err)
+ if (err) {
+ /* To free already allocated and mapped ones */
+ rbdr->tail = idx - 1;
return err;
+ }
desc = GET_RBDR_DESC(rbdr, idx);
- desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+ desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
}
nicvf_get_page(nic);
static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
{
int head, tail;
- u64 buf_addr;
+ u64 buf_addr, phys_addr;
struct rbdr_entry_t *desc;
if (!rbdr)
head = rbdr->head;
tail = rbdr->tail;
- /* Free SKBs */
+ /* Release page references */
while (head != tail) {
desc = GET_RBDR_DESC(rbdr, head);
- buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
- put_page(virt_to_page(phys_to_virt(buf_addr)));
+ buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+ phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+ dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (phys_addr)
+ put_page(virt_to_page(phys_to_virt(phys_addr)));
head++;
head &= (rbdr->dmem.q_len - 1);
}
- /* Free SKB of tail desc */
+ /* Release buffer of tail desc */
desc = GET_RBDR_DESC(rbdr, tail);
- buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN;
- put_page(virt_to_page(phys_to_virt(buf_addr)));
+ buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN;
+ phys_addr = nicvf_iova_to_phys(nic, buf_addr);
+ dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (phys_addr)
+ put_page(virt_to_page(phys_to_virt(phys_addr)));
/* Free RBDR ring */
nicvf_free_q_desc_mem(nic, &rbdr->dmem);
break;
desc = GET_RBDR_DESC(rbdr, tail);
- desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN;
+ desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN;
refill_rb_cnt--;
new_rb++;
}
return 0;
}
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+ int hdr_sqe, u8 subdesc_cnt)
+{
+ u8 idx;
+ struct sq_gather_subdesc *gather;
+
+ /* Unmap DMA mapped skb data buffers */
+ for (idx = 0; idx < subdesc_cnt; idx++) {
+ hdr_sqe++;
+ hdr_sqe &= (sq->dmem.q_len - 1);
+ gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
+ /* HW will ensure data coherency, CPU sync not required */
+ dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
+ gather->size, DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ }
+}
+
static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
{
struct sk_buff *skb;
+ struct sq_hdr_subdesc *hdr;
+ struct sq_hdr_subdesc *tso_sqe;
if (!sq)
return;
smp_rmb();
while (sq->head != sq->tail) {
skb = (struct sk_buff *)sq->skbuff[sq->head];
- if (skb)
- dev_kfree_skb_any(skb);
+ if (!skb)
+ goto next;
+ hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
+ /* Check for dummy descriptor used for HW TSO offload on 88xx */
+ if (hdr->dont_send) {
+ /* Get actual TSO descriptors and unmap them */
+ tso_sqe =
+ (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+ nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
+ tso_sqe->subdesc_cnt);
+ } else {
+ nicvf_unmap_sndq_buffers(nic, sq, sq->head,
+ hdr->subdesc_cnt);
+ }
+ dev_kfree_skb_any(skb);
+next:
sq->head++;
sq->head &= (sq->dmem.q_len - 1);
}
nicvf_send_msg_to_pf(nic, &mbx);
if (!nic->sqs_mode && (qidx == 0)) {
- /* Enable checking L3/L4 length and TCP/UDP checksums */
+ /* Enable checking L3/L4 length and TCP/UDP checksums
+ * Also allow IPv6 pkts with zero UDP checksum.
+ */
nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
- (BIT(24) | BIT(23) | BIT(21)));
+ (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
nicvf_config_vlan_stripping(nic, nic->netdev->features);
}
return qentry;
}
+/* Rollback to previous tail pointer when descriptors not used */
+static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
+ int qentry, int desc_cnt)
+{
+ sq->tail = qentry;
+ atomic_add(desc_cnt, &sq->free_cnt);
+}
+
/* Free descriptor back to SQ for future use */
void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
{
struct sk_buff *skb, u8 sq_num)
{
int i, size;
- int subdesc_cnt, tso_sqe = 0;
+ int subdesc_cnt, hdr_sqe = 0;
int qentry;
+ u64 dma_addr;
subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
if (subdesc_cnt > atomic_read(&sq->free_cnt))
/* Add SQ header subdesc */
nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
skb, skb->len);
- tso_sqe = qentry;
+ hdr_sqe = qentry;
/* Add SQ gather subdescs */
qentry = nicvf_get_nxt_sqentry(sq, qentry);
size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
- nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data));
+ /* HW will ensure data coherency, CPU sync not required */
+ dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
+ offset_in_page(skb->data), size,
+ DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+ nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+ return 0;
+ }
+
+ nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
/* Check for scattered buffer */
if (!skb_is_nonlinear(skb))
qentry = nicvf_get_nxt_sqentry(sq, qentry);
size = skb_frag_size(frag);
- nicvf_sq_add_gather_subdesc(sq, qentry, size,
- virt_to_phys(
- skb_frag_address(frag)));
+ dma_addr = dma_map_page_attrs(&nic->pdev->dev,
+ skb_frag_page(frag),
+ frag->page_offset, size,
+ DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
+ /* Free entire chain of mapped buffers
+ * here 'i' = frags mapped + above mapped skb->data
+ */
+ nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
+ nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
+ return 0;
+ }
+ nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
}
doorbell:
if (nic->t88 && skb_shinfo(skb)->gso_size) {
qentry = nicvf_get_nxt_sqentry(sq, qentry);
- nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+ nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
}
nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
int offset;
u16 *rb_lens = NULL;
u64 *rb_ptrs = NULL;
+ u64 phys_addr;
rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
else
rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
- netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
- __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
-
for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
payload_len = rb_lens[frag_num(frag)];
+ phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
+ if (!phys_addr) {
+ if (skb)
+ dev_kfree_skb_any(skb);
+ return NULL;
+ }
+
if (!frag) {
/* First fragment */
+ dma_unmap_page_attrs(&nic->pdev->dev,
+ *rb_ptrs - cqe_rx->align_pad,
+ RCV_FRAG_LEN, DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
skb = nicvf_rb_ptr_to_skb(nic,
- *rb_ptrs - cqe_rx->align_pad,
+ phys_addr - cqe_rx->align_pad,
payload_len);
if (!skb)
return NULL;
skb_put(skb, payload_len);
} else {
/* Add fragments */
- page = virt_to_page(phys_to_virt(*rb_ptrs));
- offset = phys_to_virt(*rb_ptrs) - page_address(page);
+ dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs,
+ RCV_FRAG_LEN, DMA_FROM_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ page = virt_to_page(phys_to_virt(phys_addr));
+ offset = phys_to_virt(phys_addr) - page_address(page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
offset, payload_len, RCV_FRAG_LEN);
}
#define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13))
#define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13))
#define RBDR_THRESH (RCV_BUF_COUNT / 2)
-#define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */
+#define DMA_BUFFER_LEN 1536 /* In multiples of 128bytes */
#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT)
+void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
+ int hdr_sqe, u8 subdesc_cnt);
void nicvf_config_vlan_stripping(struct nicvf *nic,
netdev_features_t features);
int nicvf_set_qset_resources(struct nicvf *nic);
return 1;
}
+static int max_bgx_per_node;
+static void set_max_bgx_per_node(struct pci_dev *pdev)
+{
+ u16 sdevid;
+
+ if (max_bgx_per_node)
+ return;
+
+ pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+ switch (sdevid) {
+ case PCI_SUBSYS_DEVID_81XX_BGX:
+ max_bgx_per_node = MAX_BGX_PER_CN81XX;
+ break;
+ case PCI_SUBSYS_DEVID_83XX_BGX:
+ max_bgx_per_node = MAX_BGX_PER_CN83XX;
+ break;
+ case PCI_SUBSYS_DEVID_88XX_BGX:
+ default:
+ max_bgx_per_node = MAX_BGX_PER_CN88XX;
+ break;
+ }
+}
+
+static struct bgx *get_bgx(int node, int bgx_idx)
+{
+ int idx = (node * max_bgx_per_node) + bgx_idx;
+
+ return bgx_vnic[idx];
+}
+
/* Return number of BGX present in HW */
unsigned bgx_get_map(int node)
{
int i;
unsigned map = 0;
- for (i = 0; i < MAX_BGX_PER_NODE; i++) {
- if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
+ for (i = 0; i < max_bgx_per_node; i++) {
+ if (bgx_vnic[(node * max_bgx_per_node) + i])
map |= (1 << i);
}
{
struct bgx *bgx;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (bgx)
return bgx->lmac_count;
struct bgx *bgx;
struct lmac *lmac;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return;
const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
{
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
if (bgx)
return bgx->lmac[lmacid].mac;
void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
{
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
if (!bgx)
return;
void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
{
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
struct lmac *lmac;
u64 cfg;
void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
{
struct pfc *pfc = (struct pfc *)pause;
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
struct lmac *lmac;
u64 cfg;
void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
{
struct pfc *pfc = (struct pfc *)pause;
- struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+ struct bgx *bgx = get_bgx(node, bgx_idx);
struct lmac *lmac;
u64 cfg;
{
struct bgx *bgx;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return 0;
{
struct bgx *bgx;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return 0;
struct lmac *lmac;
u64 cfg;
- bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+ bgx = get_bgx(node, bgx_idx);
if (!bgx)
return;
dev_info(dev, "%s: 40G_KR4\n", (char *)str);
break;
case BGX_MODE_QSGMII:
- if ((lmacid == 0) &&
- (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
- return;
- if ((lmacid == 2) &&
- (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
- return;
dev_info(dev, "%s: QSGMII\n", (char *)str);
break;
case BGX_MODE_RGMII:
goto err_release_regions;
}
+ set_max_bgx_per_node(pdev);
+
pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
bgx->bgx_id = (pci_resource_start(pdev,
PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
- bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+ bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node;
bgx->max_lmac = MAX_LMAC_PER_BGX;
bgx_vnic[bgx->bgx_id] = bgx;
} else {
#define MAX_BGX_PER_CN88XX 2
#define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */
#define MAX_BGX_PER_CN83XX 4
-#define MAX_BGX_PER_NODE 4
#define MAX_LMAC_PER_BGX 4
#define MAX_BGX_CHANS_PER_LMAC 16
#define MAX_DMAC_PER_LMAC 8
static int emac_dt_phy_connect(struct emac_instance *dev,
struct device_node *phy_handle)
{
- int res;
-
dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def),
GFP_KERNEL);
if (!dev->phy.def)
{
struct device_node *np = dev->ofdev->dev.of_node;
struct device_node *phy_handle;
- int res = 0;
+ int res = 1;
phy_handle = of_parse_phandle(np, "phy-handle", 0);
if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) {
int res = emac_dt_phy_probe(dev);
- mutex_unlock(&emac_phy_map_lock);
- if (!res)
+ switch (res) {
+ case 1:
+ /* No phy-handle property configured.
+ * Continue with the existing phy probe
+ * and setup code.
+ */
+ break;
+
+ case 0:
+ mutex_unlock(&emac_phy_map_lock);
goto init_phy;
- dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
- res);
- return res;
+ default:
+ mutex_unlock(&emac_phy_map_lock);
+ dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n",
+ res);
+ return res;
+ }
}
if (dev->phy_address != 0xffffffff)
send_map_query(adapter);
for (i = 0; i < rxadd_subcrqs; i++) {
init_rx_pool(adapter, &adapter->rx_pool[i],
- IBMVNIC_BUFFS_PER_POOL, i,
+ adapter->req_rx_add_entries_per_subcrq, i,
be64_to_cpu(size_array[i]), 1);
if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) {
dev_err(dev, "Couldn't alloc rx pool\n");
for (i = 0; i < tx_subcrqs; i++) {
tx_pool = &adapter->tx_pool[i];
tx_pool->tx_buff =
- kcalloc(adapter->max_tx_entries_per_subcrq,
+ kcalloc(adapter->req_tx_entries_per_subcrq,
sizeof(struct ibmvnic_tx_buff), GFP_KERNEL);
if (!tx_pool->tx_buff)
goto tx_pool_alloc_failed;
if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff,
- adapter->max_tx_entries_per_subcrq *
+ adapter->req_tx_entries_per_subcrq *
adapter->req_mtu))
goto tx_ltb_alloc_failed;
tx_pool->free_map =
- kcalloc(adapter->max_tx_entries_per_subcrq,
+ kcalloc(adapter->req_tx_entries_per_subcrq,
sizeof(int), GFP_KERNEL);
if (!tx_pool->free_map)
goto tx_fm_alloc_failed;
- for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++)
+ for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++)
tx_pool->free_map[j] = j;
tx_pool->consumer_index = 0;
u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
struct device *dev = &adapter->vdev->dev;
struct ibmvnic_tx_buff *tx_buff = NULL;
+ struct ibmvnic_sub_crq_queue *tx_scrq;
struct ibmvnic_tx_pool *tx_pool;
unsigned int tx_send_failed = 0;
unsigned int tx_map_failed = 0;
int ret = 0;
tx_pool = &adapter->tx_pool[queue_num];
+ tx_scrq = adapter->tx_scrq[queue_num];
txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
be32_to_cpu(adapter->login_rsp_buf->
tx_pool->consumer_index =
(tx_pool->consumer_index + 1) %
- adapter->max_tx_entries_per_subcrq;
+ adapter->req_tx_entries_per_subcrq;
tx_buff = &tx_pool->tx_buff[index];
tx_buff->skb = skb;
if (tx_pool->consumer_index == 0)
tx_pool->consumer_index =
- adapter->max_tx_entries_per_subcrq - 1;
+ adapter->req_tx_entries_per_subcrq - 1;
else
tx_pool->consumer_index--;
ret = NETDEV_TX_BUSY;
goto out;
}
+
+ atomic_inc(&tx_scrq->used);
+
+ if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) {
+ netdev_info(netdev, "Stopping queue %d\n", queue_num);
+ netif_stop_subqueue(netdev, queue_num);
+ }
+
tx_packets++;
tx_bytes += skb->len;
txq->trans_start = jiffies;
scrq->adapter = adapter;
scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
scrq->cur = 0;
+ atomic_set(&scrq->used, 0);
scrq->rx_skb_top = NULL;
spin_lock_init(&scrq->lock);
DMA_TO_DEVICE);
}
- if (txbuff->last_frag)
+ if (txbuff->last_frag) {
+ atomic_dec(&scrq->used);
+
+ if (atomic_read(&scrq->used) <=
+ (adapter->req_tx_entries_per_subcrq / 2) &&
+ netif_subqueue_stopped(adapter->netdev,
+ txbuff->skb)) {
+ netif_wake_subqueue(adapter->netdev,
+ scrq->pool_index);
+ netdev_dbg(adapter->netdev,
+ "Started queue %d\n",
+ scrq->pool_index);
+ }
+
dev_kfree_skb_any(txbuff->skb);
+ }
adapter->tx_pool[pool].free_map[adapter->tx_pool[pool].
producer_index] = index;
adapter->tx_pool[pool].producer_index =
(adapter->tx_pool[pool].producer_index + 1) %
- adapter->max_tx_entries_per_subcrq;
+ adapter->req_tx_entries_per_subcrq;
}
/* remove tx_comp scrq*/
next->tx_comp.first = 0;
spinlock_t lock;
struct sk_buff *rx_skb_top;
struct ibmvnic_adapter *adapter;
+ atomic_t used;
};
struct ibmvnic_long_term_buff {
config MLX5_CORE_EN
bool "Mellanox Technologies ConnectX-4 Ethernet support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+ depends on IPV6=y || IPV6=n || MLX5_CORE=m
imply PTP_1588_CLOCK
default n
---help---
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_dcbx *dcbx = &priv->dcbx;
+ if (mode & DCB_CAP_DCBX_LLD_MANAGED)
+ return 1;
+
if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
return 0;
return 1;
}
- if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+ if (!(mode & DCB_CAP_DCBX_HOST))
return 1;
- if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
- !(mode & DCB_CAP_DCBX_VER_CEE) ||
- !(mode & DCB_CAP_DCBX_VER_IEEE) ||
- !(mode & DCB_CAP_DCBX_HOST))
+ if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
return 1;
return 0;
struct iphdr *iph;
/* We are only going to peek, no need to clone the SKB */
- if (skb->protocol != htons(ETH_P_IP))
- goto out;
-
if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
goto out;
lbtp->loopback_ok = false;
init_completion(&lbtp->comp);
- lbtp->pt.type = htons(ETH_P_ALL);
+ lbtp->pt.type = htons(ETH_P_IP);
lbtp->pt.func = mlx5e_test_loopback_validate;
lbtp->pt.dev = priv->netdev;
lbtp->pt.af_packet_priv = lbtp;
#include "eswitch.h"
#include "vxlan.h"
+enum {
+ MLX5E_TC_FLOW_ESWITCH = BIT(0),
+};
+
struct mlx5e_tc_flow {
struct rhash_head node;
u64 cookie;
+ u8 flags;
struct mlx5_flow_handle *rule;
struct list_head encap; /* flows sharing the same encap */
struct mlx5_esw_flow_attr *attr;
mlx5_fc_destroy(priv->mdev, counter);
}
- if (esw && esw->mode == SRIOV_OFFLOADS) {
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
mlx5_eswitch_del_vlan_action(esw, flow->attr);
if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
mlx5e_detach_encap(priv, flow);
}
static int parse_cls_flower(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f)
{
err = __parse_cls_flower(priv, spec, f, &min_inline);
- if (!err && esw->mode == SRIOV_OFFLOADS &&
+ if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) &&
rep->vport != FDB_UPLINK_VPORT) {
if (min_inline > esw->offloads.inline_mode) {
netdev_warn(priv->netdev,
struct tc_cls_flower_offload *f)
{
struct mlx5e_tc_table *tc = &priv->fs.tc;
- int err = 0;
- bool fdb_flow = false;
+ int err, attr_size = 0;
u32 flow_tag, action;
struct mlx5e_tc_flow *flow;
struct mlx5_flow_spec *spec;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ u8 flow_flags = 0;
- if (esw && esw->mode == SRIOV_OFFLOADS)
- fdb_flow = true;
-
- if (fdb_flow)
- flow = kzalloc(sizeof(*flow) +
- sizeof(struct mlx5_esw_flow_attr),
- GFP_KERNEL);
- else
- flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+ if (esw && esw->mode == SRIOV_OFFLOADS) {
+ flow_flags = MLX5E_TC_FLOW_ESWITCH;
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
+ }
+ flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
spec = mlx5_vzalloc(sizeof(*spec));
if (!spec || !flow) {
err = -ENOMEM;
}
flow->cookie = f->cookie;
+ flow->flags = flow_flags;
- err = parse_cls_flower(priv, spec, f);
+ err = parse_cls_flower(priv, flow, spec, f);
if (err < 0)
goto err_free;
- if (fdb_flow) {
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1);
err = parse_tc_fdb_actions(priv, f->exts, flow);
if (err < 0)
u32 *match_criteria)
{
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct list_head *prev = ft->node.children.prev;
+ struct list_head *prev = &ft->node.children;
unsigned int candidate_index = 0;
struct mlx5_flow_group *fg;
void *match_criteria_addr;
if (err)
goto clean_load;
+ pci_save_state(pdev);
return 0;
clean_load:
mlx5_enter_error_state(dev);
mlx5_unload_one(dev, priv, false);
- /* In case of kernel call save the pci state and drain the health wq */
+ /* In case of kernel call drain the health wq */
if (state) {
- pci_save_state(pdev);
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
}
pci_set_master(pdev);
pci_restore_state(pdev);
+ pci_save_state(pdev);
if (wait_vital(pdev)) {
dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
#define MLXSW_REG_SPVM_ID 0x200F
#define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVM_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVM_REC_MAX_COUNT 255
#define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \
MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
#define MLXSW_REG_SPVMLR_ID 0x2020
#define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */
#define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */
-#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256
+#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255
#define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \
MLXSW_REG_SPVMLR_REC_LEN * \
MLXSW_REG_SPVMLR_REC_MAX_COUNT)
ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev,
ingress,
MLXSW_SP_ACL_PROFILE_FLOWER);
- if (WARN_ON(IS_ERR(ruleset)))
+ if (IS_ERR(ruleset))
return;
rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie);
- if (!WARN_ON(!rule)) {
+ if (rule) {
mlxsw_sp_acl_rule_del(mlxsw_sp, rule);
mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule);
}
u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val;
u32 cxt_size = CONN_CXT_SIZE(p_hwfn);
u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size;
+ u32 align = elems_per_page * DQ_RANGE_ALIGN;
- p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page);
+ p_conn->cid_count = roundup(p_conn->cid_count, align);
}
}
* size/capacity fields are of a u32 type.
*/
if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 &&
- chain_size > 0x10000) ||
- (cnt_type == QED_CHAIN_CNT_TYPE_U32 &&
- chain_size > 0x100000000ULL)) {
+ chain_size > ((u32)U16_MAX + 1)) ||
+ (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) {
DP_NOTICE(cdev,
"The actual chain size (0x%llx) is larger than the maximal possible value\n",
chain_size);
p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+ p_init->ooo_enable = p_params->ooo_enable;
+ p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] +
+ p_params->ll2_ooo_queue_id;
p_init->func_params.log_page_size = p_params->log_page_size;
val = p_params->num_tasks;
p_init->func_params.num_tasks = cpu_to_le16(val);
spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
}
+void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn,
+ struct qed_iscsi_conn *p_conn)
+{
+ qed_chain_free(p_hwfn->cdev, &p_conn->xhq);
+ qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+ qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct tcp_upload_params),
+ p_conn->tcp_upload_params_virt_addr,
+ p_conn->tcp_upload_params_phys_addr);
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct scsi_terminate_extra_params),
+ p_conn->queue_cnts_virt_addr,
+ p_conn->queue_cnts_phys_addr);
+ kfree(p_conn);
+}
+
struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
{
struct qed_iscsi_info *p_iscsi_info;
void qed_iscsi_free(struct qed_hwfn *p_hwfn,
struct qed_iscsi_info *p_iscsi_info)
{
+ struct qed_iscsi_conn *p_conn = NULL;
+
+ while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) {
+ p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+ struct qed_iscsi_conn, list_entry);
+ if (p_conn) {
+ list_del(&p_conn->list_entry);
+ qed_iscsi_free_connection(p_hwfn, p_conn);
+ }
+ }
+
kfree(p_iscsi_info);
}
/* If need to reuse or there's no replacement buffer, repost this */
if (rc)
goto out_post;
+ dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+ cdev->ll2->rx_size, DMA_FROM_DEVICE);
skb = build_skb(buffer->data, 0);
if (!skb) {
static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_conn,
union core_rx_cqe_union *p_cqe,
- unsigned long lock_flags,
+ unsigned long *p_lock_flags,
bool b_last_cqe)
{
struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
"Mismatch between active_descq and the LL2 Rx chain\n");
list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
- spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+ spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
- spin_lock_irqsave(&p_rx->lock, lock_flags);
+ spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
return 0;
}
break;
case CORE_RX_CQE_TYPE_REGULAR:
rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
- cqe, flags, b_last_cqe);
+ cqe, &flags,
+ b_last_cqe);
break;
default:
rc = -EIO;
{
struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
- struct qed_ll2_conn ll2_info;
+ struct qed_ll2_conn ll2_info = { 0 };
int rc;
ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO;
if (!p_ooo_info->ooo_history.p_cqes)
goto no_history_mem;
+ p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES;
+
return p_ooo_info;
no_history_mem:
* Ethtool support
*/
static int
-smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct smc_local *lp = netdev_priv(dev);
int ret;
- cmd->maxtxpkt = 1;
- cmd->maxrxpkt = 1;
-
if (lp->phy_type != 0) {
spin_lock_irq(&lp->lock);
- ret = mii_ethtool_gset(&lp->mii, cmd);
+ ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd);
spin_unlock_irq(&lp->lock);
} else {
- cmd->supported = SUPPORTED_10baseT_Half |
+ u32 supported = SUPPORTED_10baseT_Half |
SUPPORTED_10baseT_Full |
SUPPORTED_TP | SUPPORTED_AUI;
if (lp->ctl_rspeed == 10)
- ethtool_cmd_speed_set(cmd, SPEED_10);
+ cmd->base.speed = SPEED_10;
else if (lp->ctl_rspeed == 100)
- ethtool_cmd_speed_set(cmd, SPEED_100);
+ cmd->base.speed = SPEED_100;
+
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ cmd->base.port = 0;
+ cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ?
+ DUPLEX_FULL : DUPLEX_HALF;
- cmd->autoneg = AUTONEG_DISABLE;
- cmd->transceiver = XCVR_INTERNAL;
- cmd->port = 0;
- cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF;
+ ethtool_convert_legacy_u32_to_link_mode(
+ cmd->link_modes.supported, supported);
ret = 0;
}
}
static int
-smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd)
+smc_ethtool_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct smc_local *lp = netdev_priv(dev);
int ret;
if (lp->phy_type != 0) {
spin_lock_irq(&lp->lock);
- ret = mii_ethtool_sset(&lp->mii, cmd);
+ ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd);
spin_unlock_irq(&lp->lock);
} else {
- if (cmd->autoneg != AUTONEG_DISABLE ||
- cmd->speed != SPEED_10 ||
- (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) ||
- (cmd->port != PORT_TP && cmd->port != PORT_AUI))
+ if (cmd->base.autoneg != AUTONEG_DISABLE ||
+ cmd->base.speed != SPEED_10 ||
+ (cmd->base.duplex != DUPLEX_HALF &&
+ cmd->base.duplex != DUPLEX_FULL) ||
+ (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI))
return -EINVAL;
-// lp->port = cmd->port;
- lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL;
+// lp->port = cmd->base.port;
+ lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL;
// if (netif_running(dev))
// smc_set_port(dev);
static const struct ethtool_ops smc_ethtool_ops = {
- .get_settings = smc_ethtool_getsettings,
- .set_settings = smc_ethtool_setsettings,
.get_drvinfo = smc_ethtool_getdrvinfo,
.get_msglevel = smc_ethtool_getmsglevel,
.get_eeprom_len = smc_ethtool_geteeprom_len,
.get_eeprom = smc_ethtool_geteeprom,
.set_eeprom = smc_ethtool_seteeprom,
+ .get_link_ksettings = smc_ethtool_get_link_ksettings,
+ .set_link_ksettings = smc_ethtool_set_link_ksettings,
};
static const struct net_device_ops smc_netdev_ops = {
u32 tx_checksum_mask;
+ u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+
/* Ethtool settings */
u8 duplex;
u32 speed;
struct nvsp_message revoke_packet;
- u32 send_table[VRSS_SEND_TAB_SIZE];
u32 max_chn;
u32 num_chn;
spinlock_t sc_lock; /* Protects num_sc_offered variable */
static void netvsc_send_table(struct hv_device *hdev,
struct nvsp_message *nvmsg)
{
- struct netvsc_device *nvscdev;
struct net_device *ndev = hv_get_drvdata(hdev);
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
int i;
u32 count, *tab;
- nvscdev = get_outbound_net_device(hdev);
- if (!nvscdev)
- return;
-
count = nvmsg->msg.v5_msg.send_table.count;
if (count != VRSS_SEND_TAB_SIZE) {
netdev_err(ndev, "Received wrong send-table size:%u\n", count);
nvmsg->msg.v5_msg.send_table.offset);
for (i = 0; i < count; i++)
- nvscdev->send_table[i] = tab[i];
+ net_device_ctx->tx_send_table[i] = tab[i];
}
static void netvsc_send_vf(struct net_device_context *net_device_ctx,
void *accel_priv, select_queue_fallback_t fallback)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
+ unsigned int num_tx_queues = ndev->real_num_tx_queues;
struct sock *sk = skb->sk;
int q_idx = sk_tx_queue_get(sk);
- if (q_idx < 0 || skb->ooo_okay ||
- q_idx >= ndev->real_num_tx_queues) {
+ if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
int new_idx;
- new_idx = nvsc_dev->send_table[hash]
- % nvsc_dev->num_chn;
+ new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;
if (q_idx != new_idx && sk &&
sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
q_idx = new_idx;
}
- if (unlikely(!nvsc_dev->chan_table[q_idx].channel))
- q_idx = 0;
-
return q_idx;
}
return m88e1510_hwmon_probe(phydev);
}
-static void marvell_remove(struct phy_device *phydev)
-{
-#ifdef CONFIG_HWMON
-
- struct marvell_priv *priv = phydev->priv;
-
- if (priv && priv->hwmon_dev)
- hwmon_device_unregister(priv->hwmon_dev);
-#endif
-}
-
static struct phy_driver marvell_drivers[] = {
{
.phy_id = MARVELL_PHY_ID_88E1101,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = &m88e1121_probe,
- .remove = &marvell_remove,
.config_init = &m88e1121_config_init,
.config_aneg = &m88e1121_config_aneg,
.read_status = &marvell_read_status,
.features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
.flags = PHY_HAS_INTERRUPT,
.probe = &m88e1510_probe,
- .remove = &marvell_remove,
.config_init = &m88e1510_config_init,
.config_aneg = &m88e1510_config_aneg,
.read_status = &marvell_read_status,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.probe = m88e1510_probe,
- .remove = &marvell_remove,
.config_init = &marvell_config_init,
.config_aneg = &m88e1510_config_aneg,
.read_status = &marvell_read_status,
.phy_id_mask = MARVELL_PHY_ID_MASK,
.name = "Marvell 88E1545",
.probe = m88e1510_probe,
- .remove = &marvell_remove,
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_INTERRUPT,
.config_init = &marvell_config_init,
.phy_id = 0xffffffff,
.phy_id_mask = 0xffffffff,
.name = "Generic PHY",
- .soft_reset = genphy_soft_reset,
+ .soft_reset = genphy_no_soft_reset,
.config_init = genphy_config_init,
.features = PHY_GBIT_FEATURES | SUPPORTED_MII |
SUPPORTED_AUI | SUPPORTED_FIBRE |
if (err)
return err;
- ks->regs_attr.size = ks->chip->regs_size;
memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr));
+ ks->regs_attr.size = ks->chip->regs_size;
err = ks8995_reset(ks);
if (err)
return err;
+ sysfs_attr_init(&ks->regs_attr.attr);
err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
if (err) {
dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
static void team_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = ETH_MAX_MTU;
dev->netdev_ops = &team_netdev_ops;
dev->ethtool_ops = &team_ethtool_ops;
/* Net device open. */
static int tun_net_open(struct net_device *dev)
{
+ struct tun_struct *tun = netdev_priv(dev);
+ int i;
+
netif_tx_start_all_queues(dev);
+
+ for (i = 0; i < tun->numqueues; i++) {
+ struct tun_file *tfile;
+
+ tfile = rtnl_dereference(tun->tfiles[i]);
+ tfile->socket.sk->sk_write_space(tfile->socket.sk);
+ }
+
return 0;
}
if (!skb_array_empty(&tfile->tx_array))
mask |= POLLIN | POLLRDNORM;
- if (sock_writeable(sk) ||
- (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
- sock_writeable(sk)))
+ if (tun->dev->flags & IFF_UP &&
+ (sock_writeable(sk) ||
+ (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+ sock_writeable(sk))))
mask |= POLLOUT | POLLWRNORM;
if (tun->dev->reg_state != NETREG_REGISTERED)
int ret = 0;
pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
- pr_info("%s\n", DRV_COPYRIGHT);
ret = rtnl_link_register(&tun_link_ops);
if (ret) {
static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ int len = skb->len;
netdev_tx_t ret = is_ip_tx_frame(skb, dev);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
u64_stats_update_begin(&dstats->syncp);
dstats->tx_pkts++;
- dstats->tx_bytes += skb->len;
+ dstats->tx_bytes += len;
u64_stats_update_end(&dstats->syncp);
} else {
this_cpu_inc(dev->dstats->tx_drps);
return 0;
}
+static int __vxlan_dev_create(struct net *net, struct net_device *dev,
+ struct vxlan_config *conf)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ int err;
+
+ err = vxlan_dev_configure(net, dev, conf, false);
+ if (err)
+ return err;
+
+ dev->ethtool_ops = &vxlan_ethtool_ops;
+
+ /* create an fdb entry for a valid default destination */
+ if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+ err = vxlan_fdb_create(vxlan, all_zeros_mac,
+ &vxlan->default_dst.remote_ip,
+ NUD_REACHABLE | NUD_PERMANENT,
+ NLM_F_EXCL | NLM_F_CREATE,
+ vxlan->cfg.dst_port,
+ vxlan->default_dst.remote_vni,
+ vxlan->default_dst.remote_vni,
+ vxlan->default_dst.remote_ifindex,
+ NTF_SELF);
+ if (err)
+ return err;
+ }
+
+ err = register_netdevice(dev);
+ if (err) {
+ vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
+ return err;
+ }
+
+ list_add(&vxlan->next, &vn->vxlan_list);
+ return 0;
+}
+
static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
struct net_device *dev, struct vxlan_config *conf,
bool changelink)
static int vxlan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
- struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
- struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_config conf;
int err;
if (err)
return err;
- err = vxlan_dev_configure(src_net, dev, &conf, false);
- if (err)
- return err;
-
- dev->ethtool_ops = &vxlan_ethtool_ops;
-
- /* create an fdb entry for a valid default destination */
- if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
- err = vxlan_fdb_create(vxlan, all_zeros_mac,
- &vxlan->default_dst.remote_ip,
- NUD_REACHABLE | NUD_PERMANENT,
- NLM_F_EXCL | NLM_F_CREATE,
- vxlan->cfg.dst_port,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_ifindex,
- NTF_SELF);
- if (err)
- return err;
- }
-
- err = register_netdevice(dev);
- if (err) {
- vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
- return err;
- }
-
- list_add(&vxlan->next, &vn->vxlan_list);
-
- return 0;
+ return __vxlan_dev_create(src_net, dev, &conf);
}
static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(dev))
return dev;
- err = vxlan_dev_configure(net, dev, conf, false);
+ err = __vxlan_dev_create(net, dev, conf);
if (err < 0) {
free_netdev(dev);
return ERR_PTR(err);
/* set bd status and length */
bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S;
- iowrite16be(bd_status, &bd->status);
iowrite16be(skb->len, &bd->length);
+ iowrite16be(bd_status, &bd->status);
/* Move to next BD in the ring */
if (!(bd_status & T_W_S))
struct sk_buff *skb;
hdlc_device *hdlc = dev_to_hdlc(dev);
struct qe_bd *bd;
- u32 bd_status;
+ u16 bd_status;
u16 length, howmany = 0;
u8 *bdbuffer;
int i;
struct i2400mu *i2400mu;
struct usb_device *usb_dev = interface_to_usbdev(iface);
+ if (iface->cur_altsetting->desc.bNumEndpoints < 4)
+ return -ENODEV;
+
if (usb_dev->speed != USB_SPEED_HIGH)
dev_err(dev, "device not connected as high speed\n");
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
- unsigned int num_queues = vif->num_queues;
+ unsigned int num_queues;
u16 index;
struct xenvif_rx_cb *cb;
BUG_ON(skb->dev != dev);
- /* Drop the packet if queues are not set up */
+ /* Drop the packet if queues are not set up.
+ * This handler should be called inside an RCU read section
+ * so we don't need to enter it here explicitly.
+ */
+ num_queues = READ_ONCE(vif->num_queues);
if (num_queues < 1)
goto drop;
{
struct xenvif *vif = netdev_priv(dev);
struct xenvif_queue *queue = NULL;
+ unsigned int num_queues;
u64 rx_bytes = 0;
u64 rx_packets = 0;
u64 tx_bytes = 0;
u64 tx_packets = 0;
unsigned int index;
- spin_lock(&vif->lock);
- if (vif->queues == NULL)
- goto out;
+ rcu_read_lock();
+ num_queues = READ_ONCE(vif->num_queues);
/* Aggregate tx and rx stats from each queue */
- for (index = 0; index < vif->num_queues; ++index) {
+ for (index = 0; index < num_queues; ++index) {
queue = &vif->queues[index];
rx_bytes += queue->stats.rx_bytes;
rx_packets += queue->stats.rx_packets;
tx_packets += queue->stats.tx_packets;
}
-out:
- spin_unlock(&vif->lock);
+ rcu_read_unlock();
vif->dev->stats.rx_bytes = rx_bytes;
vif->dev->stats.rx_packets = rx_packets;
struct ethtool_stats *stats, u64 * data)
{
struct xenvif *vif = netdev_priv(dev);
- unsigned int num_queues = vif->num_queues;
+ unsigned int num_queues;
int i;
unsigned int queue_index;
+ rcu_read_lock();
+ num_queues = READ_ONCE(vif->num_queues);
+
for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
unsigned long accum = 0;
for (queue_index = 0; queue_index < num_queues; ++queue_index) {
}
data[i] = accum;
}
+
+ rcu_read_unlock();
}
static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data)
netdev_err(vif->dev, "fatal error; disabling device\n");
vif->disabled = true;
/* Disable the vif from queue 0's kthread */
- if (vif->queues)
+ if (vif->num_queues)
xenvif_kick_thread(&vif->queues[0]);
}
struct xenvif *vif = be->vif;
if (vif) {
+ unsigned int num_queues = vif->num_queues;
unsigned int queue_index;
- struct xenvif_queue *queues;
xen_unregister_watchers(vif);
#ifdef CONFIG_DEBUG_FS
xenvif_debugfs_delif(vif);
#endif /* CONFIG_DEBUG_FS */
xenvif_disconnect_data(vif);
- for (queue_index = 0;
- queue_index < vif->num_queues;
- ++queue_index)
- xenvif_deinit_queue(&vif->queues[queue_index]);
- spin_lock(&vif->lock);
- queues = vif->queues;
+ /* At this point some of the handlers may still be active
+ * so we need to have additional synchronization here.
+ */
vif->num_queues = 0;
- vif->queues = NULL;
- spin_unlock(&vif->lock);
+ synchronize_net();
- vfree(queues);
+ for (queue_index = 0; queue_index < num_queues; ++queue_index)
+ xenvif_deinit_queue(&vif->queues[queue_index]);
+
+ vfree(vif->queues);
+ vif->queues = NULL;
xenvif_disconnect_ctrl(vif);
}
.wapf = 2,
};
-static struct quirk_entry quirk_no_rfkill = {
- .no_rfkill = true,
-};
-
-static struct quirk_entry quirk_no_rfkill_wapf4 = {
- .wapf = 4,
- .no_rfkill = true,
-};
-
static struct quirk_entry quirk_asus_ux303ub = {
.wmi_backlight_native = true,
};
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"),
},
- .driver_data = &quirk_no_rfkill_wapf4,
+ .driver_data = &quirk_asus_wapf4,
},
{
.callback = dmi_matched,
DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"),
},
- .driver_data = &quirk_no_rfkill_wapf4,
+ .driver_data = &quirk_asus_wapf4,
},
{
.callback = dmi_matched,
},
.driver_data = &quirk_asus_x200ca,
},
- {
- .callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. X555UB",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"),
- },
- .driver_data = &quirk_no_rfkill,
- },
- {
- .callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. N552VW",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"),
- },
- .driver_data = &quirk_no_rfkill,
- },
- {
- .callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. U303LB",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"),
- },
- .driver_data = &quirk_no_rfkill,
- },
- {
- .callback = dmi_matched,
- .ident = "ASUSTeK COMPUTER INC. Z550MA",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"),
- },
- .driver_data = &quirk_no_rfkill,
- },
{
.callback = dmi_matched,
.ident = "ASUSTeK COMPUTER INC. UX303UB",
#define USB_INTEL_XUSB2PR 0xD0
#define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31
+static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
+
struct bios_args {
u32 arg0;
u32 arg1;
return 0;
}
+static bool ashs_present(void)
+{
+ int i = 0;
+ while (ashs_ids[i]) {
+ if (acpi_dev_found(ashs_ids[i++]))
+ return true;
+ }
+ return false;
+}
+
/*
* WMI Driver
*/
if (err)
goto fail_leds;
- if (!asus->driver->quirks->no_rfkill) {
+ asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
+ if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
+ asus->driver->wlan_ctrl_by_user = 1;
+
+ if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) {
err = asus_wmi_rfkill_init(asus);
if (err)
goto fail_rfkill;
if (err)
goto fail_debugfs;
- asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result);
- if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT))
- asus->driver->wlan_ctrl_by_user = 1;
-
return 0;
fail_debugfs:
struct asus_wmi;
struct quirk_entry {
- bool no_rfkill;
bool hotplug_wireless;
bool scalar_panel_brightness;
bool store_backlight_power;
#define FUJITSU_LCD_N_LEVELS 8
-#define ACPI_FUJITSU_CLASS "fujitsu"
-#define ACPI_FUJITSU_HID "FUJ02B1"
-#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver"
-#define ACPI_FUJITSU_DEVICE_NAME "Fujitsu FUJ02B1"
-#define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3"
-#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
-#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3"
+#define ACPI_FUJITSU_CLASS "fujitsu"
+#define ACPI_FUJITSU_BL_HID "FUJ02B1"
+#define ACPI_FUJITSU_BL_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver"
+#define ACPI_FUJITSU_BL_DEVICE_NAME "Fujitsu FUJ02B1"
+#define ACPI_FUJITSU_LAPTOP_HID "FUJ02E3"
+#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
+#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME "Fujitsu FUJ02E3"
#define ACPI_FUJITSU_NOTIFY_CODE1 0x80
/* FUNC interface - command values */
-#define FUNC_RFKILL 0x1000
+#define FUNC_FLAGS 0x1000
#define FUNC_LEDS 0x1001
#define FUNC_BUTTONS 0x1002
#define FUNC_BACKLIGHT 0x1004
/* FUNC interface - responses */
#define UNSUPPORTED_CMD 0x80000000
+/* FUNC interface - status flags */
+#define FLAG_RFKILL 0x020
+#define FLAG_LID 0x100
+#define FLAG_DOCK 0x200
+
#if IS_ENABLED(CONFIG_LEDS_CLASS)
/* FUNC interface - LED control */
#define FUNC_LED_OFF 0x1
#endif
/* Device controlling the backlight and associated keys */
-struct fujitsu_t {
+struct fujitsu_bl {
acpi_handle acpi_handle;
struct acpi_device *dev;
struct input_dev *input;
unsigned int brightness_level;
};
-static struct fujitsu_t *fujitsu;
+static struct fujitsu_bl *fujitsu_bl;
static int use_alt_lcd_levels = -1;
static int disable_brightness_adjust = -1;
-/* Device used to access other hotkeys on the laptop */
-struct fujitsu_hotkey_t {
+/* Device used to access hotkeys and other features on the laptop */
+struct fujitsu_laptop {
acpi_handle acpi_handle;
struct acpi_device *dev;
struct input_dev *input;
struct platform_device *pf_device;
struct kfifo fifo;
spinlock_t fifo_lock;
- int rfkill_supported;
- int rfkill_state;
+ int flags_supported;
+ int flags_state;
int logolamp_registered;
int kblamps_registered;
int radio_led_registered;
int eco_led_registered;
};
-static struct fujitsu_hotkey_t *fujitsu_hotkey;
-
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event);
+static struct fujitsu_laptop *fujitsu_laptop;
#if IS_ENABLED(CONFIG_LEDS_CLASS)
static enum led_brightness logolamp_get(struct led_classdev *cdev);
static u32 dbg_level = 0x03;
#endif
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event);
-
/* Fujitsu ACPI interface function */
static int call_fext_func(int cmd, int arg0, int arg1, int arg2)
unsigned long long value;
acpi_handle handle = NULL;
- status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle);
+ status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle);
if (ACPI_FAILURE(status)) {
vdbg_printk(FUJLAPTOP_DBG_ERROR,
"FUNC interface is not present\n");
enum led_brightness brightness)
{
if (brightness >= LED_FULL)
- return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON);
+ return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON);
else
- return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0);
+ return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0);
}
static int eco_led_set(struct led_classdev *cdev,
{
enum led_brightness brightness = LED_OFF;
- if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON)
+ if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON)
brightness = LED_FULL;
return brightness;
vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n",
level);
- if (level < 0 || level >= fujitsu->max_brightness)
+ if (level < 0 || level >= fujitsu_bl->max_brightness)
return -EINVAL;
- status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+ status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle);
if (ACPI_FAILURE(status)) {
vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n");
return -ENODEV;
vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n",
level);
- if (level < 0 || level >= fujitsu->max_brightness)
+ if (level < 0 || level >= fujitsu_bl->max_brightness)
return -EINVAL;
- status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle);
+ status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle);
if (ACPI_FAILURE(status)) {
vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n");
return -ENODEV;
vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n");
- status =
- acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+ status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL,
+ &state);
if (ACPI_FAILURE(status))
return 0;
- fujitsu->brightness_level = state & 0x0fffffff;
+ fujitsu_bl->brightness_level = state & 0x0fffffff;
if (state & 0x80000000)
- fujitsu->brightness_changed = 1;
+ fujitsu_bl->brightness_changed = 1;
else
- fujitsu->brightness_changed = 0;
+ fujitsu_bl->brightness_changed = 0;
- return fujitsu->brightness_level;
+ return fujitsu_bl->brightness_level;
}
static int get_max_brightness(void)
vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n");
- status =
- acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state);
+ status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL,
+ &state);
if (ACPI_FAILURE(status))
return -1;
- fujitsu->max_brightness = state;
+ fujitsu_bl->max_brightness = state;
- return fujitsu->max_brightness;
+ return fujitsu_bl->max_brightness;
}
/* Backlight device stuff */
return ret;
}
-static const struct backlight_ops fujitsubl_ops = {
+static const struct backlight_ops fujitsu_bl_ops = {
.get_brightness = bl_get_brightness,
.update_status = bl_update_status,
};
int ret;
- ret = fujitsu->brightness_changed;
+ ret = fujitsu_bl->brightness_changed;
if (ret < 0)
return ret;
int level, ret;
if (sscanf(buf, "%i", &level) != 1
- || (level < 0 || level >= fujitsu->max_brightness))
+ || (level < 0 || level >= fujitsu_bl->max_brightness))
return -EINVAL;
if (use_alt_lcd_levels)
show_lid_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (!(fujitsu_hotkey->rfkill_supported & 0x100))
+ if (!(fujitsu_laptop->flags_supported & FLAG_LID))
return sprintf(buf, "unknown\n");
- if (fujitsu_hotkey->rfkill_state & 0x100)
+ if (fujitsu_laptop->flags_state & FLAG_LID)
return sprintf(buf, "open\n");
else
return sprintf(buf, "closed\n");
show_dock_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (!(fujitsu_hotkey->rfkill_supported & 0x200))
+ if (!(fujitsu_laptop->flags_supported & FLAG_DOCK))
return sprintf(buf, "unknown\n");
- if (fujitsu_hotkey->rfkill_state & 0x200)
+ if (fujitsu_laptop->flags_state & FLAG_DOCK)
return sprintf(buf, "docked\n");
else
return sprintf(buf, "undocked\n");
show_radios_state(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (!(fujitsu_hotkey->rfkill_supported & 0x20))
+ if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL))
return sprintf(buf, "unknown\n");
- if (fujitsu_hotkey->rfkill_state & 0x20)
+ if (fujitsu_laptop->flags_state & FLAG_RFKILL)
return sprintf(buf, "on\n");
else
return sprintf(buf, "killed\n");
static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store);
static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store);
-static struct attribute *fujitsupf_attributes[] = {
+static struct attribute *fujitsu_pf_attributes[] = {
&dev_attr_brightness_changed.attr,
&dev_attr_max_brightness.attr,
&dev_attr_lcd_level.attr,
NULL
};
-static struct attribute_group fujitsupf_attribute_group = {
- .attrs = fujitsupf_attributes
+static struct attribute_group fujitsu_pf_attribute_group = {
+ .attrs = fujitsu_pf_attributes
};
-static struct platform_driver fujitsupf_driver = {
+static struct platform_driver fujitsu_pf_driver = {
.driver = {
.name = "fujitsu-laptop",
}
static void __init dmi_check_cb_common(const struct dmi_system_id *id)
{
pr_info("Identified laptop model '%s'\n", id->ident);
- if (use_alt_lcd_levels == -1) {
- if (acpi_has_method(NULL,
- "\\_SB.PCI0.LPCB.FJEX.SBL2"))
- use_alt_lcd_levels = 1;
- else
- use_alt_lcd_levels = 0;
- vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as "
- "%i\n", use_alt_lcd_levels);
- }
}
static int __init dmi_check_cb_s6410(const struct dmi_system_id *id)
{
dmi_check_cb_common(id);
- fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */
- fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */
+ fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */
+ fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */
return 1;
}
static int __init dmi_check_cb_s6420(const struct dmi_system_id *id)
{
dmi_check_cb_common(id);
- fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */
- fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */
+ fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */
+ fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */
return 1;
}
static int __init dmi_check_cb_p8010(const struct dmi_system_id *id)
{
dmi_check_cb_common(id);
- fujitsu->keycode1 = KEY_HELP; /* "Support" */
- fujitsu->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */
- fujitsu->keycode4 = KEY_WWW; /* "Internet" */
+ fujitsu_bl->keycode1 = KEY_HELP; /* "Support" */
+ fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */
+ fujitsu_bl->keycode4 = KEY_WWW; /* "Internet" */
return 1;
}
/* ACPI device for LCD brightness control */
-static int acpi_fujitsu_add(struct acpi_device *device)
+static int acpi_fujitsu_bl_add(struct acpi_device *device)
{
int state = 0;
struct input_dev *input;
if (!device)
return -EINVAL;
- fujitsu->acpi_handle = device->handle;
- sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+ fujitsu_bl->acpi_handle = device->handle;
+ sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME);
sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
- device->driver_data = fujitsu;
+ device->driver_data = fujitsu_bl;
- fujitsu->input = input = input_allocate_device();
+ fujitsu_bl->input = input = input_allocate_device();
if (!input) {
error = -ENOMEM;
goto err_stop;
}
- snprintf(fujitsu->phys, sizeof(fujitsu->phys),
+ snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys),
"%s/video/input0", acpi_device_hid(device));
input->name = acpi_device_name(device);
- input->phys = fujitsu->phys;
+ input->phys = fujitsu_bl->phys;
input->id.bustype = BUS_HOST;
input->id.product = 0x06;
input->dev.parent = &device->dev;
if (error)
goto err_free_input_dev;
- error = acpi_bus_update_power(fujitsu->acpi_handle, &state);
+ error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state);
if (error) {
pr_err("Error reading power state\n");
goto err_unregister_input_dev;
acpi_device_name(device), acpi_device_bid(device),
!device->power.state ? "on" : "off");
- fujitsu->dev = device;
+ fujitsu_bl->dev = device;
if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
pr_err("_INI Method failed\n");
}
+ if (use_alt_lcd_levels == -1) {
+ if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2"))
+ use_alt_lcd_levels = 1;
+ else
+ use_alt_lcd_levels = 0;
+ vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n",
+ use_alt_lcd_levels);
+ }
+
/* do config (detect defaults) */
use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0;
disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0;
use_alt_lcd_levels, disable_brightness_adjust);
if (get_max_brightness() <= 0)
- fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS;
+ fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS;
get_lcd_level();
return 0;
return error;
}
-static int acpi_fujitsu_remove(struct acpi_device *device)
+static int acpi_fujitsu_bl_remove(struct acpi_device *device)
{
- struct fujitsu_t *fujitsu = acpi_driver_data(device);
- struct input_dev *input = fujitsu->input;
+ struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device);
+ struct input_dev *input = fujitsu_bl->input;
input_unregister_device(input);
- fujitsu->acpi_handle = NULL;
+ fujitsu_bl->acpi_handle = NULL;
return 0;
}
/* Brightness notify */
-static void acpi_fujitsu_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
{
struct input_dev *input;
int keycode;
int oldb, newb;
- input = fujitsu->input;
+ input = fujitsu_bl->input;
switch (event) {
case ACPI_FUJITSU_NOTIFY_CODE1:
keycode = 0;
- oldb = fujitsu->brightness_level;
+ oldb = fujitsu_bl->brightness_level;
get_lcd_level();
- newb = fujitsu->brightness_level;
+ newb = fujitsu_bl->brightness_level;
vdbg_printk(FUJLAPTOP_DBG_TRACE,
"brightness button event [%i -> %i (%i)]\n",
- oldb, newb, fujitsu->brightness_changed);
+ oldb, newb, fujitsu_bl->brightness_changed);
if (oldb < newb) {
if (disable_brightness_adjust != 1) {
/* ACPI device for hotkey handling */
-static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
+static int acpi_fujitsu_laptop_add(struct acpi_device *device)
{
int result = 0;
int state = 0;
if (!device)
return -EINVAL;
- fujitsu_hotkey->acpi_handle = device->handle;
+ fujitsu_laptop->acpi_handle = device->handle;
sprintf(acpi_device_name(device), "%s",
- ACPI_FUJITSU_HOTKEY_DEVICE_NAME);
+ ACPI_FUJITSU_LAPTOP_DEVICE_NAME);
sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
- device->driver_data = fujitsu_hotkey;
+ device->driver_data = fujitsu_laptop;
/* kfifo */
- spin_lock_init(&fujitsu_hotkey->fifo_lock);
- error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
+ spin_lock_init(&fujitsu_laptop->fifo_lock);
+ error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int),
GFP_KERNEL);
if (error) {
pr_err("kfifo_alloc failed\n");
goto err_stop;
}
- fujitsu_hotkey->input = input = input_allocate_device();
+ fujitsu_laptop->input = input = input_allocate_device();
if (!input) {
error = -ENOMEM;
goto err_free_fifo;
}
- snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys),
+ snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys),
"%s/video/input0", acpi_device_hid(device));
input->name = acpi_device_name(device);
- input->phys = fujitsu_hotkey->phys;
+ input->phys = fujitsu_laptop->phys;
input->id.bustype = BUS_HOST;
input->id.product = 0x06;
input->dev.parent = &device->dev;
set_bit(EV_KEY, input->evbit);
- set_bit(fujitsu->keycode1, input->keybit);
- set_bit(fujitsu->keycode2, input->keybit);
- set_bit(fujitsu->keycode3, input->keybit);
- set_bit(fujitsu->keycode4, input->keybit);
- set_bit(fujitsu->keycode5, input->keybit);
+ set_bit(fujitsu_bl->keycode1, input->keybit);
+ set_bit(fujitsu_bl->keycode2, input->keybit);
+ set_bit(fujitsu_bl->keycode3, input->keybit);
+ set_bit(fujitsu_bl->keycode4, input->keybit);
+ set_bit(fujitsu_bl->keycode5, input->keybit);
set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit);
set_bit(KEY_UNKNOWN, input->keybit);
if (error)
goto err_free_input_dev;
- error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state);
+ error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state);
if (error) {
pr_err("Error reading power state\n");
goto err_unregister_input_dev;
acpi_device_name(device), acpi_device_bid(device),
!device->power.state ? "on" : "off");
- fujitsu_hotkey->dev = device;
+ fujitsu_laptop->dev = device;
if (acpi_has_method(device->handle, METHOD_NAME__INI)) {
vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n");
; /* No action, result is discarded */
vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i);
- fujitsu_hotkey->rfkill_supported =
- call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0);
+ fujitsu_laptop->flags_supported =
+ call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0);
/* Make sure our bitmask of supported functions is cleared if the
RFKILL function block is not implemented, like on the S7020. */
- if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD)
- fujitsu_hotkey->rfkill_supported = 0;
+ if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD)
+ fujitsu_laptop->flags_supported = 0;
- if (fujitsu_hotkey->rfkill_supported)
- fujitsu_hotkey->rfkill_state =
- call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+ if (fujitsu_laptop->flags_supported)
+ fujitsu_laptop->flags_state =
+ call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
/* Suspect this is a keymap of the application panel, print it */
pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0));
#if IS_ENABLED(CONFIG_LEDS_CLASS)
if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&logolamp_led);
if (result == 0) {
- fujitsu_hotkey->logolamp_registered = 1;
+ fujitsu_laptop->logolamp_registered = 1;
} else {
pr_err("Could not register LED handler for logo lamp, error %i\n",
result);
if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) &&
(call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&kblamps_led);
if (result == 0) {
- fujitsu_hotkey->kblamps_registered = 1;
+ fujitsu_laptop->kblamps_registered = 1;
} else {
pr_err("Could not register LED handler for keyboard lamps, error %i\n",
result);
* that an RF LED is present.
*/
if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&radio_led);
if (result == 0) {
- fujitsu_hotkey->radio_led_registered = 1;
+ fujitsu_laptop->radio_led_registered = 1;
} else {
pr_err("Could not register LED handler for radio LED, error %i\n",
result);
*/
if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) &&
(call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) {
- result = led_classdev_register(&fujitsu->pf_device->dev,
+ result = led_classdev_register(&fujitsu_bl->pf_device->dev,
&eco_led);
if (result == 0) {
- fujitsu_hotkey->eco_led_registered = 1;
+ fujitsu_laptop->eco_led_registered = 1;
} else {
pr_err("Could not register LED handler for eco LED, error %i\n",
result);
err_free_input_dev:
input_free_device(input);
err_free_fifo:
- kfifo_free(&fujitsu_hotkey->fifo);
+ kfifo_free(&fujitsu_laptop->fifo);
err_stop:
return error;
}
-static int acpi_fujitsu_hotkey_remove(struct acpi_device *device)
+static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
{
- struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device);
- struct input_dev *input = fujitsu_hotkey->input;
+ struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device);
+ struct input_dev *input = fujitsu_laptop->input;
#if IS_ENABLED(CONFIG_LEDS_CLASS)
- if (fujitsu_hotkey->logolamp_registered)
+ if (fujitsu_laptop->logolamp_registered)
led_classdev_unregister(&logolamp_led);
- if (fujitsu_hotkey->kblamps_registered)
+ if (fujitsu_laptop->kblamps_registered)
led_classdev_unregister(&kblamps_led);
- if (fujitsu_hotkey->radio_led_registered)
+ if (fujitsu_laptop->radio_led_registered)
led_classdev_unregister(&radio_led);
- if (fujitsu_hotkey->eco_led_registered)
+ if (fujitsu_laptop->eco_led_registered)
led_classdev_unregister(&eco_led);
#endif
input_unregister_device(input);
- kfifo_free(&fujitsu_hotkey->fifo);
+ kfifo_free(&fujitsu_laptop->fifo);
- fujitsu_hotkey->acpi_handle = NULL;
+ fujitsu_laptop->acpi_handle = NULL;
return 0;
}
-static void acpi_fujitsu_hotkey_press(int keycode)
+static void acpi_fujitsu_laptop_press(int keycode)
{
- struct input_dev *input = fujitsu_hotkey->input;
+ struct input_dev *input = fujitsu_laptop->input;
int status;
- status = kfifo_in_locked(&fujitsu_hotkey->fifo,
+ status = kfifo_in_locked(&fujitsu_laptop->fifo,
(unsigned char *)&keycode, sizeof(keycode),
- &fujitsu_hotkey->fifo_lock);
+ &fujitsu_laptop->fifo_lock);
if (status != sizeof(keycode)) {
vdbg_printk(FUJLAPTOP_DBG_WARN,
"Could not push keycode [0x%x]\n", keycode);
"Push keycode into ringbuffer [%d]\n", keycode);
}
-static void acpi_fujitsu_hotkey_release(void)
+static void acpi_fujitsu_laptop_release(void)
{
- struct input_dev *input = fujitsu_hotkey->input;
+ struct input_dev *input = fujitsu_laptop->input;
int keycode, status;
while (true) {
- status = kfifo_out_locked(&fujitsu_hotkey->fifo,
+ status = kfifo_out_locked(&fujitsu_laptop->fifo,
(unsigned char *)&keycode,
sizeof(keycode),
- &fujitsu_hotkey->fifo_lock);
+ &fujitsu_laptop->fifo_lock);
if (status != sizeof(keycode))
return;
input_report_key(input, keycode, 0);
}
}
-static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
+static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
{
struct input_dev *input;
int keycode;
unsigned int irb = 1;
int i;
- input = fujitsu_hotkey->input;
+ input = fujitsu_laptop->input;
if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
keycode = KEY_UNKNOWN;
return;
}
- if (fujitsu_hotkey->rfkill_supported)
- fujitsu_hotkey->rfkill_state =
- call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0);
+ if (fujitsu_laptop->flags_supported)
+ fujitsu_laptop->flags_state =
+ call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0);
i = 0;
while ((irb =
&& (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) {
switch (irb & 0x4ff) {
case KEY1_CODE:
- keycode = fujitsu->keycode1;
+ keycode = fujitsu_bl->keycode1;
break;
case KEY2_CODE:
- keycode = fujitsu->keycode2;
+ keycode = fujitsu_bl->keycode2;
break;
case KEY3_CODE:
- keycode = fujitsu->keycode3;
+ keycode = fujitsu_bl->keycode3;
break;
case KEY4_CODE:
- keycode = fujitsu->keycode4;
+ keycode = fujitsu_bl->keycode4;
break;
case KEY5_CODE:
- keycode = fujitsu->keycode5;
+ keycode = fujitsu_bl->keycode5;
break;
case 0:
keycode = 0;
}
if (keycode > 0)
- acpi_fujitsu_hotkey_press(keycode);
+ acpi_fujitsu_laptop_press(keycode);
else if (keycode == 0)
- acpi_fujitsu_hotkey_release();
+ acpi_fujitsu_laptop_release();
}
/* On some models (first seen on the Skylake-based Lifebook
* E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
- * handled in software; its state is queried using FUNC_RFKILL
+ * handled in software; its state is queried using FUNC_FLAGS
*/
- if ((fujitsu_hotkey->rfkill_supported & BIT(26)) &&
- (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) {
+ if ((fujitsu_laptop->flags_supported & BIT(26)) &&
+ (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) {
keycode = KEY_TOUCHPAD_TOGGLE;
input_report_key(input, keycode, 1);
input_sync(input);
/* Initialization */
-static const struct acpi_device_id fujitsu_device_ids[] = {
- {ACPI_FUJITSU_HID, 0},
+static const struct acpi_device_id fujitsu_bl_device_ids[] = {
+ {ACPI_FUJITSU_BL_HID, 0},
{"", 0},
};
-static struct acpi_driver acpi_fujitsu_driver = {
- .name = ACPI_FUJITSU_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_bl_driver = {
+ .name = ACPI_FUJITSU_BL_DRIVER_NAME,
.class = ACPI_FUJITSU_CLASS,
- .ids = fujitsu_device_ids,
+ .ids = fujitsu_bl_device_ids,
.ops = {
- .add = acpi_fujitsu_add,
- .remove = acpi_fujitsu_remove,
- .notify = acpi_fujitsu_notify,
+ .add = acpi_fujitsu_bl_add,
+ .remove = acpi_fujitsu_bl_remove,
+ .notify = acpi_fujitsu_bl_notify,
},
};
-static const struct acpi_device_id fujitsu_hotkey_device_ids[] = {
- {ACPI_FUJITSU_HOTKEY_HID, 0},
+static const struct acpi_device_id fujitsu_laptop_device_ids[] = {
+ {ACPI_FUJITSU_LAPTOP_HID, 0},
{"", 0},
};
-static struct acpi_driver acpi_fujitsu_hotkey_driver = {
- .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME,
+static struct acpi_driver acpi_fujitsu_laptop_driver = {
+ .name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME,
.class = ACPI_FUJITSU_CLASS,
- .ids = fujitsu_hotkey_device_ids,
+ .ids = fujitsu_laptop_device_ids,
.ops = {
- .add = acpi_fujitsu_hotkey_add,
- .remove = acpi_fujitsu_hotkey_remove,
- .notify = acpi_fujitsu_hotkey_notify,
+ .add = acpi_fujitsu_laptop_add,
+ .remove = acpi_fujitsu_laptop_remove,
+ .notify = acpi_fujitsu_laptop_notify,
},
};
static const struct acpi_device_id fujitsu_ids[] __used = {
- {ACPI_FUJITSU_HID, 0},
- {ACPI_FUJITSU_HOTKEY_HID, 0},
+ {ACPI_FUJITSU_BL_HID, 0},
+ {ACPI_FUJITSU_LAPTOP_HID, 0},
{"", 0}
};
MODULE_DEVICE_TABLE(acpi, fujitsu_ids);
static int __init fujitsu_init(void)
{
- int ret, result, max_brightness;
+ int ret, max_brightness;
if (acpi_disabled)
return -ENODEV;
- fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
- if (!fujitsu)
+ fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL);
+ if (!fujitsu_bl)
return -ENOMEM;
- fujitsu->keycode1 = KEY_PROG1;
- fujitsu->keycode2 = KEY_PROG2;
- fujitsu->keycode3 = KEY_PROG3;
- fujitsu->keycode4 = KEY_PROG4;
- fujitsu->keycode5 = KEY_RFKILL;
+ fujitsu_bl->keycode1 = KEY_PROG1;
+ fujitsu_bl->keycode2 = KEY_PROG2;
+ fujitsu_bl->keycode3 = KEY_PROG3;
+ fujitsu_bl->keycode4 = KEY_PROG4;
+ fujitsu_bl->keycode5 = KEY_RFKILL;
dmi_check_system(fujitsu_dmi_table);
- result = acpi_bus_register_driver(&acpi_fujitsu_driver);
- if (result < 0) {
- ret = -ENODEV;
+ ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver);
+ if (ret)
goto fail_acpi;
- }
/* Register platform stuff */
- fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
- if (!fujitsu->pf_device) {
+ fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+ if (!fujitsu_bl->pf_device) {
ret = -ENOMEM;
goto fail_platform_driver;
}
- ret = platform_device_add(fujitsu->pf_device);
+ ret = platform_device_add(fujitsu_bl->pf_device);
if (ret)
goto fail_platform_device1;
ret =
- sysfs_create_group(&fujitsu->pf_device->dev.kobj,
- &fujitsupf_attribute_group);
+ sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj,
+ &fujitsu_pf_attribute_group);
if (ret)
goto fail_platform_device2;
struct backlight_properties props;
memset(&props, 0, sizeof(struct backlight_properties));
- max_brightness = fujitsu->max_brightness;
+ max_brightness = fujitsu_bl->max_brightness;
props.type = BACKLIGHT_PLATFORM;
props.max_brightness = max_brightness - 1;
- fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
- NULL, NULL,
- &fujitsubl_ops,
- &props);
- if (IS_ERR(fujitsu->bl_device)) {
- ret = PTR_ERR(fujitsu->bl_device);
- fujitsu->bl_device = NULL;
+ fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop",
+ NULL, NULL,
+ &fujitsu_bl_ops,
+ &props);
+ if (IS_ERR(fujitsu_bl->bl_device)) {
+ ret = PTR_ERR(fujitsu_bl->bl_device);
+ fujitsu_bl->bl_device = NULL;
goto fail_sysfs_group;
}
- fujitsu->bl_device->props.brightness = fujitsu->brightness_level;
+ fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level;
}
- ret = platform_driver_register(&fujitsupf_driver);
+ ret = platform_driver_register(&fujitsu_pf_driver);
if (ret)
goto fail_backlight;
- /* Register hotkey driver */
+ /* Register laptop driver */
- fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL);
- if (!fujitsu_hotkey) {
+ fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL);
+ if (!fujitsu_laptop) {
ret = -ENOMEM;
- goto fail_hotkey;
+ goto fail_laptop;
}
- result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver);
- if (result < 0) {
- ret = -ENODEV;
- goto fail_hotkey1;
- }
+ ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver);
+ if (ret)
+ goto fail_laptop1;
/* Sync backlight power status (needs FUJ02E3 device, hence deferred) */
if (acpi_video_get_backlight_type() == acpi_backlight_vendor) {
if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
- fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN;
+ fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
else
- fujitsu->bl_device->props.power = FB_BLANK_UNBLANK;
+ fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
}
pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n");
return 0;
-fail_hotkey1:
- kfree(fujitsu_hotkey);
-fail_hotkey:
- platform_driver_unregister(&fujitsupf_driver);
+fail_laptop1:
+ kfree(fujitsu_laptop);
+fail_laptop:
+ platform_driver_unregister(&fujitsu_pf_driver);
fail_backlight:
- backlight_device_unregister(fujitsu->bl_device);
+ backlight_device_unregister(fujitsu_bl->bl_device);
fail_sysfs_group:
- sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
- &fujitsupf_attribute_group);
+ sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+ &fujitsu_pf_attribute_group);
fail_platform_device2:
- platform_device_del(fujitsu->pf_device);
+ platform_device_del(fujitsu_bl->pf_device);
fail_platform_device1:
- platform_device_put(fujitsu->pf_device);
+ platform_device_put(fujitsu_bl->pf_device);
fail_platform_driver:
- acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+ acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
fail_acpi:
- kfree(fujitsu);
+ kfree(fujitsu_bl);
return ret;
}
static void __exit fujitsu_cleanup(void)
{
- acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver);
+ acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver);
- kfree(fujitsu_hotkey);
+ kfree(fujitsu_laptop);
- platform_driver_unregister(&fujitsupf_driver);
+ platform_driver_unregister(&fujitsu_pf_driver);
- backlight_device_unregister(fujitsu->bl_device);
+ backlight_device_unregister(fujitsu_bl->bl_device);
- sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
- &fujitsupf_attribute_group);
+ sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj,
+ &fujitsu_pf_attribute_group);
- platform_device_unregister(fujitsu->pf_device);
+ platform_device_unregister(fujitsu_bl->pf_device);
- acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+ acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver);
- kfree(fujitsu);
+ kfree(fujitsu_bl);
pr_info("driver unloaded\n");
}
MODULE_DESCRIPTION("Fujitsu laptop extras support");
MODULE_VERSION(FUJITSU_DRIVER_VERSION);
MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*");
-MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*");
-MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*");
#define Q2_SET_SEL(cpu, selname, address, size) \
do { \
- struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+ struct desc_struct *gdt = get_cpu_gdt_rw((cpu)); \
set_desc_base(&gdt[(selname) >> 3], (u32)(address)); \
set_desc_limit(&gdt[(selname) >> 3], (size) - 1); \
} while(0)
return PNP_FUNCTION_NOT_SUPPORTED;
cpu = get_cpu();
- save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
- get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
+ save_desc_40 = get_cpu_gdt_rw(cpu)[0x40 / 8];
+ get_cpu_gdt_rw(cpu)[0x40 / 8] = bad_bios_desc;
/* On some boxes IRQ's during PnP BIOS calls are deadly. */
spin_lock_irqsave(&pnp_bios_lock, flags);
:"memory");
spin_unlock_irqrestore(&pnp_bios_lock, flags);
- get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
+ get_cpu_gdt_rw(cpu)[0x40 / 8] = save_desc_40;
put_cpu();
/* If we get here and this is set then the PnP BIOS faulted on us. */
pnp_bios_callpoint.segment = PNP_CS16;
for_each_possible_cpu(i) {
- struct desc_struct *gdt = get_cpu_gdt_table(i);
+ struct desc_struct *gdt = get_cpu_gdt_rw(i);
if (!gdt)
continue;
set_desc_base(&gdt[GDT_ENTRY_PNPBIOS_CS32],
tristate "Emulex LightPulse Fibre Channel Support"
depends on PCI && SCSI
depends on SCSI_FC_ATTRS
- depends on NVME_FC && NVME_TARGET_FC
select CRC_T10DIF
- help
+ ---help---
This lpfc driver supports the Emulex LightPulse
Family of Fibre Channel PCI host adapters.
config SCSI_LPFC_DEBUG_FS
bool "Emulex LightPulse Fibre Channel debugfs Support"
depends on SCSI_LPFC && DEBUG_FS
- help
+ ---help---
This makes debugging information from the lpfc driver
available via the debugfs filesystem.
+config LPFC_NVME_INITIATOR
+ bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+ depends on SCSI_LPFC && NVME_FC
+ ---help---
+ This enables NVME Initiator support in the Emulex lpfc driver.
+
+config LPFC_NVME_TARGET
+ bool "Emulex LightPulse Fibre Channel NVME Initiator Support"
+ depends on SCSI_LPFC && NVME_TARGET_FC
+ ---help---
+ This enables NVME Target support in the Emulex lpfc driver.
+ Target enablement must still be enabled on a per adapter
+ basis by module parameters.
+
config SCSI_SIM710
tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
depends on (EISA || MCA) && SCSI
return -1;
err_blink:
- return (status > 16) & 0xFF;
+ return (status >> 16) & 0xFF;
}
static inline u32 aac_get_vector(struct aac_dev *dev)
WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
task->state = state;
- if (!list_empty(&task->running))
+ spin_lock_bh(&conn->taskqueuelock);
+ if (!list_empty(&task->running)) {
+ pr_debug_once("%s while task on list", __func__);
list_del_init(&task->running);
+ }
+ spin_unlock_bh(&conn->taskqueuelock);
if (conn->task == task)
conn->task = NULL;
if (session->tt->xmit_task(task))
goto free_task;
} else {
+ spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&task->running, &conn->mgmtqueue);
+ spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
* this may be on the requeue list already if the xmit_task callout
* is handling the r2ts while we are adding new ones
*/
+ spin_lock_bh(&conn->taskqueuelock);
if (list_empty(&task->running))
list_add_tail(&task->running, &conn->requeue);
+ spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
EXPORT_SYMBOL_GPL(iscsi_requeue_task);
* only have one nop-out as a ping from us and targets should not
* overflow us with nop-ins
*/
+ spin_lock_bh(&conn->taskqueuelock);
check_mgmt:
while (!list_empty(&conn->mgmtqueue)) {
conn->task = list_entry(conn->mgmtqueue.next,
struct iscsi_task, running);
list_del_init(&conn->task->running);
+ spin_unlock_bh(&conn->taskqueuelock);
if (iscsi_prep_mgmt_task(conn, conn->task)) {
/* regular RX path uses back_lock */
spin_lock_bh(&conn->session->back_lock);
__iscsi_put_task(conn->task);
spin_unlock_bh(&conn->session->back_lock);
conn->task = NULL;
+ spin_lock_bh(&conn->taskqueuelock);
continue;
}
rc = iscsi_xmit_task(conn);
if (rc)
goto done;
+ spin_lock_bh(&conn->taskqueuelock);
}
/* process pending command queue */
conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
running);
list_del_init(&conn->task->running);
+ spin_unlock_bh(&conn->taskqueuelock);
if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
fail_scsi_task(conn->task, DID_IMM_RETRY);
+ spin_lock_bh(&conn->taskqueuelock);
continue;
}
rc = iscsi_prep_scsi_cmd_pdu(conn->task);
if (rc) {
if (rc == -ENOMEM || rc == -EACCES) {
+ spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&conn->task->running,
&conn->cmdqueue);
conn->task = NULL;
+ spin_unlock_bh(&conn->taskqueuelock);
goto done;
} else
fail_scsi_task(conn->task, DID_ABORT);
+ spin_lock_bh(&conn->taskqueuelock);
continue;
}
rc = iscsi_xmit_task(conn);
* we need to check the mgmt queue for nops that need to
* be sent to aviod starvation
*/
+ spin_lock_bh(&conn->taskqueuelock);
if (!list_empty(&conn->mgmtqueue))
goto check_mgmt;
}
conn->task = task;
list_del_init(&conn->task->running);
conn->task->state = ISCSI_TASK_RUNNING;
+ spin_unlock_bh(&conn->taskqueuelock);
rc = iscsi_xmit_task(conn);
if (rc)
goto done;
+ spin_lock_bh(&conn->taskqueuelock);
if (!list_empty(&conn->mgmtqueue))
goto check_mgmt;
}
+ spin_unlock_bh(&conn->taskqueuelock);
spin_unlock_bh(&conn->session->frwd_lock);
return -ENODATA;
goto prepd_reject;
}
} else {
+ spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&task->running, &conn->cmdqueue);
+ spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
INIT_LIST_HEAD(&conn->mgmtqueue);
INIT_LIST_HEAD(&conn->cmdqueue);
INIT_LIST_HEAD(&conn->requeue);
+ spin_lock_init(&conn->taskqueuelock);
INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
/* allocate login_task used for the login/text sequences */
#define FC_MAX_ADPTMSG 64
#define MAX_HBAEVT 32
+#define MAX_HBAS_NO_RESET 16
/* Number of MSI-X vectors the driver uses */
#define LPFC_MSIX_VECTORS 2
/* lpfc wait event data ready flag */
-#define LPFC_DATA_READY (1<<0)
+#define LPFC_DATA_READY 0 /* bit 0 */
/* queue dump line buffer size */
#define LPFC_LBUF_SZ 128
* capability
*/
#define HBA_NVME_IOQ_FLUSH 0x80000 /* NVME IO queues flushed. */
+#define NVME_XRI_ABORT_EVENT 0x100000
uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
struct lpfc_dmabuf slim2p;
static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
lpfc_poll_show, lpfc_poll_store);
+int lpfc_no_hba_reset_cnt;
+unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444);
+MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset");
+
LPFC_ATTR(sli_mode, 0, 0, 3,
"SLI mode selector:"
" 0 - auto (SLI-3 if supported),"
return -EINVAL;
phba->cfg_fcp_imax = (uint32_t)val;
- for (i = 0; i < phba->io_channel_irqs; i++)
+
+ for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT)
lpfc_modify_hba_eq_delay(phba, i);
return strlen(buf);
extern struct device_attribute *lpfc_hba_attrs[];
extern struct device_attribute *lpfc_vport_attrs[];
extern struct scsi_host_template lpfc_template;
-extern struct scsi_host_template lpfc_template_s3;
+extern struct scsi_host_template lpfc_template_no_hr;
extern struct scsi_host_template lpfc_template_nvme;
extern struct scsi_host_template lpfc_vport_template;
extern struct fc_function_template lpfc_transport_functions;
struct lpfc_wcqe_complete *abts_cmpl);
extern int lpfc_enable_nvmet_cnt;
extern unsigned long long lpfc_enable_nvmet[];
+extern int lpfc_no_hba_reset_cnt;
+extern unsigned long lpfc_no_hba_reset[];
"FC4 x%08x, Data: x%08x x%08x\n",
ndlp, did, ndlp->nlp_fc4_type,
FC_TYPE_FCP, FC_TYPE_NVME);
+ ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
}
- ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
lpfc_issue_els_prli(vport, ndlp, 0);
} else
idiag.ptr_private = phba->sli4_hba.nvmels_cq;
goto pass_check;
}
- /* NVME LS complete queue */
- if (phba->sli4_hba.nvmels_cq &&
- phba->sli4_hba.nvmels_cq->queue_id == queid) {
- /* Sanity check */
- rc = lpfc_idiag_que_param_check(
- phba->sli4_hba.nvmels_cq, index, count);
- if (rc)
- goto error_out;
- idiag.ptr_private = phba->sli4_hba.nvmels_cq;
- goto pass_check;
- }
/* FCP complete queue */
if (phba->sli4_hba.fcp_cq) {
for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
idiag.ptr_private = phba->sli4_hba.nvmels_wq;
goto pass_check;
}
- /* NVME LS work queue */
- if (phba->sli4_hba.nvmels_wq &&
- phba->sli4_hba.nvmels_wq->queue_id == queid) {
- /* Sanity check */
- rc = lpfc_idiag_que_param_check(
- phba->sli4_hba.nvmels_wq, index, count);
- if (rc)
- goto error_out;
- idiag.ptr_private = phba->sli4_hba.nvmels_wq;
- goto pass_check;
- }
/* FCP work queue */
if (phba->sli4_hba.fcp_wq) {
for (qidx = 0; qidx < phba->cfg_fcp_io_channel;
static uint32_t
lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc,
- struct lpfc_hba *phba)
+ struct lpfc_vport *vport)
{
desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
- memcpy(desc->port_names.wwnn, phba->wwnn,
+ memcpy(desc->port_names.wwnn, &vport->fc_nodename,
sizeof(desc->port_names.wwnn));
- memcpy(desc->port_names.wwpn, phba->wwpn,
+ memcpy(desc->port_names.wwpn, &vport->fc_portname,
sizeof(desc->port_names.wwpn));
desc->length = cpu_to_be32(sizeof(desc->port_names));
len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *)
(len + pcmd), &rdp_context->link_stat);
len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *)
- (len + pcmd), phba);
+ (len + pcmd), vport);
len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *)
(len + pcmd), vport, ndlp);
len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd),
spin_lock_irq(shost->host_lock);
vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
spin_unlock_irq(shost->host_lock);
- if (vport->port_type == LPFC_PHYSICAL_PORT
- && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
- lpfc_issue_init_vfi(vport);
- else
+ if (mb->mbxStatus == MBX_NOT_FINISHED)
+ break;
+ if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
+ !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ lpfc_issue_init_vfi(vport);
+ else
+ lpfc_initial_flogi(vport);
+ } else {
lpfc_initial_fdisc(vport);
+ }
break;
}
} else {
ndlp->nlp_state, ndlp->nlp_rpi);
}
- if (!(vport->load_flag & FC_UNLOADING) &&
- !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+ if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
!(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
(ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
(ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) &&
lpfc_handle_rrq_active(phba);
if (phba->hba_flag & FCP_XRI_ABORT_EVENT)
lpfc_sli4_fcp_xri_abort_event_proc(phba);
+ if (phba->hba_flag & NVME_XRI_ABORT_EVENT)
+ lpfc_sli4_nvme_xri_abort_event_proc(phba);
if (phba->hba_flag & ELS_XRI_ABORT_EVENT)
lpfc_sli4_els_xri_abort_event_proc(phba);
if (phba->hba_flag & ASYNC_EVENT)
uint32_t boot_flag, addr_mode;
uint16_t fcf_index, next_fcf_index;
struct lpfc_fcf_rec *fcf_rec = NULL;
- uint16_t vlan_id;
+ uint16_t vlan_id = LPFC_FCOE_NULL_VID;
bool select_new_fcf;
int rc;
rdata = rport->dd_data;
/* break the link before dropping the ref */
ndlp->rport = NULL;
- if (rdata && rdata->pnode == ndlp)
- lpfc_nlp_put(ndlp);
- rdata->pnode = NULL;
+ if (rdata) {
+ if (rdata->pnode == ndlp)
+ lpfc_nlp_put(ndlp);
+ rdata->pnode = NULL;
+ }
/* drop reference for earlier registeration */
put_device(&rport->dev);
}
{
INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
- init_timer(&ndlp->nlp_delayfunc);
- ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
- ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+ setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay,
+ (unsigned long)ndlp);
ndlp->nlp_DID = did;
ndlp->vport = vport;
ndlp->phba = vport->phba;
pring = qp->pring;
if (!pring)
continue;
- spin_lock_irq(&pring->ring_lock);
+ spin_lock(&pring->ring_lock);
__lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list);
- spin_unlock_irq(&pring->ring_lock);
+ spin_unlock(&pring->ring_lock);
}
spin_unlock_irq(&phba->hbalock);
}
uint32_t phase;
uint32_t delay_multi;
};
-#define LPFC_MAX_EQ_DELAY 8
+#define LPFC_MAX_EQ_DELAY_EQID_CNT 8
struct sgl_page_pairs {
uint32_t sgl_pg0_addr_lo;
union {
struct {
uint32_t num_eq;
- struct eq_delay_info eq[LPFC_MAX_EQ_DELAY];
+ struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT];
} request;
struct {
uint32_t word0;
return rc;
}
+static uint64_t
+lpfc_get_wwpn(struct lpfc_hba *phba)
+{
+ uint64_t wwn;
+ int rc;
+ LPFC_MBOXQ_t *mboxq;
+ MAILBOX_t *mb;
+
+
+ mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+ GFP_KERNEL);
+ if (!mboxq)
+ return (uint64_t)-1;
+
+ /* First get WWN of HBA instance */
+ lpfc_read_nv(phba, mboxq);
+ rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+ if (rc != MBX_SUCCESS) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "6019 Mailbox failed , mbxCmd x%x "
+ "READ_NV, mbxStatus x%x\n",
+ bf_get(lpfc_mqe_command, &mboxq->u.mqe),
+ bf_get(lpfc_mqe_status, &mboxq->u.mqe));
+ mempool_free(mboxq, phba->mbox_mem_pool);
+ return (uint64_t) -1;
+ }
+ mb = &mboxq->u.mb;
+ memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t));
+ /* wwn is WWPN of HBA instance */
+ mempool_free(mboxq, phba->mbox_mem_pool);
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ return be64_to_cpu(wwn);
+ else
+ return (((wwn & 0xffffffff00000000) >> 32) |
+ ((wwn & 0x00000000ffffffff) << 32));
+
+}
+
/**
* lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping
* @phba: pointer to lpfc hba data structure.
struct lpfc_vport *vport;
struct Scsi_Host *shost = NULL;
int error = 0;
+ int i;
+ uint64_t wwn;
+ bool use_no_reset_hba = false;
+
+ wwn = lpfc_get_wwpn(phba);
+
+ for (i = 0; i < lpfc_no_hba_reset_cnt; i++) {
+ if (wwn == lpfc_no_hba_reset[i]) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "6020 Setting use_no_reset port=%llx\n",
+ wwn);
+ use_no_reset_hba = true;
+ break;
+ }
+ }
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
if (dev != &phba->pcidev->dev) {
shost = scsi_host_alloc(&lpfc_vport_template,
sizeof(struct lpfc_vport));
} else {
- if (phba->sli_rev == LPFC_SLI_REV4)
+ if (!use_no_reset_hba)
shost = scsi_host_alloc(&lpfc_template,
sizeof(struct lpfc_vport));
else
- shost = scsi_host_alloc(&lpfc_template_s3,
+ shost = scsi_host_alloc(&lpfc_template_no_hr,
sizeof(struct lpfc_vport));
}
} else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
INIT_LIST_HEAD(&vport->rcv_buffer_list);
spin_lock_init(&vport->work_port_lock);
- init_timer(&vport->fc_disctmo);
- vport->fc_disctmo.function = lpfc_disc_timeout;
- vport->fc_disctmo.data = (unsigned long)vport;
+ setup_timer(&vport->fc_disctmo, lpfc_disc_timeout,
+ (unsigned long)vport);
- init_timer(&vport->els_tmofunc);
- vport->els_tmofunc.function = lpfc_els_timeout;
- vport->els_tmofunc.data = (unsigned long)vport;
+ setup_timer(&vport->els_tmofunc, lpfc_els_timeout,
+ (unsigned long)vport);
- init_timer(&vport->delayed_disc_tmo);
- vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo;
- vport->delayed_disc_tmo.data = (unsigned long)vport;
+ setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo,
+ (unsigned long)vport);
error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
if (error)
INIT_LIST_HEAD(&phba->luns);
/* MBOX heartbeat timer */
- init_timer(&psli->mbox_tmo);
- psli->mbox_tmo.function = lpfc_mbox_timeout;
- psli->mbox_tmo.data = (unsigned long) phba;
+ setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba);
/* Fabric block timer */
- init_timer(&phba->fabric_block_timer);
- phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
- phba->fabric_block_timer.data = (unsigned long) phba;
+ setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout,
+ (unsigned long)phba);
/* EA polling mode timer */
- init_timer(&phba->eratt_poll);
- phba->eratt_poll.function = lpfc_poll_eratt;
- phba->eratt_poll.data = (unsigned long) phba;
+ setup_timer(&phba->eratt_poll, lpfc_poll_eratt,
+ (unsigned long)phba);
/* Heartbeat timer */
- init_timer(&phba->hb_tmofunc);
- phba->hb_tmofunc.function = lpfc_hb_timeout;
- phba->hb_tmofunc.data = (unsigned long)phba;
+ setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba);
return 0;
}
*/
/* FCP polling mode timer */
- init_timer(&phba->fcp_poll_timer);
- phba->fcp_poll_timer.function = lpfc_poll_timeout;
- phba->fcp_poll_timer.data = (unsigned long) phba;
+ setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout,
+ (unsigned long)phba);
/* Host attention work mask setup */
phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT);
/* Initialize the host templates the configured values. */
lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
- lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
/* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
if (phba->cfg_enable_bg) {
* Initialize timers used by driver
*/
- init_timer(&phba->rrq_tmr);
- phba->rrq_tmr.function = lpfc_rrq_timeout;
- phba->rrq_tmr.data = (unsigned long)phba;
+ setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba);
/* FCF rediscover timer */
- init_timer(&phba->fcf.redisc_wait);
- phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo;
- phba->fcf.redisc_wait.data = (unsigned long)phba;
+ setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo,
+ (unsigned long)phba);
/*
* Control structure for handling external multi-buffer mailbox
/* Initialize the host templates with the updated values. */
lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+ lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt;
if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ)
phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
/* Initialize the Abort nvme buffer list used by driver */
spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock);
INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list);
+ /* Fast-path XRI aborted CQ Event work queue list */
+ INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
}
/* This abort list used by worker thread */
}
}
- /*
- * Configure EQ delay multipier for interrupt coalescing using
- * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time.
- */
- for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY)
+ for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT)
lpfc_modify_hba_eq_delay(phba, qidx);
+
return 0;
out_destroy:
/* Pending ELS XRI abort events */
list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue,
&cqelist);
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ /* Pending NVME XRI abort events */
+ list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+ &cqelist);
+ }
/* Pending asynnc events */
list_splice_init(&phba->sli4_hba.sp_asynce_work_queue,
&cqelist);
fc_remove_host(shost);
scsi_remove_host(shost);
- /* Perform ndlp cleanup on the physical port. The nvme and nvmet
- * localports are destroyed after to cleanup all transport memory.
- */
lpfc_cleanup(vport);
- lpfc_nvmet_destroy_targetport(phba);
- lpfc_nvme_destroy_localport(vport);
/*
* Bring down the SLI Layer. This step disable all interrupts,
.id_table = lpfc_id_table,
.probe = lpfc_pci_probe_one,
.remove = lpfc_pci_remove_one,
+ .shutdown = lpfc_pci_remove_one,
.suspend = lpfc_pci_suspend_one,
.resume = lpfc_pci_resume_one,
.err_handler = &lpfc_err_handler,
}
dma_buf->iocbq = lpfc_sli_get_iocbq(phba);
- dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
if (!dma_buf->iocbq) {
kfree(dma_buf->context);
pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt,
"2621 Ran out of nvmet iocb/WQEs\n");
return NULL;
}
+ dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET;
nvmewqe = dma_buf->iocbq;
wqe = (union lpfc_wqe128 *)&nvmewqe->wqe;
/* Initialize WQE */
bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
- bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL);
+ bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ);
bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
/* Word 6 */
* Embed the payload in the last half of the WQE
* WQE words 16-30 get the NVME CMD IU payload
*
- * WQE Word 16 is already setup with flags
- * WQE words 17-19 get payload Words 2-4
+ * WQE words 16-19 get payload Words 1-4
* WQE words 20-21 get payload Words 6-7
* WQE words 22-29 get payload Words 16-23
*/
- wptr = &wqe->words[17]; /* WQE ptr */
+ wptr = &wqe->words[16]; /* WQE ptr */
dptr = (uint32_t *)nCmd->cmdaddr; /* payload ptr */
- dptr += 2; /* Skip Words 0-1 in payload */
+ dptr++; /* Skip Word 0 in payload */
+ *wptr++ = *dptr++; /* Word 1 */
*wptr++ = *dptr++; /* Word 2 */
*wptr++ = *dptr++; /* Word 3 */
*wptr++ = *dptr++; /* Word 4 */
bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
NVME_WRITE_CMD);
- /* Word 16 */
- wqe->words[16] = LPFC_NVME_EMBED_WRITE;
-
phba->fc4NvmeOutputRequests++;
} else {
/* Word 7 */
bf_set(wqe_cmd_type, &wqe->generic.wqe_com,
NVME_READ_CMD);
- /* Word 16 */
- wqe->words[16] = LPFC_NVME_EMBED_READ;
-
phba->fc4NvmeInputRequests++;
}
} else {
/* Word 11 */
bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD);
- /* Word 16 */
- wqe->words[16] = LPFC_NVME_EMBED_CMD;
-
phba->fc4NvmeControlRequests++;
}
/*
pnvme_fcreq->private = (void *)lpfc_ncmd;
lpfc_ncmd->nvmeCmd = pnvme_fcreq;
lpfc_ncmd->nrport = rport;
+ lpfc_ncmd->ndlp = ndlp;
lpfc_ncmd->start_time = jiffies;
lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp);
"sid: x%x did: x%x oxid: x%x\n",
ret, vport->fc_myDID, ndlp->nlp_DID,
lpfc_ncmd->cur_iocbq.sli4_xritag);
- ret = -EINVAL;
+ ret = -EBUSY;
goto out_free_nvme_buf;
}
pdma_phys_sgl1, cur_xritag);
if (status) {
/* failure, put on abort nvme list */
- lpfc_ncmd->exch_busy = 1;
+ lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
} else {
/* success, put on NVME buffer list */
- lpfc_ncmd->exch_busy = 0;
+ lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
lpfc_ncmd->status = IOSTAT_SUCCESS;
num_posted++;
}
struct lpfc_nvme_buf, list);
if (status) {
/* failure, put on abort nvme list */
- lpfc_ncmd->exch_busy = 1;
+ lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
} else {
/* success, put on NVME buffer list */
- lpfc_ncmd->exch_busy = 0;
+ lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
lpfc_ncmd->status = IOSTAT_SUCCESS;
num_posted++;
}
unsigned long iflag = 0;
lpfc_ncmd->nonsg_phys = 0;
- if (lpfc_ncmd->exch_busy) {
+ if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) {
spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock,
iflag);
lpfc_ncmd->nvmeCmd = NULL;
int
lpfc_nvme_create_localport(struct lpfc_vport *vport)
{
+ int ret = 0;
struct lpfc_hba *phba = vport->phba;
struct nvme_fc_port_info nfcp_info;
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
- int len, ret = 0;
+ int len;
/* Initialize this localport instance. The vport wwn usage ensures
* that NPIV is accounted for.
/* localport is allocated from the stack, but the registration
* call allocates heap memory as well as the private area.
*/
+#ifdef CONFIG_LPFC_NVME_INITIATOR
ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
&vport->phba->pcidev->dev, &localport);
+#else
+ ret = -ENOMEM;
+#endif
if (!ret) {
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
"6005 Successfully registered local "
lport->vport = vport;
INIT_LIST_HEAD(&lport->rport_list);
vport->nvmei_support = 1;
+ len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
+ vport->phba->total_nvme_bufs += len;
}
- len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max);
- vport->phba->total_nvme_bufs += len;
return ret;
}
void
lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
{
+#ifdef CONFIG_LPFC_NVME_INITIATOR
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
"6011 Destroying NVME localport %p\n",
localport);
-
list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) {
/* The last node ref has to get released now before the rport
* private memory area is released by the transport.
"6008 rport fail destroy %x\n", ret);
wait_for_completion_timeout(&rport->rport_unreg_done, 5);
}
+
/* lport's rport list is clear. Unregister
* lport and release resources.
*/
"Failed, status x%x\n",
ret);
}
+#endif
}
void
int
lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
{
+#ifdef CONFIG_LPFC_NVME_INITIATOR
int ret = 0;
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
-
ret = nvme_fc_register_remoteport(localport, &rpinfo,
&remote_port);
if (!ret) {
ndlp->nlp_type, ndlp->nlp_DID, ndlp);
}
return ret;
+#else
+ return 0;
+#endif
}
/* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
void
lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
{
+#ifdef CONFIG_LPFC_NVME_INITIATOR
int ret;
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
return;
input_err:
+#endif
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
"6168: State error: lport %p, rport%p FCID x%06x\n",
vport->localport, ndlp->rport, ndlp->nlp_DID);
}
+
+/**
+ * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the fcp xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * FCP aborted xri.
+ **/
+void
+lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri)
+{
+ uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
+ uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+ struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd;
+ struct lpfc_nodelist *ndlp;
+ unsigned long iflag = 0;
+ int rrq_empty = 0;
+
+ if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
+ return;
+ spin_lock_irqsave(&phba->hbalock, iflag);
+ spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+ list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd,
+ &phba->sli4_hba.lpfc_abts_nvme_buf_list,
+ list) {
+ if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) {
+ list_del(&lpfc_ncmd->list);
+ lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
+ lpfc_ncmd->status = IOSTAT_SUCCESS;
+ spin_unlock(
+ &phba->sli4_hba.abts_nvme_buf_list_lock);
+
+ rrq_empty = list_empty(&phba->active_rrq_list);
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ ndlp = lpfc_ncmd->ndlp;
+ if (ndlp) {
+ lpfc_set_rrq_active(
+ phba, ndlp,
+ lpfc_ncmd->cur_iocbq.sli4_lxritag,
+ rxid, 1);
+ lpfc_sli4_abts_err_handler(phba, ndlp, axri);
+ }
+ lpfc_release_nvme_buf(phba, lpfc_ncmd);
+ if (rrq_empty)
+ lpfc_worker_wake_up(phba);
+ return;
+ }
+ }
+ spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock);
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+}
struct list_head list;
struct nvmefc_fcp_req *nvmeCmd;
struct lpfc_nvme_rport *nrport;
+ struct lpfc_nodelist *ndlp;
uint32_t timeout;
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6102 Bad state IO x%x aborted\n",
ctxp->oxid);
+ rc = -ENXIO;
goto aerr;
}
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6152 FCP Drop IO x%x: Prep\n",
ctxp->oxid);
+ rc = -ENXIO;
goto aerr;
}
ctxp->wqeq->hba_wqidx = 0;
nvmewqeq->context2 = NULL;
nvmewqeq->context3 = NULL;
+ rc = -EBUSY;
aerr:
- return -ENXIO;
+ return rc;
}
static void
lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP |
NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED;
+#ifdef CONFIG_LPFC_NVME_TARGET
error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate,
&phba->pcidev->dev,
&phba->targetport);
+#else
+ error = -ENOMEM;
+#endif
if (error) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
"6025 Cannot register NVME targetport "
return 0;
}
+/**
+ * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @axri: pointer to the nvmet xri abort wcqe structure.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * NVMET aborted xri.
+ **/
+void
+lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri)
+{
+ /* TODO: work in progress */
+}
+
void
lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
{
+#ifdef CONFIG_LPFC_NVME_TARGET
struct lpfc_nvmet_tgtport *tgtp;
if (phba->nvmet_support == 0)
wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
}
phba->targetport = NULL;
+#endif
}
/**
lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
struct hbq_dmabuf *nvmebuf)
{
+#ifdef CONFIG_LPFC_NVME_TARGET
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
struct lpfc_nvmet_rcv_ctx *ctxp;
atomic_inc(&tgtp->xmt_ls_abort);
lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
+#endif
}
/**
struct rqb_dmabuf *nvmebuf,
uint64_t isr_timestamp)
{
+#ifdef CONFIG_LPFC_NVME_TARGET
struct lpfc_nvmet_rcv_ctx *ctxp;
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
atomic_inc(&tgtp->rcv_fcp_cmd_drop);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n",
+ "6159 FCP Drop IO x%x: err x%x\n",
ctxp->oxid, rc);
dropit:
lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n",
/* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf);
}
+#endif
}
/**
bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0);
bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1);
bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0);
- bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL);
+ bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP);
bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME);
/* Word 6 */
case NVMET_FCOP_RSP:
/* Words 0 - 2 */
- sgel = &rsp->sg[0];
physaddr = rsp->rspdma;
wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen;
struct lpfc_nodelist *ndlp;
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
- "6067 %s: Entrypoint: sid %x xri %x\n", __func__,
- sid, xri);
+ "6067 Abort: sid %x xri x%x/x%x\n",
+ sid, xri, ctxp->wqeq->sli4_xritag);
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
atomic_inc(&tgtp->xmt_abort_rsp_error);
lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
"6134 Drop ABTS - wrong NDLP state x%x.\n",
- ndlp->nlp_state);
+ (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
/* No failure to an ABTS request. */
return 0;
atomic_inc(&tgtp->xmt_abort_rsp_error);
lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS,
"6160 Drop ABTS - wrong NDLP state x%x.\n",
- ndlp->nlp_state);
+ (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
/* No failure to an ABTS request. */
return 0;
.track_queue_depth = 0,
};
-struct scsi_host_template lpfc_template_s3 = {
+struct scsi_host_template lpfc_template_no_hr = {
.module = THIS_MODULE,
.name = LPFC_DRIVER_NAME,
.proc_name = LPFC_DRIVER_NAME,
.info = lpfc_info,
.queuecommand = lpfc_queuecommand,
+ .eh_timed_out = fc_eh_timed_out,
.eh_abort_handler = lpfc_abort_handler,
.eh_device_reset_handler = lpfc_device_reset_handler,
.eh_target_reset_handler = lpfc_target_reset_handler,
.eh_abort_handler = lpfc_abort_handler,
.eh_device_reset_handler = lpfc_device_reset_handler,
.eh_target_reset_handler = lpfc_target_reset_handler,
- .eh_bus_reset_handler = lpfc_bus_reset_handler,
.slave_alloc = lpfc_slave_alloc,
.slave_configure = lpfc_slave_configure,
.slave_destroy = lpfc_slave_destroy,
+
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
start_sglq = sglq;
while (!found) {
if (!sglq)
- return NULL;
+ break;
if (ndlp && ndlp->active_rrqs_xri_bitmap &&
test_bit(sglq->sli4_lxritag,
ndlp->active_rrqs_xri_bitmap)) {
}
}
+/**
+ * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine is invoked by the worker thread to process all the pending
+ * SLI4 NVME abort XRI events.
+ **/
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba)
+{
+ struct lpfc_cq_event *cq_event;
+
+ /* First, declare the fcp xri abort event has been handled */
+ spin_lock_irq(&phba->hbalock);
+ phba->hba_flag &= ~NVME_XRI_ABORT_EVENT;
+ spin_unlock_irq(&phba->hbalock);
+ /* Now, handle all the fcp xri abort events */
+ while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) {
+ /* Get the first event from the head of the event queue */
+ spin_lock_irq(&phba->hbalock);
+ list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue,
+ cq_event, struct lpfc_cq_event, list);
+ spin_unlock_irq(&phba->hbalock);
+ /* Notify aborted XRI for NVME work queue */
+ if (phba->nvmet_support) {
+ lpfc_sli4_nvmet_xri_aborted(phba,
+ &cq_event->cqe.wcqe_axri);
+ } else {
+ lpfc_sli4_nvme_xri_aborted(phba,
+ &cq_event->cqe.wcqe_axri);
+ }
+ /* Free the event processed back to the free pool */
+ lpfc_sli4_cq_event_release(phba, cq_event);
+ }
+}
+
/**
* lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
* @phba: pointer to lpfc hba data structure.
spin_unlock_irqrestore(&phba->hbalock, iflags);
workposted = true;
break;
+ case LPFC_NVME:
+ spin_lock_irqsave(&phba->hbalock, iflags);
+ list_add_tail(&cq_event->list,
+ &phba->sli4_hba.sp_nvme_xri_aborted_work_queue);
+ /* Set the nvme xri abort event flag */
+ phba->hba_flag |= NVME_XRI_ABORT_EVENT;
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ workposted = true;
+ break;
default:
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0603 Invalid work queue CQE subtype (x%x)\n",
- cq->subtype);
+ "0603 Invalid CQ subtype %d: "
+ "%08x %08x %08x %08x\n",
+ cq->subtype, wcqe->word0, wcqe->parameter,
+ wcqe->word2, wcqe->word3);
+ lpfc_sli4_cq_event_release(phba, cq_event);
workposted = false;
break;
}
* @startq: The starting FCP EQ to modify
*
* This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
+ * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
+ * updated in one mailbox command.
*
* The @phba struct is used to send mailbox command to HBA. The @startq
* is used to get the starting FCP EQ to change.
eq_delay->u.request.eq[cnt].phase = 0;
eq_delay->u.request.eq[cnt].delay_multi = dmult;
cnt++;
- if (cnt >= LPFC_MAX_EQ_DELAY)
+ if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT)
break;
}
eq_delay->u.request.num_eq = cnt;
drq = drqp[idx];
cq = cqp[idx];
- if (hrq->entry_count != drq->entry_count) {
- status = -EINVAL;
- goto out;
- }
-
/* sanity check on queue memory */
if (!hrq || !drq || !cq) {
status = -ENODEV;
goto out;
}
+ if (hrq->entry_count != drq->entry_count) {
+ status = -EINVAL;
+ goto out;
+ }
+
if (idx == 0) {
bf_set(lpfc_mbx_rq_create_num_pages,
&rq_create->u.request,
struct list_head sp_asynce_work_queue;
struct list_head sp_fcp_xri_aborted_work_queue;
struct list_head sp_els_xri_aborted_work_queue;
+ struct list_head sp_nvme_xri_aborted_work_queue;
struct list_head sp_unsol_work_queue;
struct lpfc_sli4_link link_state;
struct lpfc_sli4_lnk_info lnk_info;
int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *);
+void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba);
void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *);
void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *,
struct sli4_wcqe_xri_aborted *);
+void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri);
+void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
+ struct sli4_wcqe_xri_aborted *axri);
void lpfc_sli4_els_xri_aborted(struct lpfc_hba *,
struct sli4_wcqe_xri_aborted *);
void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *);
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "11.2.0.7"
+#define LPFC_DRIVER_VERSION "11.2.0.10"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
u64 sas_address, u16 handle, u8 phy_number, u8 link_rate);
extern struct sas_function_template mpt3sas_transport_functions;
extern struct scsi_transport_template *mpt3sas_transport_template;
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
- enum scsi_device_state new_state);
/* trigger data externs */
void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc,
struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data);
sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 1;
- r = scsi_internal_device_block(sdev);
+ r = scsi_internal_device_block(sdev, false);
if (r == -EINVAL)
sdev_printk(KERN_WARNING, sdev,
"device_block failed with return(%d) for handle(0x%04x)\n",
"performing a block followed by an unblock\n",
r, sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 1;
- r = scsi_internal_device_block(sdev);
+ r = scsi_internal_device_block(sdev, false);
if (r)
sdev_printk(KERN_WARNING, sdev, "retried device_block "
"failed with return(%d) for handle(0x%04x)\n",
struct MPT3SAS_DEVICE *sas_device_priv_data;
u32 response_code = 0;
unsigned long flags;
- unsigned int sector_sz;
mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
}
xfer_cnt = le32_to_cpu(mpi_reply->TransferCount);
-
- /* In case of bogus fw or device, we could end up having
- * unaligned partial completion. We can force alignment here,
- * then scsi-ml does not need to handle this misbehavior.
- */
- sector_sz = scmd->device->sector_size;
- if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz &&
- xfer_cnt % sector_sz)) {
- sdev_printk(KERN_INFO, scmd->device,
- "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
- xfer_cnt, sector_sz);
- xfer_cnt = round_down(xfer_cnt, sector_sz);
- }
-
scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt);
if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
log_info = le32_to_cpu(mpi_reply->IOCLogInfo);
#define QEDF_INFO(pdev, level, fmt, ...) \
qedf_dbg_info(pdev, __func__, __LINE__, level, fmt, \
## __VA_ARGS__)
-
-extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
const char *fmt, ...);
-extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(4, 5)
+void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
const char *, ...);
-extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
+__printf(4, 5)
+void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func,
u32 line, const char *, ...);
-extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
+__printf(5, 6)
+void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line,
u32 info, const char *fmt, ...);
/* GRC Dump related defines */
case FIP_DT_MAC:
mp = (struct fip_mac_desc *)desc;
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2,
- "fd_mac=%pM.\n", __func__, mp->fd_mac);
+ "fd_mac=%pM\n", mp->fd_mac);
ether_addr_copy(cvl_mac, mp->fd_mac);
break;
case FIP_DT_NAME:
} else {
refcount = kref_read(&io_req->refcount);
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO,
- "%d:0:%d:%d xid=0x%0x op=0x%02x "
+ "%d:0:%d:%lld xid=0x%0x op=0x%02x "
"lba=%02x%02x%02x%02x cdb_status=%d "
"fcp_resid=0x%x refcount=%d.\n",
qedf->lport->host->host_no, sc_cmd->device->id,
sc_cmd->result = result << 16;
refcount = kref_read(&io_req->refcount);
- QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing "
+ QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing "
"sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, "
"allowed=%d retries=%d refcount=%d.\n",
qedf->lport->host->host_no, sc_cmd->device->id,
}
QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC,
- "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl,
- qedf->bdq_pbl_dma);
+ "BDQ PBL addr=0x%p dma=%pad\n",
+ qedf->bdq_pbl, &qedf->bdq_pbl_dma);
/*
* Populate BDQ PBL with physical and virtual address of individual
#include <linux/debugfs.h>
#include <linux/module.h>
-int do_not_recover;
+int qedi_do_not_recover;
static struct dentry *qedi_dbg_root;
void
static ssize_t
qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg)
{
- if (!do_not_recover)
- do_not_recover = 1;
+ if (!qedi_do_not_recover)
+ qedi_do_not_recover = 1;
QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
- do_not_recover);
+ qedi_do_not_recover);
return 0;
}
static ssize_t
qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg)
{
- if (do_not_recover)
- do_not_recover = 0;
+ if (qedi_do_not_recover)
+ qedi_do_not_recover = 0;
QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n",
- do_not_recover);
+ qedi_do_not_recover);
return 0;
}
if (*ppos)
return 0;
- cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover);
+ cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover);
cnt = min_t(int, count, cnt - *ppos);
*ppos += cnt;
return cnt;
get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id,
qedi_conn->iscsi_conn_id);
- if (do_not_recover) {
+ if (qedi_do_not_recover) {
QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
- do_not_recover);
+ qedi_do_not_recover);
goto abort_ret;
}
#include "qedi_iscsi.h"
+#ifdef CONFIG_DEBUG_FS
+extern int qedi_do_not_recover;
+#else
+#define qedi_do_not_recover (0)
+#endif
+
extern uint qedi_io_tracing;
-extern int do_not_recover;
+
extern struct scsi_host_template qedi_host_template;
extern struct iscsi_transport qedi_iscsi_transport;
extern const struct qed_iscsi_ops *qedi_ops;
return ERR_PTR(ret);
}
- if (do_not_recover) {
+ if (qedi_do_not_recover) {
ret = -ENOMEM;
return ERR_PTR(ret);
}
struct qedi_endpoint *qedi_ep;
int ret = 0;
- if (do_not_recover)
+ if (qedi_do_not_recover)
return 1;
qedi_ep = ep->dd_data;
}
if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
- if (do_not_recover) {
+ if (qedi_do_not_recover) {
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"Do not recover cid=0x%x\n",
qedi_ep->iscsi_cid);
}
}
- if (do_not_recover)
+ if (qedi_do_not_recover)
goto ep_exit_recover;
switch (qedi_ep->state) {
*/
qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params);
- qedi_setup_int(qedi);
+ rc = qedi_setup_int(qedi);
if (rc)
goto stop_iscsi_func;
"%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size);
ql_dbg(level, vha, id,
"----- -----------------------------------------------\n");
- for (cnt = 0; cnt < size; cnt++, buf++) {
- if (cnt % 16 == 0)
- ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU);
- printk(" %02x", *buf);
- if (cnt % 16 == 15)
- printk("\n");
+ for (cnt = 0; cnt < size; cnt += 16) {
+ ql_dbg(level, vha, id, "%04x: ", cnt);
+ print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1,
+ buf + cnt, min(16U, size - cnt), false);
}
- if (cnt % 16 != 0)
- printk("\n");
}
/**
* scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
* @sdev: device to block
+ * @wait: Whether or not to wait until ongoing .queuecommand() /
+ * .queue_rq() calls have finished.
*
* Block request made by scsi lld's to temporarily stop all
* scsi commands on the specified device. May sleep.
* remove the rport mutex lock and unlock calls from srp_queuecommand().
*/
int
-scsi_internal_device_block(struct scsi_device *sdev)
+scsi_internal_device_block(struct scsi_device *sdev, bool wait)
{
struct request_queue *q = sdev->request_queue;
unsigned long flags;
* request queue.
*/
if (q->mq_ops) {
- blk_mq_quiesce_queue(q);
+ if (wait)
+ blk_mq_quiesce_queue(q);
+ else
+ blk_mq_stop_hw_queues(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_stop_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
- scsi_wait_for_queuecommand(sdev);
+ if (wait)
+ scsi_wait_for_queuecommand(sdev);
}
return 0;
static void
device_block(struct scsi_device *sdev, void *data)
{
- scsi_internal_device_block(sdev);
+ scsi_internal_device_block(sdev, true);
}
static int
*/
#define SCSI_DEVICE_BLOCK_MAX_TIMEOUT 600 /* units in seconds */
-extern int scsi_internal_device_block(struct scsi_device *sdev);
-extern int scsi_internal_device_unblock(struct scsi_device *sdev,
- enum scsi_device_state new_state);
#endif /* _SCSI_PRIV_H */
{
int result = SCpnt->result;
unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
+ unsigned int sector_size = SCpnt->device->sector_size;
+ unsigned int resid;
struct scsi_sense_hdr sshdr;
struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
struct request *req = SCpnt->request;
scsi_set_resid(SCpnt, blk_rq_bytes(req));
}
break;
+ default:
+ /*
+ * In case of bogus fw or device, we could end up having
+ * an unaligned partial completion. Check this here and force
+ * alignment.
+ */
+ resid = scsi_get_resid(SCpnt);
+ if (resid & (sector_size - 1)) {
+ sd_printk(KERN_INFO, sdkp,
+ "Unaligned partial completion (resid=%u, sector_sz=%u)\n",
+ resid, sector_size);
+ resid = min(scsi_bufflen(SCpnt),
+ round_up(resid, sector_size));
+ scsi_set_resid(SCpnt, resid);
+ }
}
if (result) {
*/
static int storvsc_timeout = 180;
-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
-
#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
static struct scsi_transport_template *fc_transport_template;
#endif
return ret;
}
+static int storvsc_device_alloc(struct scsi_device *sdevice)
+{
+ /*
+ * Set blist flag to permit the reading of the VPD pages even when
+ * the target may claim SPC-2 compliance. MSFT targets currently
+ * claim SPC-2 compliance while they implement post SPC-2 features.
+ * With this flag we can correctly handle WRITE_SAME_16 issues.
+ *
+ * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
+ * still supports REPORT LUN.
+ */
+ sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
+
+ return 0;
+}
+
static int storvsc_device_configure(struct scsi_device *sdevice)
{
sdevice->no_write_same = 1;
- /*
- * Add blist flags to permit the reading of the VPD pages even when
- * the target may claim SPC-2 compliance. MSFT targets currently
- * claim SPC-2 compliance while they implement post SPC-2 features.
- * With this patch we can correctly handle WRITE_SAME_16 issues.
- */
- sdevice->sdev_bflags |= msft_blist_flags;
-
/*
* If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
* if the device is a MSFT virtual device. If the host is
.eh_host_reset_handler = storvsc_host_reset_handler,
.proc_name = "storvsc_host",
.eh_timed_out = storvsc_eh_timed_out,
+ .slave_alloc = storvsc_device_alloc,
.slave_configure = storvsc_device_configure,
.cmd_per_lun = 255,
.this_id = -1,
/* Descriptor idn for Query requests */
enum desc_idn {
QUERY_DESC_IDN_DEVICE = 0x0,
- QUERY_DESC_IDN_CONFIGURAION = 0x1,
+ QUERY_DESC_IDN_CONFIGURATION = 0x1,
QUERY_DESC_IDN_UNIT = 0x2,
QUERY_DESC_IDN_RFU_0 = 0x3,
QUERY_DESC_IDN_INTERCONNECT = 0x4,
QUERY_DESC_DESC_TYPE_OFFSET = 0x01,
};
-enum ufs_desc_max_size {
- QUERY_DESC_DEVICE_MAX_SIZE = 0x40,
- QUERY_DESC_CONFIGURAION_MAX_SIZE = 0x90,
- QUERY_DESC_UNIT_MAX_SIZE = 0x23,
- QUERY_DESC_INTERCONNECT_MAX_SIZE = 0x06,
- /*
- * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes
- * of descriptor header.
- */
- QUERY_DESC_STRING_MAX_SIZE = 0xFE,
- QUERY_DESC_GEOMETRY_MAX_SIZE = 0x44,
- QUERY_DESC_POWER_MAX_SIZE = 0x62,
- QUERY_DESC_RFU_MAX_SIZE = 0x00,
+enum ufs_desc_def_size {
+ QUERY_DESC_DEVICE_DEF_SIZE = 0x40,
+ QUERY_DESC_CONFIGURATION_DEF_SIZE = 0x90,
+ QUERY_DESC_UNIT_DEF_SIZE = 0x23,
+ QUERY_DESC_INTERCONNECT_DEF_SIZE = 0x06,
+ QUERY_DESC_GEOMETRY_DEF_SIZE = 0x44,
+ QUERY_DESC_POWER_DEF_SIZE = 0x62,
};
/* Unit descriptor parameters offsets in bytes*/
#define ufshcd_hex_dump(prefix_str, buf, len) \
print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false)
-static u32 ufs_query_desc_max_size[] = {
- QUERY_DESC_DEVICE_MAX_SIZE,
- QUERY_DESC_CONFIGURAION_MAX_SIZE,
- QUERY_DESC_UNIT_MAX_SIZE,
- QUERY_DESC_RFU_MAX_SIZE,
- QUERY_DESC_INTERCONNECT_MAX_SIZE,
- QUERY_DESC_STRING_MAX_SIZE,
- QUERY_DESC_RFU_MAX_SIZE,
- QUERY_DESC_GEOMETRY_MAX_SIZE,
- QUERY_DESC_POWER_MAX_SIZE,
- QUERY_DESC_RFU_MAX_SIZE,
-};
-
enum {
UFSHCD_MAX_CHANNEL = 0,
UFSHCD_MAX_ID = 1,
goto out;
}
- if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
+ if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) {
dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n",
__func__, *buf_len);
err = -EINVAL;
return err;
}
+/**
+ * ufshcd_read_desc_length - read the specified descriptor length from header
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_index: descriptor index
+ * @desc_length: pointer to variable to read the length of descriptor
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+static int ufshcd_read_desc_length(struct ufs_hba *hba,
+ enum desc_idn desc_id,
+ int desc_index,
+ int *desc_length)
+{
+ int ret;
+ u8 header[QUERY_DESC_HDR_SIZE];
+ int header_len = QUERY_DESC_HDR_SIZE;
+
+ if (desc_id >= QUERY_DESC_IDN_MAX)
+ return -EINVAL;
+
+ ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+ desc_id, desc_index, 0, header,
+ &header_len);
+
+ if (ret) {
+ dev_err(hba->dev, "%s: Failed to get descriptor header id %d",
+ __func__, desc_id);
+ return ret;
+ } else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) {
+ dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch",
+ __func__, header[QUERY_DESC_DESC_TYPE_OFFSET],
+ desc_id);
+ ret = -EINVAL;
+ }
+
+ *desc_length = header[QUERY_DESC_LENGTH_OFFSET];
+ return ret;
+
+}
+
+/**
+ * ufshcd_map_desc_id_to_length - map descriptor IDN to its length
+ * @hba: Pointer to adapter instance
+ * @desc_id: descriptor idn value
+ * @desc_len: mapped desc length (out)
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba,
+ enum desc_idn desc_id, int *desc_len)
+{
+ switch (desc_id) {
+ case QUERY_DESC_IDN_DEVICE:
+ *desc_len = hba->desc_size.dev_desc;
+ break;
+ case QUERY_DESC_IDN_POWER:
+ *desc_len = hba->desc_size.pwr_desc;
+ break;
+ case QUERY_DESC_IDN_GEOMETRY:
+ *desc_len = hba->desc_size.geom_desc;
+ break;
+ case QUERY_DESC_IDN_CONFIGURATION:
+ *desc_len = hba->desc_size.conf_desc;
+ break;
+ case QUERY_DESC_IDN_UNIT:
+ *desc_len = hba->desc_size.unit_desc;
+ break;
+ case QUERY_DESC_IDN_INTERCONNECT:
+ *desc_len = hba->desc_size.interc_desc;
+ break;
+ case QUERY_DESC_IDN_STRING:
+ *desc_len = QUERY_DESC_MAX_SIZE;
+ break;
+ case QUERY_DESC_IDN_RFU_0:
+ case QUERY_DESC_IDN_RFU_1:
+ *desc_len = 0;
+ break;
+ default:
+ *desc_len = 0;
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ufshcd_map_desc_id_to_length);
+
/**
* ufshcd_read_desc_param - read the specified descriptor parameter
* @hba: Pointer to adapter instance
static int ufshcd_read_desc_param(struct ufs_hba *hba,
enum desc_idn desc_id,
int desc_index,
- u32 param_offset,
+ u8 param_offset,
u8 *param_read_buf,
- u32 param_size)
+ u8 param_size)
{
int ret;
u8 *desc_buf;
- u32 buff_len;
+ int buff_len;
bool is_kmalloc = true;
- /* safety checks */
- if (desc_id >= QUERY_DESC_IDN_MAX)
+ /* Safety check */
+ if (desc_id >= QUERY_DESC_IDN_MAX || !param_size)
return -EINVAL;
- buff_len = ufs_query_desc_max_size[desc_id];
- if ((param_offset + param_size) > buff_len)
- return -EINVAL;
+ /* Get the max length of descriptor from structure filled up at probe
+ * time.
+ */
+ ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len);
- if (!param_offset && (param_size == buff_len)) {
- /* memory space already available to hold full descriptor */
- desc_buf = param_read_buf;
- is_kmalloc = false;
- } else {
- /* allocate memory to hold full descriptor */
+ /* Sanity checks */
+ if (ret || !buff_len) {
+ dev_err(hba->dev, "%s: Failed to get full descriptor length",
+ __func__);
+ return ret;
+ }
+
+ /* Check whether we need temp memory */
+ if (param_offset != 0 || param_size < buff_len) {
desc_buf = kmalloc(buff_len, GFP_KERNEL);
if (!desc_buf)
return -ENOMEM;
+ } else {
+ desc_buf = param_read_buf;
+ is_kmalloc = false;
}
+ /* Request for full descriptor */
ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
- desc_id, desc_index, 0, desc_buf,
- &buff_len);
+ desc_id, desc_index, 0,
+ desc_buf, &buff_len);
if (ret) {
dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d",
__func__, desc_id, desc_index, param_offset, ret);
-
goto out;
}
goto out;
}
- /*
- * While reading variable size descriptors (like string descriptor),
- * some UFS devices may report the "LENGTH" (field in "Transaction
- * Specific fields" of Query Response UPIU) same as what was requested
- * in Query Request UPIU instead of reporting the actual size of the
- * variable size descriptor.
- * Although it's safe to ignore the "LENGTH" field for variable size
- * descriptors as we can always derive the length of the descriptor from
- * the descriptor header fields. Hence this change impose the length
- * match check only for fixed size descriptors (for which we always
- * request the correct size as part of Query Request UPIU).
- */
- if ((desc_id != QUERY_DESC_IDN_STRING) &&
- (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) {
- dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d",
- __func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]);
- ret = -EINVAL;
- goto out;
- }
+ /* Check wherher we will not copy more data, than available */
+ if (is_kmalloc && param_size > buff_len)
+ param_size = buff_len;
if (is_kmalloc)
memcpy(param_read_buf, &desc_buf[param_offset], param_size);
static void ufshcd_init_icc_levels(struct ufs_hba *hba)
{
int ret;
- int buff_len = QUERY_DESC_POWER_MAX_SIZE;
- u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE];
+ int buff_len = hba->desc_size.pwr_desc;
+ u8 desc_buf[hba->desc_size.pwr_desc];
ret = ufshcd_read_power_desc(hba, desc_buf, buff_len);
if (ret) {
{
int err;
u8 model_index;
- u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
- u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+ u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0};
+ u8 desc_buf[hba->desc_size.dev_desc];
- err = ufshcd_read_device_desc(hba, desc_buf,
- QUERY_DESC_DEVICE_MAX_SIZE);
+ err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc);
if (err) {
dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
__func__, err);
model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
- QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+ QUERY_DESC_MAX_SIZE, ASCII_STD);
if (err) {
dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
__func__, err);
goto out;
}
- str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+ str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0';
strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
MAX_MODEL_LEN));
hba->req_abort_count = 0;
}
+static void ufshcd_init_desc_sizes(struct ufs_hba *hba)
+{
+ int err;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0,
+ &hba->desc_size.dev_desc);
+ if (err)
+ hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0,
+ &hba->desc_size.pwr_desc);
+ if (err)
+ hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0,
+ &hba->desc_size.interc_desc);
+ if (err)
+ hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0,
+ &hba->desc_size.conf_desc);
+ if (err)
+ hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0,
+ &hba->desc_size.unit_desc);
+ if (err)
+ hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+
+ err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0,
+ &hba->desc_size.geom_desc);
+ if (err)
+ hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
+static void ufshcd_def_desc_sizes(struct ufs_hba *hba)
+{
+ hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE;
+ hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE;
+ hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE;
+ hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE;
+ hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE;
+ hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE;
+}
+
/**
* ufshcd_probe_hba - probe hba to detect device and initialize
* @hba: per-adapter instance
if (ret)
goto out;
+ /* Init check for device descriptor sizes */
+ ufshcd_init_desc_sizes(hba);
+
ret = ufs_get_device_desc(hba, &card);
if (ret) {
dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
/* set the state as operational after switching to desired gear */
hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
/*
* If we are in error handling context or in power management callbacks
* context, no need to scan the host
hba->mmio_base = mmio_base;
hba->irq = irq;
+ /* Set descriptor lengths to specification defaults */
+ ufshcd_def_desc_sizes(hba);
+
err = ufshcd_hba_init(hba);
if (err)
goto out_error;
struct ufs_query query;
};
+struct ufs_desc_size {
+ int dev_desc;
+ int pwr_desc;
+ int geom_desc;
+ int interc_desc;
+ int unit_desc;
+ int conf_desc;
+};
+
/**
* struct ufs_clk_info - UFS clock related info
* @list: list headed by hba->clk_list_head
* @clk_list_head: UFS host controller clocks list node head
* @pwr_info: holds current power mode
* @max_pwr_info: keeps the device max valid pwm
+ * @desc_size: descriptor sizes reported by device
* @urgent_bkops_lvl: keeps track of urgent bkops level for device
* @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
* device is known or not.
bool is_urgent_bkops_lvl_checked;
struct rw_semaphore clk_scaling_lock;
+ struct ufs_desc_size desc_size;
};
/* Returns true if clocks can be gated. Otherwise false */
enum flag_idn idn, bool *flag_res);
int ufshcd_hold(struct ufs_hba *hba, bool async);
void ufshcd_release(struct ufs_hba *hba);
+
+int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id,
+ int *desc_length);
+
u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
/* Wrapper functions for safely calling variant operations */
irq_flag &= ~PCI_IRQ_MSI;
error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
- if (error)
+ if (error < 0)
goto out_reset_adapter;
adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
newsock->ops = sock->ops;
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
if (rc == -EAGAIN) {
/* Nothing ready, so wait for activity */
init_waitqueue_entry(&wait, current);
set_current_state(TASK_INTERRUPTIBLE);
schedule();
remove_wait_queue(sk_sleep(sock->sk), &wait);
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false);
}
if (rc)
newsock->type = con->sock->type;
newsock->ops = con->sock->ops;
- result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
+ result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true);
if (result < 0)
goto accept_err;
else
set_dumpable(current->mm, suid_dumpable);
+ arch_setup_new_exec();
perf_event_exec();
__set_task_comm(current, kbasename(bprm->filename), true);
spin_unlock_bh(&wb->work_lock);
}
+static void finish_writeback_work(struct bdi_writeback *wb,
+ struct wb_writeback_work *work)
+{
+ struct wb_completion *done = work->done;
+
+ if (work->auto_free)
+ kfree(work);
+ if (done && atomic_dec_and_test(&done->cnt))
+ wake_up_all(&wb->bdi->wb_waitq);
+}
+
static void wb_queue_work(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
trace_writeback_queue(wb, work);
- spin_lock_bh(&wb->work_lock);
- if (!test_bit(WB_registered, &wb->state))
- goto out_unlock;
if (work->done)
atomic_inc(&work->done->cnt);
- list_add_tail(&work->list, &wb->work_list);
- mod_delayed_work(bdi_wq, &wb->dwork, 0);
-out_unlock:
+
+ spin_lock_bh(&wb->work_lock);
+
+ if (test_bit(WB_registered, &wb->state)) {
+ list_add_tail(&work->list, &wb->work_list);
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ } else
+ finish_writeback_work(wb, work);
+
spin_unlock_bh(&wb->work_lock);
}
set_bit(WB_writeback_running, &wb->state);
while ((work = get_next_work_item(wb)) != NULL) {
- struct wb_completion *done = work->done;
-
trace_writeback_exec(wb, work);
-
wrote += wb_writeback(wb, work);
-
- if (work->auto_free)
- kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(&wb->bdi->wb_waitq);
+ finish_writeback_work(wb, work);
}
/*
struct gfs2_sbd *ln_sbd;
u64 ln_number;
unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
#define lm_name_equal(name1, name2) \
(((name1)->ln_number == (name2)->ln_number) && \
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+ ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
if (ret < 0)
goto out;
extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
+extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
+ int size);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
}
#endif /* DEBUG */
+/* Verify the consistency of an inline directory. */
+int
+xfs_dir2_sf_verify(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *sfp,
+ int size)
+{
+ struct xfs_dir2_sf_entry *sfep;
+ struct xfs_dir2_sf_entry *next_sfep;
+ char *endp;
+ const struct xfs_dir_ops *dops;
+ xfs_ino_t ino;
+ int i;
+ int i8count;
+ int offset;
+ __uint8_t filetype;
+
+ dops = xfs_dir_get_ops(mp, NULL);
+
+ /*
+ * Give up if the directory is way too short.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp, size >
+ offsetof(struct xfs_dir2_sf_hdr, parent));
+ XFS_WANT_CORRUPTED_RETURN(mp, size >=
+ xfs_dir2_sf_hdr_size(sfp->i8count));
+
+ endp = (char *)sfp + size;
+
+ /* Check .. entry */
+ ino = dops->sf_get_parent_ino(sfp);
+ i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+ offset = dops->data_first_offset;
+
+ /* Check all reported entries */
+ sfep = xfs_dir2_sf_firstentry(sfp);
+ for (i = 0; i < sfp->count; i++) {
+ /*
+ * struct xfs_dir2_sf_entry has a variable length.
+ * Check the fixed-offset parts of the structure are
+ * within the data buffer.
+ */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ ((char *)sfep + sizeof(*sfep)) < endp);
+
+ /* Don't allow names with known bad length. */
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
+ XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+
+ /*
+ * Check that the variable-length part of the structure is
+ * within the data buffer. The next entry starts after the
+ * name component, so nextentry is an acceptable test.
+ */
+ next_sfep = dops->sf_nextentry(sfp, sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+
+ /* Check that the offsets always increase. */
+ XFS_WANT_CORRUPTED_RETURN(mp,
+ xfs_dir2_sf_get_offset(sfep) >= offset);
+
+ /* Check the inode number. */
+ ino = dops->sf_get_ino(sfp, sfep);
+ i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
+ XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+
+ /* Check the file type. */
+ filetype = dops->sf_get_ftype(sfep);
+ XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+
+ offset = xfs_dir2_sf_get_offset(sfep) +
+ dops->data_entsize(sfep->namelen);
+
+ sfep = next_sfep;
+ }
+ XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
+ XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+
+ /* Make sure this whole thing ought to be in local format. */
+ XFS_WANT_CORRUPTED_RETURN(mp, offset +
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+
+ return 0;
+}
+
/*
* Create a new (shortform) directory.
*/
#include "xfs_trace.h"
#include "xfs_attr_sf.h"
#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_ifork_zone;
int whichfork,
int size)
{
+ int error;
/*
* If the size is unreasonable, then something
return -EFSCORRUPTED;
}
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(ip->i_mount,
+ (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
+ size);
+ if (error)
+ return error;
+ }
+
xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
return 0;
}
* In these cases, the format always takes precedence, because the
* format indicates the current state of the fork.
*/
-void
+int
xfs_iflush_fork(
xfs_inode_t *ip,
xfs_dinode_t *dip,
char *cp;
xfs_ifork_t *ifp;
xfs_mount_t *mp;
+ int error;
static const short brootflag[2] =
{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
static const short dataflag[2] =
{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
if (!iip)
- return;
+ return 0;
ifp = XFS_IFORK_PTR(ip, whichfork);
/*
* This can happen if we gave up in iformat in an error path,
*/
if (!ifp) {
ASSERT(whichfork == XFS_ATTR_FORK);
- return;
+ return 0;
}
cp = XFS_DFORK_PTR(dip, whichfork);
mp = ip->i_mount;
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
+ if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
+ error = xfs_dir2_sf_verify(mp,
+ (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
+ ifp->if_bytes);
+ if (error)
+ return error;
+ }
if ((iip->ili_fields & dataflag[whichfork]) &&
(ifp->if_bytes > 0)) {
ASSERT(ifp->if_u1.if_data != NULL);
ASSERT(0);
break;
}
+ return 0;
}
/*
struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
void xfs_idata_realloc(struct xfs_inode *, int, int);
struct xfs_da_geometry *geo = args->geo;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Give up if the directory is way too short.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
-
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count))
- return -EFSCORRUPTED;
-
/*
* If the block number in the offset is out of range, we're done.
*/
struct xfs_inode_log_item *iip = ip->i_itemp;
struct xfs_dinode *dip;
struct xfs_mount *mp = ip->i_mount;
+ int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(xfs_isiflocked(ip));
if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
ip->i_d.di_flushiter = 0;
- xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
- if (XFS_IFORK_Q(ip))
- xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ if (XFS_IFORK_Q(ip)) {
+ error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
+ if (error)
+ return error;
+ }
xfs_inobp_check(mp, bp);
/*
/* by default, allow everything */
return true;
}
-
-static inline bool arch_pte_access_permitted(pte_t pte, bool write)
-{
- /* by default, allow everything */
- return true;
-}
#endif /* _ASM_GENERIC_MM_HOOKS_H */
}
#endif
+#ifndef pte_access_permitted
+#define pte_access_permitted(pte, write) \
+ (pte_present(pte) && (!(write) || pte_write(pte)))
+#endif
+
+#ifndef pmd_access_permitted
+#define pmd_access_permitted(pmd, write) \
+ (pmd_present(pmd) && (!(write) || pmd_write(pmd)))
+#endif
+
+#ifndef pud_access_permitted
+#define pud_access_permitted(pud, write) \
+ (pud_present(pud) && (!(write) || pud_write(pud)))
+#endif
+
+#ifndef p4d_access_permitted
+#define p4d_access_permitted(p4d, write) \
+ (p4d_present(p4d) && (!(write) || p4d_write(p4d)))
+#endif
+
+#ifndef pgd_access_permitted
+#define pgd_access_permitted(pgd, write) \
+ (pgd_present(pgd) && (!(write) || pgd_write(pgd)))
+#endif
+
#ifndef __HAVE_ARCH_PMD_SAME
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
int af_alg_release(struct socket *sock);
void af_alg_release_parent(struct sock *sk);
-int af_alg_accept(struct sock *sk, struct socket *newsock);
+int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
void af_alg_free_sg(struct af_alg_sgl *sgl);
asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
int, const char __user *);
+asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2);
+
/*
* For most but not all architectures, "am I in a compat syscall?" and
* "am I a compat task?" are the same question. For architectures on which
__u64 dreq_isr;
__u64 dreq_gsr;
__be32 dreq_service;
+ spinlock_t dreq_lock;
struct list_head dreq_featneg;
__u32 dreq_timestamp_echo;
__u32 dreq_timestamp_time;
u16 pages; /* Number of allocated pages */
kmemcheck_bitfield_begin(meta);
u16 jited:1, /* Is our filter JIT'ed? */
+ locked:1, /* Program image locked? */
gpl_compatible:1, /* Is filter GPL compatible? */
cb_access:1, /* Is control block accessed? */
dst_needed:1, /* Do we need dst entry? */
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
{
- set_memory_ro((unsigned long)fp, fp->pages);
+ fp->locked = 1;
+ WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
}
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
{
- set_memory_rw((unsigned long)fp, fp->pages);
+ if (fp->locked) {
+ WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
+ /* In case set_memory_rw() fails, we want to be the first
+ * to crash here instead of some random place later on.
+ */
+ fp->locked = 0;
+ }
}
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{
- set_memory_ro((unsigned long)hdr, hdr->pages);
+ WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
}
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
{
- set_memory_rw((unsigned long)hdr, hdr->pages);
+ WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
}
#else
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
#define ICC_IGRPEN1_EL1_SHIFT 0
#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB (1U << 2)
+#define ICC_SRE_EL1_DFB (1U << 1)
#define ICC_SRE_EL1_SRE (1U << 0)
/*
{
return NULL;
}
+static inline bool irq_domain_check_msi_remap(void)
+{
+ return false;
+}
#endif /* !CONFIG_IRQ_DOMAIN */
#endif /* _LINUX_IRQDOMAIN_H */
((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_nulls_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
+ })
/**
* ptr_is_a_nulls - Test if a ptr is a nulls
* @ptr: ptr to be tested
{
return 0;
}
+static inline int pgd_devmap(pgd_t pgd)
+{
+ return 0;
+}
#endif
/*
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ /* Base adresses for compatible mmap() */
+ unsigned long mmap_compat_base;
+ unsigned long mmap_compat_legacy_base;
+#endif
unsigned long task_size; /* size of task vm space */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
int (*socketpair)(struct socket *sock1,
struct socket *sock2);
int (*accept) (struct socket *sock,
- struct socket *newsock, int flags);
+ struct socket *newsock, int flags, bool kern);
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int *sockaddr_len, int peer);
#ifdef CONFIG_TINY_RCU
# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic());
+ VM_BUG_ON(!in_atomic() && !irqs_disabled());
# endif
/*
* Preempt must be disabled here - we rely on rcu_read_lock doing
#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
# ifdef CONFIG_PREEMPT_COUNT
- VM_BUG_ON(!in_atomic());
+ VM_BUG_ON(!in_atomic() && !irqs_disabled());
# endif
VM_BUG_ON_PAGE(page_count(page) == 0, page);
page_ref_add(page, count);
int genphy_suspend(struct phy_device *phydev);
int genphy_resume(struct phy_device *phydev);
int genphy_soft_reset(struct phy_device *phydev);
+static inline int genphy_no_soft_reset(struct phy_device *phydev)
+{
+ return 0;
+}
void phy_driver_unregister(struct phy_driver *drv);
void phy_drivers_unregister(struct phy_driver *drv, int n);
int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
--- /dev/null
+#ifndef _LINUX_PURGATORY_H
+#define _LINUX_PURGATORY_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <uapi/linux/kexec.h>
+
+struct kexec_sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+/*
+ * These forward declarations serve two purposes:
+ *
+ * 1) Make sparse happy when checking arch/purgatory
+ * 2) Document that these are required to be global so the symbol
+ * lookup in kexec works
+ */
+extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX];
+extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE];
+
+#endif
extern int add_random_ready_callback(struct random_ready_callback *rdy);
extern void del_random_ready_callback(struct random_ready_callback *rdy);
extern void get_random_bytes_arch(void *buf, int nbytes);
-extern int random_int_secret_init(void);
#ifndef MODULE
extern const struct file_operations random_fops, urandom_fops;
#endif
-unsigned int get_random_int(void);
-unsigned long get_random_long(void);
+u32 get_random_u32(void);
+u64 get_random_u64(void);
+static inline unsigned int get_random_int(void)
+{
+ return get_random_u32();
+}
+static inline unsigned long get_random_long(void)
+{
+#if BITS_PER_LONG == 64
+ return get_random_u64();
+#else
+ return get_random_u32();
+#endif
+}
+
unsigned long randomize_page(unsigned long start, unsigned long range);
u32 prandom_u32(void);
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
+/**
+ * hlist_nulls_for_each_entry_safe -
+ * iterate over list of given type safe against removal of list entry
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_nulls_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_nulls_node within the struct.
+ */
+#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \
+ for (({barrier();}), \
+ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
+ (!is_a_nulls(pos)) && \
+ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \
+ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
#endif
#endif
{ }
#endif /* CONFIG_HARDENED_USERCOPY */
+#ifndef arch_setup_new_exec
+static inline void arch_setup_new_exec(void) { }
+#endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_THREAD_INFO_H */
int addr_len, int flags, int is_sendmsg);
int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags);
-int inet_accept(struct socket *sock, struct socket *newsock, int flags);
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern);
int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
return (unsigned long)min_t(u64, when, max_when);
}
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
int inet_csk_get_port(struct sock *sk, unsigned short snum);
int (*send_verify) (struct sctp_sock *, union sctp_addr *);
int (*supported_addrs)(const struct sctp_sock *, __be16 *);
struct sock *(*create_accept_sk) (struct sock *sk,
- struct sctp_association *asoc);
+ struct sctp_association *asoc,
+ bool kern);
int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
* @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
* @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
* @sk_lock: synchronizer
+ * @sk_kern_sock: True if sock is using kernel lock classes
* @sk_rcvbuf: size of receive buffer in bytes
* @sk_wq: sock wait queue and async head
* @sk_rx_dst: receive input route used by early demux
#endif
kmemcheck_bitfield_begin(flags);
- unsigned int sk_padding : 2,
+ unsigned int sk_padding : 1,
+ sk_kern_sock : 1,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
sk_userlocks : 4,
int addr_len);
int (*disconnect)(struct sock *sk, int flags);
- struct sock * (*accept)(struct sock *sk, int flags, int *err);
+ struct sock * (*accept)(struct sock *sk, int flags, int *err,
+ bool kern);
int (*ioctl)(struct sock *sk, int cmd,
unsigned long arg);
int sock_no_bind(struct socket *, struct sockaddr *, int);
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int);
+int sock_no_accept(struct socket *, struct socket *, int, bool);
int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
unsigned int sock_no_poll(struct file *, struct socket *,
struct poll_table_struct *);
struct iscsi_task *task; /* xmit task in progress */
/* xmit */
+ spinlock_t taskqueuelock; /* protects the next three lists */
struct list_head mgmtqueue; /* mgmt (control) xmit queue */
struct list_head cmdqueue; /* data-path cmd queue */
struct list_head requeue; /* tasks needing another run */
sdev->sdev_state == SDEV_CREATED_BLOCK;
}
+int scsi_internal_device_block(struct scsi_device *sdev, bool wait);
+int scsi_internal_device_unblock(struct scsi_device *sdev,
+ enum scsi_device_state new_state);
+
/* accessor functions for the SCSI parameters */
static inline int scsi_device_sync(struct scsi_device *sdev)
{
(int)sizeof(pudval_t) * 2, (unsigned long long)__entry->pudval)
);
-TRACE_EVENT(xen_mmu_set_pgd,
- TP_PROTO(pgd_t *pgdp, pgd_t *user_pgdp, pgd_t pgdval),
- TP_ARGS(pgdp, user_pgdp, pgdval),
+TRACE_EVENT(xen_mmu_set_p4d,
+ TP_PROTO(p4d_t *p4dp, p4d_t *user_p4dp, p4d_t p4dval),
+ TP_ARGS(p4dp, user_p4dp, p4dval),
TP_STRUCT__entry(
- __field(pgd_t *, pgdp)
- __field(pgd_t *, user_pgdp)
- __field(pgdval_t, pgdval)
- ),
- TP_fast_assign(__entry->pgdp = pgdp;
- __entry->user_pgdp = user_pgdp;
- __entry->pgdval = pgdval.pgd),
- TP_printk("pgdp %p user_pgdp %p pgdval %0*llx (raw %0*llx)",
- __entry->pgdp, __entry->user_pgdp,
- (int)sizeof(pgdval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->pgdval)),
- (int)sizeof(pgdval_t) * 2, (unsigned long long)__entry->pgdval)
+ __field(p4d_t *, p4dp)
+ __field(p4d_t *, user_p4dp)
+ __field(p4dval_t, p4dval)
+ ),
+ TP_fast_assign(__entry->p4dp = p4dp;
+ __entry->user_p4dp = user_p4dp;
+ __entry->p4dval = p4d_val(p4dval)),
+ TP_printk("p4dp %p user_p4dp %p p4dval %0*llx (raw %0*llx)",
+ __entry->p4dp, __entry->user_p4dp,
+ (int)sizeof(p4dval_t) * 2, (unsigned long long)pgd_val(native_make_pgd(__entry->p4dval)),
+ (int)sizeof(p4dval_t) * 2, (unsigned long long)__entry->p4dval)
);
TRACE_EVENT(xen_mmu_pud_clear,
__u32 pdmc_count;
__u16 pdmc_type;
__u16 pdmc_alen;
- __u8 pdmc_addr[MAX_ADDR_LEN];
+ __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */
};
struct packet_diag_ring {
do_ctors();
usermodehelper_enable();
do_initcalls();
- random_int_secret_init();
}
static void __init do_pre_smp_initcalls(void)
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/filter.h>
+#include <linux/rculist_nulls.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
struct bucket {
- struct hlist_head head;
+ struct hlist_nulls_head head;
raw_spinlock_t lock;
};
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
- struct hlist_node hash_node;
- struct bpf_htab *htab;
- struct pcpu_freelist_node fnode;
+ struct hlist_nulls_node hash_node;
+ struct {
+ void *padding;
+ union {
+ struct bpf_htab *htab;
+ struct pcpu_freelist_node fnode;
+ };
+ };
};
union {
struct rcu_head rcu;
offsetof(struct htab_elem, lru_node),
htab->elem_size, htab->map.max_entries);
else
- pcpu_freelist_populate(&htab->freelist, htab->elems,
+ pcpu_freelist_populate(&htab->freelist,
+ htab->elems + offsetof(struct htab_elem, fnode),
htab->elem_size, htab->map.max_entries);
return 0;
int err, i;
u64 cost;
+ BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+ BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+ offsetof(struct htab_elem, hash_node.pprev));
+
if (lru && !capable(CAP_SYS_ADMIN))
/* LRU implementation is much complicated than other
* maps. Hence, limit to CAP_SYS_ADMIN for now.
goto free_htab;
for (i = 0; i < htab->n_buckets; i++) {
- INIT_HLIST_HEAD(&htab->buckets[i].head);
+ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
raw_spin_lock_init(&htab->buckets[i].lock);
}
return &htab->buckets[hash & (htab->n_buckets - 1)];
}
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
{
return &__select_bucket(htab, hash)->head;
}
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+/* this lookup function can only be called with bucket lock taken */
+static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
void *key, u32 key_size)
{
+ struct hlist_nulls_node *n;
+ struct htab_elem *l;
+
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+ if (l->hash == hash && !memcmp(&l->key, key, key_size))
+ return l;
+
+ return NULL;
+}
+
+/* can be called without bucket lock. it will repeat the loop in
+ * the unlikely event when elements moved from one bucket into another
+ * while link list is being walked
+ */
+static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
+ u32 hash, void *key,
+ u32 key_size, u32 n_buckets)
+{
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_rcu(l, head, hash_node)
+again:
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
+ if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
+ goto again;
+
return NULL;
}
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l;
u32 hash, key_size;
head = select_bucket(htab, hash);
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
return l;
}
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
{
struct bpf_htab *htab = (struct bpf_htab *)arg;
- struct htab_elem *l, *tgt_l;
- struct hlist_head *head;
+ struct htab_elem *l = NULL, *tgt_l;
+ struct hlist_nulls_head *head;
+ struct hlist_nulls_node *n;
unsigned long flags;
struct bucket *b;
raw_spin_lock_irqsave(&b->lock, flags);
- hlist_for_each_entry_rcu(l, head, hash_node)
+ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
break;
}
static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct htab_elem *l, *next_l;
u32 hash, key_size;
int i;
head = select_bucket(htab, hash);
/* lookup the key */
- l = lookup_elem_raw(head, hash, key, key_size);
+ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
if (!l) {
i = 0;
}
/* key was found, get next key in the same bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
struct htab_elem, hash_node);
if (next_l) {
head = select_bucket(htab, i);
/* pick first element in the bucket */
- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
struct htab_elem, hash_node);
if (next_l) {
/* if it's not empty, just return it */
int err = 0;
if (prealloc) {
- l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
- if (!l_new)
+ struct pcpu_freelist_node *l;
+
+ l = pcpu_freelist_pop(&htab->freelist);
+ if (!l)
err = -E2BIG;
+ else
+ l_new = container_of(l, struct htab_elem, fnode);
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
- hlist_del_rcu(&l_old->hash_node);
+ hlist_nulls_del_rcu(&l_old->hash_node);
free_htab_elem(htab, l_old);
}
ret = 0;
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new, *l_old = NULL;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
bpf_lru_node_set_ref(&l_new->lru_node);
- hlist_del_rcu(&l_old->hash_node);
+ hlist_nulls_del_rcu(&l_old->hash_node);
}
ret = 0;
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
ret = PTR_ERR(l_new);
goto err;
}
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
}
ret = 0;
err:
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
} else {
pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
value, onallcpus);
- hlist_add_head_rcu(&l_new->hash_node, head);
+ hlist_nulls_add_head_rcu(&l_new->hash_node, head);
l_new = NULL;
}
ret = 0;
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
free_htab_elem(htab, l);
ret = 0;
}
static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct hlist_head *head;
+ struct hlist_nulls_head *head;
struct bucket *b;
struct htab_elem *l;
unsigned long flags;
l = lookup_elem_raw(head, hash, key, key_size);
if (l) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_del_rcu(&l->hash_node);
ret = 0;
}
int i;
for (i = 0; i < htab->n_buckets; i++) {
- struct hlist_head *head = select_bucket(htab, i);
- struct hlist_node *n;
+ struct hlist_nulls_head *head = select_bucket(htab, i);
+ struct hlist_nulls_node *n;
struct htab_elem *l;
- hlist_for_each_entry_safe(l, n, head, hash_node) {
- hlist_del_rcu(&l->hash_node);
+ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+ hlist_nulls_del_rcu(&l->hash_node);
if (l->state != HTAB_EXTRA_ELEM_USED)
htab_elem_free(htab, l);
}
raw_spin_unlock(&trie->lock);
}
+static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ return -ENOTSUPP;
+}
+
static const struct bpf_map_ops trie_ops = {
.map_alloc = trie_alloc,
.map_free = trie_free,
+ .map_get_next_key = trie_get_next_key,
.map_lookup_elem = trie_lookup_elem,
.map_update_elem = trie_update_elem,
.map_delete_elem = trie_delete_elem,
struct task_struct *task;
int count = 0;
- seq_printf(seq, "css_set %p\n", cset);
+ seq_printf(seq, "css_set %pK\n", cset);
list_for_each_entry(task, &cset->tasks, cg_list) {
if (count++ > MAX_TASKS_SHOWN_PER_CSS)
/* Only log the first time events_limit is incremented. */
if (atomic64_inc_return(&pids->events_limit) == 1) {
pr_info("cgroup: fork rejected by pids controller in ");
- pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id));
+ pr_cont_cgroup_path(css->cgroup);
pr_cont("\n");
}
cgroup_file_notify(&pids->events_file);
ret = crypto_shash_final(desc, digest);
if (ret)
goto out_free_digest;
- ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
- sha_regions, sha_region_sz, 0);
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions",
+ sha_regions, sha_region_sz, 0);
if (ret)
goto out_free_digest;
- ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
- digest, SHA256_DIGEST_SIZE, 0);
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest",
+ digest, SHA256_DIGEST_SIZE, 0);
if (ret)
goto out_free_digest;
}
extern struct mutex kexec_mutex;
#ifdef CONFIG_KEXEC_FILE
-struct kexec_sha_region {
- unsigned long start;
- unsigned long len;
-};
-
+#include <linux/purgatory.h>
void kimage_file_post_load_cleanup(struct kimage *image);
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;
+ WARN_ON_ONCE(!wq);
WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
timer->data != (unsigned long)dwork);
WARN_ON_ONCE(timer_pending(timer));
config HAVE_MEMBLOCK_PHYS_MAP
bool
-config HAVE_GENERIC_RCU_GUP
+config HAVE_GENERIC_GUP
bool
config ARCH_DISCARD_MEMBLOCK
#endif /* CONFIG_ELF_CORE */
/*
- * Generic RCU Fast GUP
+ * Generic Fast GUP
*
* get_user_pages_fast attempts to pin user pages by walking the page
* tables directly and avoids taking locks. Thus the walker needs to be
* Before activating this code, please be aware that the following assumptions
* are currently made:
*
- * *) HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table is used to free
- * pages containing page tables.
+ * *) Either HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
+ * free pages containing page tables or TLB flushing requires IPI broadcast.
*
* *) ptes can be read atomically by the architecture.
*
*
* This code is based heavily on the PowerPC implementation by Nick Piggin.
*/
-#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
+#ifdef CONFIG_HAVE_GENERIC_GUP
+
+#ifndef gup_get_pte
+/*
+ * We assume that the PTE can be read atomically. If this is not the case for
+ * your architecture, please provide the helper.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+#endif
+
+static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+{
+ while ((*nr) - nr_start) {
+ struct page *page = pages[--(*nr)];
+
+ ClearPageReferenced(page);
+ put_page(page);
+ }
+}
#ifdef __HAVE_ARCH_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
+ struct dev_pagemap *pgmap = NULL;
+ int nr_start = *nr, ret = 0;
pte_t *ptep, *ptem;
- int ret = 0;
ptem = ptep = pte_offset_map(&pmd, addr);
do {
- /*
- * In the line below we are assuming that the pte can be read
- * atomically. If this is not the case for your architecture,
- * please wrap this in a helper function!
- *
- * for an example see gup_get_pte in arch/x86/mm/gup.c
- */
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = gup_get_pte(ptep);
struct page *head, *page;
/*
* Similar to the PMD case below, NUMA hinting must take slow
* path using the pte_protnone check.
*/
- if (!pte_present(pte) || pte_special(pte) ||
- pte_protnone(pte) || (write && !pte_write(pte)))
+ if (pte_protnone(pte))
goto pte_unmap;
- if (!arch_pte_access_permitted(pte, write))
+ if (!pte_access_permitted(pte, write))
+ goto pte_unmap;
+
+ if (pte_devmap(pte)) {
+ pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
+ if (unlikely(!pgmap)) {
+ undo_dev_pagemap(nr, nr_start, pages);
+ goto pte_unmap;
+ }
+ } else if (pte_special(pte))
goto pte_unmap;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
}
VM_BUG_ON_PAGE(compound_head(page) != head, page);
+
+ put_dev_pagemap(pgmap);
+ SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
}
#endif /* __HAVE_ARCH_PTE_SPECIAL */
+#ifdef __HAVE_ARCH_PTE_DEVMAP
+static int __gup_device_huge(unsigned long pfn, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ int nr_start = *nr;
+ struct dev_pagemap *pgmap = NULL;
+
+ do {
+ struct page *page = pfn_to_page(pfn);
+
+ pgmap = get_dev_pagemap(pfn, pgmap);
+ if (unlikely(!pgmap)) {
+ undo_dev_pagemap(nr, nr_start, pages);
+ return 0;
+ }
+ SetPageReferenced(page);
+ pages[*nr] = page;
+ get_page(page);
+ put_dev_pagemap(pgmap);
+ (*nr)++;
+ pfn++;
+ } while (addr += PAGE_SIZE, addr != end);
+ return 1;
+}
+
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ unsigned long fault_pfn;
+
+ fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ unsigned long fault_pfn;
+
+ fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+#else
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ BUILD_BUG();
+ return 0;
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ BUILD_BUG();
+ return 0;
+}
+#endif
+
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
struct page *head, *page;
int refs;
- if (write && !pmd_write(orig))
+ if (!pmd_access_permitted(orig, write))
return 0;
+ if (pmd_devmap(orig))
+ return __gup_device_huge_pmd(orig, addr, end, pages, nr);
+
refs = 0;
head = pmd_page(orig);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
return 0;
}
+ SetPageReferenced(head);
return 1;
}
struct page *head, *page;
int refs;
- if (write && !pud_write(orig))
+ if (!pud_access_permitted(orig, write))
return 0;
+ if (pud_devmap(orig))
+ return __gup_device_huge_pud(orig, addr, end, pages, nr);
+
refs = 0;
head = pud_page(orig);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
return 0;
}
+ SetPageReferenced(head);
return 1;
}
int refs;
struct page *head, *page;
- if (write && !pgd_write(orig))
+ if (!pgd_access_permitted(orig, write))
return 0;
+ BUILD_BUG_ON(pgd_devmap(orig));
refs = 0;
head = pgd_page(orig);
page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
return 0;
}
+ SetPageReferenced(head);
return 1;
}
if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
P4D_SHIFT, next, write, pages, nr))
return 0;
- } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr))
+ } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
return 0;
} while (p4dp++, addr = next, addr != end);
return nr;
}
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ return end >= start;
+}
+#endif
+
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- int nr, ret;
+ int nr = 0, ret = 0;
start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- ret = nr;
+
+ if (gup_fast_permitted(start, nr_pages, write)) {
+ nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ ret = nr;
+ }
if (nr < nr_pages) {
/* Try to get the remaining pages with get_user_pages */
return ret;
}
-#endif /* CONFIG_HAVE_GENERIC_RCU_GUP */
+#endif /* CONFIG_HAVE_GENERIC_GUP */
/**
* pcpu_get_pages - get temp pages array
- * @chunk: chunk of interest
*
* Returns pointer to array of pointers to struct page which can be indexed
* with pcpu_page_idx(). Note that there is only one array and accesses
* RETURNS:
* Pointer to temp pages array on success.
*/
-static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc)
+static struct page **pcpu_get_pages(void)
{
static struct page **pages;
size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
{
struct page **pages;
- pages = pcpu_get_pages(chunk);
+ pages = pcpu_get_pages();
if (!pages)
return -ENOMEM;
* successful population attempt so the temp pages array must
* be available now.
*/
- pages = pcpu_get_pages(chunk);
+ pages = pcpu_get_pages();
BUG_ON(!pages);
/* unmap and free */
mutex_unlock(&pcpu_alloc_mutex);
}
- if (chunk != pcpu_reserved_chunk)
+ if (chunk != pcpu_reserved_chunk) {
+ spin_lock_irqsave(&pcpu_lock, flags);
pcpu_nr_empty_pop_pages -= occ_pages;
+ spin_unlock_irqrestore(&pcpu_lock, flags);
+ }
if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
pcpu_schedule_balance_work();
return error;
}
-static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
+static int svc_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
lock_sock(sk);
- error = svc_create(sock_net(sk), newsock, 0, 0);
+ error = svc_create(sock_net(sk), newsock, 0, kern);
if (error)
goto out;
return err;
}
-static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int ax25_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sk_buff *skb;
struct sock *newsk;
}
static int l2cap_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
return err;
}
-static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *nsk;
}
static int sco_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = sock->sk, *ch;
static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ br_drop_fake_rtable(skb);
return netif_receive_skb(skb);
}
}
-/* PF_BRIDGE/LOCAL_IN ************************************************/
-/* The packet is locally destined, which requires a real
- * dst_entry, so detach the fake one. On the way up, the
- * packet would pass through PRE_ROUTING again (which already
- * took place when the packet entered the bridge), but we
- * register an IPv4 PRE_ROUTING 'sabotage' hook that will
- * prevent this from happening. */
-static unsigned int br_nf_local_in(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- br_drop_fake_rtable(skb);
- return NF_ACCEPT;
-}
-
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
- {
- .hook = br_nf_local_in,
- .pf = NFPROTO_BRIDGE,
- .hooknum = NF_BR_LOCAL_IN,
- .priority = NF_BR_PRI_BRNF,
- },
{
.hook = br_nf_forward_ip,
.pf = NFPROTO_BRIDGE,
{
rtnl_lock();
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+ call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
rtnl_unlock();
}
EXPORT_SYMBOL(netdev_notify_peers);
while (--i >= new_num) {
struct kobject *kobj = &dev->_rx[i].kobj;
- if (!list_empty(&dev_net(dev)->exit_list))
+ if (!atomic_read(&dev_net(dev)->count))
kobj->uevent_suppress = 1;
if (dev->sysfs_rx_queue_group)
sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
while (--i >= new_num) {
struct netdev_queue *queue = dev->_tx + i;
- if (!list_empty(&dev_net(dev)->exit_list))
+ if (!atomic_read(&dev_net(dev)->count))
queue->kobj.uevent_suppress = 1;
#ifdef CONFIG_BQL
sysfs_remove_group(&queue->kobj, &dql_group);
{
struct device *dev = &(ndev->dev);
- if (!list_empty(&dev_net(ndev)->exit_list))
+ if (!atomic_read(&dev_net(ndev)->count))
dev_set_uevent_suppress(dev, 1);
kobject_get(&dev->kobj);
if (!skb_may_tx_timestamp(sk, false))
return;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- *skb_hwtstamps(skb) = *hwtstamps;
- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
- sock_put(sk);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ *skb_hwtstamps(skb) = *hwtstamps;
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+ sock_put(sk);
+ }
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
{
struct sock *sk = skb->sk;
struct sock_exterr_skb *serr;
- int err;
+ int err = 1;
skb->wifi_acked_valid = 1;
skb->wifi_acked = acked;
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- err = sock_queue_err_skb(sk, skb);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ err = sock_queue_err_skb(sk, skb);
+ sock_put(sk);
+ }
if (err)
kfree_skb(skb);
-
- sock_put(sk);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
/*
* Each address family might have different locking rules, so we have
- * one slock key per address family:
+ * one slock key per address family and separate keys for internal and
+ * userspace sockets.
*/
static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_kern_keys[AF_MAX];
static struct lock_class_key af_family_slock_keys[AF_MAX];
+static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
/*
* Make lock validator output more readable. (we pre-construct these
* strings build-time, so that runtime initialization of socket
* locks is fast):
*/
+
+#define _sock_locks(x) \
+ x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
+ x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
+ x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
+ x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
+ x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
+ x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
+ x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
+ x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
+ x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
+ x "27" , x "28" , x "AF_CAN" , \
+ x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
+ x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
+ x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
+ x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
+ x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX"
+
static const char *const af_family_key_strings[AF_MAX+1] = {
- "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
- "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
- "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
- "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
- "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
- "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
- "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
- "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
- "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
- "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
- "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
- "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
- "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
- "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX"
+ _sock_locks("sk_lock-")
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
- "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
- "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
- "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
- "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
- "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
- "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
- "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
- "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
- "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
- "slock-27" , "slock-28" , "slock-AF_CAN" ,
- "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
- "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
- "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
- "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX"
+ _sock_locks("slock-")
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
- "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
- "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
- "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
- "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
- "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
- "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
- "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
- "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
- "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
- "clock-27" , "clock-28" , "clock-AF_CAN" ,
- "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
- "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
- "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
- "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX"
+ _sock_locks("clock-")
+};
+
+static const char *const af_family_kern_key_strings[AF_MAX+1] = {
+ _sock_locks("k-sk_lock-")
+};
+static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
+ _sock_locks("k-slock-")
+};
+static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
+ _sock_locks("k-clock-")
};
/*
* so split the lock classes by using a per-AF key:
*/
static struct lock_class_key af_callback_keys[AF_MAX];
+static struct lock_class_key af_kern_callback_keys[AF_MAX];
/* Take into consideration the size of the struct sk_buff overhead in the
* determination of these values, since that is non-constant across
*/
static inline void sock_lock_init(struct sock *sk)
{
- sock_lock_init_class_and_name(sk,
+ if (sk->sk_kern_sock)
+ sock_lock_init_class_and_name(
+ sk,
+ af_family_kern_slock_key_strings[sk->sk_family],
+ af_family_kern_slock_keys + sk->sk_family,
+ af_family_kern_key_strings[sk->sk_family],
+ af_family_kern_keys + sk->sk_family);
+ else
+ sock_lock_init_class_and_name(
+ sk,
af_family_slock_key_strings[sk->sk_family],
af_family_slock_keys + sk->sk_family,
af_family_key_strings[sk->sk_family],
* why we need sk_prot_creator -acme
*/
sk->sk_prot = sk->sk_prot_creator = prot;
+ sk->sk_kern_sock = kern;
sock_lock_init(sk);
sk->sk_net_refcnt = kern ? 0 : 1;
if (likely(sk->sk_net_refcnt))
}
EXPORT_SYMBOL(sock_no_socketpair);
-int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
+int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
return -EOPNOTSUPP;
}
}
rwlock_init(&sk->sk_callback_lock);
- lockdep_set_class_and_name(&sk->sk_callback_lock,
+ if (sk->sk_kern_sock)
+ lockdep_set_class_and_name(
+ &sk->sk_callback_lock,
+ af_kern_callback_keys + sk->sk_family,
+ af_family_kern_clock_key_strings[sk->sk_family]);
+ else
+ lockdep_set_class_and_name(
+ &sk->sk_callback_lock,
af_callback_keys + sk->sk_family,
af_family_clock_key_strings[sk->sk_family]);
for (i = 0; i < hc->tx_seqbufc; i++)
kfree(hc->tx_seqbuf[i]);
hc->tx_seqbufc = 0;
+ dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
}
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
switch (type) {
case ICMP_REDIRECT:
- dccp_do_redirect(skb, sk);
+ if (!sock_owned_by_user(sk))
+ dccp_do_redirect(skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
np = inet6_sk(sk);
if (type == NDISC_REDIRECT) {
- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+ if (!sock_owned_by_user(sk)) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst)
- dst->ops->redirect(dst, sk, skb);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
goto out;
}
struct dccp_request_sock *dreq = dccp_rsk(req);
bool own_req;
+ /* TCP/DCCP listeners became lockless.
+ * DCCP stores complex state in its request_sock, so we need
+ * a protection for them, now this code runs without being protected
+ * by the parent (listener) lock.
+ */
+ spin_lock_bh(&dreq->dreq_lock);
+
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
inet_rtx_syn_ack(sk, req);
}
/* Network Duplicate, discard packet */
- return NULL;
+ goto out;
}
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
req, &own_req);
- if (!child)
- goto listen_overflow;
-
- return inet_csk_complete_hashdance(sk, child, req, own_req);
+ if (child) {
+ child = inet_csk_complete_hashdance(sk, child, req, own_req);
+ goto out;
+ }
-listen_overflow:
- dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req);
- return NULL;
+out:
+ spin_unlock_bh(&dreq->dreq_lock);
+ return child;
}
EXPORT_SYMBOL_GPL(dccp_check_req);
{
struct dccp_request_sock *dreq = dccp_rsk(req);
+ spin_lock_init(&dreq->dreq_lock);
inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport);
inet_rsk(req)->acked = 0;
return skb == NULL ? ERR_PTR(err) : skb;
}
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
+static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk, *newsk;
struct sk_buff *skb = NULL;
cb = DN_SKB_CB(skb);
sk->sk_ack_backlog--;
- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
+ newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
if (newsk == NULL) {
release_sock(sk);
kfree_skb(skb);
* Accept a pending connection. The TCP layer now gives BSD semantics.
*/
-int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+int inet_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk1 = sock->sk;
int err = -EINVAL;
- struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err);
+ struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern);
if (!sk2)
goto do_err;
int proto = iph->protocol;
int err = -ENOSYS;
- if (skb->encapsulation)
+ if (skb->encapsulation) {
+ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
skb_set_inner_network_header(skb, nhoff);
+ }
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
/*
* This will accept the next outstanding connection.
*/
-struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
+struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
cork->length += length;
if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+ (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
*/
void tcp_v4_mtu_reduced(struct sock *sk)
{
- struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
- u32 mtu = tcp_sk(sk)->mtu_info;
+ struct dst_entry *dst;
+ u32 mtu;
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+ mtu = tcp_sk(sk)->mtu_info;
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
switch (type) {
case ICMP_REDIRECT:
- do_redirect(icmp_skb, sk);
+ if (!sock_owned_by_user(sk))
+ do_redirect(icmp_skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
sk_mem_reclaim_partial(sk);
- if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
struct inet_connection_sock *icsk = inet_csk(sk);
int event;
- if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
err = register_pernet_subsys(&inet6_net_ops);
if (err)
goto register_pernet_fail;
- err = icmpv6_init();
- if (err)
- goto icmp_fail;
err = ip6_mr_init();
if (err)
goto ipmr_fail;
+ err = icmpv6_init();
+ if (err)
+ goto icmp_fail;
err = ndisc_init();
if (err)
goto ndisc_fail;
ndisc_cleanup();
ndisc_fail:
ip6_mr_cleanup();
-ipmr_fail:
- icmpv6_cleanup();
icmp_fail:
unregister_pernet_subsys(&inet6_net_ops);
+ipmr_fail:
+ icmpv6_cleanup();
register_pernet_fail:
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
ins = &rt->dst.rt6_next;
iter = *ins;
while (iter) {
+ if (iter->rt6i_metric > rt->rt6i_metric)
+ break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
fib6_purge_rt(iter, fn, info->nl_net);
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
int err = -ENOSYS;
- if (skb->encapsulation)
+ if (skb->encapsulation) {
+ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
skb_set_inner_network_header(skb, nhoff);
+ }
iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
* Fragment the datagram.
*/
- *prevhdr = NEXTHDR_FRAGMENT;
troom = rt->dst.dev->needed_tailroom;
/*
* Keep copying data until we run out.
*/
while (left > 0) {
+ u8 *fragnexthdr_offset;
+
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
*/
skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
+ fragnexthdr_offset = skb_network_header(frag);
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
/*
* Build fragment header.
*/
if ((((length + fragheaderlen) > mtu) ||
(skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+ (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, exthdrlen,
if (!skb->ignore_df && skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
- if (skb->protocol == htons(ETH_P_IPV6))
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- else
+ } else {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ }
return -EMSGSIZE;
}
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */
- + nla_total_size(4) /* RTA_OIF */
+ lwtunnel_get_encap_size(rt->dst.lwtstate);
nexthop_len *= rt->rt6i_nsiblings;
}
static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
- unsigned int *flags)
+ unsigned int *flags, bool skip_oif)
{
if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
*flags |= RTNH_F_LINKDOWN;
goto nla_put_failure;
}
- if (rt->dst.dev &&
+ /* not needed for multipath encoding b/c it has a rtnexthop struct */
+ if (!skip_oif && rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
goto nla_put_failure;
return -EMSGSIZE;
}
+/* add multipath next hop */
static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
{
struct rtnexthop *rtnh;
rtnh->rtnh_hops = 0;
rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
- if (rt6_nexthop_info(skb, rt, &flags) < 0)
+ if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
goto nla_put_failure;
rtnh->rtnh_flags = flags;
nla_nest_end(skb, mp);
} else {
- if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
+ if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
goto nla_put_failure;
}
np = inet6_sk(sk);
if (type == NDISC_REDIRECT) {
- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+ if (!sock_owned_by_user(sk)) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst)
- dst->ops->redirect(dst, sk, skb);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
goto out;
}
* Wait for incoming connection
*
*/
-static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
+static int irda_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct irda_sock *new, *self = irda_sk(sk);
struct sk_buff *skb = NULL;
int err;
- err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
+ err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern);
if (err)
return err;
/* Accept a pending connection */
static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *nsk;
* @sock: Socket which connections arrive on.
* @newsock: Socket to move incoming connection to.
* @flags: User specified operational flags.
+ * @kern: If the socket is kernel internal
*
* Accept a new incoming connection.
* Returns 0 upon success, negative otherwise.
*/
-static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags)
+static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk, *newsk;
struct llc_sock *llc, *newllc;
/* fall through */
case NETDEV_CHANGE:
nh->nh_flags |= RTNH_F_LINKDOWN;
- ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+ if (event != NETDEV_UNREGISTER)
+ ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
break;
}
if (event == NETDEV_UNREGISTER)
for (index = 0; index < platform_labels; index++) {
struct mpls_route *rt = rtnl_dereference(platform_label[index]);
RCU_INIT_POINTER(platform_label[index], NULL);
+ mpls_notify_route(net, index, rt, NULL, NULL);
mpls_rt_free(rt);
}
rtnl_unlock();
return err;
}
-static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
+static int nr_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sk_buff *skb;
struct sock *newsk;
}
static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
DECLARE_WAITQUEUE(wait, current);
struct sock *sk = sock->sk, *new_sk;
sock_put(sk);
}
-static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
+static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
+ bool kern)
{
struct pep_sock *pn = pep_sk(sk), *newpn;
struct sock *newsk = NULL;
}
/* Create a new to-be-accepted sock */
- newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
+ newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot,
+ kern);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
}
static int pn_socket_accept(struct socket *sock, struct socket *newsock,
- int flags)
+ int flags, bool kern)
{
struct sock *sk = sock->sk;
struct sock *newsk;
if (unlikely(sk->sk_state != TCP_LISTEN))
return -EINVAL;
- newsk = sk->sk_prot->accept(sk, flags, &err);
+ newsk = sk->sk_prot->accept(sk, flags, &err, kern);
if (!newsk)
return err;
*/
rds_cong_remove_conn(conn);
+ put_net(conn->c_net);
kmem_cache_free(rds_conn_slab, conn);
spin_lock_irqsave(&rds_conn_lock, flags);
ic->i_send_cq = NULL;
ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
rdsdebug("ib_create_cq send failed: %d\n", ret);
- goto out;
+ goto rds_ibdev_out;
}
ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
ic->i_recv_cq = NULL;
ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
rdsdebug("ib_create_cq recv failed: %d\n", ret);
- goto out;
+ goto send_cq_out;
}
ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
if (ret) {
rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
if (ret) {
rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
/* XXX negotiate max send/recv with remote? */
ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
if (ret) {
rdsdebug("rdma_create_qp failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
if (!ic->i_send_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent send failed\n");
- goto out;
+ goto qp_out;
}
ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
if (!ic->i_recv_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent recv failed\n");
- goto out;
+ goto send_hdrs_dma_out;
}
ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
if (!ic->i_ack) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent ack failed\n");
- goto out;
+ goto recv_hdrs_dma_out;
}
ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
if (!ic->i_sends) {
ret = -ENOMEM;
rdsdebug("send allocation failed\n");
- goto out;
+ goto ack_dma_out;
}
ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
if (!ic->i_recvs) {
ret = -ENOMEM;
rdsdebug("recv allocation failed\n");
- goto out;
+ goto sends_out;
}
rds_ib_recv_init_ack(ic);
rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
ic->i_send_cq, ic->i_recv_cq);
-out:
+ return ret;
+
+sends_out:
+ vfree(ic->i_sends);
+ack_dma_out:
+ ib_dma_free_coherent(dev, sizeof(struct rds_header),
+ ic->i_ack, ic->i_ack_dma);
+recv_hdrs_dma_out:
+ ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
+ sizeof(struct rds_header),
+ ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+send_hdrs_dma_out:
+ ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
+ sizeof(struct rds_header),
+ ic->i_send_hdrs, ic->i_send_hdrs_dma);
+qp_out:
+ rdma_destroy_qp(ic->i_cm_id);
+recv_cq_out:
+ if (!ib_destroy_cq(ic->i_recv_cq))
+ ic->i_recv_cq = NULL;
+send_cq_out:
+ if (!ib_destroy_cq(ic->i_send_cq))
+ ic->i_send_cq = NULL;
+rds_ibdev_out:
+ rds_ib_remove_conn(rds_ibdev, conn);
rds_ib_dev_put(rds_ibdev);
+
return ret;
}
/* Protocol version */
unsigned int c_version;
- possible_net_t c_net;
+ struct net *c_net;
struct list_head c_map_item;
unsigned long c_map_queued;
static inline
struct net *rds_conn_net(struct rds_connection *conn)
{
- return read_pnet(&conn->c_net);
+ return conn->c_net;
}
static inline
void rds_conn_net_set(struct rds_connection *conn, struct net *net)
{
- write_pnet(&conn->c_net, net);
+ conn->c_net = get_net(net);
}
#define RDS_FLAG_CONG_BITMAP 0x01
* we do need to clean up the listen socket here.
*/
if (rtn->rds_tcp_listen_sock) {
- rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
+
rtn->rds_tcp_listen_sock = NULL;
- flush_work(&rtn->rds_tcp_accept_w);
+ rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
}
}
struct rds_tcp_connection *tc, *_tc;
LIST_HEAD(tmp_list);
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
- rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
rtn->rds_tcp_listen_sock = NULL;
- flush_work(&rtn->rds_tcp_accept_w);
+ rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w);
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+ struct net *c_net = tc->t_cpath->cp_conn->c_net;
if (net != c_net || !tc->t_sock)
continue;
void *rds_tcp_listen_sock_def_readable(struct net *net)
{
struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct socket *lsock = rtn->rds_tcp_listen_sock;
+
+ if (!lsock)
+ return NULL;
- return rtn->rds_tcp_listen_sock->sk->sk_user_data;
+ return lsock->sk->sk_user_data;
}
static int rds_tcp_dev_event(struct notifier_block *this,
spin_lock_irq(&rds_tcp_conn_lock);
list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
- struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);
+ struct net *c_net = tc->t_cpath->cp_conn->c_net;
if (net != c_net || !tc->t_sock)
continue;
goto out;
}
- ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
- if (ret) {
- pr_warn("could not register rds_tcp_dev_notifier\n");
+ ret = rds_tcp_recv_init();
+ if (ret)
goto out_slab;
- }
ret = register_pernet_subsys(&rds_tcp_net_ops);
if (ret)
- goto out_notifier;
+ goto out_recv;
- ret = rds_tcp_recv_init();
- if (ret)
+ ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
+ if (ret) {
+ pr_warn("could not register rds_tcp_dev_notifier\n");
goto out_pernet;
+ }
rds_trans_register(&rds_tcp_transport);
out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
-out_notifier:
- if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
- pr_warn("could not unregister rds_tcp_dev_notifier\n");
+out_recv:
+ rds_tcp_recv_exit();
out_slab:
kmem_cache_destroy(rds_tcp_conn_slab);
out:
/* tcp_listen.c */
struct socket *rds_tcp_listen_init(struct net *);
-void rds_tcp_listen_stop(struct socket *);
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock);
int rds_tcp_keepalive(struct socket *sock);
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+ ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true);
if (ret < 0)
goto out;
* before it has been accepted and the accepter has set up their
* data_ready.. we only want to queue listen work for our listening
* socket
+ *
+ * (*ready)() may be null if we are racing with netns delete, and
+ * the listen socket is being torn down.
*/
if (sk->sk_state == TCP_LISTEN)
rds_tcp_accept_work(sk);
out:
read_unlock_bh(&sk->sk_callback_lock);
- ready(sk);
+ if (ready)
+ ready(sk);
}
struct socket *rds_tcp_listen_init(struct net *net)
return NULL;
}
-void rds_tcp_listen_stop(struct socket *sock)
+void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor)
{
struct sock *sk;
/* wait for accepts to stop and close the socket */
flush_workqueue(rds_wq);
+ flush_work(acceptor);
sock_release(sock);
}
return err;
}
-static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
+static int rose_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sk_buff *skb;
struct sock *newsk;
u16 skew)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ enum rxrpc_call_state state;
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int ix;
rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
_proto("Rx DATA %%%u { #%u f=%02x }",
sp->hdr.serial, seq, sp->hdr.flags);
- if (call->state >= RXRPC_CALL_COMPLETE)
+ state = READ_ONCE(call->state);
+ if (state >= RXRPC_CALL_COMPLETE)
return;
/* Received data implicitly ACKs all of the request packets we sent
* when we're acting as a client.
*/
- if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
- call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+ state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
!rxrpc_receiving_reply(call))
return;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_peer *peer;
unsigned int mtu;
+ bool wake = false;
u32 rwind = ntohl(ackinfo->rwind);
_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
rwind, ntohl(ackinfo->jumbo_max));
- if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
- rwind = RXRPC_RXTX_BUFF_SIZE - 1;
- call->tx_winsize = rwind;
+ if (call->tx_winsize != rwind) {
+ if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+ rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+ if (rwind > call->tx_winsize)
+ wake = true;
+ call->tx_winsize = rwind;
+ }
+
if (call->cong_ssthresh > rwind)
call->cong_ssthresh = rwind;
spin_unlock_bh(&peer->lock);
_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
}
+
+ if (wake)
+ wake_up(&call->waitq);
}
/*
return rxrpc_proto_abort("AK0", call, 0);
/* Ignore ACKs unless we are or have just been transmitting. */
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
case RXRPC_CALL_SERVER_SEND_REPLY:
static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
struct rxrpc_call *call)
{
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
break;
msg->msg_namelen = len;
}
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_SERVER_ACCEPTING:
ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
break;
mutex_lock(&call->user_mutex);
- switch (call->state) {
+ switch (READ_ONCE(call->state)) {
case RXRPC_CALL_CLIENT_RECV_REPLY:
case RXRPC_CALL_SERVER_RECV_REQUEST:
case RXRPC_CALL_SERVER_ACK_REQUEST:
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
+ enum rxrpc_call_state state;
enum rxrpc_command cmd;
struct rxrpc_call *call;
unsigned long user_call_ID = 0;
return PTR_ERR(call);
/* ... and we have the call lock. */
} else {
- ret = -EBUSY;
- if (call->state == RXRPC_CALL_UNINITIALISED ||
- call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
- call->state == RXRPC_CALL_SERVER_PREALLOC ||
- call->state == RXRPC_CALL_SERVER_SECURING ||
- call->state == RXRPC_CALL_SERVER_ACCEPTING)
+ switch (READ_ONCE(call->state)) {
+ case RXRPC_CALL_UNINITIALISED:
+ case RXRPC_CALL_CLIENT_AWAIT_CONN:
+ case RXRPC_CALL_SERVER_PREALLOC:
+ case RXRPC_CALL_SERVER_SECURING:
+ case RXRPC_CALL_SERVER_ACCEPTING:
+ ret = -EBUSY;
goto error_release_sock;
+ default:
+ break;
+ }
ret = mutex_lock_interruptible(&call->user_mutex);
release_sock(&rx->sk);
}
}
+ state = READ_ONCE(call->state);
_debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
+ call->debug_id, call->user_call_ID, state, call->conn);
- if (call->state >= RXRPC_CALL_COMPLETE) {
+ if (state >= RXRPC_CALL_COMPLETE) {
/* it's too late for this call */
ret = -ESHUTDOWN;
} else if (cmd == RXRPC_CMD_SEND_ABORT) {
} else if (cmd != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else if (rxrpc_is_client_call(call) &&
- call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+ state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
/* request phase complete for this client call */
ret = -EPROTO;
} else if (rxrpc_is_service_call(call) &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY) {
/* Reply phase not begun or not complete for service call. */
ret = -EPROTO;
} else {
_debug("CALL %d USR %lx ST %d on CONN %p",
call->debug_id, call->user_call_ID, call->state, call->conn);
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = -ESHUTDOWN; /* it's too late for this call */
- } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
- ret = -EPROTO; /* request phase complete for this client call */
- } else {
+ switch (READ_ONCE(call->state)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+ break;
+ case RXRPC_CALL_COMPLETE:
+ read_lock_bh(&call->state_lock);
+ ret = -call->error;
+ read_unlock_bh(&call->state_lock);
+ break;
+ default:
+ /* Request phase complete for this client call */
+ ret = -EPROTO;
+ break;
}
mutex_unlock(&call->user_mutex);
if (ret < 0)
return ret;
+ if (!tb[TCA_CONNMARK_PARMS])
+ return -EINVAL;
+
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(tn, parm->index, a, bind)) {
return skb->len;
nla_put_failure:
- rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
/* Create and initialize a new sk for the socket to be returned by accept(). */
static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
- struct sctp_association *asoc)
+ struct sctp_association *asoc,
+ bool kern)
{
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
struct ipv6_txoptions *opt;
- newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
+ newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
if (!newsk)
goto out;
/* Create and initialize a new sk for the socket returned by accept(). */
static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
- struct sctp_association *asoc)
+ struct sctp_association *asoc,
+ bool kern)
{
struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
- sk->sk_prot, 0);
+ sk->sk_prot, kern);
struct inet_sock *newinet;
if (!newsk)
* descriptor will be returned from accept() to represent the newly
* formed association.
*/
-static struct sock *sctp_accept(struct sock *sk, int flags, int *err)
+static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern)
{
struct sctp_sock *sp;
struct sctp_endpoint *ep;
*/
asoc = list_entry(ep->asocs.next, struct sctp_association, asocs);
- newsk = sp->pf->create_accept_sk(sk, asoc);
+ newsk = sp->pf->create_accept_sk(sk, asoc, kern);
if (!newsk) {
error = -ENOMEM;
goto out;
}
static int smc_accept(struct socket *sock, struct socket *new_sock,
- int flags)
+ int flags, bool kern)
{
struct sock *sk = sock->sk, *nsk;
DECLARE_WAITQUEUE(wait, current);
if (err)
goto out_fd;
- err = sock->ops->accept(sock, newsock, sock->file->f_flags);
+ err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
if (err < 0)
goto out_fd;
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
msg.msg_iocb = NULL;
+ msg.msg_flags = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, flags);
if (err < 0)
goto done;
- err = sock->ops->accept(sock, *newsock, flags);
+ err = sock->ops->accept(sock, *newsock, flags, true);
if (err < 0) {
sock_release(*newsock);
*newsock = NULL;
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+ bool kern);
static void tipc_sk_timeout(unsigned long data);
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
*
* Returns 0 on success, errno otherwise
*/
-static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
+static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
+ bool kern)
{
struct sock *new_sk, *sk = sock->sk;
struct sk_buff *buf;
buf = skb_peek(&sk->sk_receive_queue);
- res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
+ res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
if (res)
goto exit;
security_sk_clone(sock->sk, new_sock->sk);
static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
-static int unix_accept(struct socket *, struct socket *, int);
+static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int *, int);
static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
static unsigned int unix_dgram_poll(struct file *, struct socket *,
set_bit(SOCK_PASSSEC, &new->flags);
}
-static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
+static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct sock *tsk;
return err;
}
-static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *listener;
int err;
return rc;
}
-static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
+static int x25_accept(struct socket *sock, struct socket *newsock, int flags,
+ bool kern)
{
struct sock *sk = sock->sk;
struct sock *newsk;
}
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
- const struct flowi *fl)
+ const struct flowi *fl, u16 family)
{
struct xfrm_policy *pol;
again:
pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) {
- bool match = xfrm_selector_match(&pol->selector, fl,
- sk->sk_family);
+ bool match = xfrm_selector_match(&pol->selector, fl, family);
int err = 0;
if (match) {
sk = sk_const_to_full_sk(sk);
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
num_pols = 1;
- pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+ pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
pol = NULL;
sk = sk_to_full_sk(sk);
if (sk && sk->sk_policy[dir]) {
- pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+ pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
return 0;
{
int rv;
+ /* Initialize the per-net locks here */
+ spin_lock_init(&net->xfrm.xfrm_state_lock);
+ spin_lock_init(&net->xfrm.xfrm_policy_lock);
+ mutex_init(&net->xfrm.xfrm_cfg_mutex);
+
rv = xfrm_statistics_init(net);
if (rv < 0)
goto out_statistics;
if (rv < 0)
goto out;
- /* Initialize the per-net locks here */
- spin_lock_init(&net->xfrm.xfrm_state_lock);
- spin_lock_init(&net->xfrm.xfrm_policy_lock);
- mutex_init(&net->xfrm.xfrm_cfg_mutex);
-
return 0;
out:
--- /dev/null
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+ struct pt_regs regs;
+ __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
LIBDIR := ../../../lib
BPFOBJ := $(LIBDIR)/bpf/bpf.o
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
TEST_PROGS := test_kmod.sh
+all: $(TEST_GEN_PROGS)
+
.PHONY: all clean force
# force a rebuild of BPFOBJ when its dependencies are updated
* License as published by the Free Software Foundation.
*/
+#include <asm/types.h>
+#include <linux/types.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
cap_flag_value_t sysadmin = CAP_CLEAR;
const cap_value_t cap_val = CAP_SYS_ADMIN;
+#ifdef CAP_IS_SUPPORTED
if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
perror("cap_get_flag");
return false;
}
+#endif
caps = cap_get_proc();
if (!caps) {
perror("cap_get_proc");
*/
FUNC_START(load_vsx)
li r5,0
- lxvx vs20,r5,r3
+ lxvd2x vs20,r5,r3
addi r5,r5,16
- lxvx vs21,r5,r3
+ lxvd2x vs21,r5,r3
addi r5,r5,16
- lxvx vs22,r5,r3
+ lxvd2x vs22,r5,r3
addi r5,r5,16
- lxvx vs23,r5,r3
+ lxvd2x vs23,r5,r3
addi r5,r5,16
- lxvx vs24,r5,r3
+ lxvd2x vs24,r5,r3
addi r5,r5,16
- lxvx vs25,r5,r3
+ lxvd2x vs25,r5,r3
addi r5,r5,16
- lxvx vs26,r5,r3
+ lxvd2x vs26,r5,r3
addi r5,r5,16
- lxvx vs27,r5,r3
+ lxvd2x vs27,r5,r3
addi r5,r5,16
- lxvx vs28,r5,r3
+ lxvd2x vs28,r5,r3
addi r5,r5,16
- lxvx vs29,r5,r3
+ lxvd2x vs29,r5,r3
addi r5,r5,16
- lxvx vs30,r5,r3
+ lxvd2x vs30,r5,r3
addi r5,r5,16
- lxvx vs31,r5,r3
+ lxvd2x vs31,r5,r3
blr
FUNC_END(load_vsx)
FUNC_START(store_vsx)
li r5,0
- stxvx vs20,r5,r3
+ stxvd2x vs20,r5,r3
addi r5,r5,16
- stxvx vs21,r5,r3
+ stxvd2x vs21,r5,r3
addi r5,r5,16
- stxvx vs22,r5,r3
+ stxvd2x vs22,r5,r3
addi r5,r5,16
- stxvx vs23,r5,r3
+ stxvd2x vs23,r5,r3
addi r5,r5,16
- stxvx vs24,r5,r3
+ stxvd2x vs24,r5,r3
addi r5,r5,16
- stxvx vs25,r5,r3
+ stxvd2x vs25,r5,r3
addi r5,r5,16
- stxvx vs26,r5,r3
+ stxvd2x vs26,r5,r3
addi r5,r5,16
- stxvx vs27,r5,r3
+ stxvd2x vs27,r5,r3
addi r5,r5,16
- stxvx vs28,r5,r3
+ stxvd2x vs28,r5,r3
addi r5,r5,16
- stxvx vs29,r5,r3
+ stxvd2x vs29,r5,r3
addi r5,r5,16
- stxvx vs30,r5,r3
+ stxvd2x vs30,r5,r3
addi r5,r5,16
- stxvx vs31,r5,r3
+ stxvd2x vs31,r5,r3
blr
FUNC_END(store_vsx)
}
}
+#ifdef __i386__
+
+#ifndef SA_RESTORE
+#define SA_RESTORER 0x04000000
+#endif
+
+/*
+ * The UAPI header calls this 'struct sigaction', which conflicts with
+ * glibc. Sigh.
+ */
+struct fake_ksigaction {
+ void *handler; /* the real type is nasty */
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+ unsigned char sigset[8];
+};
+
+static void fix_sa_restorer(int sig)
+{
+ struct fake_ksigaction ksa;
+
+ if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) {
+ /*
+ * glibc has a nasty bug: it sometimes writes garbage to
+ * sa_restorer. This interacts quite badly with anything
+ * that fiddles with SS because it can trigger legacy
+ * stack switching. Patch it up. See:
+ *
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=21269
+ */
+ if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) {
+ ksa.sa_restorer = NULL;
+ if (syscall(SYS_rt_sigaction, sig, &ksa, NULL,
+ sizeof(ksa.sigset)) != 0)
+ err(1, "rt_sigaction");
+ }
+ }
+}
+#else
+static void fix_sa_restorer(int sig)
+{
+ /* 64-bit glibc works fine. */
+}
+#endif
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
if (sigaction(sig, &sa, 0))
err(1, "sigaction");
+ fix_sa_restorer(sig);
}
static jmp_buf jmpbuf;
return ret;
}
-static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
- struct vgic_its *its,
- gpa_t addr, unsigned int len)
-{
- u32 reg = 0;
-
- mutex_lock(&its->cmd_lock);
- if (its->creadr == its->cwriter)
- reg |= GITS_CTLR_QUIESCENT;
- if (its->enabled)
- reg |= GITS_CTLR_ENABLE;
- mutex_unlock(&its->cmd_lock);
-
- return reg;
-}
-
-static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
-{
- its->enabled = !!(val & GITS_CTLR_ENABLE);
-}
-
static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
struct vgic_its *its,
gpa_t addr, unsigned int len)
#define ITS_CMD_SIZE 32
#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5))
-/*
- * By writing to CWRITER the guest announces new commands to be processed.
- * To avoid any races in the first place, we take the its_cmd lock, which
- * protects our ring buffer variables, so that there is only one user
- * per ITS handling commands at a given time.
- */
-static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
- gpa_t addr, unsigned int len,
- unsigned long val)
+/* Must be called with the cmd_lock held. */
+static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
{
gpa_t cbaser;
u64 cmd_buf[4];
- u32 reg;
- if (!its)
- return;
-
- mutex_lock(&its->cmd_lock);
-
- reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
- reg = ITS_CMD_OFFSET(reg);
- if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
- mutex_unlock(&its->cmd_lock);
+ /* Commands are only processed when the ITS is enabled. */
+ if (!its->enabled)
return;
- }
- its->cwriter = reg;
cbaser = CBASER_ADDRESS(its->cbaser);
while (its->cwriter != its->creadr) {
if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser))
its->creadr = 0;
}
+}
+
+/*
+ * By writing to CWRITER the guest announces new commands to be processed.
+ * To avoid any races in the first place, we take the its_cmd lock, which
+ * protects our ring buffer variables, so that there is only one user
+ * per ITS handling commands at a given time.
+ */
+static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u64 reg;
+
+ if (!its)
+ return;
+
+ mutex_lock(&its->cmd_lock);
+
+ reg = update_64bit_reg(its->cwriter, addr & 7, len, val);
+ reg = ITS_CMD_OFFSET(reg);
+ if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+ mutex_unlock(&its->cmd_lock);
+ return;
+ }
+ its->cwriter = reg;
+
+ vgic_its_process_commands(kvm, its);
mutex_unlock(&its->cmd_lock);
}
*regptr = reg;
}
+static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu,
+ struct vgic_its *its,
+ gpa_t addr, unsigned int len)
+{
+ u32 reg = 0;
+
+ mutex_lock(&its->cmd_lock);
+ if (its->creadr == its->cwriter)
+ reg |= GITS_CTLR_QUIESCENT;
+ if (its->enabled)
+ reg |= GITS_CTLR_ENABLE;
+ mutex_unlock(&its->cmd_lock);
+
+ return reg;
+}
+
+static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ mutex_lock(&its->cmd_lock);
+
+ its->enabled = !!(val & GITS_CTLR_ENABLE);
+
+ /*
+ * Try to process any pending commands. This function bails out early
+ * if the ITS is disabled or no commands have been queued.
+ */
+ vgic_its_process_commands(kvm, its);
+
+ mutex_unlock(&its->cmd_lock);
+}
+
#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \
{ \
.reg_offset = off, \
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
bool new_active_state)
{
+ struct kvm_vcpu *requester_vcpu;
spin_lock(&irq->irq_lock);
+
+ /*
+ * The vcpu parameter here can mean multiple things depending on how
+ * this function is called; when handling a trap from the kernel it
+ * depends on the GIC version, and these functions are also called as
+ * part of save/restore from userspace.
+ *
+ * Therefore, we have to figure out the requester in a reliable way.
+ *
+ * When accessing VGIC state from user space, the requester_vcpu is
+ * NULL, which is fine, because we guarantee that no VCPUs are running
+ * when accessing VGIC state from user space so irq->vcpu->cpu is
+ * always -1.
+ */
+ requester_vcpu = kvm_arm_get_running_vcpu();
+
/*
* If this virtual IRQ was written into a list register, we
* have to make sure the CPU that runs the VCPU thread has
- * synced back LR state to the struct vgic_irq. We can only
- * know this for sure, when either this irq is not assigned to
- * anyone's AP list anymore, or the VCPU thread is not
- * running on any CPUs.
+ * synced back the LR state to the struct vgic_irq.
*
- * In the opposite case, we know the VCPU thread may be on its
- * way back from the guest and still has to sync back this
- * IRQ, so we release and re-acquire the spin_lock to let the
- * other thread sync back the IRQ.
+ * As long as the conditions below are true, we know the VCPU thread
+ * may be on its way back from the guest (we kicked the VCPU thread in
+ * vgic_change_active_prepare) and still has to sync back this IRQ,
+ * so we release and re-acquire the spin_lock to let the other thread
+ * sync back the IRQ.
*/
while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+ irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
irq->vcpu->cpu != -1) /* VCPU thread is running */
cond_resched_lock(&irq->irq_lock);
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
* way, so we force SRE to 1 to demonstrate this to the guest.
+ * Also, we don't support any form of IRQ/FIQ bypass.
* This goes with the spec allowing the value to be RAO/WI.
*/
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+ vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
+ ICC_SRE_EL1_DFB |
+ ICC_SRE_EL1_SRE);
vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
} else {
vgic_v3->vgic_sre = 0;