- Notes on accessing payload contents
- Defining a key type
- Request-key callback service
- - Key access filesystem
+ - Garbage collection
============
(*) Dead. The key's type was unregistered, and so the key is now useless.
+Keys in the last three states are subject to garbage collection. See the
+section on "Garbage collection".
+
====================
KEY SERVICE OVERVIEW
successful.
+ (*) Install the calling process's session keyring on its parent.
+
+ long keyctl(KEYCTL_SESSION_TO_PARENT);
+
+ This functions attempts to install the calling process's session keyring
+ on to the calling process's parent, replacing the parent's current session
+ keyring.
+
+ The calling process must have the same ownership as its parent, the
+ keyring must have the same ownership as the calling process, the calling
+ process must have LINK permission on the keyring and the active LSM module
+ mustn't deny permission, otherwise error EPERM will be returned.
+
+ Error ENOMEM will be returned if there was insufficient memory to complete
+ the operation, otherwise 0 will be returned to indicate success.
+
+ The keyring will be replaced next time the parent process leaves the
+ kernel and resumes executing userspace.
+
+
===============
KERNEL SERVICES
===============
In this case, the program isn't required to actually attach the key to a ring;
the rings are provided for reference.
+
+
+==================
+GARBAGE COLLECTION
+==================
+
+Dead keys (for which the type has been removed) will be automatically unlinked
+from those keyrings that point to them and deleted as soon as possible by a
+background garbage collector.
+
+Similarly, revoked and expired keys will be garbage collected, but only after a
+certain amount of time has passed. This time is set as a number of seconds in:
+
+ /proc/sys/kernel/keys/gc_delay
# echo scan > /sys/kernel/debug/kmemleak
+To clear the list of all current possible memory leaks:
+
+ # echo clear > /sys/kernel/debug/kmemleak
+
+New leaks will then come up upon reading /sys/kernel/debug/kmemleak
+again.
+
Note that the orphan objects are listed in the order they were allocated
and one object at the beginning of the list may cause other subsequent
objects to be reported as orphan.
scan=<secs> - set the automatic memory scanning period in seconds
(default 600, 0 to stop the automatic scanning)
scan - trigger a memory scan
+ clear - clear list of current memory leak suspects, done by
+ marking all current reported unreferenced objects grey
+ dump=<addr> - dump information about the object found at <addr>
Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
the kernel command line.
address inside the block address range that need to be found so that the
block is not considered a leak. One example is __vmalloc().
+Testing specific sections with kmemleak
+---------------------------------------
+
+Upon initial bootup your /sys/kernel/debug/kmemleak output page may be
+quite extensive. This can also be the case if you have very buggy code
+when doing development. To work around these situations you can use the
+'clear' command to clear all reported unreferenced objects from the
+/sys/kernel/debug/kmemleak output. By issuing a 'scan' after a 'clear'
+you can find new unreferenced objects; this should help with testing
+specific sections of code.
+
+To test a critical section on demand with a clean kmemleak do:
+
+ # echo clear > /sys/kernel/debug/kmemleak
+ ... test your kernel or modules ...
+ # echo scan > /sys/kernel/debug/kmemleak
+
+Then as usual to get your report with:
+
+ # cat /sys/kernel/debug/kmemleak
+
Kmemleak API
------------
AMSO1100 RNIC DRIVER
M: Tom Tucker <tom@opengridcomputing.com>
M: Steve Wise <swise@opengridcomputing.com>
-L: general@lists.openfabrics.org
+L: linux-rdma@vger.kernel.org
S: Maintained
F: drivers/infiniband/hw/amso1100/
CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
M: Steve Wise <swise@chelsio.com>
-L: general@lists.openfabrics.org
+L: linux-rdma@vger.kernel.org
W: http://www.openfabrics.org
S: Supported
F: drivers/infiniband/hw/cxgb3/
EHCA (IBM GX bus InfiniBand adapter) DRIVER
M: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
M: Christoph Raisch <raisch@de.ibm.com>
-L: general@lists.openfabrics.org
+L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/ehca/
M: Roland Dreier <rolandd@cisco.com>
M: Sean Hefty <sean.hefty@intel.com>
M: Hal Rosenstock <hal.rosenstock@gmail.com>
-L: general@lists.openfabrics.org (moderated for non-subscribers)
+L: linux-rdma@vger.kernel.org
W: http://www.openib.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
S: Supported
IPATH DRIVER
M: Ralph Campbell <infinipath@qlogic.com>
-L: general@lists.openfabrics.org
+L: linux-rdma@vger.kernel.org
T: git git://git.qlogic.com/ipath-linux-2.6
S: Supported
F: drivers/infiniband/hw/ipath/
NETEFFECT IWARP RNIC DRIVER (IW_NES)
M: Faisal Latif <faisal.latif@intel.com>
M: Chien Tung <chien.tin.tung@intel.com>
-L: general@lists.openfabrics.org
+L: linux-rdma@vger.kernel.org
W: http://www.neteffect.com
S: Supported
F: drivers/infiniband/hw/nes/
#define TIF_UAC_SIGBUS 7
#define TIF_MEMDIE 8
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
+#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */
#define TIF_FREEZE 16 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_FREEZE (1<<TIF_FREEZE)
/* Work to do on interrupt/exception return. */
-#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
+#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+ _TIF_NOTIFY_RESUME)
/* Work to do on any return to userspace. */
#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \
#include <linux/binfmts.h>
#include <linux/bitops.h>
#include <linux/syscalls.h>
+#include <linux/tracehook.h>
#include <asm/uaccess.h>
#include <asm/sigcontext.h>
{
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
do_signal(regs, sw, r0, r19);
+
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
}
* TIF_SYSCALL_TRACE - syscall trace active
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
+ * TIF_NOTIFY_RESUME - callback before returning to user
* TIF_USEDFPU - FPU was used by this task this quantum (SMP)
* TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
*/
#define TIF_SIGPENDING 0
#define TIF_NEED_RESCHED 1
+#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_SYSCALL_TRACE 8
#define TIF_POLLING_NRFLAG 16
#define TIF_USING_IWMMXT 17
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
work_pending:
tst r1, #_TIF_NEED_RESCHED
bne work_resched
- tst r1, #_TIF_SIGPENDING
+ tst r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME
beq no_work_pending
mov r0, sp @ 'regs'
mov r2, why @ 'syscall'
#include <linux/personality.h>
#include <linux/freezer.h>
#include <linux/uaccess.h>
+#include <linux/tracehook.h>
#include <asm/elf.h>
#include <asm/cacheflush.h>
{
if (thread_flags & _TIF_SIGPENDING)
do_signal(¤t->blocked, regs, syscall);
+
+ if (thread_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
}
#define TIF_MEMDIE 6
#define TIF_RESTORE_SIGMASK 7 /* restore signal mask in do_signal */
#define TIF_CPU_GOING_TO_SLEEP 8 /* CPU is entering sleep 0 mode */
+#define TIF_NOTIFY_RESUME 9 /* callback before returning to user */
#define TIF_FREEZE 29
#define TIF_DEBUG 30 /* debugging enabled */
#define TIF_USERSPACE 31 /* true if FS sets userspace */
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FREEZE (1 << TIF_FREEZE)
/* Note: The masks below must never span more than 16 bits! */
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
((1 << TIF_SIGPENDING) \
+ | _TIF_NOTIFY_RESUME \
| (1 << TIF_NEED_RESCHED) \
| (1 << TIF_POLLING_NRFLAG) \
| (1 << TIF_BREAKPOINT) \
| (1 << TIF_RESTORE_SIGMASK))
/* work to do on any return to userspace */
-#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE))
+#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \
+ _TIF_NOTIFY_RESUME)
/* work to do on return from debug mode */
#define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT))
ld.w r1, r0[TI_flags]
rjmp 1b
-2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NOTIFY_RESUME
tst r1, r2
breq 3f
unmask_interrupts
#include <linux/ptrace.h>
#include <linux/unistd.h>
#include <linux/freezer.h>
+#include <linux/tracehook.h>
#include <asm/uaccess.h>
#include <asm/ucontext.h>
if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
do_signal(regs, ¤t->blocked, syscall);
+
+ if (ti->flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
}
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/user.h>
+#include <linux/tracehook.h>
#include <asm/uaccess.h>
#include <asm/page.h>
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(canrestart,regs);
+
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
}
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(__frame);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
} /* end do_notify_resume() */
TIF_NEED_RESCHED */
#define TIF_MEMDIE 4
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME 6 /* callback before returning to user */
#define TIF_FREEZE 16 /* is freezing for suspend */
/* as above, but as bit values */
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/freezer.h>
+#include <linux/tracehook.h>
#include <asm/setup.h>
#include <asm/uaccess.h>
{
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
do_signal(regs, NULL);
+
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
}
if (test_thread_flag(TIF_NOTIFY_RESUME)) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(&scr->pt);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
/* copy user rbs to kernel rbs */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */
#define TIF_IRET 4 /* return with iret */
+#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */
#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_IRET (1<<TIF_IRET)
+#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/freezer.h>
+#include <linux/tracehook.h>
#include <asm/cacheflush.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs,oldset);
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
+
clear_thread_flag(TIF_IRET);
}
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
#define TIF_SECCOMP 4 /* secure computing */
+#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#include <linux/compiler.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <linux/tracehook.h>
#include <asm/abi.h>
#include <asm/asm.h>
/* deal with pending signal delivery */
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
do_signal(regs);
+
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
}
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(__frame);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
}
#define TIF_MEMDIE 5
#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */
#define TIF_FREEZE 7 /* is freezing for suspend */
+#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
#define _TIF_FREEZE (1 << TIF_FREEZE)
+#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \
+#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
_TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
#endif /* __KERNEL__ */
/* As above */
mfctl %cr30,%r1
LDREG TI_FLAGS(%r1),%r19
- ldi (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK), %r20
+ ldi (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME), %r20
and,COND(<>) %r19, %r20, %r0
b,n intr_restore /* skip past if we've nothing to do */
#include <linux/stddef.h>
#include <linux/compat.h>
#include <linux/elf.h>
+#include <linux/tracehook.h>
#include <asm/ucontext.h>
#include <asm/rt_sigframe.h>
#include <asm/uaccess.h>
if (test_thread_flag(TIF_SIGPENDING) ||
test_thread_flag(TIF_RESTORE_SIGMASK))
do_signal(regs, in_syscall);
+
+ if (test_thread_flag(TIF_NOTIFY_RESUME)) {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
+ }
}
{
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
}
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
}
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
}
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
}
+
#include <linux/bitops.h>
#include <linux/ioport.h>
#include <linux/suspend.h>
+#include <linux/kmemleak.h>
#include <asm/e820.h>
#include <asm/io.h>
#include <asm/iommu.h>
* code for safe
*/
p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+ /*
+ * Kmemleak should not scan this block as it may not be mapped via the
+ * kernel direct mapping.
+ */
+ kmemleak_ignore(p);
if (!p || __pa(p)+aper_size > 0xffffffff) {
printk(KERN_ERR
"Cannot allocate aperture memory hole (%p,%uK)\n",
#include <linux/dmar.h>
#include <linux/bootmem.h>
#include <linux/pci.h>
+#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
size = roundup(dma32_bootmem_size, align);
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
512ULL<<20);
+ /*
+ * Kmemleak should not scan this block as it may not be mapped via the
+ * kernel direct mapping.
+ */
+ kmemleak_ignore(dma32_bootmem_ptr);
if (dma32_bootmem_ptr)
dma32_bootmem_size = size;
else
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+ if (current->replacement_session_keyring)
+ key_replace_session_keyring();
}
#ifdef CONFIG_X86_32
kmemcheck_shadow_set(shadow, size);
}
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+ enum kmemcheck_shadow status;
+ void *shadow;
+
+ shadow = kmemcheck_shadow_lookup(addr);
+ if (!shadow)
+ return true;
+
+ status = kmemcheck_shadow_test(shadow, size);
+
+ return status == KMEMCHECK_SHADOW_INITIALIZED;
+}
+
/* Access may cross page boundary */
static void kmemcheck_read(struct pt_regs *regs,
unsigned long addr, unsigned int size)
goto out_err;
}
+ /* Default timeouts */
+ chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+ chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
+ chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+ chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+
if (request_locality(chip, 0) != 0) {
rc = -ENODEV;
goto out_err;
vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
- /* Default timeouts */
- chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
- chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
- chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
- chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-
dev_info(dev,
"1.2 TPM (device-id 0x%X, rev-id %d)\n",
vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
* In either case, must tell the provider to reject.
*/
cm_id_priv->state = IW_CM_STATE_DESTROYING;
+ cm_id->device->iwcm->reject(cm_id, NULL, 0);
break;
case IW_CM_STATE_CONN_SENT:
case IW_CM_STATE_DESTROYING:
* Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
MODULE_AUTHOR("Hal Rosenstock");
MODULE_AUTHOR("Sean Hefty");
+int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
+int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
+
+module_param_named(send_queue_size, mad_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
+module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+
static struct kmem_cache *ib_mad_cache;
static struct list_head ib_mad_port_list;
static u32 ib_mad_client_id = 0;
/* Port list lock */
-static spinlock_t ib_mad_port_list_lock;
-
+static DEFINE_SPINLOCK(ib_mad_port_list_lock);
/* Forward declarations */
static int method_in_use(struct ib_mad_mgmt_method_table **method,
unsigned long delay;
if (list_empty(&mad_agent_priv->wait_list)) {
- cancel_delayed_work(&mad_agent_priv->timed_work);
+ __cancel_delayed_work(&mad_agent_priv->timed_work);
} else {
mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
struct ib_mad_send_wr_private,
if (time_after(mad_agent_priv->timeout,
mad_send_wr->timeout)) {
mad_agent_priv->timeout = mad_send_wr->timeout;
- cancel_delayed_work(&mad_agent_priv->timed_work);
+ __cancel_delayed_work(&mad_agent_priv->timed_work);
delay = mad_send_wr->timeout - jiffies;
if ((long)delay <= 0)
delay = 1;
/* Reschedule a work item if we have a shorter timeout */
if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
- cancel_delayed_work(&mad_agent_priv->timed_work);
+ __cancel_delayed_work(&mad_agent_priv->timed_work);
queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
&mad_agent_priv->timed_work, delay);
}
qp_init_attr.send_cq = qp_info->port_priv->cq;
qp_init_attr.recv_cq = qp_info->port_priv->cq;
qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
- qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+ qp_init_attr.cap.max_send_wr = mad_sendq_size;
+ qp_init_attr.cap.max_recv_wr = mad_recvq_size;
qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
qp_init_attr.qp_type = qp_type;
goto error;
}
/* Use minimum queue sizes unless the CQ is resized */
- qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE;
- qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE;
+ qp_info->send_queue.max_active = mad_sendq_size;
+ qp_info->recv_queue.max_active = mad_recvq_size;
return 0;
error:
init_mad_qp(port_priv, &port_priv->qp_info[0]);
init_mad_qp(port_priv, &port_priv->qp_info[1]);
- cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
+ cq_size = (mad_sendq_size + mad_recvq_size) * 2;
port_priv->cq = ib_create_cq(port_priv->device,
ib_mad_thread_completion_handler,
NULL, port_priv, cq_size, 0);
{
int ret;
- spin_lock_init(&ib_mad_port_list_lock);
+ mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
+ mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
+
+ mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
+ mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
ib_mad_cache = kmem_cache_create("ib_mad",
sizeof(struct ib_mad_private),
module_init(ib_mad_init_module);
module_exit(ib_mad_cleanup_module);
-
* Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
/* QP and CQ parameters */
#define IB_MAD_QP_SEND_SIZE 128
#define IB_MAD_QP_RECV_SIZE 512
+#define IB_MAD_QP_MIN_SIZE 64
+#define IB_MAD_QP_MAX_SIZE 8192
#define IB_MAD_SEND_REQ_MAX_SG 2
#define IB_MAD_RECV_REQ_MAX_SG 1
struct ib_sa_query *query;
int query_id;
u16 pkey_index;
+ u8 leave_state;
+ int retries;
};
struct mcast_member {
rec = group->rec;
rec.join_state = leave_state;
+ group->leave_state = leave_state;
ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
port->port_num, IB_SA_METHOD_DELETE, &rec,
{
struct mcast_group *group = context;
- mcast_work_handler(&group->work);
+ if (status && group->retries > 0 &&
+ !send_leave(group, group->leave_state))
+ group->retries--;
+ else
+ mcast_work_handler(&group->work);
}
static struct mcast_group *acquire_group(struct mcast_port *port,
if (!group)
return NULL;
+ group->retries = 3;
group->port = port;
group->rec.mgid = *mgid;
group->pkey_index = MCAST_INVALID_PKEY_INDEX;
.remove = ib_sa_remove_one
};
-static spinlock_t idr_lock;
+static DEFINE_SPINLOCK(idr_lock);
static DEFINE_IDR(query_idr);
-static spinlock_t tid_lock;
+static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
{
int ret;
- spin_lock_init(&idr_lock);
- spin_lock_init(&tid_lock);
-
get_random_bytes(&tid, sizeof tid);
ret = ib_register_client(&sa_client);
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
+ /* C14-6 -- valid hop_cnt values are from 0 to 63 */
+ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+ return IB_SMI_DISCARD;
+
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 */
if (hop_cnt && hop_ptr == 0) {
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
+ /* C14-6 -- valid hop_cnt values are from 0 to 63 */
+ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+ return IB_SMI_DISCARD;
+
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 -- sender should have incremented hop_ptr */
if (hop_cnt && hop_ptr == 0)
DEFINE_IDR(ib_uverbs_qp_idr);
DEFINE_IDR(ib_uverbs_srq_idr);
-static spinlock_t map_lock;
+static DEFINE_SPINLOCK(map_lock);
static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
if (hdr.command < 0 ||
hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[hdr.command] ||
- !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+ !uverbs_cmd_table[hdr.command])
return -EINVAL;
if (!file->ucontext &&
hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
return -EINVAL;
+ if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+ return -ENOSYS;
+
return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
hdr.in_words * 4, hdr.out_words * 4);
}
{
int ret;
- spin_lock_init(&map_lock);
-
ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
"infiniband_verbs");
if (ret) {
static void c2_print_macaddr(struct net_device *netdev)
{
- pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
- "IRQ %u\n", netdev->name,
- netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
- netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
- netdev->irq);
+ pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq);
}
static void c2_set_rxbufsize(struct c2_port *c2_port)
/* Register pseudo network device */
dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
if (!dev->pseudo_netdev)
- goto out3;
+ goto out;
ret = register_netdev(dev->pseudo_netdev);
if (ret)
- goto out2;
+ goto out_free_netdev;
pr_debug("%s:%u\n", __func__, __LINE__);
strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
dev->ibdev.post_recv = c2_post_receive;
dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+ if (dev->ibdev.iwcm == NULL) {
+ ret = -ENOMEM;
+ goto out_unregister_netdev;
+ }
dev->ibdev.iwcm->add_ref = c2_add_ref;
dev->ibdev.iwcm->rem_ref = c2_rem_ref;
dev->ibdev.iwcm->get_qp = c2_get_qp;
ret = ib_register_device(&dev->ibdev);
if (ret)
- goto out1;
+ goto out_free_iwcm;
for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
ret = device_create_file(&dev->ibdev.dev,
c2_dev_attributes[i]);
if (ret)
- goto out0;
+ goto out_unregister_ibdev;
}
- goto out3;
+ goto out;
-out0:
+out_unregister_ibdev:
ib_unregister_device(&dev->ibdev);
-out1:
+out_free_iwcm:
+ kfree(dev->ibdev.iwcm);
+out_unregister_netdev:
unregister_netdev(dev->pseudo_netdev);
-out2:
+out_free_netdev:
free_netdev(dev->pseudo_netdev);
-out3:
+out:
pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
return ret;
}
wqe->qpcaps = attr->qpcaps;
wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
wqe->rqe_count = cpu_to_be16(attr->rqe_count);
- wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
+ wqe->flags_rtr_type = cpu_to_be16(attr->flags |
+ V_RTR_TYPE(attr->rtr_type) |
+ V_CHAN(attr->chan));
wqe->ord = cpu_to_be32(attr->ord);
wqe->ird = cpu_to_be32(attr->ird);
wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
err2:
cxio_hal_destroy_ctrl_qp(rdev_p);
err1:
+ rdev_p->t3cdev_p->ulp = NULL;
list_del(&rdev_p->entry);
return err;
}
#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+#define S_CHAN 4
+#define M_CHAN 0x3
+#define V_CHAN(x) ((x) << S_CHAN)
+#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
+
struct t3_rdma_init_attr {
u32 tid;
u32 qpid;
u16 flags;
u16 rqe_count;
u32 irs;
+ u32 chan;
};
struct t3_rdma_init_wr {
static void open_rnic_dev(struct t3cdev *);
static void close_rnic_dev(struct t3cdev *);
-static void iwch_err_handler(struct t3cdev *, u32, u32);
+static void iwch_event_handler(struct t3cdev *, u32, u32);
struct cxgb3_client t3c_client = {
.name = "iw_cxgb3",
.remove = close_rnic_dev,
.handlers = t3c_handlers,
.redirect = iwch_ep_redirect,
- .err_handler = iwch_err_handler
+ .event_handler = iwch_event_handler
};
static LIST_HEAD(dev_list);
static void open_rnic_dev(struct t3cdev *tdev)
{
struct iwch_dev *rnicp;
- static int vers_printed;
PDBG("%s t3cdev %p\n", __func__, tdev);
- if (!vers_printed++)
- printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
+ printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
DRV_VERSION);
rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
if (!rnicp) {
mutex_unlock(&dev_mutex);
}
-static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error)
+static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
{
struct cxio_rdev *rdev = tdev->ulp;
- struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev);
+ struct iwch_dev *rnicp;
struct ib_event event;
+ u32 portnum = port_id + 1;
- if (status == OFFLOAD_STATUS_DOWN) {
+ if (!rdev)
+ return;
+ rnicp = rdev_to_iwch_dev(rdev);
+ switch (evt) {
+ case OFFLOAD_STATUS_DOWN: {
rdev->flags = CXIO_ERROR_FATAL;
-
- event.device = &rnicp->ibdev;
event.event = IB_EVENT_DEVICE_FATAL;
- event.element.port_num = 0;
- ib_dispatch_event(&event);
+ break;
+ }
+ case OFFLOAD_PORT_DOWN: {
+ event.event = IB_EVENT_PORT_ERR;
+ break;
+ }
+ case OFFLOAD_PORT_UP: {
+ event.event = IB_EVENT_PORT_ACTIVE;
+ break;
+ }
}
+ event.device = &rnicp->ibdev;
+ event.element.port_num = portnum;
+ ib_dispatch_event(&event);
+
return;
}
ep = container_of(container_of(kref, struct iwch_ep_common, kref),
struct iwch_ep, com);
PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
- if (ep->com.flags & RELEASE_RESOURCES) {
+ if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
dst_release(ep->dst);
l2t_release(L2DATA(ep->com.tdev), ep->l2t);
static void release_ep_resources(struct iwch_ep *ep)
{
PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- ep->com.flags |= RELEASE_RESOURCES;
+ set_bit(RELEASE_RESOURCES, &ep->com.flags);
put_ep(&ep->com);
}
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
event.provider_data = ep;
- if (state_read(&ep->parent_ep->com) != DEAD)
+ if (state_read(&ep->parent_ep->com) != DEAD) {
+ get_ep(&ep->com);
ep->parent_ep->com.cm_id->event_handler(
ep->parent_ep->com.cm_id,
&event);
+ }
put_ep(&ep->parent_ep->com);
ep->parent_ep = NULL;
}
* We get 2 abort replies from the HW. The first one must
* be ignored except for scribbling that we need one more.
*/
- if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) {
- ep->com.flags |= ABORT_REQ_IN_PROGRESS;
+ if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
return CPL_RET_BUF_DONE;
}
/*
* We're gonna mark this puppy DEAD, but keep
* the reference on it until the ULP accepts or
- * rejects the CR.
+ * rejects the CR. Also wake up anyone waiting
+ * in rdma connection migration (see iwch_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
- get_ep(&ep->com);
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
* We get 2 peer aborts from the HW. The first one must
* be ignored except for scribbling that we need one more.
*/
- if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) {
- ep->com.flags |= PEER_ABORT_IN_PROGRESS;
+ if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
return CPL_RET_BUF_DONE;
}
/*
* We're gonna mark this puppy DEAD, but keep
* the reference on it until the ULP accepts or
- * rejects the CR.
+ * rejects the CR. Also wake up anyone waiting
+ * in rdma connection migration (see iwch_accept_cr()).
*/
- get_ep(&ep->com);
+ ep->com.rpl_done = 1;
+ ep->com.rpl_err = -ECONNRESET;
+ PDBG("waking up ep %p\n", ep);
+ wake_up(&ep->com.waitq);
break;
case MORIBUND:
case CLOSING:
err = send_mpa_reject(ep, pdata, pdata_len);
err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
}
+ put_ep(&ep->com);
return 0;
}
struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD)
- return -ECONNRESET;
+ if (state_read(&ep->com) == DEAD) {
+ err = -ECONNRESET;
+ goto err;
+ }
BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
BUG_ON(!qp);
if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
(conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
abort_connection(ep, NULL, GFP_KERNEL);
- return -EINVAL;
+ err = -EINVAL;
+ goto err;
}
cm_id->add_ref(cm_id);
ep->com.cm_id = cm_id;
ep->com.qp = qp;
- ep->com.rpl_done = 0;
- ep->com.rpl_err = 0;
ep->ird = conn_param->ird;
ep->ord = conn_param->ord;
PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
- get_ep(&ep->com);
-
/* bind QP to EP and move to RTS */
attrs.mpa_attr = ep->mpa_attr;
attrs.max_ird = ep->ird;
err = iwch_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1);
if (err)
- goto err;
+ goto err1;
/* if needed, wait for wr_ack */
if (iwch_rqes_posted(qp)) {
wait_event(ep->com.waitq, ep->com.rpl_done);
err = ep->com.rpl_err;
if (err)
- goto err;
+ goto err1;
}
err = send_mpa_reply(ep, conn_param->private_data,
conn_param->private_data_len);
if (err)
- goto err;
+ goto err1;
state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
put_ep(&ep->com);
return 0;
-err:
+err1:
ep->com.cm_id = NULL;
ep->com.qp = NULL;
cm_id->rem_ref(cm_id);
+err:
put_ep(&ep->com);
return err;
}
ep->com.state = CLOSING;
start_ep_timer(ep);
}
+ set_bit(CLOSE_SENT, &ep->com.flags);
break;
case CLOSING:
- close = 1;
- if (abrupt) {
- stop_ep_timer(ep);
- ep->com.state = ABORTING;
- } else
- ep->com.state = MORIBUND;
+ if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
+ close = 1;
+ if (abrupt) {
+ stop_ep_timer(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
+ }
break;
case MORIBUND:
case ABORTING:
};
enum iwch_ep_flags {
- PEER_ABORT_IN_PROGRESS = (1 << 0),
- ABORT_REQ_IN_PROGRESS = (1 << 1),
- RELEASE_RESOURCES = (1 << 2),
+ PEER_ABORT_IN_PROGRESS = 0,
+ ABORT_REQ_IN_PROGRESS = 1,
+ RELEASE_RESOURCES = 2,
+ CLOSE_SENT = 3,
};
struct iwch_ep_common {
wait_queue_head_t waitq;
int rpl_done;
int rpl_err;
- u32 flags;
+ unsigned long flags;
};
struct iwch_listen_ep {
#include "iwch.h"
#include "iwch_provider.h"
-static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
{
u32 mmid;
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+ return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
}
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift)
{
u32 stag;
+ int ret;
if (cxio_register_phys_mem(&rhp->rdev,
&stag, mhp->attr.pdid,
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- iwch_finish_mem_reg(mhp, stag);
-
- return 0;
+ ret = iwch_finish_mem_reg(mhp, stag);
+ if (ret)
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ return ret;
}
int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
int npages)
{
u32 stag;
+ int ret;
/* We could support this... */
if (npages > mhp->attr.pbl_size)
mhp->attr.pbl_size, mhp->attr.pbl_addr))
return -ENOMEM;
- iwch_finish_mem_reg(mhp, stag);
+ ret = iwch_finish_mem_reg(mhp, stag);
+ if (ret)
+ cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
- return 0;
+ return ret;
}
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
spin_lock_init(&chp->lock);
atomic_set(&chp->refcnt, 1);
init_waitqueue_head(&chp->wait);
- insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+ if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
+ cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+ kfree(chp);
+ return ERR_PTR(-ENOMEM);
+ }
if (ucontext) {
struct iwch_mm_entry *mm;
mhp->attr.stag = stag;
mmid = (stag) >> 8;
mhp->ibmw.rkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+ kfree(mhp);
+ return ERR_PTR(-ENOMEM);
+ }
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmw);
}
struct iwch_mr *mhp;
u32 mmid;
u32 stag = 0;
- int ret;
+ int ret = 0;
php = to_iwch_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
- return ERR_PTR(-ENOMEM);
+ goto err;
mhp->rhp = rhp;
ret = iwch_alloc_pbl(mhp, pbl_depth);
- if (ret) {
- kfree(mhp);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto err1;
mhp->attr.pbl_size = pbl_depth;
ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret) {
- iwch_free_pbl(mhp);
- kfree(mhp);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto err2;
mhp->attr.pdid = php->pdid;
mhp->attr.type = TPT_NON_SHARED_MR;
mhp->attr.stag = stag;
mhp->attr.state = 1;
mmid = (stag) >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
+ goto err3;
+
PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
return &(mhp->ibmr);
+err3:
+ cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+err2:
+ iwch_free_pbl(mhp);
+err1:
+ kfree(mhp);
+err:
+ return ERR_PTR(ret);
}
static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
spin_lock_init(&qhp->lock);
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
- insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+ if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
+ cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ kfree(qhp);
+ return ERR_PTR(-ENOMEM);
+ }
if (udata) {
bail2:
ib_unregister_device(&dev->ibdev);
bail1:
+ kfree(dev->ibdev.iwcm);
return ret;
}
device_remove_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
ib_unregister_device(&dev->ibdev);
+ kfree(dev->ibdev.iwcm);
return;
}
init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
init_attr.rqe_count = iwch_rqes_posted(qhp);
init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
+ init_attr.chan = qhp->ep->l2t->smt_idx;
if (peer2peer) {
init_attr.rtr_type = RTR_READ;
if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
#include "ehca_tools.h"
#include "hcp_if.h"
-#define HCAD_VERSION "0028"
+#define HCAD_VERSION "0029"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
static int ehca_poll_all_eqs = 1;
int ehca_debug_level = 0;
-int ehca_nr_ports = 2;
+int ehca_nr_ports = -1;
int ehca_use_hp_mr = 0;
int ehca_port_act_time = 30;
int ehca_static_rate = -1;
"Hardware level (0: autosensing (default), "
"0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
MODULE_PARM_DESC(nr_ports,
- "number of connected ports (-1: autodetect, 1: port one only, "
- "2: two ports (default)");
+ "number of connected ports (-1: autodetect (default), "
+ "1: port one only, 2: two ports)");
MODULE_PARM_DESC(use_hp_mr,
"Use high performance MRs (default: no)");
MODULE_PARM_DESC(port_act_time,
wc->slid = cqe->rlid;
wc->dlid_path_bits = cqe->dlid;
wc->src_qp = cqe->remote_qp_number;
- wc->wc_flags = cqe->w_completion_flags;
+ /*
+ * HW has "Immed data present" and "GRH present" in bits 6 and 5.
+ * SW defines those in bits 1 and 0, so we can just shift and mask.
+ */
+ wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
wc->sl = cqe->service_level;
u8 data[192];
} __attribute__ ((packed));
+/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
+struct tcslfl {
+ u32 tc:8;
+ u32 sl:4;
+ u32 fl:20;
+} __attribute__ ((packed));
+
+/* IP Version/TC/FL packed into 32 bits, as in GRH */
+struct vertcfl {
+ u32 ver:4;
+ u32 tc:8;
+ u32 fl:20;
+} __attribute__ ((packed));
static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad)
{
struct ib_perf *in_perf = (struct ib_perf *)in_mad;
struct ib_perf *out_perf = (struct ib_perf *)out_mad;
struct ib_class_port_info *poi =
(struct ib_class_port_info *)out_perf->data;
+ struct tcslfl *tcslfl =
+ (struct tcslfl *)&poi->redirect_tcslfl;
struct ehca_shca *shca =
container_of(ibdev, struct ehca_shca, ib_device);
struct ehca_sport *sport = &shca->sport[port_num - 1];
poi->base_version = 1;
poi->class_version = 1;
poi->resp_time_value = 18;
- poi->redirect_lid = sport->saved_attr.lid;
- poi->redirect_qp = sport->pma_qp_nr;
+
+ /* copy local routing information from WC where applicable */
+ tcslfl->sl = in_wc->sl;
+ poi->redirect_lid =
+ sport->saved_attr.lid | in_wc->dlid_path_bits;
+ poi->redirect_qp = sport->pma_qp_nr;
poi->redirect_qkey = IB_QP1_QKEY;
- poi->redirect_pkey = IB_DEFAULT_PKEY_FULL;
+
+ ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
+ &poi->redirect_pkey);
+
+ /* if request was globally routed, copy route info */
+ if (in_grh) {
+ struct vertcfl *vertcfl =
+ (struct vertcfl *)&in_grh->version_tclass_flow;
+ memcpy(poi->redirect_gid, in_grh->dgid.raw,
+ sizeof(poi->redirect_gid));
+ tcslfl->tc = vertcfl->tc;
+ tcslfl->fl = vertcfl->fl;
+ } else
+ /* else only fill in default GID */
+ ehca_query_gid(ibdev, port_num, 0,
+ (union ib_gid *)&poi->redirect_gid);
ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
sport->saved_attr.lid, sport->pma_qp_nr);
int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
- struct ib_mad *in_mad,
- struct ib_mad *out_mad)
+ struct ib_mad *in_mad, struct ib_mad *out_mad)
{
int ret;
return IB_MAD_RESULT_SUCCESS;
ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
- ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad);
+ ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
+ in_mad, out_mad);
return ret;
}
pd->port_cnt = 1;
port_fp(fp) = pd;
pd->port_pid = get_pid(task_pid(current));
- strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
+ strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
ipath_stats.sps_ports++;
ret = 0;
} else
if (smp->attr_mod)
smp->status |= IB_SMP_INVALID_FIELD;
- strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
+ memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
return reply(smp);
}
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
+ if (!dev->ib_active)
+ return ERR_PTR(-EAGAIN);
+
resp.qp_tab_size = dev->dev->caps.num_qps;
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
- static int mlx4_ib_version_printed;
struct mlx4_ib_dev *ibdev;
int num_ports = 0;
int i;
- if (!mlx4_ib_version_printed) {
- printk(KERN_INFO "%s", mlx4_ib_version);
- ++mlx4_ib_version_printed;
- }
+ printk_once(KERN_INFO "%s", mlx4_ib_version);
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
num_ports++;
goto err_reg;
}
+ ibdev->ib_active = true;
+
return ibdev;
err_reg:
break;
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+ ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
break;
spinlock_t sm_lock;
struct mutex cap_mask_mutex;
+ bool ib_active;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
}
static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
}
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
- else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+ } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
event.device = &dev->ib_dev;
event.event = IB_EVENT_DEVICE_FATAL;
event.element.port_num = 0;
+ dev->active = false;
ib_dispatch_event(&event);
#ifndef MTHCA_CONFIG_REG_H
#define MTHCA_CONFIG_REG_H
-#include <asm/page.h>
-
#define MTHCA_HCR_BASE 0x80680
#define MTHCA_HCR_SIZE 0x0001c
#define MTHCA_ECR_BASE 0x80700
struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
spinlock_t sm_lock;
u8 rate[MTHCA_MAX_PORTS];
+ bool active;
};
#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
static const char *eq_name[] = {
- [MTHCA_EQ_COMP] = DRV_NAME " (comp)",
- [MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
- [MTHCA_EQ_CMD] = DRV_NAME " (cmd)"
+ [MTHCA_EQ_COMP] = DRV_NAME "-comp",
+ [MTHCA_EQ_ASYNC] = DRV_NAME "-async",
+ [MTHCA_EQ_CMD] = DRV_NAME "-cmd"
};
for (i = 0; i < MTHCA_NUM_EQ; ++i) {
+ snprintf(dev->eq_table.eq[i].irq_name,
+ IB_DEVICE_NAME_MAX,
+ "%s@pci:%s", eq_name[i],
+ pci_name(dev->pdev));
err = request_irq(dev->eq_table.eq[i].msi_x_vector,
mthca_is_memfree(dev) ?
mthca_arbel_msi_x_interrupt :
mthca_tavor_msi_x_interrupt,
- 0, eq_name[i], dev->eq_table.eq + i);
+ 0, dev->eq_table.eq[i].irq_name,
+ dev->eq_table.eq + i);
if (err)
goto err_out_cmd;
dev->eq_table.eq[i].have_irq = 1;
}
} else {
+ snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
+ DRV_NAME "@pci:%s", pci_name(dev->pdev));
err = request_irq(dev->pdev->irq,
mthca_is_memfree(dev) ?
mthca_arbel_interrupt :
mthca_tavor_interrupt,
- IRQF_SHARED, DRV_NAME, dev);
+ IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
if (err)
goto err_out_cmd;
dev->eq_table.have_irq = 1;
pci_set_drvdata(pdev, mdev);
mdev->hca_type = hca_type;
+ mdev->active = true;
+
return 0;
err_unregister:
static int __devinit mthca_init_one(struct pci_dev *pdev,
const struct pci_device_id *id)
{
- static int mthca_version_printed = 0;
int ret;
mutex_lock(&mthca_device_mutex);
- if (!mthca_version_printed) {
- printk(KERN_INFO "%s", mthca_version);
- ++mthca_version_printed;
- }
+ printk_once(KERN_INFO "%s", mthca_version);
if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
struct mthca_ucontext *context;
int err;
+ if (!(to_mdev(ibdev)->active))
+ return ERR_PTR(-EAGAIN);
+
memset(&uresp, 0, sizeof uresp);
uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
int nent;
struct mthca_buf_list *page_list;
struct mthca_mr mr;
+ char irq_name[IB_DEVICE_NAME_MAX];
};
struct mthca_av;
}
static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
- else if (send_cq->cqn < recv_cq->cqn) {
+ __acquire(&recv_cq->lock);
+ } else if (send_cq->cqn < recv_cq->cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
}
static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+ __releases(&send_cq->lock) __releases(&recv_cq->lock)
{
- if (send_cq == recv_cq)
+ if (send_cq == recv_cq) {
+ __release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
- else if (send_cq->cqn < recv_cq->cqn) {
+ } else if (send_cq->cqn < recv_cq->cqn) {
spin_unlock(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else {
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
void nes_cm_disconn_worker(void *);
/* nes_verbs.c */
-int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32);
+int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32);
int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
struct nes_ib_device *nes_init_ofa_device(struct net_device *);
void nes_destroy_ofa_device(struct nes_ib_device *);
*/
int nes_cm_disconn(struct nes_qp *nesqp)
{
- unsigned long flags;
-
- spin_lock_irqsave(&nesqp->lock, flags);
- if (nesqp->disconn_pending == 0) {
- nesqp->disconn_pending++;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- /* init our disconnect work element, to */
- INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
+ struct disconn_work *work;
- queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work);
- } else
- spin_unlock_irqrestore(&nesqp->lock, flags);
+ work = kzalloc(sizeof *work, GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM; /* Timer will clean up */
+ nes_add_ref(&nesqp->ibqp);
+ work->nesqp = nesqp;
+ INIT_WORK(&work->work, nes_disconnect_worker);
+ queue_work(g_cm_core->disconn_wq, &work->work);
return 0;
}
*/
static void nes_disconnect_worker(struct work_struct *work)
{
- struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work);
+ struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+ struct nes_qp *nesqp = dwork->nesqp;
+ kfree(dwork);
nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
nesqp->last_aeq, nesqp->hwqp.qp_id);
nes_cm_disconn_true(nesqp);
+ nes_rem_ref(&nesqp->ibqp);
}
u16 last_ae;
u8 original_hw_tcp_state;
u8 original_ibqp_state;
- u8 issued_disconnect_reset = 0;
+ enum iw_cm_event_type disconn_status = IW_CM_EVENT_STATUS_OK;
+ int issue_disconn = 0;
+ int issue_close = 0;
+ int issue_flush = 0;
+ u32 flush_q = NES_CQP_FLUSH_RQ;
+ struct ib_event ibevent;
if (!nesqp) {
nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
original_ibqp_state = nesqp->ibqp_state;
last_ae = nesqp->last_aeq;
+ if (nesqp->term_flags) {
+ issue_disconn = 1;
+ issue_close = 1;
+ nesqp->cm_id = NULL;
+ if (nesqp->flush_issued == 0) {
+ nesqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+ ((original_ibqp_state == IB_QPS_RTS) &&
+ (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ issue_disconn = 1;
+ if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
+ disconn_status = IW_CM_EVENT_STATUS_RESET;
+ }
+
+ if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+ (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
+ (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
+ (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ issue_close = 1;
+ nesqp->cm_id = NULL;
+ if (nesqp->flush_issued == 0) {
+ nesqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state);
+ if ((issue_flush) && (nesqp->destroyed == 0)) {
+ /* Flush the queue(s) */
+ if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE)
+ flush_q |= NES_CQP_FLUSH_SQ;
+ flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1);
- if ((nesqp->cm_id) && (cm_id->event_handler)) {
- if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- ((original_ibqp_state == IB_QPS_RTS) &&
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ if (nesqp->term_flags) {
+ ibevent.device = nesqp->ibqp.device;
+ ibevent.event = nesqp->terminate_eventtype;
+ ibevent.element.qp = &nesqp->ibqp;
+ nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ }
+ }
+
+ if ((cm_id) && (cm_id->event_handler)) {
+ if (issue_disconn) {
atomic_inc(&cm_disconnects);
cm_event.event = IW_CM_EVENT_DISCONNECT;
- if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
- cm_event.status = IW_CM_EVENT_STATUS_RESET;
- nes_debug(NES_DBG_CM, "Generating a CM "
- "Disconnect Event (status reset) for "
- "QP%u, cm_id = %p. \n",
- nesqp->hwqp.qp_id, cm_id);
- } else
- cm_event.status = IW_CM_EVENT_STATUS_OK;
-
+ cm_event.status = disconn_status;
cm_event.local_addr = cm_id->local_addr;
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
nesqp->hwqp.sq_tail, cm_id,
atomic_read(&nesqp->refcount));
- spin_unlock_irqrestore(&nesqp->lock, flags);
ret = cm_id->event_handler(cm_id, &cm_event);
if (ret)
nes_debug(NES_DBG_CM, "OFA CM event_handler "
"returned, ret=%d\n", ret);
- spin_lock_irqsave(&nesqp->lock, flags);
}
- nesqp->disconn_pending = 0;
- /* There might have been another AE while the lock was released */
- original_hw_tcp_state = nesqp->hw_tcp_state;
- original_ibqp_state = nesqp->ibqp_state;
- last_ae = nesqp->last_aeq;
-
- if ((issued_disconnect_reset == 0) && (nesqp->cm_id) &&
- ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
- (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
- (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+ if (issue_close) {
atomic_inc(&cm_closes);
- nesqp->cm_id = NULL;
- nesqp->in_disconnect = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
nes_disconnect(nesqp, 1);
cm_id->provider_data = nesqp;
}
cm_id->rem_ref(cm_id);
-
- spin_lock_irqsave(&nesqp->lock, flags);
- if (nesqp->flush_issued == 0) {
- nesqp->flush_issued = 1;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- flush_wqes(nesvnic->nesdev, nesqp,
- NES_CQP_FLUSH_RQ, 1);
- } else
- spin_unlock_irqrestore(&nesqp->lock, flags);
- } else {
- cm_id = nesqp->cm_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- /* check to see if the inbound reset beat the outbound reset */
- if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
- nes_debug(NES_DBG_CM, "QP%u: Decing refcount "
- "due to inbound reset beating the "
- "outbound reset.\n", nesqp->hwqp.qp_id);
- }
}
- } else {
- nesqp->disconn_pending = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
}
return 0;
int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
enum nes_timer_type, int, int);
-int nes_cm_disconn(struct nes_qp *);
-
int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
int nes_reject(struct iw_cm_id *, const void *, u8);
int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
static void process_critical_error(struct nes_device *nesdev);
static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
+static void nes_terminate_timeout(unsigned long context);
+static void nes_terminate_start_timer(struct nes_qp *nesqp);
#ifdef CONFIG_INFINIBAND_NES_DEBUG
static unsigned char *nes_iwarp_state_str[] = {
}
+static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
+{
+ u16 pkt_len;
+
+ if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
+ /* skip over ethernet header */
+ pkt_len = be16_to_cpu(*(u16 *)(pkt + ETH_HLEN - 2));
+ pkt += ETH_HLEN;
+
+ /* Skip over IP and TCP headers */
+ pkt += 4 * (pkt[0] & 0x0f);
+ pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+ }
+ return pkt;
+}
+
+/* Determine if incoming error pkt is rdma layer */
+static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info)
+{
+ u8 *pkt;
+ u16 *mpa;
+ u32 opcode = 0xffffffff;
+
+ if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+ pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+ mpa = (u16 *)locate_mpa(pkt, aeq_info);
+ opcode = be16_to_cpu(mpa[1]) & 0xf;
+ }
+
+ return opcode;
+}
+
+/* Build iWARP terminate header */
+static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info)
+{
+ u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+ u16 ddp_seg_len;
+ int copy_len = 0;
+ u8 is_tagged = 0;
+ u8 flush_code = 0;
+ struct nes_terminate_hdr *termhdr;
+
+ termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase;
+ memset(termhdr, 0, 64);
+
+ if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+
+ /* Use data from offending packet to fill in ddp & rdma hdrs */
+ pkt = locate_mpa(pkt, aeq_info);
+ ddp_seg_len = be16_to_cpu(*(u16 *)pkt);
+ if (ddp_seg_len) {
+ copy_len = 2;
+ termhdr->hdrct = DDP_LEN_FLAG;
+ if (pkt[2] & 0x80) {
+ is_tagged = 1;
+ if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+ copy_len += TERM_DDP_LEN_TAGGED;
+ termhdr->hdrct |= DDP_HDR_FLAG;
+ }
+ } else {
+ if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+ copy_len += TERM_DDP_LEN_UNTAGGED;
+ termhdr->hdrct |= DDP_HDR_FLAG;
+ }
+
+ if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
+ if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
+ copy_len += TERM_RDMA_LEN;
+ termhdr->hdrct |= RDMA_HDR_FLAG;
+ }
+ }
+ }
+ }
+ }
+
+ switch (async_event_id) {
+ case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
+ switch (iwarp_opcode(nesqp, aeq_info)) {
+ case IWARP_OPCODE_WRITE:
+ flush_code = IB_WC_LOC_PROT_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_INV_STAG;
+ break;
+ default:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_STAG;
+ }
+ break;
+ case NES_AEQE_AEID_AMP_INVALID_STAG:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_STAG;
+ break;
+ case NES_AEQE_AEID_AMP_BAD_QP:
+ flush_code = IB_WC_LOC_QP_OP_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_QN;
+ break;
+ case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
+ case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
+ switch (iwarp_opcode(nesqp, aeq_info)) {
+ case IWARP_OPCODE_SEND_INV:
+ case IWARP_OPCODE_SEND_SE_INV:
+ flush_code = IB_WC_REM_OP_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_CANT_INV_STAG;
+ break;
+ default:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_STAG;
+ }
+ break;
+ case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+ if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) {
+ flush_code = IB_WC_LOC_PROT_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_BOUNDS;
+ } else {
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_INV_BOUNDS;
+ }
+ break;
+ case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+ case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_ACCESS;
+ break;
+ case NES_AEQE_AEID_AMP_TO_WRAP:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_TO_WRAP;
+ break;
+ case NES_AEQE_AEID_AMP_BAD_PD:
+ switch (iwarp_opcode(nesqp, aeq_info)) {
+ case IWARP_OPCODE_WRITE:
+ flush_code = IB_WC_LOC_PROT_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_UNASSOC_STAG;
+ break;
+ case IWARP_OPCODE_SEND_INV:
+ case IWARP_OPCODE_SEND_SE_INV:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_CANT_INV_STAG;
+ break;
+ default:
+ flush_code = IB_WC_REM_ACCESS_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+ termhdr->error_code = RDMAP_UNASSOC_STAG;
+ }
+ break;
+ case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+ flush_code = IB_WC_LOC_LEN_ERR;
+ termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+ termhdr->error_code = MPA_MARKER;
+ break;
+ case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+ termhdr->error_code = MPA_CRC;
+ break;
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+ flush_code = IB_WC_LOC_LEN_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+ termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+ break;
+ case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+ case NES_AEQE_AEID_DDP_NO_L_BIT:
+ flush_code = IB_WC_FATAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+ termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+ break;
+ case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+ case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ flush_code = IB_WC_LOC_LEN_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+ flush_code = IB_WC_GENERAL_ERR;
+ if (is_tagged) {
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+ termhdr->error_code = DDP_TAGGED_INV_DDP_VER;
+ } else {
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER;
+ }
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_MO;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ flush_code = IB_WC_REM_OP_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF;
+ break;
+ case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+ termhdr->error_code = DDP_UNTAGGED_INV_QN;
+ break;
+ case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ flush_code = IB_WC_GENERAL_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_INV_RDMAP_VER;
+ break;
+ case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+ flush_code = IB_WC_LOC_QP_OP_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_UNEXPECTED_OP;
+ break;
+ default:
+ flush_code = IB_WC_FATAL_ERR;
+ termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+ termhdr->error_code = RDMAP_UNSPECIFIED;
+ break;
+ }
+
+ if (copy_len)
+ memcpy(termhdr + 1, pkt, copy_len);
+
+ if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) {
+ if (aeq_info & NES_AEQE_SQ)
+ nesqp->term_sq_flush_code = flush_code;
+ else
+ nesqp->term_rq_flush_code = flush_code;
+ }
+
+ return sizeof(struct nes_terminate_hdr) + copy_len;
+}
+
+static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp,
+ struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype)
+{
+ u64 context;
+ unsigned long flags;
+ u32 aeq_info;
+ u16 async_event_id;
+ u8 tcp_state;
+ u8 iwarp_state;
+ u32 termlen = 0;
+ u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE |
+ NES_CQP_QP_TERM_DONT_SEND_FIN;
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+ if (nesqp->term_flags & NES_TERM_SENT)
+ return; /* Sanity check */
+
+ aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+ iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+ async_event_id = (u16)aeq_info;
+
+ context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
+ aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
+ if (!context) {
+ WARN_ON(!context);
+ return;
+ }
+
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ nesqp->terminate_eventtype = eventtype;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ if (nesadapter->send_term_ok)
+ termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info);
+ else
+ mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG;
+
+ nes_terminate_start_timer(nesqp);
+ nesqp->term_flags |= NES_TERM_SENT;
+ nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0);
+}
+
+static void nes_terminate_send_fin(struct nes_device *nesdev,
+ struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+ u32 aeq_info;
+ u16 async_event_id;
+ u8 tcp_state;
+ u8 iwarp_state;
+ unsigned long flags;
+
+ aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+ iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+ async_event_id = (u16)aeq_info;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ /* Send the fin only */
+ nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE |
+ NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0);
+}
+
+/* Cleanup after a terminate sent or received */
+static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred)
+{
+ u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+ unsigned long flags;
+ struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device);
+ struct nes_device *nesdev = nesvnic->nesdev;
+ u8 first_time = 0;
+
+ spin_lock_irqsave(&nesqp->lock, flags);
+ if (nesqp->hte_added) {
+ nesqp->hte_added = 0;
+ next_iwarp_state |= NES_CQP_QP_DEL_HTE;
+ }
+
+ first_time = (nesqp->term_flags & NES_TERM_DONE) == 0;
+ nesqp->term_flags |= NES_TERM_DONE;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+
+ /* Make sure we go through this only once */
+ if (first_time) {
+ if (timeout_occurred == 0)
+ del_timer(&nesqp->terminate_timer);
+ else
+ next_iwarp_state |= NES_CQP_QP_RESET;
+
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+ nes_cm_disconn(nesqp);
+ }
+}
+
+static void nes_terminate_received(struct nes_device *nesdev,
+ struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+ u32 aeq_info;
+ u8 *pkt;
+ u32 *mpa;
+ u8 ddp_ctl;
+ u8 rdma_ctl;
+ u16 aeq_id = 0;
+
+ aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+ if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+ /* Terminate is not a performance path so the silicon */
+ /* did not validate the frame - do it now */
+ pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+ mpa = (u32 *)locate_mpa(pkt, aeq_info);
+ ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff;
+ rdma_ctl = be32_to_cpu(mpa[0]) & 0xff;
+ if ((ddp_ctl & 0xc0) != 0x40)
+ aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC;
+ else if ((ddp_ctl & 0x03) != 1)
+ aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION;
+ else if (be32_to_cpu(mpa[2]) != 2)
+ aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN;
+ else if (be32_to_cpu(mpa[3]) != 1)
+ aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN;
+ else if (be32_to_cpu(mpa[4]) != 0)
+ aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO;
+ else if ((rdma_ctl & 0xc0) != 0x40)
+ aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+ if (aeq_id) {
+ /* Bad terminate recvd - send back a terminate */
+ aeq_info = (aeq_info & 0xffff0000) | aeq_id;
+ aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
+ nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+ return;
+ }
+ }
+
+ nesqp->term_flags |= NES_TERM_RCVD;
+ nesqp->terminate_eventtype = IB_EVENT_QP_FATAL;
+ nes_terminate_start_timer(nesqp);
+ nes_terminate_send_fin(nesdev, nesqp, aeqe);
+}
+
+/* Timeout routine in case terminate fails to complete */
+static void nes_terminate_timeout(unsigned long context)
+{
+ struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
+
+ nes_terminate_done(nesqp, 1);
+}
+
+/* Set a timer in case hw cannot complete the terminate sequence */
+static void nes_terminate_start_timer(struct nes_qp *nesqp)
+{
+ init_timer(&nesqp->terminate_timer);
+ nesqp->terminate_timer.function = nes_terminate_timeout;
+ nesqp->terminate_timer.expires = jiffies + HZ;
+ nesqp->terminate_timer.data = (unsigned long)nesqp;
+ add_timer(&nesqp->terminate_timer);
+}
+
/**
* nes_process_iwarp_aeqe
*/
struct nes_hw_aeqe *aeqe)
{
u64 context;
- u64 aeqe_context = 0;
unsigned long flags;
struct nes_qp *nesqp;
+ struct nes_hw_cq *hw_cq;
+ struct nes_cq *nescq;
int resource_allocated;
- /* struct iw_cm_id *cm_id; */
struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct ib_event ibevent;
- /* struct iw_cm_event cm_event; */
u32 aeq_info;
u32 next_iwarp_state = 0;
u16 async_event_id;
u8 tcp_state;
u8 iwarp_state;
+ int must_disconn = 1;
+ int must_terminate = 0;
+ struct ib_event ibevent;
nes_debug(NES_DBG_AEQ, "\n");
aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
+ if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) {
context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
} else {
- aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
- aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
BUG_ON(!context);
switch (async_event_id) {
case NES_AEQE_AEID_LLP_FIN_RECEIVED:
- nesqp = *((struct nes_qp **)&context);
+ nesqp = (struct nes_qp *)(unsigned long)context;
+
+ if (nesqp->term_flags)
+ return; /* Ignore it, wait for close complete */
+
if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
nesqp->cm_id->add_ref(nesqp->cm_id);
schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
async_event_id, nesqp->last_aeq, tcp_state);
}
+
if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
(nesqp->ibqp_state != IB_QPS_RTS)) {
/* FIN Received but tcp state or IB state moved on,
should expect a close complete */
return;
}
+
case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ if (nesqp->term_flags) {
+ nes_terminate_done(nesqp, 0);
+ return;
+ }
+
case NES_AEQE_AEID_LLP_CONNECTION_RESET:
- case NES_AEQE_AEID_TERMINATE_SENT:
- case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
case NES_AEQE_AEID_RESET_SENT:
- nesqp = *((struct nes_qp **)&context);
+ nesqp = (struct nes_qp *)(unsigned long)context;
if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
tcp_state = NES_AEQE_TCP_STATE_CLOSED;
}
if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
(tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
nesqp->hte_added = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n",
- nesqp->hwqp.qp_id);
- nes_hw_modify_qp(nesdev, nesqp,
- NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0);
- spin_lock_irqsave(&nesqp->lock, flags);
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
}
if ((nesqp->ibqp_state == IB_QPS_RTS) &&
nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
break;
case NES_AEQE_IWARP_STATE_TERMINATE:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
- if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
- next_iwarp_state |= 0x02000000;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- }
+ must_disconn = 0; /* terminate path takes care of disconn */
+ if (nesqp->term_flags == 0)
+ must_terminate = 1;
break;
- default:
- next_iwarp_state = 0;
- }
- spin_unlock_irqrestore(&nesqp->lock, flags);
- if (next_iwarp_state) {
- nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
- " also added another reference\n",
- nesqp->hwqp.qp_id, next_iwarp_state);
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
}
- nes_cm_disconn(nesqp);
} else {
if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) {
/* FIN Received but ib state not RTS,
close complete will be on its way */
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return;
- }
- spin_unlock_irqrestore(&nesqp->lock, flags);
- if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
- " also added another reference\n",
- nesqp->hwqp.qp_id, next_iwarp_state);
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+ must_disconn = 0;
}
- nes_cm_disconn(nesqp);
}
- break;
- case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED"
- " event on QP%u \n Q2 Data:\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- ((nesqp->ibqp_state == IB_QPS_RTS)&&
- (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+
+ if (must_terminate)
+ nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+ else if (must_disconn) {
+ if (next_iwarp_state) {
+ nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n",
+ nesqp->hwqp.qp_id, next_iwarp_state);
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+ }
nes_cm_disconn(nesqp);
- } else {
- nesqp->in_disconnect = 0;
- wake_up(&nesqp->kick_waitq);
}
break;
- case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = async_event_id;
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
- " event on QP%u, remote IP = 0x%08X \n",
- nesqp->hwqp.qp_id,
- ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr));
- } else {
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
- " event on QP%u \n",
- nesqp->hwqp.qp_id);
- }
- spin_unlock_irqrestore(&nesqp->lock, flags);
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET;
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
+
+ case NES_AEQE_AEID_TERMINATE_SENT:
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ nes_terminate_send_fin(nesdev, nesqp, aeqe);
break;
- case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
- if (NES_AEQE_INBOUND_RDMA&aeq_info) {
- nesqp = nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
- } else {
- /* TODO: get the actual WQE and mask off wqe index */
- context &= ~((u64)511);
- nesqp = *((struct nes_qp **)&context);
- }
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
+
+ case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ nes_terminate_received(nesdev, nesqp, aeqe);
break;
+
+ case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
+ case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- break;
+ case NES_AEQE_AEID_AMP_INVALID_STAG:
+ case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+ case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
- nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words
- [NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u,"
- " nesqp = %p, AE reported %p\n",
- nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context));
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+ case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+ case NES_AEQE_AEID_AMP_TO_WRAP:
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
+ break;
+
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+ case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
+ aeq_info &= 0xffff0000;
+ aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
+ aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
}
+
+ case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
+ case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+ case NES_AEQE_AEID_AMP_BAD_QP:
+ case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+ case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+ case NES_AEQE_AEID_DDP_NO_L_BIT:
+ case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+ case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+ case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+ case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+ case NES_AEQE_AEID_AMP_BAD_PD:
+ case NES_AEQE_AEID_AMP_FASTREG_SHARED:
+ case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG:
+ case NES_AEQE_AEID_AMP_FASTREG_MW_STAG:
+ case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS:
+ case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW:
+ case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH:
+ case NES_AEQE_AEID_AMP_INVALIDATE_SHARED:
+ case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS:
+ case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG:
+ case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS:
+ case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS:
+ case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT:
+ case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED:
+ case NES_AEQE_AEID_BAD_CLOSE:
+ case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO:
+ case NES_AEQE_AEID_STAG_ZERO_INVALID:
+ case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
+ case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+ nesqp = (struct nes_qp *)(unsigned long)context;
+ nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
break;
+
case NES_AEQE_AEID_CQ_OPERATION_ERROR:
context <<= 1;
nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
if (resource_allocated) {
printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
__func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+ hw_cq = (struct nes_hw_cq *)(unsigned long)context;
+ if (hw_cq) {
+ nescq = container_of(hw_cq, struct nes_cq, hw_cq);
+ if (nescq->ibcq.event_handler) {
+ ibevent.device = nescq->ibcq.device;
+ ibevent.event = IB_EVENT_CQ_ERR;
+ ibevent.element.cq = &nescq->ibcq;
+ nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context);
+ }
+ }
}
break;
- case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- nesqp = nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG"
- "_FOR_AVAILABLE_BUFFER event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- /* tell cm to disconnect, cm will queue work to thread */
- nes_cm_disconn(nesqp);
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN"
- "_NO_BUFFER_AVAILABLE event on QP%u\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- /* tell cm to disconnect, cm will queue work to thread */
- nes_cm_disconn(nesqp);
- break;
- case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR"
- " event on QP%u \n Q2 Data:\n",
- nesqp->hwqp.qp_id);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_FATAL;
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- /* tell cm to disconnect, cm will queue work to thread */
- nes_cm_disconn(nesqp);
- break;
- /* TODO: additional AEs need to be here */
- case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
- nesqp = *((struct nes_qp **)&context);
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- if (nesqp->ibqp.event_handler) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.element.qp = &nesqp->ibqp;
- ibevent.event = IB_EVENT_QP_ACCESS_ERR;
- nesqp->ibqp.event_handler(&ibevent,
- nesqp->ibqp.qp_context);
- }
- nes_cm_disconn(nesqp);
- break;
+
default:
nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
async_event_id);
}
-
/**
* nes_iwarp_ce_handler
*/
{
struct nes_cqp_request *cqp_request;
struct nes_hw_cqp_wqe *cqp_wqe;
+ u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
+ u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
int ret;
cqp_request = nes_get_cqp_request(nesdev);
cqp_wqe = &cqp_request->cqp_wqe;
nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+ /* If wqe in error was identified, set code to be put into cqe */
+ if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) {
+ which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+ sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code;
+ nesqp->term_sq_flush_code = 0;
+ }
+
+ if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) {
+ which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+ rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code;
+ nesqp->term_rq_flush_code = 0;
+ }
+
+ if (which_wq & NES_CQP_FLUSH_MAJ_MIN) {
+ cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code);
+ cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code);
+ }
+
cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
};
#define NES_CQP_OP_IWARP_STATE_SHIFT 28
+#define NES_CQP_OP_TERMLEN_SHIFT 28
enum nes_cqp_qp_bits {
NES_CQP_QP_ARP_VALID = (1<<8),
NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
+ NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24),
+ NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25),
NES_CQP_QP_RESET = (1<<31),
};
enum nes_cqp_qp_wqe_word_idx {
NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
+ NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8,
+ NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9,
NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
};
enum nes_cqp_flush_bits {
NES_CQP_FLUSH_SQ = (1<<30),
NES_CQP_FLUSH_RQ = (1<<31),
+ NES_CQP_FLUSH_MAJ_MIN = (1<<28),
};
enum nes_cqe_opcode_bits {
NES_AEQE_INBOUND_RDMA = (1<<19),
NES_AEQE_IWARP_STATE_MASK = (7<<20),
NES_AEQE_TCP_STATE_MASK = (0xf<<24),
+ NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28),
NES_AEQE_VALID = (1<<31),
};
#define NES_AEQE_IWARP_STATE_SHIFT 20
#define NES_AEQE_TCP_STATE_SHIFT 24
+#define NES_AEQE_Q2_DATA_ETHERNET (1<<28)
+#define NES_AEQE_Q2_DATA_MPA (1<<29)
enum nes_aeqe_iwarp_state {
NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
NES_IWARP_SQ_OP_NOP = 12,
};
+enum nes_iwarp_cqe_major_code {
+ NES_IWARP_CQE_MAJOR_FLUSH = 1,
+ NES_IWARP_CQE_MAJOR_DRV = 0x8000
+};
+
+enum nes_iwarp_cqe_minor_code {
+ NES_IWARP_CQE_MINOR_FLUSH = 1
+};
+
#define NES_EEPROM_READ_REQUEST (1<<16)
#define NES_MAC_ADDR_VALID (1<<20)
u8 netdev_max; /* from host nic address count in EEPROM */
u8 port_count;
u8 virtwq;
+ u8 send_term_ok;
u8 et_use_adaptive_rx_coalesce;
u8 adapter_fcn_count;
u8 pft_mcast_map[NES_PFT_SIZE];
u32 num_pd;
};
+enum nes_hdrct_flags {
+ DDP_LEN_FLAG = 0x80,
+ DDP_HDR_FLAG = 0x40,
+ RDMA_HDR_FLAG = 0x20
+};
+
+enum nes_term_layers {
+ LAYER_RDMA = 0,
+ LAYER_DDP = 1,
+ LAYER_MPA = 2
+};
+
+enum nes_term_error_types {
+ RDMAP_CATASTROPHIC = 0,
+ RDMAP_REMOTE_PROT = 1,
+ RDMAP_REMOTE_OP = 2,
+ DDP_CATASTROPHIC = 0,
+ DDP_TAGGED_BUFFER = 1,
+ DDP_UNTAGGED_BUFFER = 2,
+ DDP_LLP = 3
+};
+
+enum nes_term_rdma_errors {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_INV_BOUNDS = 0x01,
+ RDMAP_ACCESS = 0x02,
+ RDMAP_UNASSOC_STAG = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_RDMAP_VER = 0x05,
+ RDMAP_UNEXPECTED_OP = 0x06,
+ RDMAP_CATASTROPHIC_LOCAL = 0x07,
+ RDMAP_CATASTROPHIC_GLOBAL = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum nes_term_ddp_errors {
+ DDP_CATASTROPHIC_LOCAL = 0x00,
+ DDP_TAGGED_INV_STAG = 0x00,
+ DDP_TAGGED_BOUNDS = 0x01,
+ DDP_TAGGED_UNASSOC_STAG = 0x02,
+ DDP_TAGGED_TO_WRAP = 0x03,
+ DDP_TAGGED_INV_DDP_VER = 0x04,
+ DDP_UNTAGGED_INV_QN = 0x01,
+ DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
+ DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
+ DDP_UNTAGGED_INV_MO = 0x04,
+ DDP_UNTAGGED_INV_TOO_LONG = 0x05,
+ DDP_UNTAGGED_INV_DDP_VER = 0x06
+};
+
+enum nes_term_mpa_errors {
+ MPA_CLOSED = 0x01,
+ MPA_CRC = 0x02,
+ MPA_MARKER = 0x03,
+ MPA_REQ_RSP = 0x04,
+};
+
+struct nes_terminate_hdr {
+ u8 layer_etype;
+ u8 error_code;
+ u8 hdrct;
+ u8 rsvd;
+};
+
+/* Used to determine how to fill in terminate error codes */
+#define IWARP_OPCODE_WRITE 0
+#define IWARP_OPCODE_READREQ 1
+#define IWARP_OPCODE_READRSP 2
+#define IWARP_OPCODE_SEND 3
+#define IWARP_OPCODE_SEND_INV 4
+#define IWARP_OPCODE_SEND_SE 5
+#define IWARP_OPCODE_SEND_SE_INV 6
+#define IWARP_OPCODE_TERM 7
+
+/* These values are used only during terminate processing */
+#define TERM_DDP_LEN_TAGGED 14
+#define TERM_DDP_LEN_UNTAGGED 18
+#define TERM_RDMA_LEN 28
+#define RDMA_OPCODE_MASK 0x0f
+#define RDMA_READ_REQ_OPCODE 1
+#define BAD_FRAME_OFFSET 64
+#define CQE_MAJOR_DRV 0x8000
+
#define nes_vlan_rx vlan_hwaccel_receive_skb
#define nes_netif_rx netif_receive_skb
} else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
nesadapter->virtwq = 1;
}
+ if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
+ nesadapter->send_term_ok = 1;
+
nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
(u32)((u8)eeprom_data);
spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
}
if (cqp_request == NULL) {
- cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
+ cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC);
if (cqp_request) {
cqp_request->dynamic = 1;
INIT_LIST_HEAD(&cqp_request->list);
*/
static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
{
+ struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+ struct net_device *netdev = nesvnic->netdev;
+
memset(props, 0, sizeof(*props));
- props->max_mtu = IB_MTU_2048;
- props->active_mtu = IB_MTU_2048;
+ props->max_mtu = IB_MTU_4096;
+
+ if (netdev->mtu >= 4096)
+ props->active_mtu = IB_MTU_4096;
+ else if (netdev->mtu >= 2048)
+ props->active_mtu = IB_MTU_2048;
+ else if (netdev->mtu >= 1024)
+ props->active_mtu = IB_MTU_1024;
+ else if (netdev->mtu >= 512)
+ props->active_mtu = IB_MTU_512;
+ else
+ props->active_mtu = IB_MTU_256;
+
props->lid = 1;
props->lmc = 0;
props->sm_lid = 0;
props->sm_sl = 0;
- props->state = IB_PORT_ACTIVE;
+ if (nesvnic->linkup)
+ props->state = IB_PORT_ACTIVE;
+ else
+ props->state = IB_PORT_DOWN;
props->phys_state = 0;
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
}
+/**
+ * nes_clean_cq
+ */
+static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq)
+{
+ u32 cq_head;
+ u32 lo;
+ u32 hi;
+ u64 u64temp;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&nescq->lock, flags);
+
+ cq_head = nescq->hw_cq.cq_head;
+ while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
+ rmb();
+ lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+ hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]);
+ u64temp = (((u64)hi) << 32) | ((u64)lo);
+ u64temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+ if (u64temp == (u64)(unsigned long)nesqp) {
+ /* Zero the context value so cqe will be ignored */
+ nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0;
+ nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0;
+ }
+
+ if (++cq_head >= nescq->hw_cq.cq_size)
+ cq_head = 0;
+ }
+
+ spin_unlock_irqrestore(&nescq->lock, flags);
+}
+
+
/**
* nes_destroy_qp
*/
static int nes_destroy_qp(struct ib_qp *ibqp)
{
struct nes_qp *nesqp = to_nesqp(ibqp);
- /* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */
struct nes_ucontext *nes_ucontext;
struct ib_qp_attr attr;
struct iw_cm_id *cm_id;
nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
}
-
if (nesqp->user_mode) {
if ((ibqp->uobject)&&(ibqp->uobject->context)) {
nes_ucontext = to_nesucontext(ibqp->uobject->context);
}
if (nesqp->pbl_pbase)
kunmap(nesqp->page);
+ } else {
+ /* Clean any pending completions from the cq(s) */
+ if (nesqp->nesscq)
+ nes_clean_cq(nesqp, nesqp->nesscq);
+
+ if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
+ nes_clean_cq(nesqp, nesqp->nesrcq);
}
nes_rem_ref(&nesqp->ibqp);
* nes_hw_modify_qp
*/
int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
- u32 next_iwarp_state, u32 wait_completion)
+ u32 next_iwarp_state, u32 termlen, u32 wait_completion)
{
struct nes_hw_cqp_wqe *cqp_wqe;
/* struct iw_cm_id *cm_id = nesqp->cm_id; */
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
+ /* If sending a terminate message, fill in the length (in words) */
+ if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) &&
+ !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) {
+ termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT;
+ set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen);
+ }
+
atomic_set(&cqp_request->refcount, 2);
nes_post_cqp_request(nesdev, cqp_request);
}
nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
nesqp->hwqp.qp_id);
+ if (nesqp->term_flags)
+ del_timer(&nesqp->terminate_timer);
+
next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
/* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
if (nesqp->hte_added) {
if (issue_modify_qp) {
nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
- ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1);
+ ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1);
if (ret)
nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
" failed for QP%u.\n",
head = nesqp->hwqp.sq_head;
while (ib_wr) {
+ /* Check for QP error */
+ if (nesqp->term_flags) {
+ err = -EINVAL;
+ break;
+ }
+
/* Check for SQ overflow */
if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
err = -EINVAL;
head = nesqp->hwqp.rq_head;
while (ib_wr) {
+ /* Check for QP error */
+ if (nesqp->term_flags) {
+ err = -EINVAL;
+ break;
+ }
+
if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
err = -EINVAL;
break;
{
u64 u64temp;
u64 wrid;
- /* u64 u64temp; */
unsigned long flags = 0;
struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_qp *nesqp;
struct nes_hw_cqe cqe;
u32 head;
- u32 wq_tail;
+ u32 wq_tail = 0;
u32 cq_size;
u32 cqe_count = 0;
u32 wqe_index;
u32 u32temp;
- /* u32 counter; */
+ u32 move_cq_head = 1;
+ u32 err_code;
nes_debug(NES_DBG_CQ, "\n");
cq_size = nescq->hw_cq.cq_size;
while (cqe_count < num_entries) {
- if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
- NES_CQE_VALID) {
- /*
- * Make sure we read CQ entry contents *after*
- * we've checked the valid bit.
- */
- rmb();
-
- cqe = nescq->hw_cq.cq_vbase[head];
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- wqe_index = u32temp &
- (nesdev->nesadapter->max_qp_wr - 1);
- u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
- /* parse CQE, get completion context from WQE (either rq or sq */
- u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
- ((u64)u32temp);
- nesqp = *((struct nes_qp **)&u64temp);
+ if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
+ NES_CQE_VALID) == 0)
+ break;
+
+ /*
+ * Make sure we read CQ entry contents *after*
+ * we've checked the valid bit.
+ */
+ rmb();
+
+ cqe = nescq->hw_cq.cq_vbase[head];
+ u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+ wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1);
+ u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+ /* parse CQE, get completion context from WQE (either rq or sq) */
+ u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+ ((u64)u32temp);
+
+ if (u64temp) {
+ nesqp = (struct nes_qp *)(unsigned long)u64temp;
memset(entry, 0, sizeof *entry);
if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
entry->status = IB_WC_SUCCESS;
} else {
- entry->status = IB_WC_WR_FLUSH_ERR;
+ err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]);
+ if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) {
+ entry->status = err_code & 0x0000ffff;
+
+ /* The rest of the cqe's will be marked as flushed */
+ nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] =
+ cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) |
+ NES_IWARP_CQE_MINOR_FLUSH);
+ } else
+ entry->status = IB_WC_WR_FLUSH_ERR;
}
entry->qp = &nesqp->ibqp;
if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
if (nesqp->skip_lsmm) {
nesqp->skip_lsmm = 0;
- wq_tail = nesqp->hwqp.sq_tail++;
+ nesqp->hwqp.sq_tail++;
}
/* Working on a SQ Completion*/
- wq_tail = wqe_index;
- nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
- wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+ wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
- ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+ ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
- switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
case NES_IWARP_SQ_OP_RDMAW:
nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
case NES_IWARP_SQ_OP_RDMAR:
nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
entry->opcode = IB_WC_RDMA_READ;
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+ entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
break;
case NES_IWARP_SQ_OP_SENDINV:
entry->opcode = IB_WC_SEND;
break;
}
+
+ nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
+ if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) {
+ move_cq_head = 0;
+ wq_tail = nesqp->hwqp.sq_tail;
+ }
} else {
/* Working on a RQ Completion*/
- wq_tail = wqe_index;
- nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
- wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
- ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
+ wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
+ ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
entry->opcode = IB_WC_RECV;
+
+ nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
+ if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) {
+ move_cq_head = 0;
+ wq_tail = nesqp->hwqp.rq_tail;
+ }
}
+
entry->wr_id = wrid;
+ entry++;
+ cqe_count++;
+ }
+ if (move_cq_head) {
+ nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
if (++head >= cq_size)
head = 0;
- cqe_count++;
nescq->polled_completions++;
+
if ((nescq->polled_completions > (cq_size / 2)) ||
(nescq->polled_completions == 255)) {
nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
- " are pending %u of %u.\n",
- nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
+ " are pending %u of %u.\n",
+ nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+ nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
nescq->polled_completions = 0;
}
- entry++;
- } else
- break;
+ } else {
+ /* Update the wqe index and set status to flush */
+ wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+ wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail;
+ nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] =
+ cpu_to_le32(wqe_index);
+ move_cq_head = 1; /* ready for next pass */
+ }
}
if (nescq->polled_completions) {
#define NES_MAX_USER_DB_REGIONS 4096
#define NES_MAX_USER_WQ_REGIONS 4096
+#define NES_TERM_SENT 0x01
+#define NES_TERM_RCVD 0x02
+#define NES_TERM_DONE 0x04
+
struct nes_ucontext {
struct ib_ucontext ibucontext;
struct nes_device *nesdev;
spinlock_t lock;
};
+struct disconn_work {
+ struct work_struct work;
+ struct nes_qp *nesqp;
+};
+
struct iw_cm_id;
struct ietf_mpa_frame;
void *allocated_buffer;
struct iw_cm_id *cm_id;
struct workqueue_struct *wq;
- struct work_struct disconn_work;
struct nes_cq *nesscq;
struct nes_cq *nesrcq;
struct nes_pd *nespd;
void *pbl_vbase;
dma_addr_t pbl_pbase;
struct page *page;
+ struct timer_list terminate_timer;
+ enum ib_event_type terminate_eventtype;
wait_queue_head_t kick_waitq;
u16 in_disconnect;
u16 private_data_len;
+ u16 term_sq_flush_code;
+ u16 term_rq_flush_code;
u8 active_conn;
u8 skip_lsmm;
u8 user_mode;
u8 hw_iwarp_state;
u8 flush_issued;
u8 hw_tcp_state;
- u8 disconn_pending;
+ u8 term_flags;
u8 destroyed;
};
#endif /* NES_VERBS_H */
*/
#include <rdma/ib_cm.h>
-#include <rdma/ib_cache.h>
#include <net/dst.h>
#include <net/icmp.h>
#include <linux/icmpv6.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
-#include <rdma/ib_cache.h>
#include <linux/ip.h>
#include <linux/tcp.h>
skb_queue_len(&neigh->queue));
goto err_drop;
}
- } else
+ } else {
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
+ return;
+ }
} else {
neigh->ah = NULL;
ipoib_dbg(priv, "Send unicast ARP to %04x\n",
be16_to_cpu(path->pathrec.dlid));
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
/* put pseudoheader back on for next time */
}
}
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ return;
}
unlock:
}
}
+static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
+ const u8 *broadcast)
+{
+ if (addrlen != INFINIBAND_ALEN)
+ return 0;
+ /* reserved QPN, prefix, scope */
+ if (memcmp(addr, broadcast, 6))
+ return 0;
+ /* signature lower, pkey */
+ if (memcmp(addr + 7, broadcast + 7, 3))
+ return 0;
+ return 1;
+}
+
void ipoib_mcast_restart_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
union ib_gid mgid;
+ if (!ipoib_mcast_addr_is_valid(mclist->dmi_addr,
+ mclist->dmi_addrlen,
+ dev->broadcast))
+ continue;
+
memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
mcast = __ipoib_mcast_find(dev, &mgid);
if (!other_ports)
schedule_chk_task(adapter);
+ cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_UP, pi->port_id);
return 0;
}
if (!adapter->open_device_map)
cxgb_down(adapter);
+ cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_DOWN, pi->port_id);
return 0;
}
if (is_offload(adapter) &&
test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
- cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
+ cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
offload_close(&adapter->tdev);
}
}
if (is_offload(adapter) && !ofld_disable)
- cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
+ cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
}
/*
mutex_unlock(&cxgb3_db_lock);
}
-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error)
+void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port)
{
struct cxgb3_client *client;
mutex_lock(&cxgb3_db_lock);
list_for_each_entry(client, &client_list, client_list) {
- if (client->err_handler)
- client->err_handler(tdev, status, error);
+ if (client->event_handler)
+ client->event_handler(tdev, event, port);
}
mutex_unlock(&cxgb3_db_lock);
}
void cxgb3_unregister_client(struct cxgb3_client *client);
void cxgb3_add_clients(struct t3cdev *tdev);
void cxgb3_remove_clients(struct t3cdev *tdev);
-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error);
+void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port);
typedef int (*cxgb3_cpl_handler_func)(struct t3cdev *dev,
struct sk_buff *skb, void *ctx);
enum {
OFFLOAD_STATUS_UP,
- OFFLOAD_STATUS_DOWN
+ OFFLOAD_STATUS_DOWN,
+ OFFLOAD_PORT_DOWN,
+ OFFLOAD_PORT_UP
};
struct cxgb3_client {
int (*redirect)(void *ctx, struct dst_entry *old,
struct dst_entry *new, struct l2t_entry *l2t);
struct list_head client_list;
- void (*err_handler)(struct t3cdev *tdev, u32 status, u32 error);
+ void (*event_handler)(struct t3cdev *tdev, u32 event, u32 port);
};
/*
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/hardirq.h>
#include <linux/mlx4/cmd.h>
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include "mlx4.h"
#include "fw.h"
+enum {
+ MLX4_IRQNAME_SIZE = 64
+};
+
enum {
MLX4_NUM_ASYNC_EQE = 0x100,
MLX4_NUM_SPARE_EQE = 0x80,
iounmap(priv->clr_base);
}
-int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
- int ret;
-
- /*
- * We assume that mapping one page is enough for the whole EQ
- * context table. This is fine with all current HCAs, because
- * we only use 32 EQs and each EQ uses 64 bytes of context
- * memory, or 1 KB total.
- */
- priv->eq_table.icm_virt = icm_virt;
- priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
- if (!priv->eq_table.icm_page)
- return -ENOMEM;
- priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) {
- __free_page(priv->eq_table.icm_page);
- return -ENOMEM;
- }
-
- ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
- if (ret) {
- pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
- __free_page(priv->eq_table.icm_page);
- }
-
- return ret;
-}
-
-void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
-{
- struct mlx4_priv *priv = mlx4_priv(dev);
-
- mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
- pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
- __free_page(priv->eq_table.icm_page);
-}
-
int mlx4_alloc_eq_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
priv->eq_table.clr_int = priv->clr_base +
(priv->eq_table.inta_pin < 32 ? 4 : 0);
- priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+ priv->eq_table.irq_names =
+ kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1),
+ GFP_KERNEL);
if (!priv->eq_table.irq_names) {
err = -ENOMEM;
goto err_out_bitmap;
goto err_out_comp;
if (dev->flags & MLX4_FLAG_MSI_X) {
- static const char async_eq_name[] = "mlx4-async";
const char *eq_name;
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
if (i < dev->caps.num_comp_vectors) {
- snprintf(priv->eq_table.irq_names + i * 16, 16,
- "mlx4-comp-%d", i);
- eq_name = priv->eq_table.irq_names + i * 16;
- } else
- eq_name = async_eq_name;
+ snprintf(priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE,
+ "mlx4-comp-%d@pci:%s", i,
+ pci_name(dev->pdev));
+ } else {
+ snprintf(priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE,
+ MLX4_IRQNAME_SIZE,
+ "mlx4-async@pci:%s",
+ pci_name(dev->pdev));
+ }
+ eq_name = priv->eq_table.irq_names +
+ i * MLX4_IRQNAME_SIZE;
err = request_irq(priv->eq_table.eq[i].irq,
mlx4_msi_x_interrupt, 0, eq_name,
priv->eq_table.eq + i);
priv->eq_table.eq[i].have_irq = 1;
}
} else {
+ snprintf(priv->eq_table.irq_names,
+ MLX4_IRQNAME_SIZE,
+ DRV_NAME "@pci:%s",
+ pci_name(dev->pdev));
err = request_irq(dev->pdev->irq, mlx4_interrupt,
- IRQF_SHARED, DRV_NAME, dev);
+ IRQF_SHARED, priv->eq_table.irq_names, dev);
if (err)
goto err_out_async;
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
goto err_unmap_aux;
}
- err = mlx4_map_eq_icm(dev, init_hca->eqc_base);
+ err = mlx4_init_icm_table(dev, &priv->eq_table.table,
+ init_hca->eqc_base, dev_cap->eqc_entry_sz,
+ dev->caps.num_eqs, dev->caps.num_eqs,
+ 0, 0);
if (err) {
mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
goto err_unmap_cmpt;
mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
err_unmap_eq:
- mlx4_unmap_eq_icm(dev);
+ mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
err_unmap_cmpt:
mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
+ mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
- mlx4_unmap_eq_icm(dev);
mlx4_UNMAP_ICM_AUX(dev);
mlx4_free_icm(dev, priv->fw.aux_icm, 0);
return 0;
err_close:
- mlx4_close_hca(dev);
+ mlx4_CLOSE_HCA(dev, 0);
err_free_icm:
mlx4_free_icms(dev);
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 0, DRV_NAME);
+ err = pci_request_regions(pdev, DRV_NAME);
if (err) {
- dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
+ dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
goto err_disable_pdev;
}
- err = pci_request_region(pdev, 2, DRV_NAME);
- if (err) {
- dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
- goto err_release_bar0;
- }
-
pci_set_master(pdev);
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (err) {
dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
"aborting.\n");
- goto err_release_bar2;
+ goto err_release_regions;
}
}
dev_err(&pdev->dev, "Device struct alloc failed, "
"aborting.\n");
err = -ENOMEM;
- goto err_release_bar2;
+ goto err_release_regions;
}
dev = &priv->dev;
err_free_dev:
kfree(priv);
-err_release_bar2:
- pci_release_region(pdev, 2);
-
-err_release_bar0:
- pci_release_region(pdev, 0);
+err_release_regions:
+ pci_release_regions(pdev);
err_disable_pdev:
pci_disable_device(pdev);
pci_disable_msix(pdev);
kfree(priv);
- pci_release_region(pdev, 2);
- pci_release_region(pdev, 0);
+ pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/string.h>
#include <linux/slab.h>
void __iomem **uar_map;
u32 clr_mask;
struct mlx4_eq *eq;
- u64 icm_virt;
- struct page *icm_page;
- dma_addr_t icm_dma;
+ struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
int have_irq;
u8 inta_pin;
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *init_hca);
-int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt);
-void mlx4_unmap_eq_icm(struct mlx4_dev *dev);
-
int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/mlx4/cmd.h>
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <asm/page.h>
* SOFTWARE.
*/
-#include <linux/init.h>
-
#include "mlx4.h"
#include "fw.h"
* SOFTWARE.
*/
-#include <linux/init.h>
-
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/qp.h>
* SOFTWARE.
*/
-#include <linux/init.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/delay.h>
* SOFTWARE.
*/
-#include <linux/init.h>
-
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
static int tun_attach(struct tun_struct *tun, struct file *file)
{
struct tun_file *tfile = file->private_data;
- const struct cred *cred = current_cred();
int err;
ASSERT_RTNL();
- /* Check permissions */
- if (((tun->owner != -1 && cred->euid != tun->owner) ||
- (tun->group != -1 && !in_egroup_p(tun->group))) &&
- !capable(CAP_NET_ADMIN))
- return -EPERM;
-
netif_tx_lock_bh(tun->dev);
err = -EINVAL;
dev = __dev_get_by_name(net, ifr->ifr_name);
if (dev) {
+ const struct cred *cred = current_cred();
+
if (ifr->ifr_flags & IFF_TUN_EXCL)
return -EBUSY;
if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
else
return -EINVAL;
+ if (((tun->owner != -1 && cred->euid != tun->owner) ||
+ (tun->group != -1 && !in_egroup_p(tun->group))) &&
+ !capable(CAP_NET_ADMIN))
+ return -EPERM;
+ err = security_tun_dev_attach(tun->sk);
+ if (err < 0)
+ return err;
+
err = tun_attach(tun, file);
if (err < 0)
return err;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
+ err = security_tun_dev_create();
+ if (err < 0)
+ return err;
/* Set dev type */
if (ifr->ifr_flags & IFF_TUN) {
tun->sk = sk;
container_of(sk, struct tun_sock, sk)->tun = tun;
+ security_tun_dev_post_create(sk);
+
tun_net_init(dev);
if (strchr(dev->name, '%')) {
static void open_s3_dev(struct t3cdev *);
static void close_s3_dev(struct t3cdev *);
-static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error);
+static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port);
static cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS];
static struct cxgb3_client t3c_client = {
.handlers = cxgb3i_cpl_handlers,
.add = open_s3_dev,
.remove = close_s3_dev,
- .err_handler = s3_err_handler,
+ .event_handler = s3_event_handler,
};
/**
cxgb3i_ddp_cleanup(t3dev);
}
-static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error)
+static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port)
{
struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(tdev);
- cxgb3i_log_info("snic 0x%p, tdev 0x%p, status 0x%x, err 0x%x.\n",
- snic, tdev, status, error);
+ cxgb3i_log_info("snic 0x%p, tdev 0x%p, event 0x%x, port 0x%x.\n",
+ snic, tdev, event, port);
if (!snic)
return;
- switch (status) {
+ switch (event) {
case OFFLOAD_STATUS_DOWN:
snic->flags |= CXGB3I_ADAPTER_FLAG_RESET;
break;
mutex_lock(&dev->mutex);
if (dev->attached)
goto ok;
- if (!capable(CAP_SYS_MODULE) && dev->in_request_module) {
+ if (!capable(CAP_NET_ADMIN) && dev->in_request_module) {
DPRINTK("in request module\n");
mutex_unlock(&dev->mutex);
return -ENODEV;
}
- if (capable(CAP_SYS_MODULE) && dev->in_request_module)
+ if (capable(CAP_NET_ADMIN) && dev->in_request_module)
goto ok;
dev->in_request_module = 1;
dev->in_request_module = 0;
- if (!dev->attached && !capable(CAP_SYS_MODULE)) {
- DPRINTK("not attached and not CAP_SYS_MODULE\n");
+ if (!dev->attached && !capable(CAP_NET_ADMIN)) {
+ DPRINTK("not attached and not CAP_NET_ADMIN\n");
mutex_unlock(&dev->mutex);
return -ENODEV;
}
if (can_sleep)
lock->fl_flags |= FL_SLEEP;
- error = security_file_lock(filp, cmd);
+ error = security_file_lock(filp, lock->fl_type);
if (error)
goto out_free;
if (error)
return error;
- error = ima_path_check(path,
- acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
+ error = ima_path_check(path, acc_mode ?
+ acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
+ ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
IMA_COUNT_UPDATE);
+
if (error)
return error;
/*
int flags = nfsexp_flags(rqstp, exp);
int ret;
+ validate_process_creds();
+
/* discard any old override before preparing the new set */
revert_creds(get_cred(current->real_cred));
new = prepare_creds();
else
new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
+ validate_process_creds();
put_cred(override_creds(new));
put_cred(new);
+ validate_process_creds();
return 0;
oom:
/* Lock the export hash tables for reading. */
exp_readlock();
+ validate_process_creds();
svc_process(rqstp);
+ validate_process_creds();
/* Unlock export hash tables */
exp_readunlock();
__be32 err;
int host_err;
+ validate_process_creds();
+
/*
* If we get here, then the client has already done an "open",
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
out_nfserr:
err = nfserrno(host_err);
out:
+ validate_process_creds();
return err;
}
int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
struct file *filp)
{
- int err;
+ int ret;
struct iattr newattrs;
/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
}
/* Remove suid/sgid on truncate too */
- newattrs.ia_valid |= should_remove_suid(dentry);
+ ret = should_remove_suid(dentry);
+ if (ret)
+ newattrs.ia_valid |= ret | ATTR_FORCE;
mutex_lock(&dentry->d_inode->i_mutex);
- err = notify_change(dentry, &newattrs);
+ ret = notify_change(dentry, &newattrs);
mutex_unlock(&dentry->d_inode->i_mutex);
- return err;
+ return ret;
}
static long do_sys_truncate(const char __user *pathname, loff_t length)
int error;
struct file *f;
+ validate_creds(cred);
+
/*
* We must always pass in a valid mount pointer. Historically
* callers got away with not passing it, but we must enforce this at
const struct inode_operations sysfs_dir_inode_operations = {
.lookup = sysfs_lookup,
.setattr = sysfs_setattr,
+ .setxattr = sysfs_setxattr,
};
static void remove_dir(struct sysfs_dirent *sd)
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
#include "sysfs.h"
extern struct super_block * sysfs_sb;
static const struct inode_operations sysfs_inode_operations ={
.setattr = sysfs_setattr,
+ .setxattr = sysfs_setxattr,
};
int __init sysfs_inode_init(void)
return bdi_init(&sysfs_backing_dev_info);
}
+struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
+{
+ struct sysfs_inode_attrs *attrs;
+ struct iattr *iattrs;
+
+ attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
+ if (!attrs)
+ return NULL;
+ iattrs = &attrs->ia_iattr;
+
+ /* assign default attributes */
+ iattrs->ia_mode = sd->s_mode;
+ iattrs->ia_uid = 0;
+ iattrs->ia_gid = 0;
+ iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
+
+ return attrs;
+}
int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
{
struct inode * inode = dentry->d_inode;
struct sysfs_dirent * sd = dentry->d_fsdata;
- struct iattr * sd_iattr;
+ struct sysfs_inode_attrs *sd_attrs;
+ struct iattr *iattrs;
unsigned int ia_valid = iattr->ia_valid;
int error;
if (!sd)
return -EINVAL;
- sd_iattr = sd->s_iattr;
+ sd_attrs = sd->s_iattr;
error = inode_change_ok(inode, iattr);
if (error)
if (error)
return error;
- if (!sd_iattr) {
+ if (!sd_attrs) {
/* setting attributes for the first time, allocate now */
- sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
- if (!sd_iattr)
+ sd_attrs = sysfs_init_inode_attrs(sd);
+ if (!sd_attrs)
return -ENOMEM;
- /* assign default attributes */
- sd_iattr->ia_mode = sd->s_mode;
- sd_iattr->ia_uid = 0;
- sd_iattr->ia_gid = 0;
- sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
- sd->s_iattr = sd_iattr;
+ sd->s_iattr = sd_attrs;
+ } else {
+ /* attributes were changed at least once in past */
+ iattrs = &sd_attrs->ia_iattr;
+
+ if (ia_valid & ATTR_UID)
+ iattrs->ia_uid = iattr->ia_uid;
+ if (ia_valid & ATTR_GID)
+ iattrs->ia_gid = iattr->ia_gid;
+ if (ia_valid & ATTR_ATIME)
+ iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_MTIME)
+ iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_CTIME)
+ iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
+ inode->i_sb->s_time_gran);
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = iattr->ia_mode;
+
+ if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ mode &= ~S_ISGID;
+ iattrs->ia_mode = sd->s_mode = mode;
+ }
}
+ return error;
+}
- /* attributes were changed atleast once in past */
-
- if (ia_valid & ATTR_UID)
- sd_iattr->ia_uid = iattr->ia_uid;
- if (ia_valid & ATTR_GID)
- sd_iattr->ia_gid = iattr->ia_gid;
- if (ia_valid & ATTR_ATIME)
- sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_MTIME)
- sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_CTIME)
- sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
- inode->i_sb->s_time_gran);
- if (ia_valid & ATTR_MODE) {
- umode_t mode = iattr->ia_mode;
-
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- mode &= ~S_ISGID;
- sd_iattr->ia_mode = sd->s_mode = mode;
- }
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct sysfs_dirent *sd = dentry->d_fsdata;
+ struct sysfs_inode_attrs *iattrs;
+ void *secdata;
+ int error;
+ u32 secdata_len = 0;
+
+ if (!sd)
+ return -EINVAL;
+ if (!sd->s_iattr)
+ sd->s_iattr = sysfs_init_inode_attrs(sd);
+ if (!sd->s_iattr)
+ return -ENOMEM;
+
+ iattrs = sd->s_iattr;
+
+ if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
+ const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+ error = security_inode_setsecurity(dentry->d_inode, suffix,
+ value, size, flags);
+ if (error)
+ goto out;
+ error = security_inode_getsecctx(dentry->d_inode,
+ &secdata, &secdata_len);
+ if (error)
+ goto out;
+ if (iattrs->ia_secdata)
+ security_release_secctx(iattrs->ia_secdata,
+ iattrs->ia_secdata_len);
+ iattrs->ia_secdata = secdata;
+ iattrs->ia_secdata_len = secdata_len;
+ } else
+ return -EINVAL;
+out:
return error;
}
static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
struct bin_attribute *bin_attr;
+ struct sysfs_inode_attrs *iattrs;
inode->i_private = sysfs_get(sd);
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_ino = sd->s_ino;
lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
- if (sd->s_iattr) {
+ iattrs = sd->s_iattr;
+ if (iattrs) {
/* sysfs_dirent has non-default attributes
* get them for the new inode from persistent copy
* in sysfs_dirent
*/
- set_inode_attr(inode, sd->s_iattr);
+ set_inode_attr(inode, &iattrs->ia_iattr);
+ if (iattrs->ia_secdata)
+ security_inode_notifysecctx(inode,
+ iattrs->ia_secdata,
+ iattrs->ia_secdata_len);
} else
set_default_inode_attr(inode, sd->s_mode);
-
/* initialize inode according to type */
switch (sysfs_type(sd)) {
case SYSFS_DIR:
#include <linux/kobject.h>
#include <linux/namei.h>
#include <linux/mutex.h>
+#include <linux/security.h>
#include "sysfs.h"
}
const struct inode_operations sysfs_symlink_inode_operations = {
+ .setxattr = sysfs_setxattr,
.readlink = generic_readlink,
.follow_link = sysfs_follow_link,
.put_link = sysfs_put_link,
* This file is released under the GPLv2.
*/
+#include <linux/fs.h>
+
struct sysfs_open_dirent;
/* type-specific structures for sysfs_dirent->s_* union members */
struct hlist_head buffers;
};
+struct sysfs_inode_attrs {
+ struct iattr ia_iattr;
+ void *ia_secdata;
+ u32 ia_secdata_len;
+};
+
/*
* sysfs_dirent - the building block of sysfs hierarchy. Each and
* every sysfs node is represented by single sysfs_dirent.
unsigned int s_flags;
ino_t s_ino;
umode_t s_mode;
- struct iattr *s_iattr;
+ struct sysfs_inode_attrs *s_iattr;
};
#define SD_DEACTIVATED_BIAS INT_MIN
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
void sysfs_delete_inode(struct inode *inode);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags);
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
int sysfs_inode_init(void);
return inode_permission(inode, mask);
}
-int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
+/**
+ * __vfs_setxattr_noperm - perform setxattr operation without performing
+ * permission checks.
+ *
+ * @dentry - object to perform setxattr on
+ * @name - xattr name to set
+ * @value - value to set @name to
+ * @size - size of @value
+ * @flags - flags to pass into filesystem operations
+ *
+ * returns the result of the internal setxattr or setsecurity operations.
+ *
+ * This function requires the caller to lock the inode's i_mutex before it
+ * is executed. It also assumes that the caller will make the appropriate
+ * permission checks.
+ */
+int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
{
struct inode *inode = dentry->d_inode;
- int error;
-
- error = xattr_permission(inode, name, MAY_WRITE);
- if (error)
- return error;
+ int error = -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
- error = security_inode_setxattr(dentry, name, value, size, flags);
- if (error)
- goto out;
- error = -EOPNOTSUPP;
if (inode->i_op->setxattr) {
error = inode->i_op->setxattr(dentry, name, value, size, flags);
if (!error) {
if (!error)
fsnotify_xattr(dentry);
}
+
+ return error;
+}
+
+
+int
+vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ error = xattr_permission(inode, name, MAY_WRITE);
+ if (error)
+ return error;
+
+ mutex_lock(&inode->i_mutex);
+ error = security_inode_setxattr(dentry, name, value, size, flags);
+ if (error)
+ goto out;
+
+ error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+
out:
mutex_unlock(&inode->i_mutex);
return error;
*/
struct cred {
atomic_t usage;
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ atomic_t subscribers; /* number of processes subscribed */
+ void *put_addr;
+ unsigned magic;
+#define CRED_MAGIC 0x43736564
+#define CRED_MAGIC_DEAD 0x44656144
+#endif
uid_t uid; /* real UID of the task */
gid_t gid; /* real GID of the task */
uid_t suid; /* saved UID of the task */
};
extern void __put_cred(struct cred *);
+extern void exit_creds(struct task_struct *);
extern int copy_creds(struct task_struct *, unsigned long);
+extern struct cred *cred_alloc_blank(void);
extern struct cred *prepare_creds(void);
extern struct cred *prepare_exec_creds(void);
extern struct cred *prepare_usermodehelper_creds(void);
extern int set_create_files_as(struct cred *, struct inode *);
extern void __init cred_init(void);
+/*
+ * check for validity of credentials
+ */
+#ifdef CONFIG_DEBUG_CREDENTIALS
+extern void __invalid_creds(const struct cred *, const char *, unsigned);
+extern void __validate_process_creds(struct task_struct *,
+ const char *, unsigned);
+
+static inline bool creds_are_invalid(const struct cred *cred)
+{
+ if (cred->magic != CRED_MAGIC)
+ return true;
+ if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+ return true;
+#ifdef CONFIG_SECURITY_SELINUX
+ if ((unsigned long) cred->security < PAGE_SIZE)
+ return true;
+ if ((*(u32*)cred->security & 0xffffff00) ==
+ (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+ return true;
+#endif
+ return false;
+}
+
+static inline void __validate_creds(const struct cred *cred,
+ const char *file, unsigned line)
+{
+ if (unlikely(creds_are_invalid(cred)))
+ __invalid_creds(cred, file, line);
+}
+
+#define validate_creds(cred) \
+do { \
+ __validate_creds((cred), __FILE__, __LINE__); \
+} while(0)
+
+#define validate_process_creds() \
+do { \
+ __validate_process_creds(current, __FILE__, __LINE__); \
+} while(0)
+
+extern void validate_creds_for_do_exit(struct task_struct *);
+#else
+static inline void validate_creds(const struct cred *cred)
+{
+}
+static inline void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+}
+static inline void validate_process_creds(void)
+{
+}
+#endif
+
/**
* get_new_cred - Get a reference on a new set of credentials
* @cred: The new credentials to reference
*/
static inline const struct cred *get_cred(const struct cred *cred)
{
- return get_new_cred((struct cred *) cred);
+ struct cred *nonconst_cred = (struct cred *) cred;
+ validate_creds(cred);
+ return get_new_cred(nonconst_cred);
}
/**
{
struct cred *cred = (struct cred *) _cred;
- BUG_ON(atomic_read(&(cred)->usage) <= 0);
+ validate_creds(cred);
if (atomic_dec_and_test(&(cred)->usage))
__put_cred(cred);
}
struct rw_semaphore sem; /* change vs change sem */
struct key_user *user; /* owner of this key */
void *security; /* security data for this key */
- time_t expiry; /* time at which key expires (or 0) */
+ union {
+ time_t expiry; /* time at which key expires (or 0) */
+ time_t revoked_at; /* time at which key was revoked */
+ };
uid_t uid;
gid_t gid;
key_perm_t perm; /* access permissions */
extern ctl_table key_sysctls[];
#endif
+extern void key_replace_session_keyring(void);
+
/*
* the userspace interface
*/
#define key_fsuid_changed(t) do { } while(0)
#define key_fsgid_changed(t) do { } while(0)
#define key_init() do { } while(0)
+#define key_replace_session_keyring() do { } while(0)
#endif /* CONFIG_KEYS */
#endif /* __KERNEL__ */
#define KEYCTL_SET_TIMEOUT 15 /* set key timeout */
#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */
#define KEYCTL_GET_SECURITY 17 /* get key security label */
+#define KEYCTL_SESSION_TO_PARENT 18 /* apply session keyring to parent process */
#endif /* _LINUX_KEYCTL_H */
int kmemcheck_show_addr(unsigned long address);
int kmemcheck_hide_addr(unsigned long address);
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
+
#else
#define kmemcheck_enabled 0
{
}
+static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+ return true;
+}
+
#endif /* CONFIG_KMEMCHECK */
/*
#ifdef CONFIG_DEBUG_KMEMLEAK
-extern void kmemleak_init(void);
+extern void kmemleak_init(void) __ref;
extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
- gfp_t gfp);
-extern void kmemleak_free(const void *ptr);
-extern void kmemleak_free_part(const void *ptr, size_t size);
+ gfp_t gfp) __ref;
+extern void kmemleak_free(const void *ptr) __ref;
+extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
extern void kmemleak_padding(const void *ptr, unsigned long offset,
- size_t size);
-extern void kmemleak_not_leak(const void *ptr);
-extern void kmemleak_ignore(const void *ptr);
+ size_t size) __ref;
+extern void kmemleak_not_leak(const void *ptr) __ref;
+extern void kmemleak_ignore(const void *ptr) __ref;
extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
- size_t length, gfp_t gfp);
-extern void kmemleak_no_scan(const void *ptr);
+ size_t length, gfp_t gfp) __ref;
+extern void kmemleak_no_scan(const void *ptr) __ref;
static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
int min_count, unsigned long flags,
#define LSM_AUDIT_DATA_IPC 4
#define LSM_AUDIT_DATA_TASK 5
#define LSM_AUDIT_DATA_KEY 6
+#define LSM_AUDIT_NO_AUDIT 7
struct task_struct *tsk;
union {
struct {
} key_struct;
#endif
} u;
- const char *function;
/* this union contains LSM specific data */
union {
+#ifdef CONFIG_SECURITY_SMACK
/* SMACK data */
struct smack_audit_data {
+ const char *function;
char *subject;
char *object;
char *request;
int result;
} smack_audit_data;
+#endif
+#ifdef CONFIG_SECURITY_SELINUX
/* SELinux data */
struct {
u32 ssid;
u16 tclass;
u32 requested;
u32 audited;
+ u32 denied;
struct av_decision *avd;
int result;
} selinux_audit_data;
- } lsm_priv;
+#endif
+ };
/* these callback will be implemented by a specific LSM */
void (*lsm_pre_audit)(struct audit_buffer *, void *);
void (*lsm_post_audit)(struct audit_buffer *, void *);
/* Initialize an LSM audit data structure. */
#define COMMON_AUDIT_DATA_INIT(_d, _t) \
{ memset((_d), 0, sizeof(struct common_audit_data)); \
- (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
+ (_d)->type = LSM_AUDIT_DATA_##_t; }
void common_lsm_audit(struct common_audit_data *a);
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
* (notably. ptrace) */
+ struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock
#define for_each_process(p) \
for (p = &init_task ; (p = next_task(p)) != &init_task ; )
-extern bool is_single_threaded(struct task_struct *);
+extern bool current_is_single_threaded(void);
/*
* Careful: do_each_thread/while_each_thread is a double loop so
extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
int cap, int audit);
extern int cap_settime(struct timespec *ts, struct timezone *tz);
-extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
+extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
extern int cap_ptrace_traceme(struct task_struct *parent);
extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
extern int cap_capset(struct cred *new, const struct cred *old,
* manual page for definitions of the @clone_flags.
* @clone_flags contains the flags indicating what should be shared.
* Return 0 if permission is granted.
+ * @cred_alloc_blank:
+ * @cred points to the credentials.
+ * @gfp indicates the atomicity of any memory allocations.
+ * Only allocate sufficient memory and attach to @cred such that
+ * cred_transfer() will not get ENOMEM.
* @cred_free:
* @cred points to the credentials.
* Deallocate and clear the cred->security field in a set of credentials.
* @new points to the new credentials.
* @old points to the original credentials.
* Install a new set of credentials.
+ * @cred_transfer:
+ * @new points to the new credentials.
+ * @old points to the original credentials.
+ * Transfer data from original creds to new creds
* @kernel_act_as:
* Set the credentials for a kernel service to act as (subjective context).
* @new points to the credentials to be modified.
* @inode points to the inode to use as a reference.
* The current task must be the one that nominated @inode.
* Return 0 if successful.
+ * @kernel_module_request:
+ * Ability to trigger the kernel to automatically upcall to userspace for
+ * userspace to load a kernel module with the given name.
+ * Return 0 if successful.
* @task_setuid:
* Check permission before setting one or more of the user identity
* attributes of the current process. The @flags parameter indicates
* Sets the connection's peersid to the secmark on skb.
* @req_classify_flow:
* Sets the flow's sid to the openreq sid.
+ * @tun_dev_create:
+ * Check permissions prior to creating a new TUN device.
+ * @tun_dev_post_create:
+ * This hook allows a module to update or allocate a per-socket security
+ * structure.
+ * @sk contains the newly created sock structure.
+ * @tun_dev_attach:
+ * Check permissions prior to attaching to a persistent TUN device. This
+ * hook can also be used by the module to update any security state
+ * associated with the TUN device's sock structure.
+ * @sk contains the existing sock structure.
*
* Security hooks for XFRM operations.
*
* Return the length of the string (including terminating NUL) or -ve if
* an error.
* May also return 0 (and a NULL buffer pointer) if there is no label.
+ * @key_session_to_parent:
+ * Forcibly assign the session keyring from a process to its parent
+ * process.
+ * @cred: Pointer to process's credentials
+ * @parent_cred: Pointer to parent process's credentials
+ * @keyring: Proposed new session keyring
+ * Return 0 if permission is granted, -ve error otherwise.
*
* Security hooks affecting all System V IPC operations.
*
* @alter contains the flag indicating whether changes are to be made.
* Return 0 if permission is granted.
*
- * @ptrace_may_access:
+ * @ptrace_access_check:
* Check permission before allowing the current process to trace the
* @child process.
* Security modules may also want to perform a process tracing check
* Check that the @parent process has sufficient permission to trace the
* current process before allowing the current process to present itself
* to the @parent process for tracing.
- * The parent process will still have to undergo the ptrace_may_access
+ * The parent process will still have to undergo the ptrace_access_check
* checks before it is allowed to trace this one.
* @parent contains the task_struct structure for debugger process.
* Return 0 if permission is granted.
* audit_rule_init.
* @rule contains the allocated rule
*
+ * @inode_notifysecctx:
+ * Notify the security module of what the security context of an inode
+ * should be. Initializes the incore security context managed by the
+ * security module for this inode. Example usage: NFS client invokes
+ * this hook to initialize the security context in its incore inode to the
+ * value provided by the server for the file when the server returned the
+ * file's attributes to the client.
+ *
+ * Must be called with inode->i_mutex locked.
+ *
+ * @inode we wish to set the security context of.
+ * @ctx contains the string which we wish to set in the inode.
+ * @ctxlen contains the length of @ctx.
+ *
+ * @inode_setsecctx:
+ * Change the security context of an inode. Updates the
+ * incore security context managed by the security module and invokes the
+ * fs code as needed (via __vfs_setxattr_noperm) to update any backing
+ * xattrs that represent the context. Example usage: NFS server invokes
+ * this hook to change the security context in its incore inode and on the
+ * backing filesystem to a value provided by the client on a SETATTR
+ * operation.
+ *
+ * Must be called with inode->i_mutex locked.
+ *
+ * @dentry contains the inode we wish to set the security context of.
+ * @ctx contains the string which we wish to set in the inode.
+ * @ctxlen contains the length of @ctx.
+ *
+ * @inode_getsecctx:
+ * Returns a string containing all relavent security context information
+ *
+ * @inode we wish to set the security context of.
+ * @ctx is a pointer in which to place the allocated security context.
+ * @ctxlen points to the place to put the length of @ctx.
* This is the main security structure.
*/
struct security_operations {
char name[SECURITY_NAME_MAX + 1];
- int (*ptrace_may_access) (struct task_struct *child, unsigned int mode);
+ int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
int (*ptrace_traceme) (struct task_struct *parent);
int (*capget) (struct task_struct *target,
kernel_cap_t *effective,
int (*dentry_open) (struct file *file, const struct cred *cred);
int (*task_create) (unsigned long clone_flags);
+ int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp);
void (*cred_free) (struct cred *cred);
int (*cred_prepare)(struct cred *new, const struct cred *old,
gfp_t gfp);
void (*cred_commit)(struct cred *new, const struct cred *old);
+ void (*cred_transfer)(struct cred *new, const struct cred *old);
int (*kernel_act_as)(struct cred *new, u32 secid);
int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
+ int (*kernel_module_request)(void);
int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
int (*task_fix_setuid) (struct cred *new, const struct cred *old,
int flags);
int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid);
void (*release_secctx) (char *secdata, u32 seclen);
+ int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
+ int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
+ int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
+
#ifdef CONFIG_SECURITY_NETWORK
int (*unix_stream_connect) (struct socket *sock,
struct socket *other, struct sock *newsk);
void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
+ int (*tun_dev_create)(void);
+ void (*tun_dev_post_create)(struct sock *sk);
+ int (*tun_dev_attach)(struct sock *sk);
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
const struct cred *cred,
key_perm_t perm);
int (*key_getsecurity)(struct key *key, char **_buffer);
+ int (*key_session_to_parent)(const struct cred *cred,
+ const struct cred *parent_cred,
+ struct key *key);
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
extern int register_security(struct security_operations *ops);
/* Security operations */
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode);
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
int security_ptrace_traceme(struct task_struct *parent);
int security_capget(struct task_struct *target,
kernel_cap_t *effective,
int security_file_receive(struct file *file);
int security_dentry_open(struct file *file, const struct cred *cred);
int security_task_create(unsigned long clone_flags);
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
void security_cred_free(struct cred *cred);
int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
void security_commit_creds(struct cred *new, const struct cred *old);
+void security_transfer_creds(struct cred *new, const struct cred *old);
int security_kernel_act_as(struct cred *new, u32 secid);
int security_kernel_create_files_as(struct cred *new, struct inode *inode);
+int security_kernel_module_request(void);
int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
int security_task_fix_setuid(struct cred *new, const struct cred *old,
int flags);
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
void security_release_secctx(char *secdata, u32 seclen);
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
#else /* CONFIG_SECURITY */
struct security_mnt_opts {
};
return 0;
}
-static inline int security_ptrace_may_access(struct task_struct *child,
+static inline int security_ptrace_access_check(struct task_struct *child,
unsigned int mode)
{
- return cap_ptrace_may_access(child, mode);
+ return cap_ptrace_access_check(child, mode);
}
static inline int security_ptrace_traceme(struct task_struct *parent)
return 0;
}
+static inline int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+ return 0;
+}
+
static inline void security_cred_free(struct cred *cred)
{ }
{
}
+static inline void security_transfer_creds(struct cred *new,
+ const struct cred *old)
+{
+}
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
return 0;
}
+static inline int security_kernel_module_request(void)
+{
+ return 0;
+}
+
static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
int flags)
{
static inline void security_release_secctx(char *secdata, u32 seclen)
{
}
+
+static inline int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+ return -EOPNOTSUPP;
+}
+static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+ return -EOPNOTSUPP;
+}
+static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_SECURITY */
#ifdef CONFIG_SECURITY_NETWORK
const struct request_sock *req);
void security_inet_conn_established(struct sock *sk,
struct sk_buff *skb);
+int security_tun_dev_create(void);
+void security_tun_dev_post_create(struct sock *sk);
+int security_tun_dev_attach(struct sock *sk);
#else /* CONFIG_SECURITY_NETWORK */
static inline int security_unix_stream_connect(struct socket *sock,
struct sk_buff *skb)
{
}
+
+static inline int security_tun_dev_create(void)
+{
+ return 0;
+}
+
+static inline void security_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static inline int security_tun_dev_attach(struct sock *sk)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
int security_key_permission(key_ref_t key_ref,
const struct cred *cred, key_perm_t perm);
int security_key_getsecurity(struct key *key, char **_buffer);
+int security_key_session_to_parent(const struct cred *cred,
+ const struct cred *parent_cred,
+ struct key *key);
#else
return 0;
}
+static inline int security_key_session_to_parent(const struct cred *cred,
+ const struct cred *parent_cred,
+ struct key *key)
+{
+ return 0;
+}
+
#endif
#endif /* CONFIG_KEYS */
ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
+int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
int vfs_removexattr(struct dentry *, const char *);
u64 run_time;
struct timespec uptime;
struct tty_struct *tty;
+ const struct cred *orig_cred;
+
+ /* Perform file operations on behalf of whoever enabled accounting */
+ orig_cred = override_creds(file->f_cred);
/*
* First check to see if there is enough free_space to continue
* the process accounting system.
*/
if (!check_free_space(acct, file))
- return;
+ goto out;
/*
* Fill the accounting struct with the needed info as recorded
sizeof(acct_t), &file->f_pos);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
set_fs(fs);
+out:
+ revert_creds(orig_cred);
}
/**
#include <linux/cn_proc.h>
#include "cred-internals.h"
+#if 0
+#define kdebug(FMT, ...) \
+ printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#else
+static inline __attribute__((format(printf, 1, 2)))
+void no_printk(const char *fmt, ...)
+{
+}
+#define kdebug(FMT, ...) \
+ no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#endif
+
static struct kmem_cache *cred_jar;
/*
*/
struct cred init_cred = {
.usage = ATOMIC_INIT(4),
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ .subscribers = ATOMIC_INIT(2),
+ .magic = CRED_MAGIC,
+#endif
.securebits = SECUREBITS_DEFAULT,
.cap_inheritable = CAP_INIT_INH_SET,
.cap_permitted = CAP_FULL_SET,
#endif
};
+static inline void set_cred_subscribers(struct cred *cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ atomic_set(&cred->subscribers, n);
+#endif
+}
+
+static inline int read_cred_subscribers(const struct cred *cred)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ return atomic_read(&cred->subscribers);
+#else
+ return 0;
+#endif
+}
+
+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ struct cred *cred = (struct cred *) _cred;
+
+ atomic_add(n, &cred->subscribers);
+#endif
+}
+
/*
* Dispose of the shared task group credentials
*/
{
struct cred *cred = container_of(rcu, struct cred, rcu);
+ kdebug("put_cred_rcu(%p)", cred);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ if (cred->magic != CRED_MAGIC_DEAD ||
+ atomic_read(&cred->usage) != 0 ||
+ read_cred_subscribers(cred) != 0)
+ panic("CRED: put_cred_rcu() sees %p with"
+ " mag %x, put %p, usage %d, subscr %d\n",
+ cred, cred->magic, cred->put_addr,
+ atomic_read(&cred->usage),
+ read_cred_subscribers(cred));
+#else
if (atomic_read(&cred->usage) != 0)
panic("CRED: put_cred_rcu() sees %p with usage %d\n",
cred, atomic_read(&cred->usage));
+#endif
security_cred_free(cred);
key_put(cred->thread_keyring);
*/
void __put_cred(struct cred *cred)
{
+ kdebug("__put_cred(%p{%d,%d})", cred,
+ atomic_read(&cred->usage),
+ read_cred_subscribers(cred));
+
BUG_ON(atomic_read(&cred->usage) != 0);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ BUG_ON(read_cred_subscribers(cred) != 0);
+ cred->magic = CRED_MAGIC_DEAD;
+ cred->put_addr = __builtin_return_address(0);
+#endif
+ BUG_ON(cred == current->cred);
+ BUG_ON(cred == current->real_cred);
call_rcu(&cred->rcu, put_cred_rcu);
}
EXPORT_SYMBOL(__put_cred);
+/*
+ * Clean up a task's credentials when it exits
+ */
+void exit_creds(struct task_struct *tsk)
+{
+ struct cred *cred;
+
+ kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
+ atomic_read(&tsk->cred->usage),
+ read_cred_subscribers(tsk->cred));
+
+ cred = (struct cred *) tsk->real_cred;
+ tsk->real_cred = NULL;
+ validate_creds(cred);
+ alter_cred_subscribers(cred, -1);
+ put_cred(cred);
+
+ cred = (struct cred *) tsk->cred;
+ tsk->cred = NULL;
+ validate_creds(cred);
+ alter_cred_subscribers(cred, -1);
+ put_cred(cred);
+
+ cred = (struct cred *) tsk->replacement_session_keyring;
+ if (cred) {
+ tsk->replacement_session_keyring = NULL;
+ validate_creds(cred);
+ put_cred(cred);
+ }
+}
+
+/*
+ * Allocate blank credentials, such that the credentials can be filled in at a
+ * later date without risk of ENOMEM.
+ */
+struct cred *cred_alloc_blank(void)
+{
+ struct cred *new;
+
+ new = kmem_cache_zalloc(cred_jar, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+#ifdef CONFIG_KEYS
+ new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
+ if (!new->tgcred) {
+ kfree(new);
+ return NULL;
+ }
+ atomic_set(&new->tgcred->usage, 1);
+#endif
+
+ atomic_set(&new->usage, 1);
+
+ if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
+ goto error;
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ new->magic = CRED_MAGIC;
+#endif
+ return new;
+
+error:
+ abort_creds(new);
+ return NULL;
+}
+
/**
* prepare_creds - Prepare a new set of credentials for modification
*
const struct cred *old;
struct cred *new;
- BUG_ON(atomic_read(&task->real_cred->usage) < 1);
+ validate_process_creds();
new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
if (!new)
return NULL;
+ kdebug("prepare_creds() alloc %p", new);
+
old = task->cred;
memcpy(new, old, sizeof(struct cred));
atomic_set(&new->usage, 1);
+ set_cred_subscribers(new, 0);
get_group_info(new->group_info);
get_uid(new->user);
if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
goto error;
+ validate_creds(new);
return new;
error:
if (!new)
return NULL;
+ kdebug("prepare_usermodehelper_creds() alloc %p", new);
+
memcpy(new, &init_cred, sizeof(struct cred));
atomic_set(&new->usage, 1);
+ set_cred_subscribers(new, 0);
get_group_info(new->group_info);
get_uid(new->user);
#endif
if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
goto error;
+ validate_creds(new);
BUG_ON(atomic_read(&new->usage) != 1);
return new;
) {
p->real_cred = get_cred(p->cred);
get_cred(p->cred);
+ alter_cred_subscribers(p->cred, 2);
+ kdebug("share_creds(%p{%d,%d})",
+ p->cred, atomic_read(&p->cred->usage),
+ read_cred_subscribers(p->cred));
atomic_inc(&p->cred->user->processes);
return 0;
}
atomic_inc(&new->user->processes);
p->cred = p->real_cred = get_cred(new);
+ alter_cred_subscribers(new, 2);
+ validate_creds(new);
return 0;
error_put:
int commit_creds(struct cred *new)
{
struct task_struct *task = current;
- const struct cred *old;
+ const struct cred *old = task->real_cred;
- BUG_ON(task->cred != task->real_cred);
- BUG_ON(atomic_read(&task->real_cred->usage) < 2);
+ kdebug("commit_creds(%p{%d,%d})", new,
+ atomic_read(&new->usage),
+ read_cred_subscribers(new));
+
+ BUG_ON(task->cred != old);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ BUG_ON(read_cred_subscribers(old) < 2);
+ validate_creds(old);
+ validate_creds(new);
+#endif
BUG_ON(atomic_read(&new->usage) < 1);
- old = task->real_cred;
security_commit_creds(new, old);
get_cred(new); /* we will require a ref for the subj creds too */
* cheaply with the new uid cache, so if it matters
* we should be checking for it. -DaveM
*/
+ alter_cred_subscribers(new, 2);
if (new->user != old->user)
atomic_inc(&new->user->processes);
rcu_assign_pointer(task->real_cred, new);
rcu_assign_pointer(task->cred, new);
if (new->user != old->user)
atomic_dec(&old->user->processes);
+ alter_cred_subscribers(old, -2);
sched_switch_user(task);
*/
void abort_creds(struct cred *new)
{
+ kdebug("abort_creds(%p{%d,%d})", new,
+ atomic_read(&new->usage),
+ read_cred_subscribers(new));
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+ BUG_ON(read_cred_subscribers(new) != 0);
+#endif
BUG_ON(atomic_read(&new->usage) < 1);
put_cred(new);
}
{
const struct cred *old = current->cred;
- rcu_assign_pointer(current->cred, get_cred(new));
+ kdebug("override_creds(%p{%d,%d})", new,
+ atomic_read(&new->usage),
+ read_cred_subscribers(new));
+
+ validate_creds(old);
+ validate_creds(new);
+ get_cred(new);
+ alter_cred_subscribers(new, 1);
+ rcu_assign_pointer(current->cred, new);
+ alter_cred_subscribers(old, -1);
+
+ kdebug("override_creds() = %p{%d,%d}", old,
+ atomic_read(&old->usage),
+ read_cred_subscribers(old));
return old;
}
EXPORT_SYMBOL(override_creds);
{
const struct cred *override = current->cred;
+ kdebug("revert_creds(%p{%d,%d})", old,
+ atomic_read(&old->usage),
+ read_cred_subscribers(old));
+
+ validate_creds(old);
+ validate_creds(override);
+ alter_cred_subscribers(old, 1);
rcu_assign_pointer(current->cred, old);
+ alter_cred_subscribers(override, -1);
put_cred(override);
}
EXPORT_SYMBOL(revert_creds);
if (!new)
return NULL;
+ kdebug("prepare_kernel_cred() alloc %p", new);
+
if (daemon)
old = get_task_cred(daemon);
else
old = get_cred(&init_cred);
+ validate_creds(old);
+
*new = *old;
get_uid(new->user);
get_group_info(new->group_info);
goto error;
atomic_set(&new->usage, 1);
+ set_cred_subscribers(new, 0);
put_cred(old);
+ validate_creds(new);
return new;
error:
return security_kernel_create_files_as(new, inode);
}
EXPORT_SYMBOL(set_create_files_as);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+
+/*
+ * dump invalid credentials
+ */
+static void dump_invalid_creds(const struct cred *cred, const char *label,
+ const struct task_struct *tsk)
+{
+ printk(KERN_ERR "CRED: %s credentials: %p %s%s%s\n",
+ label, cred,
+ cred == &init_cred ? "[init]" : "",
+ cred == tsk->real_cred ? "[real]" : "",
+ cred == tsk->cred ? "[eff]" : "");
+ printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n",
+ cred->magic, cred->put_addr);
+ printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n",
+ atomic_read(&cred->usage),
+ read_cred_subscribers(cred));
+ printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n",
+ cred->uid, cred->euid, cred->suid, cred->fsuid);
+ printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n",
+ cred->gid, cred->egid, cred->sgid, cred->fsgid);
+#ifdef CONFIG_SECURITY
+ printk(KERN_ERR "CRED: ->security is %p\n", cred->security);
+ if ((unsigned long) cred->security >= PAGE_SIZE &&
+ (((unsigned long) cred->security & 0xffffff00) !=
+ (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)))
+ printk(KERN_ERR "CRED: ->security {%x, %x}\n",
+ ((u32*)cred->security)[0],
+ ((u32*)cred->security)[1]);
+#endif
+}
+
+/*
+ * report use of invalid credentials
+ */
+void __invalid_creds(const struct cred *cred, const char *file, unsigned line)
+{
+ printk(KERN_ERR "CRED: Invalid credentials\n");
+ printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+ dump_invalid_creds(cred, "Specified", current);
+ BUG();
+}
+EXPORT_SYMBOL(__invalid_creds);
+
+/*
+ * check the credentials on a process
+ */
+void __validate_process_creds(struct task_struct *tsk,
+ const char *file, unsigned line)
+{
+ if (tsk->cred == tsk->real_cred) {
+ if (unlikely(read_cred_subscribers(tsk->cred) < 2 ||
+ creds_are_invalid(tsk->cred)))
+ goto invalid_creds;
+ } else {
+ if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 ||
+ read_cred_subscribers(tsk->cred) < 1 ||
+ creds_are_invalid(tsk->real_cred) ||
+ creds_are_invalid(tsk->cred)))
+ goto invalid_creds;
+ }
+ return;
+
+invalid_creds:
+ printk(KERN_ERR "CRED: Invalid process credentials\n");
+ printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+
+ dump_invalid_creds(tsk->real_cred, "Real", tsk);
+ if (tsk->cred != tsk->real_cred)
+ dump_invalid_creds(tsk->cred, "Effective", tsk);
+ else
+ printk(KERN_ERR "CRED: Effective creds == Real creds\n");
+ BUG();
+}
+EXPORT_SYMBOL(__validate_process_creds);
+
+/*
+ * check creds for do_exit()
+ */
+void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+ kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
+ tsk->real_cred, tsk->cred,
+ atomic_read(&tsk->cred->usage),
+ read_cred_subscribers(tsk->cred));
+
+ __validate_process_creds(tsk, __FILE__, __LINE__);
+}
+
+#endif /* CONFIG_DEBUG_CREDENTIALS */
tracehook_report_exit(&code);
+ validate_creds_for_do_exit(tsk);
+
/*
* We're taking recursive faults here in do_exit. Safest is to just
* leave this task alone and wait for reboot.
if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe);
+ validate_creds_for_do_exit(tsk);
+
preempt_disable();
/* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD;
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
- put_cred(tsk->real_cred);
- put_cred(tsk->cred);
+ exit_creds(tsk);
delayacct_tsk_free(tsk);
if (!profile_handoff_task(tsk))
module_put(task_thread_info(p)->exec_domain->module);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
- put_cred(p->real_cred);
- put_cred(p->cred);
+ exit_creds(p);
bad_fork_free:
free_task(p);
fork_out:
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg;
+ ret = security_kernel_module_request();
+ if (ret)
+ return ret;
+
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
int retval = 0;
BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+ validate_creds(sub_info->cred);
helper_lock();
if (sub_info->path[0] == '\0')
if (!dumpable && !capable(CAP_SYS_PTRACE))
return -EPERM;
- return security_ptrace_may_access(task, mode);
+ return security_ptrace_access_check(task, mode);
}
bool ptrace_may_access(struct task_struct *task, unsigned int mode)
#include <linux/acpi.h>
#include <linux/reboot.h>
#include <linux/ftrace.h>
-#include <linux/security.h>
#include <linux/slow-work.h>
#include <linux/perf_counter.h>
This is a relatively cheap check but if you care about maximum
performance, say N.
+config DEBUG_CREDENTIALS
+ bool "Debug credential management"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on some debug checking for credential
+ management. The additional code keeps track of the number of
+ pointers from task_structs to any given cred struct, and checks to
+ see that this number never exceeds the usage count of the cred
+ struct.
+
+ Furthermore, if SELinux is enabled, this also checks that the
+ security pointer in the cred struct is never seen to be invalid.
+
+ If unsure, say N.
+
#
# Select this config option from the architecture Kconfig, if it
# it is preferred to always offer frame pointers as a config
#include <linux/sched.h>
-/**
- * is_single_threaded - Determine if a thread group is single-threaded or not
- * @p: A task in the thread group in question
- *
- * This returns true if the thread group to which a task belongs is single
- * threaded, false if it is not.
+/*
+ * Returns true if the task does not share ->mm with another thread/process.
*/
-bool is_single_threaded(struct task_struct *p)
+bool current_is_single_threaded(void)
{
- struct task_struct *g, *t;
- struct mm_struct *mm = p->mm;
+ struct task_struct *task = current;
+ struct mm_struct *mm = task->mm;
+ struct task_struct *p, *t;
+ bool ret;
- if (atomic_read(&p->signal->count) != 1)
- goto no;
+ if (atomic_read(&task->signal->live) != 1)
+ return false;
- if (atomic_read(&p->mm->mm_users) != 1) {
- read_lock(&tasklist_lock);
- do_each_thread(g, t) {
- if (t->mm == mm && t != p)
- goto no_unlock;
- } while_each_thread(g, t);
- read_unlock(&tasklist_lock);
- }
+ if (atomic_read(&mm->mm_users) == 1)
+ return true;
- return true;
+ ret = false;
+ rcu_read_lock();
+ for_each_process(p) {
+ if (unlikely(p->flags & PF_KTHREAD))
+ continue;
+ if (unlikely(p == task->group_leader))
+ continue;
+
+ t = p;
+ do {
+ if (unlikely(t->mm == mm))
+ goto found;
+ if (likely(t->mm))
+ break;
+ /*
+ * t->mm == NULL. Make sure next_thread/next_task
+ * will see other CLONE_VM tasks which might be
+ * forked before exiting.
+ */
+ smp_rmb();
+ } while_each_thread(p, t);
+ }
+ ret = true;
+found:
+ rcu_read_unlock();
-no_unlock:
- read_unlock(&tasklist_lock);
-no:
- return false;
+ return ret;
}
region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
start_off);
memset(region, 0, size);
- kmemleak_alloc(region, size, 1, 0);
+ /*
+ * The min_count is set to 0 so that bootmem allocated blocks
+ * are never reported as leaks.
+ */
+ kmemleak_alloc(region, size, 0, 0);
return region;
}
#include <linux/string.h>
#include <linux/nodemask.h>
#include <linux/mm.h>
+#include <linux/workqueue.h>
#include <asm/sections.h>
#include <asm/processor.h>
#include <asm/atomic.h>
+#include <linux/kmemcheck.h>
#include <linux/kmemleak.h>
/*
#define SECS_FIRST_SCAN 60 /* delay before the first scan */
#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */
#define GRAY_LIST_PASSES 25 /* maximum number of gray list scans */
+#define MAX_SCAN_SIZE 4096 /* maximum size of a scanned block */
#define BYTES_PER_POINTER sizeof(void *)
size_t length;
};
+#define KMEMLEAK_GREY 0
+#define KMEMLEAK_BLACK -1
+
/*
* Structure holding the metadata for each allocated memory block.
* Modifications to such objects should be made while holding the
/* flag set on newly allocated objects */
#define OBJECT_NEW (1 << 3)
+/* number of bytes to print per line; must be 16 or 32 */
+#define HEX_ROW_SIZE 16
+/* number of bytes to print at a time (1, 2, 4, 8) */
+#define HEX_GROUP_SIZE 1
+/* include ASCII after the hex output */
+#define HEX_ASCII 1
+/* max number of lines to be printed */
+#define HEX_MAX_LINES 2
+
/* the list of all allocated objects */
static LIST_HEAD(object_list);
/* the list of gray-colored objects (see color_gray comment below) */
int min_count; /* minimum reference count */
unsigned long offset; /* scan area offset */
size_t length; /* scan area length */
+ unsigned long trace[MAX_TRACE]; /* stack trace */
+ unsigned int trace_len; /* stack trace length */
};
/* early logging buffer and current position */
-static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
-static int crt_early_log;
+static struct early_log
+ early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
+static int crt_early_log __initdata;
static void kmemleak_disable(void);
kmemleak_disable(); \
} while (0)
+/*
+ * Printing of the objects hex dump to the seq file. The number of lines to be
+ * printed is limited to HEX_MAX_LINES to prevent seq file spamming. The
+ * actual number of printed bytes depends on HEX_ROW_SIZE. It must be called
+ * with the object->lock held.
+ */
+static void hex_dump_object(struct seq_file *seq,
+ struct kmemleak_object *object)
+{
+ const u8 *ptr = (const u8 *)object->pointer;
+ int i, len, remaining;
+ unsigned char linebuf[HEX_ROW_SIZE * 5];
+
+ /* limit the number of lines to HEX_MAX_LINES */
+ remaining = len =
+ min(object->size, (size_t)(HEX_MAX_LINES * HEX_ROW_SIZE));
+
+ seq_printf(seq, " hex dump (first %d bytes):\n", len);
+ for (i = 0; i < len; i += HEX_ROW_SIZE) {
+ int linelen = min(remaining, HEX_ROW_SIZE);
+
+ remaining -= HEX_ROW_SIZE;
+ hex_dump_to_buffer(ptr + i, linelen, HEX_ROW_SIZE,
+ HEX_GROUP_SIZE, linebuf, sizeof(linebuf),
+ HEX_ASCII);
+ seq_printf(seq, " %s\n", linebuf);
+ }
+}
+
/*
* Object colors, encoded with count and min_count:
* - white - orphan object, not enough references to it (count < min_count)
* Newly created objects don't have any color assigned (object->count == -1)
* before the next memory scan when they become white.
*/
-static int color_white(const struct kmemleak_object *object)
+static bool color_white(const struct kmemleak_object *object)
{
- return object->count != -1 && object->count < object->min_count;
+ return object->count != KMEMLEAK_BLACK &&
+ object->count < object->min_count;
}
-static int color_gray(const struct kmemleak_object *object)
+static bool color_gray(const struct kmemleak_object *object)
{
- return object->min_count != -1 && object->count >= object->min_count;
+ return object->min_count != KMEMLEAK_BLACK &&
+ object->count >= object->min_count;
}
-static int color_black(const struct kmemleak_object *object)
+static bool color_black(const struct kmemleak_object *object)
{
- return object->min_count == -1;
+ return object->min_count == KMEMLEAK_BLACK;
}
/*
* not be deleted and have a minimum age to avoid false positives caused by
* pointers temporarily stored in CPU registers.
*/
-static int unreferenced_object(struct kmemleak_object *object)
+static bool unreferenced_object(struct kmemleak_object *object)
{
return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
time_before_eq(object->jiffies + jiffies_min_age,
object->pointer, object->size);
seq_printf(seq, " comm \"%s\", pid %d, jiffies %lu\n",
object->comm, object->pid, object->jiffies);
+ hex_dump_object(seq, object);
seq_printf(seq, " backtrace:\n");
for (i = 0; i < object->trace_len; i++) {
object->comm, object->pid, object->jiffies);
pr_notice(" min_count = %d\n", object->min_count);
pr_notice(" count = %d\n", object->count);
+ pr_notice(" flags = 0x%lx\n", object->flags);
pr_notice(" backtrace:\n");
print_stack_trace(&trace, 4);
}
return object;
}
+/*
+ * Save stack trace to the given array of MAX_TRACE size.
+ */
+static int __save_stack_trace(unsigned long *trace)
+{
+ struct stack_trace stack_trace;
+
+ stack_trace.max_entries = MAX_TRACE;
+ stack_trace.nr_entries = 0;
+ stack_trace.entries = trace;
+ stack_trace.skip = 2;
+ save_stack_trace(&stack_trace);
+
+ return stack_trace.nr_entries;
+}
+
/*
* Create the metadata (struct kmemleak_object) corresponding to an allocated
* memory block and add it to the object_list and object_tree_root.
*/
-static void create_object(unsigned long ptr, size_t size, int min_count,
- gfp_t gfp)
+static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
+ int min_count, gfp_t gfp)
{
unsigned long flags;
struct kmemleak_object *object;
struct prio_tree_node *node;
- struct stack_trace trace;
object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK);
if (!object) {
kmemleak_stop("Cannot allocate a kmemleak_object structure\n");
- return;
+ return NULL;
}
INIT_LIST_HEAD(&object->object_list);
}
/* kernel backtrace */
- trace.max_entries = MAX_TRACE;
- trace.nr_entries = 0;
- trace.entries = object->trace;
- trace.skip = 1;
- save_stack_trace(&trace);
- object->trace_len = trace.nr_entries;
+ object->trace_len = __save_stack_trace(object->trace);
INIT_PRIO_TREE_NODE(&object->tree_node);
object->tree_node.start = ptr;
object->tree_node.last = ptr + size - 1;
write_lock_irqsave(&kmemleak_lock, flags);
+
min_addr = min(min_addr, ptr);
max_addr = max(max_addr, ptr + size);
node = prio_tree_insert(&object_tree_root, &object->tree_node);
* random memory blocks.
*/
if (node != &object->tree_node) {
- unsigned long flags;
-
kmemleak_stop("Cannot insert 0x%lx into the object search tree "
"(already existing)\n", ptr);
object = lookup_object(ptr, 1);
- spin_lock_irqsave(&object->lock, flags);
+ spin_lock(&object->lock);
dump_object_info(object);
- spin_unlock_irqrestore(&object->lock, flags);
+ spin_unlock(&object->lock);
goto out;
}
list_add_tail_rcu(&object->object_list, &object_list);
out:
write_unlock_irqrestore(&kmemleak_lock, flags);
+ return object;
}
/*
put_object(object);
}
-/*
- * Make a object permanently as gray-colored so that it can no longer be
- * reported as a leak. This is used in general to mark a false positive.
- */
-static void make_gray_object(unsigned long ptr)
+
+static void __paint_it(struct kmemleak_object *object, int color)
+{
+ object->min_count = color;
+ if (color == KMEMLEAK_BLACK)
+ object->flags |= OBJECT_NO_SCAN;
+}
+
+static void paint_it(struct kmemleak_object *object, int color)
{
unsigned long flags;
+
+ spin_lock_irqsave(&object->lock, flags);
+ __paint_it(object, color);
+ spin_unlock_irqrestore(&object->lock, flags);
+}
+
+static void paint_ptr(unsigned long ptr, int color)
+{
struct kmemleak_object *object;
object = find_and_get_object(ptr, 0);
if (!object) {
- kmemleak_warn("Graying unknown object at 0x%08lx\n", ptr);
+ kmemleak_warn("Trying to color unknown object "
+ "at 0x%08lx as %s\n", ptr,
+ (color == KMEMLEAK_GREY) ? "Grey" :
+ (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
return;
}
-
- spin_lock_irqsave(&object->lock, flags);
- object->min_count = 0;
- spin_unlock_irqrestore(&object->lock, flags);
+ paint_it(object, color);
put_object(object);
}
+/*
+ * Make a object permanently as gray-colored so that it can no longer be
+ * reported as a leak. This is used in general to mark a false positive.
+ */
+static void make_gray_object(unsigned long ptr)
+{
+ paint_ptr(ptr, KMEMLEAK_GREY);
+}
+
/*
* Mark the object as black-colored so that it is ignored from scans and
* reporting.
*/
static void make_black_object(unsigned long ptr)
{
- unsigned long flags;
- struct kmemleak_object *object;
-
- object = find_and_get_object(ptr, 0);
- if (!object) {
- kmemleak_warn("Blacking unknown object at 0x%08lx\n", ptr);
- return;
- }
-
- spin_lock_irqsave(&object->lock, flags);
- object->min_count = -1;
- spin_unlock_irqrestore(&object->lock, flags);
- put_object(object);
+ paint_ptr(ptr, KMEMLEAK_BLACK);
}
/*
* Log an early kmemleak_* call to the early_log buffer. These calls will be
* processed later once kmemleak is fully initialized.
*/
-static void log_early(int op_type, const void *ptr, size_t size,
- int min_count, unsigned long offset, size_t length)
+static void __init log_early(int op_type, const void *ptr, size_t size,
+ int min_count, unsigned long offset, size_t length)
{
unsigned long flags;
struct early_log *log;
if (crt_early_log >= ARRAY_SIZE(early_log)) {
- pr_warning("Early log buffer exceeded\n");
+ pr_warning("Early log buffer exceeded, "
+ "please increase DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n");
kmemleak_disable();
return;
}
log->min_count = min_count;
log->offset = offset;
log->length = length;
+ if (op_type == KMEMLEAK_ALLOC)
+ log->trace_len = __save_stack_trace(log->trace);
crt_early_log++;
local_irq_restore(flags);
}
+/*
+ * Log an early allocated block and populate the stack trace.
+ */
+static void early_alloc(struct early_log *log)
+{
+ struct kmemleak_object *object;
+ unsigned long flags;
+ int i;
+
+ if (!atomic_read(&kmemleak_enabled) || !log->ptr || IS_ERR(log->ptr))
+ return;
+
+ /*
+ * RCU locking needed to ensure object is not freed via put_object().
+ */
+ rcu_read_lock();
+ object = create_object((unsigned long)log->ptr, log->size,
+ log->min_count, GFP_KERNEL);
+ spin_lock_irqsave(&object->lock, flags);
+ for (i = 0; i < log->trace_len; i++)
+ object->trace[i] = log->trace[i];
+ object->trace_len = log->trace_len;
+ spin_unlock_irqrestore(&object->lock, flags);
+ rcu_read_unlock();
+}
+
/*
* Memory allocation function callback. This function is called from the
* kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
* vmalloc etc.).
*/
-void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp)
+void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
+ gfp_t gfp)
{
pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
* Memory freeing function callback. This function is called from the kernel
* allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
*/
-void kmemleak_free(const void *ptr)
+void __ref kmemleak_free(const void *ptr)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
* Partial memory freeing function callback. This function is usually called
* from bootmem allocator when (part of) a memory block is freed.
*/
-void kmemleak_free_part(const void *ptr, size_t size)
+void __ref kmemleak_free_part(const void *ptr, size_t size)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
* Mark an already allocated memory block as a false positive. This will cause
* the block to no longer be reported as leak and always be scanned.
*/
-void kmemleak_not_leak(const void *ptr)
+void __ref kmemleak_not_leak(const void *ptr)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
* corresponding block is not a leak and does not contain any references to
* other allocated memory blocks.
*/
-void kmemleak_ignore(const void *ptr)
+void __ref kmemleak_ignore(const void *ptr)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
/*
* Limit the range to be scanned in an allocated memory block.
*/
-void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length,
- gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
+ size_t length, gfp_t gfp)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
/*
* Inform kmemleak not to scan the given memory block.
*/
-void kmemleak_no_scan(const void *ptr)
+void __ref kmemleak_no_scan(const void *ptr)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
unsigned long *end = _end - (BYTES_PER_POINTER - 1);
for (ptr = start; ptr < end; ptr++) {
- unsigned long flags;
- unsigned long pointer = *ptr;
struct kmemleak_object *object;
+ unsigned long flags;
+ unsigned long pointer;
if (allow_resched)
cond_resched();
if (scan_should_stop())
break;
+ /* don't scan uninitialized memory */
+ if (!kmemcheck_is_obj_initialized((unsigned long)ptr,
+ BYTES_PER_POINTER))
+ continue;
+
+ pointer = *ptr;
+
object = find_and_get_object(pointer, 1);
if (!object)
continue;
if (!(object->flags & OBJECT_ALLOCATED))
/* already freed object */
goto out;
- if (hlist_empty(&object->area_list))
- scan_block((void *)object->pointer,
- (void *)(object->pointer + object->size), object, 0);
- else
+ if (hlist_empty(&object->area_list)) {
+ void *start = (void *)object->pointer;
+ void *end = (void *)(object->pointer + object->size);
+
+ while (start < end && (object->flags & OBJECT_ALLOCATED) &&
+ !(object->flags & OBJECT_NO_SCAN)) {
+ scan_block(start, min(start + MAX_SCAN_SIZE, end),
+ object, 0);
+ start += MAX_SCAN_SIZE;
+
+ spin_unlock_irqrestore(&object->lock, flags);
+ cond_resched();
+ spin_lock_irqsave(&object->lock, flags);
+ }
+ } else
hlist_for_each_entry(area, elem, &object->area_list, node)
scan_block((void *)(object->pointer + area->offset),
(void *)(object->pointer + area->offset
{
unsigned long flags;
struct kmemleak_object *object, *tmp;
- struct task_struct *task;
int i;
int new_leaks = 0;
int gray_list_pass = 0;
}
/*
- * Scanning the task stacks may introduce false negatives and it is
- * not enabled by default.
+ * Scanning the task stacks (may introduce false negatives).
*/
if (kmemleak_stack_scan) {
+ struct task_struct *p, *g;
+
read_lock(&tasklist_lock);
- for_each_process(task)
- scan_block(task_stack_page(task),
- task_stack_page(task) + THREAD_SIZE,
- NULL, 0);
+ do_each_thread(g, p) {
+ scan_block(task_stack_page(p), task_stack_page(p) +
+ THREAD_SIZE, NULL, 0);
+ } while_each_thread(g, p);
read_unlock(&tasklist_lock);
}
* Start the automatic memory scanning thread. This function must be called
* with the scan_mutex held.
*/
-void start_scan_thread(void)
+static void start_scan_thread(void)
{
if (scan_thread)
return;
* Stop the automatic memory scanning thread. This function must be called
* with the scan_mutex held.
*/
-void stop_scan_thread(void)
+static void stop_scan_thread(void)
{
if (scan_thread) {
kthread_stop(scan_thread);
return seq_release(inode, file);
}
+static int dump_str_object_info(const char *str)
+{
+ unsigned long flags;
+ struct kmemleak_object *object;
+ unsigned long addr;
+
+ addr= simple_strtoul(str, NULL, 0);
+ object = find_and_get_object(addr, 0);
+ if (!object) {
+ pr_info("Unknown object at 0x%08lx\n", addr);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&object->lock, flags);
+ dump_object_info(object);
+ spin_unlock_irqrestore(&object->lock, flags);
+
+ put_object(object);
+ return 0;
+}
+
+/*
+ * We use grey instead of black to ensure we can do future scans on the same
+ * objects. If we did not do future scans these black objects could
+ * potentially contain references to newly allocated objects in the future and
+ * we'd end up with false positives.
+ */
+static void kmemleak_clear(void)
+{
+ struct kmemleak_object *object;
+ unsigned long flags;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(object, &object_list, object_list) {
+ spin_lock_irqsave(&object->lock, flags);
+ if ((object->flags & OBJECT_REPORTED) &&
+ unreferenced_object(object))
+ __paint_it(object, KMEMLEAK_GREY);
+ spin_unlock_irqrestore(&object->lock, flags);
+ }
+ rcu_read_unlock();
+}
+
/*
* File write operation to configure kmemleak at run-time. The following
* commands can be written to the /sys/kernel/debug/kmemleak file:
* scan=... - set the automatic memory scanning period in seconds (0 to
* disable it)
* scan - trigger a memory scan
+ * clear - mark all current reported unreferenced kmemleak objects as
+ * grey to ignore printing them
+ * dump=... - dump information about the object found at the given address
*/
static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
size_t size, loff_t *ppos)
}
} else if (strncmp(buf, "scan", 4) == 0)
kmemleak_scan();
+ else if (strncmp(buf, "clear", 5) == 0)
+ kmemleak_clear();
+ else if (strncmp(buf, "dump=", 5) == 0)
+ ret = dump_str_object_info(buf + 5);
else
ret = -EINVAL;
* Perform the freeing of the kmemleak internal objects after waiting for any
* current memory scan to complete.
*/
-static int kmemleak_cleanup_thread(void *arg)
+static void kmemleak_do_cleanup(struct work_struct *work)
{
struct kmemleak_object *object;
delete_object_full(object->pointer);
rcu_read_unlock();
mutex_unlock(&scan_mutex);
-
- return 0;
}
-/*
- * Start the clean-up thread.
- */
-static void kmemleak_cleanup(void)
-{
- struct task_struct *cleanup_thread;
-
- cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
- "kmemleak-clean");
- if (IS_ERR(cleanup_thread))
- pr_warning("Failed to create the clean-up thread\n");
-}
+static DECLARE_WORK(cleanup_work, kmemleak_do_cleanup);
/*
* Disable kmemleak. No memory allocation/freeing will be traced once this
/* check whether it is too early for a kernel thread */
if (atomic_read(&kmemleak_initialized))
- kmemleak_cleanup();
+ schedule_work(&cleanup_work);
pr_info("Kernel memory leak detector disabled\n");
}
switch (log->op_type) {
case KMEMLEAK_ALLOC:
- kmemleak_alloc(log->ptr, log->size, log->min_count,
- GFP_KERNEL);
+ early_alloc(log);
break;
case KMEMLEAK_FREE:
kmemleak_free(log->ptr);
* after setting kmemleak_initialized and we may end up with
* two clean-up threads but serialized by scan_mutex.
*/
- kmemleak_cleanup();
+ schedule_work(&cleanup_work);
return -ENOMEM;
}
dev = __dev_get_by_name(net, name);
read_unlock(&dev_base_lock);
- if (!dev && capable(CAP_SYS_MODULE))
+ if (!dev && capable(CAP_NET_ADMIN))
request_module("%s", name);
}
spin_lock(&tcp_cong_list_lock);
ca = tcp_ca_find(name);
#ifdef CONFIG_MODULES
- if (!ca && capable(CAP_SYS_MODULE)) {
+ if (!ca && capable(CAP_NET_ADMIN)) {
spin_unlock(&tcp_cong_list_lock);
request_module("tcp_%s", name);
#ifdef CONFIG_MODULES
/* not found attempt to autoload module */
- if (!ca && capable(CAP_SYS_MODULE)) {
+ if (!ca && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
request_module("tcp_%s", name);
rcu_read_lock();
# Must precede capability.o in order to stack properly.
obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o
obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o
-ifeq ($(CONFIG_AUDIT),y)
-obj-$(CONFIG_SECURITY_SMACK) += lsm_audit.o
-endif
+obj-$(CONFIG_AUDIT) += lsm_audit.o
obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o
obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o
obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o
return 0;
}
+static int cap_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+ return 0;
+}
+
static void cap_cred_free(struct cred *cred)
{
}
{
}
+static void cap_cred_transfer(struct cred *new, const struct cred *old)
+{
+}
+
static int cap_kernel_act_as(struct cred *new, u32 secid)
{
return 0;
return 0;
}
+static int cap_kernel_module_request(void)
+{
+ return 0;
+}
+
static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
{
return 0;
{
}
+
+
static void cap_req_classify_flow(const struct request_sock *req,
struct flowi *fl)
{
}
+
+static int cap_tun_dev_create(void)
+{
+ return 0;
+}
+
+static void cap_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static int cap_tun_dev_attach(struct sock *sk)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
{
}
+static int cap_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+ return 0;
+}
+
+static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+ return 0;
+}
+
+static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+ return 0;
+}
#ifdef CONFIG_KEYS
static int cap_key_alloc(struct key *key, const struct cred *cred,
unsigned long flags)
return 0;
}
+static int cap_key_session_to_parent(const struct cred *cred,
+ const struct cred *parent_cred,
+ struct key *key)
+{
+ return 0;
+}
+
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
void security_fixup_ops(struct security_operations *ops)
{
- set_to_cap_if_null(ops, ptrace_may_access);
+ set_to_cap_if_null(ops, ptrace_access_check);
set_to_cap_if_null(ops, ptrace_traceme);
set_to_cap_if_null(ops, capget);
set_to_cap_if_null(ops, capset);
set_to_cap_if_null(ops, file_receive);
set_to_cap_if_null(ops, dentry_open);
set_to_cap_if_null(ops, task_create);
+ set_to_cap_if_null(ops, cred_alloc_blank);
set_to_cap_if_null(ops, cred_free);
set_to_cap_if_null(ops, cred_prepare);
set_to_cap_if_null(ops, cred_commit);
+ set_to_cap_if_null(ops, cred_transfer);
set_to_cap_if_null(ops, kernel_act_as);
set_to_cap_if_null(ops, kernel_create_files_as);
+ set_to_cap_if_null(ops, kernel_module_request);
set_to_cap_if_null(ops, task_setuid);
set_to_cap_if_null(ops, task_fix_setuid);
set_to_cap_if_null(ops, task_setgid);
set_to_cap_if_null(ops, secid_to_secctx);
set_to_cap_if_null(ops, secctx_to_secid);
set_to_cap_if_null(ops, release_secctx);
+ set_to_cap_if_null(ops, inode_notifysecctx);
+ set_to_cap_if_null(ops, inode_setsecctx);
+ set_to_cap_if_null(ops, inode_getsecctx);
#ifdef CONFIG_SECURITY_NETWORK
set_to_cap_if_null(ops, unix_stream_connect);
set_to_cap_if_null(ops, unix_may_send);
set_to_cap_if_null(ops, inet_csk_clone);
set_to_cap_if_null(ops, inet_conn_established);
set_to_cap_if_null(ops, req_classify_flow);
+ set_to_cap_if_null(ops, tun_dev_create);
+ set_to_cap_if_null(ops, tun_dev_post_create);
+ set_to_cap_if_null(ops, tun_dev_attach);
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
set_to_cap_if_null(ops, xfrm_policy_alloc_security);
set_to_cap_if_null(ops, key_free);
set_to_cap_if_null(ops, key_permission);
set_to_cap_if_null(ops, key_getsecurity);
+ set_to_cap_if_null(ops, key_session_to_parent);
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
set_to_cap_if_null(ops, audit_rule_init);
}
/**
- * cap_ptrace_may_access - Determine whether the current process may access
+ * cap_ptrace_access_check - Determine whether the current process may access
* another
* @child: The process to be accessed
* @mode: The mode of attachment.
* Determine whether a process may access another, returning 0 if permission
* granted, -ve if denied.
*/
-int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
{
int ret = 0;
#
obj-y := \
+ gc.o \
key.o \
keyring.o \
keyctl.o \
case KEYCTL_GET_SECURITY:
return keyctl_get_security(arg2, compat_ptr(arg3), arg4);
+ case KEYCTL_SESSION_TO_PARENT:
+ return keyctl_session_to_parent();
+
default:
return -EOPNOTSUPP;
}
--- /dev/null
+/* Key garbage collector
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <keys/keyring-type.h>
+#include "internal.h"
+
+/*
+ * Delay between key revocation/expiry in seconds
+ */
+unsigned key_gc_delay = 5 * 60;
+
+/*
+ * Reaper
+ */
+static void key_gc_timer_func(unsigned long);
+static void key_garbage_collector(struct work_struct *);
+static DEFINE_TIMER(key_gc_timer, key_gc_timer_func, 0, 0);
+static DECLARE_WORK(key_gc_work, key_garbage_collector);
+static key_serial_t key_gc_cursor; /* the last key the gc considered */
+static unsigned long key_gc_executing;
+static time_t key_gc_next_run = LONG_MAX;
+
+/*
+ * Schedule a garbage collection run
+ * - precision isn't particularly important
+ */
+void key_schedule_gc(time_t gc_at)
+{
+ unsigned long expires;
+ time_t now = current_kernel_time().tv_sec;
+
+ kenter("%ld", gc_at - now);
+
+ gc_at += key_gc_delay;
+
+ if (now >= gc_at) {
+ schedule_work(&key_gc_work);
+ } else if (gc_at < key_gc_next_run) {
+ expires = jiffies + (gc_at - now) * HZ;
+ mod_timer(&key_gc_timer, expires);
+ }
+}
+
+/*
+ * The garbage collector timer kicked off
+ */
+static void key_gc_timer_func(unsigned long data)
+{
+ kenter("");
+ key_gc_next_run = LONG_MAX;
+ schedule_work(&key_gc_work);
+}
+
+/*
+ * Garbage collect pointers from a keyring
+ * - return true if we altered the keyring
+ */
+static bool key_gc_keyring(struct key *keyring, time_t limit)
+ __releases(key_serial_lock)
+{
+ struct keyring_list *klist;
+ struct key *key;
+ int loop;
+
+ kenter("%x", key_serial(keyring));
+
+ if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
+ goto dont_gc;
+
+ /* scan the keyring looking for dead keys */
+ klist = rcu_dereference(keyring->payload.subscriptions);
+ if (!klist)
+ goto dont_gc;
+
+ for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+ key = klist->keys[loop];
+ if (test_bit(KEY_FLAG_DEAD, &key->flags) ||
+ (key->expiry > 0 && key->expiry <= limit))
+ goto do_gc;
+ }
+
+dont_gc:
+ kleave(" = false");
+ return false;
+
+do_gc:
+ key_gc_cursor = keyring->serial;
+ key_get(keyring);
+ spin_unlock(&key_serial_lock);
+ keyring_gc(keyring, limit);
+ key_put(keyring);
+ kleave(" = true");
+ return true;
+}
+
+/*
+ * Garbage collector for keys
+ * - this involves scanning the keyrings for dead, expired and revoked keys
+ * that have overstayed their welcome
+ */
+static void key_garbage_collector(struct work_struct *work)
+{
+ struct rb_node *rb;
+ key_serial_t cursor;
+ struct key *key, *xkey;
+ time_t new_timer = LONG_MAX, limit;
+
+ kenter("");
+
+ if (test_and_set_bit(0, &key_gc_executing)) {
+ key_schedule_gc(current_kernel_time().tv_sec);
+ return;
+ }
+
+ limit = current_kernel_time().tv_sec;
+ if (limit > key_gc_delay)
+ limit -= key_gc_delay;
+ else
+ limit = key_gc_delay;
+
+ spin_lock(&key_serial_lock);
+
+ if (RB_EMPTY_ROOT(&key_serial_tree))
+ goto reached_the_end;
+
+ cursor = key_gc_cursor;
+ if (cursor < 0)
+ cursor = 0;
+
+ /* find the first key above the cursor */
+ key = NULL;
+ rb = key_serial_tree.rb_node;
+ while (rb) {
+ xkey = rb_entry(rb, struct key, serial_node);
+ if (cursor < xkey->serial) {
+ key = xkey;
+ rb = rb->rb_left;
+ } else if (cursor > xkey->serial) {
+ rb = rb->rb_right;
+ } else {
+ rb = rb_next(rb);
+ if (!rb)
+ goto reached_the_end;
+ key = rb_entry(rb, struct key, serial_node);
+ break;
+ }
+ }
+
+ if (!key)
+ goto reached_the_end;
+
+ /* trawl through the keys looking for keyrings */
+ for (;;) {
+ if (key->expiry > 0 && key->expiry < new_timer)
+ new_timer = key->expiry;
+
+ if (key->type == &key_type_keyring &&
+ key_gc_keyring(key, limit)) {
+ /* the gc ate our lock */
+ schedule_work(&key_gc_work);
+ goto no_unlock;
+ }
+
+ rb = rb_next(&key->serial_node);
+ if (!rb) {
+ key_gc_cursor = 0;
+ break;
+ }
+ key = rb_entry(rb, struct key, serial_node);
+ }
+
+out:
+ spin_unlock(&key_serial_lock);
+no_unlock:
+ clear_bit(0, &key_gc_executing);
+ if (new_timer < LONG_MAX)
+ key_schedule_gc(new_timer);
+
+ kleave("");
+ return;
+
+reached_the_end:
+ key_gc_cursor = 0;
+ goto out;
+}
struct key *dest_keyring,
unsigned long flags);
-extern key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
+extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
key_perm_t perm);
+#define KEY_LOOKUP_CREATE 0x01
+#define KEY_LOOKUP_PARTIAL 0x02
+#define KEY_LOOKUP_FOR_UNLINK 0x04
extern long join_session_keyring(const char *name);
+extern unsigned key_gc_delay;
+extern void keyring_gc(struct key *keyring, time_t limit);
+extern void key_schedule_gc(time_t expiry_at);
+
/*
* check to see whether permission is granted to use a key in the desired way
*/
extern long keyctl_assume_authority(key_serial_t);
extern long keyctl_get_security(key_serial_t keyid, char __user *buffer,
size_t buflen);
+extern long keyctl_session_to_parent(void);
/*
* debugging key validation
set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
now = current_kernel_time();
key->expiry = now.tv_sec + timeout;
+ key_schedule_gc(key->expiry);
if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags))
awaken = 1;
goto error;
found:
- /* pretend it doesn't exist if it's dead */
- if (atomic_read(&key->usage) == 0 ||
- test_bit(KEY_FLAG_DEAD, &key->flags) ||
- key->type == &key_type_dead)
+ /* pretend it doesn't exist if it is awaiting deletion */
+ if (atomic_read(&key->usage) == 0)
goto not_found;
/* this races with key_put(), but that doesn't matter since key_put()
*/
void key_revoke(struct key *key)
{
+ struct timespec now;
+ time_t time;
+
key_check(key);
/* make sure no one's trying to change or use the key when we mark it
key->type->revoke)
key->type->revoke(key);
+ /* set the death time to no more than the expiry time */
+ now = current_kernel_time();
+ time = now.tv_sec;
+ if (key->revoked_at == 0 || key->revoked_at > time) {
+ key->revoked_at = time;
+ key_schedule_gc(key->revoked_at);
+ }
+
up_write(&key->sem);
} /* end key_revoke() */
for (_n = rb_first(&key_serial_tree); _n; _n = rb_next(_n)) {
key = rb_entry(_n, struct key, serial_node);
- if (key->type == ktype)
+ if (key->type == ktype) {
key->type = &key_type_dead;
+ set_bit(KEY_FLAG_DEAD, &key->flags);
+ }
}
spin_unlock(&key_serial_lock);
spin_unlock(&key_serial_lock);
up_write(&key_types_sem);
+ key_schedule_gc(0);
+
} /* end unregister_key_type() */
EXPORT_SYMBOL(unregister_key_type);
}
/* find the target keyring (which must be writable) */
- keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
if (IS_ERR(keyring_ref)) {
ret = PTR_ERR(keyring_ref);
goto error3;
/* get the destination keyring if specified */
dest_ref = NULL;
if (destringid) {
- dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
+ dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
+ KEY_WRITE);
if (IS_ERR(dest_ref)) {
ret = PTR_ERR(dest_ref);
goto error3;
long keyctl_get_keyring_ID(key_serial_t id, int create)
{
key_ref_t key_ref;
+ unsigned long lflags;
long ret;
- key_ref = lookup_user_key(id, create, 0, KEY_SEARCH);
+ lflags = create ? KEY_LOOKUP_CREATE : 0;
+ key_ref = lookup_user_key(id, lflags, KEY_SEARCH);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
}
/* find the target key (which must be writable) */
- key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
+ key_ref = lookup_user_key(id, 0, KEY_WRITE);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error2;
key_ref_t key_ref;
long ret;
- key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
+ key_ref = lookup_user_key(id, 0, KEY_WRITE);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
- goto error;
+ if (ret != -EACCES)
+ goto error;
+ key_ref = lookup_user_key(id, 0, KEY_SETATTR);
+ if (IS_ERR(key_ref)) {
+ ret = PTR_ERR(key_ref);
+ goto error;
+ }
}
key_revoke(key_ref_to_ptr(key_ref));
key_ref_t keyring_ref;
long ret;
- keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
if (IS_ERR(keyring_ref)) {
ret = PTR_ERR(keyring_ref);
goto error;
key_ref_t keyring_ref, key_ref;
long ret;
- keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
if (IS_ERR(keyring_ref)) {
ret = PTR_ERR(keyring_ref);
goto error;
}
- key_ref = lookup_user_key(id, 1, 0, KEY_LINK);
+ key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_LINK);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error2;
key_ref_t keyring_ref, key_ref;
long ret;
- keyring_ref = lookup_user_key(ringid, 0, 0, KEY_WRITE);
+ keyring_ref = lookup_user_key(ringid, 0, KEY_WRITE);
if (IS_ERR(keyring_ref)) {
ret = PTR_ERR(keyring_ref);
goto error;
}
- key_ref = lookup_user_key(id, 0, 0, 0);
+ key_ref = lookup_user_key(id, KEY_LOOKUP_FOR_UNLINK, 0);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error2;
char *tmpbuf;
long ret;
- key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
+ key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
if (IS_ERR(key_ref)) {
/* viewing a key under construction is permitted if we have the
* authorisation token handy */
if (!IS_ERR(instkey)) {
key_put(instkey);
key_ref = lookup_user_key(keyid,
- 0, 1, 0);
+ KEY_LOOKUP_PARTIAL,
+ 0);
if (!IS_ERR(key_ref))
goto okay;
}
}
/* get the keyring at which to begin the search */
- keyring_ref = lookup_user_key(ringid, 0, 0, KEY_SEARCH);
+ keyring_ref = lookup_user_key(ringid, 0, KEY_SEARCH);
if (IS_ERR(keyring_ref)) {
ret = PTR_ERR(keyring_ref);
goto error2;
/* get the destination keyring if specified */
dest_ref = NULL;
if (destringid) {
- dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
+ dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
+ KEY_WRITE);
if (IS_ERR(dest_ref)) {
ret = PTR_ERR(dest_ref);
goto error3;
long ret;
/* find the key first */
- key_ref = lookup_user_key(keyid, 0, 0, 0);
+ key_ref = lookup_user_key(keyid, 0, 0);
if (IS_ERR(key_ref)) {
ret = -ENOKEY;
goto error;
if (uid == (uid_t) -1 && gid == (gid_t) -1)
goto error;
- key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+ key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+ KEY_SETATTR);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
goto error;
- key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+ key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+ KEY_SETATTR);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
/* if a specific keyring is nominated by ID, then use that */
if (ringid > 0) {
- dkref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+ dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
if (IS_ERR(dkref))
return PTR_ERR(dkref);
*_dest_keyring = key_ref_to_ptr(dkref);
time_t expiry;
long ret;
- key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+ key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+ KEY_SETATTR);
if (IS_ERR(key_ref)) {
ret = PTR_ERR(key_ref);
goto error;
}
key->expiry = expiry;
+ key_schedule_gc(key->expiry);
up_write(&key->sem);
key_put(key);
char *context;
long ret;
- key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
+ key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
if (IS_ERR(key_ref)) {
if (PTR_ERR(key_ref) != -EACCES)
return PTR_ERR(key_ref);
return PTR_ERR(key_ref);
key_put(instkey);
- key_ref = lookup_user_key(keyid, 0, 1, 0);
+ key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, 0);
if (IS_ERR(key_ref))
return PTR_ERR(key_ref);
}
return ret;
}
+/*
+ * attempt to install the calling process's session keyring on the process's
+ * parent process
+ * - the keyring must exist and must grant us LINK permission
+ * - implements keyctl(KEYCTL_SESSION_TO_PARENT)
+ */
+long keyctl_session_to_parent(void)
+{
+ struct task_struct *me, *parent;
+ const struct cred *mycred, *pcred;
+ struct cred *cred, *oldcred;
+ key_ref_t keyring_r;
+ int ret;
+
+ keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_LINK);
+ if (IS_ERR(keyring_r))
+ return PTR_ERR(keyring_r);
+
+ /* our parent is going to need a new cred struct, a new tgcred struct
+ * and new security data, so we allocate them here to prevent ENOMEM in
+ * our parent */
+ ret = -ENOMEM;
+ cred = cred_alloc_blank();
+ if (!cred)
+ goto error_keyring;
+
+ cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
+ keyring_r = NULL;
+
+ me = current;
+ write_lock_irq(&tasklist_lock);
+
+ parent = me->real_parent;
+ ret = -EPERM;
+
+ /* the parent mustn't be init and mustn't be a kernel thread */
+ if (parent->pid <= 1 || !parent->mm)
+ goto not_permitted;
+
+ /* the parent must be single threaded */
+ if (atomic_read(&parent->signal->count) != 1)
+ goto not_permitted;
+
+ /* the parent and the child must have different session keyrings or
+ * there's no point */
+ mycred = current_cred();
+ pcred = __task_cred(parent);
+ if (mycred == pcred ||
+ mycred->tgcred->session_keyring == pcred->tgcred->session_keyring)
+ goto already_same;
+
+ /* the parent must have the same effective ownership and mustn't be
+ * SUID/SGID */
+ if (pcred-> uid != mycred->euid ||
+ pcred->euid != mycred->euid ||
+ pcred->suid != mycred->euid ||
+ pcred-> gid != mycred->egid ||
+ pcred->egid != mycred->egid ||
+ pcred->sgid != mycred->egid)
+ goto not_permitted;
+
+ /* the keyrings must have the same UID */
+ if (pcred ->tgcred->session_keyring->uid != mycred->euid ||
+ mycred->tgcred->session_keyring->uid != mycred->euid)
+ goto not_permitted;
+
+ /* the LSM must permit the replacement of the parent's keyring with the
+ * keyring from this process */
+ ret = security_key_session_to_parent(mycred, pcred,
+ key_ref_to_ptr(keyring_r));
+ if (ret < 0)
+ goto not_permitted;
+
+ /* if there's an already pending keyring replacement, then we replace
+ * that */
+ oldcred = parent->replacement_session_keyring;
+
+ /* the replacement session keyring is applied just prior to userspace
+ * restarting */
+ parent->replacement_session_keyring = cred;
+ cred = NULL;
+ set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
+
+ write_unlock_irq(&tasklist_lock);
+ if (oldcred)
+ put_cred(oldcred);
+ return 0;
+
+already_same:
+ ret = 0;
+not_permitted:
+ put_cred(cred);
+ return ret;
+
+error_keyring:
+ key_ref_put(keyring_r);
+ return ret;
+}
+
/*****************************************************************************/
/*
* the key control system call
(char __user *) arg3,
(size_t) arg4);
+ case KEYCTL_SESSION_TO_PARENT:
+ return keyctl_session_to_parent();
+
default:
return -EOPNOTSUPP;
}
}
} /* end keyring_revoke() */
+
+/*
+ * Determine whether a key is dead
+ */
+static bool key_is_dead(struct key *key, time_t limit)
+{
+ return test_bit(KEY_FLAG_DEAD, &key->flags) ||
+ (key->expiry > 0 && key->expiry <= limit);
+}
+
+/*
+ * Collect garbage from the contents of a keyring
+ */
+void keyring_gc(struct key *keyring, time_t limit)
+{
+ struct keyring_list *klist, *new;
+ struct key *key;
+ int loop, keep, max;
+
+ kenter("%x", key_serial(keyring));
+
+ down_write(&keyring->sem);
+
+ klist = keyring->payload.subscriptions;
+ if (!klist)
+ goto just_return;
+
+ /* work out how many subscriptions we're keeping */
+ keep = 0;
+ for (loop = klist->nkeys - 1; loop >= 0; loop--)
+ if (!key_is_dead(klist->keys[loop], limit));
+ keep++;
+
+ if (keep == klist->nkeys)
+ goto just_return;
+
+ /* allocate a new keyring payload */
+ max = roundup(keep, 4);
+ new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
+ GFP_KERNEL);
+ if (!new)
+ goto just_return;
+ new->maxkeys = max;
+ new->nkeys = 0;
+ new->delkey = 0;
+
+ /* install the live keys
+ * - must take care as expired keys may be updated back to life
+ */
+ keep = 0;
+ for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+ key = klist->keys[loop];
+ if (!key_is_dead(key, limit)) {
+ if (keep >= max)
+ goto discard_new;
+ new->keys[keep++] = key_get(key);
+ }
+ }
+ new->nkeys = keep;
+
+ /* adjust the quota */
+ key_payload_reserve(keyring,
+ sizeof(struct keyring_list) +
+ KEYQUOTA_LINK_BYTES * keep);
+
+ if (keep == 0) {
+ rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+ kfree(new);
+ } else {
+ rcu_assign_pointer(keyring->payload.subscriptions, new);
+ }
+
+ up_write(&keyring->sem);
+
+ call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+ kleave(" [yes]");
+ return;
+
+discard_new:
+ new->nkeys = keep;
+ keyring_clear_rcu_disposal(&new->rcu);
+just_return:
+ up_write(&keyring->sem);
+ kleave(" [no]");
+}
*/
#ifdef CONFIG_KEYS_DEBUG_PROC_KEYS
-static struct rb_node *__key_serial_next(struct rb_node *n)
+static struct rb_node *key_serial_next(struct rb_node *n)
{
+ struct user_namespace *user_ns = current_user_ns();
+
+ n = rb_next(n);
while (n) {
struct key *key = rb_entry(n, struct key, serial_node);
- if (key->user->user_ns == current_user_ns())
+ if (key->user->user_ns == user_ns)
break;
n = rb_next(n);
}
return n;
}
-static struct rb_node *key_serial_next(struct rb_node *n)
+static int proc_keys_open(struct inode *inode, struct file *file)
{
- return __key_serial_next(rb_next(n));
+ return seq_open(file, &proc_keys_ops);
}
-static struct rb_node *key_serial_first(struct rb_root *r)
+static struct key *find_ge_key(key_serial_t id)
{
- struct rb_node *n = rb_first(r);
- return __key_serial_next(n);
-}
+ struct user_namespace *user_ns = current_user_ns();
+ struct rb_node *n = key_serial_tree.rb_node;
+ struct key *minkey = NULL;
-static int proc_keys_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &proc_keys_ops);
+ while (n) {
+ struct key *key = rb_entry(n, struct key, serial_node);
+ if (id < key->serial) {
+ if (!minkey || minkey->serial > key->serial)
+ minkey = key;
+ n = n->rb_left;
+ } else if (id > key->serial) {
+ n = n->rb_right;
+ } else {
+ minkey = key;
+ break;
+ }
+ key = NULL;
+ }
+ if (!minkey)
+ return NULL;
+
+ for (;;) {
+ if (minkey->user->user_ns == user_ns)
+ return minkey;
+ n = rb_next(&minkey->serial_node);
+ if (!n)
+ return NULL;
+ minkey = rb_entry(n, struct key, serial_node);
+ }
}
static void *proc_keys_start(struct seq_file *p, loff_t *_pos)
+ __acquires(key_serial_lock)
{
- struct rb_node *_p;
- loff_t pos = *_pos;
+ key_serial_t pos = *_pos;
+ struct key *key;
spin_lock(&key_serial_lock);
- _p = key_serial_first(&key_serial_tree);
- while (pos > 0 && _p) {
- pos--;
- _p = key_serial_next(_p);
- }
-
- return _p;
+ if (*_pos > INT_MAX)
+ return NULL;
+ key = find_ge_key(pos);
+ if (!key)
+ return NULL;
+ *_pos = key->serial;
+ return &key->serial_node;
+}
+static inline key_serial_t key_node_serial(struct rb_node *n)
+{
+ struct key *key = rb_entry(n, struct key, serial_node);
+ return key->serial;
}
static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos)
{
- (*_pos)++;
- return key_serial_next((struct rb_node *) v);
+ struct rb_node *n;
+ n = key_serial_next(v);
+ if (n)
+ *_pos = key_node_serial(n);
+ return n;
}
static void proc_keys_stop(struct seq_file *p, void *v)
+ __releases(key_serial_lock)
{
spin_unlock(&key_serial_lock);
}
/* come up with a suitable timeout value */
if (key->expiry == 0) {
memcpy(xbuf, "perm", 5);
- }
- else if (now.tv_sec >= key->expiry) {
+ } else if (now.tv_sec >= key->expiry) {
memcpy(xbuf, "expd", 5);
- }
- else {
+ } else {
timo = key->expiry - now.tv_sec;
if (timo < 60)
seq_putc(m, '\n');
rcu_read_unlock();
-
return 0;
-
}
#endif /* CONFIG_KEYS_DEBUG_PROC_KEYS */
struct rb_node *n = rb_first(r);
return __key_user_next(n);
}
+
/*****************************************************************************/
/*
* implement "/proc/key-users" to provides a list of the key users
static int proc_key_users_open(struct inode *inode, struct file *file)
{
return seq_open(file, &proc_key_users_ops);
-
}
static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
+ __acquires(key_user_lock)
{
struct rb_node *_p;
loff_t pos = *_pos;
}
return _p;
-
}
static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos)
{
(*_pos)++;
return key_user_next((struct rb_node *) v);
-
}
static void proc_key_users_stop(struct seq_file *p, void *v)
+ __releases(key_user_lock)
{
spin_unlock(&key_user_lock);
}
#include <linux/fs.h>
#include <linux/err.h>
#include <linux/mutex.h>
+#include <linux/security.h>
#include <linux/user_namespace.h>
#include <asm/uaccess.h>
#include "internal.h"
* - don't create special keyrings unless so requested
* - partially constructed keys aren't found unless requested
*/
-key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
+key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
key_perm_t perm)
{
struct request_key_auth *rka;
switch (id) {
case KEY_SPEC_THREAD_KEYRING:
if (!cred->thread_keyring) {
- if (!create)
+ if (!(lflags & KEY_LOOKUP_CREATE))
goto error;
ret = install_thread_keyring();
case KEY_SPEC_PROCESS_KEYRING:
if (!cred->tgcred->process_keyring) {
- if (!create)
+ if (!(lflags & KEY_LOOKUP_CREATE))
goto error;
ret = install_process_keyring();
break;
}
- if (!partial) {
+ /* unlink does not use the nominated key in any way, so can skip all
+ * the permission checks as it is only concerned with the keyring */
+ if (lflags & KEY_LOOKUP_FOR_UNLINK) {
+ ret = 0;
+ goto error;
+ }
+
+ if (!(lflags & KEY_LOOKUP_PARTIAL)) {
ret = wait_for_key_construction(key, true);
switch (ret) {
case -ERESTARTSYS:
}
ret = -EIO;
- if (!partial && !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
+ if (!(lflags & KEY_LOOKUP_PARTIAL) &&
+ !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
goto invalid_key;
/* check the permissions */
/* only permit this if there's a single thread in the thread group -
* this avoids us having to adjust the creds on all threads and risking
* ENOMEM */
- if (!is_single_threaded(current))
+ if (!current_is_single_threaded())
return -EMLINK;
new = prepare_creds();
abort_creds(new);
return ret;
}
+
+/*
+ * Replace a process's session keyring when that process resumes userspace on
+ * behalf of one of its children
+ */
+void key_replace_session_keyring(void)
+{
+ const struct cred *old;
+ struct cred *new;
+
+ if (!current->replacement_session_keyring)
+ return;
+
+ write_lock_irq(&tasklist_lock);
+ new = current->replacement_session_keyring;
+ current->replacement_session_keyring = NULL;
+ write_unlock_irq(&tasklist_lock);
+
+ if (!new)
+ return;
+
+ old = current_cred();
+ new-> uid = old-> uid;
+ new-> euid = old-> euid;
+ new-> suid = old-> suid;
+ new->fsuid = old->fsuid;
+ new-> gid = old-> gid;
+ new-> egid = old-> egid;
+ new-> sgid = old-> sgid;
+ new->fsgid = old->fsgid;
+ new->user = get_uid(old->user);
+ new->group_info = get_group_info(old->group_info);
+
+ new->securebits = old->securebits;
+ new->cap_inheritable = old->cap_inheritable;
+ new->cap_permitted = old->cap_permitted;
+ new->cap_effective = old->cap_effective;
+ new->cap_bset = old->cap_bset;
+
+ new->jit_keyring = old->jit_keyring;
+ new->thread_keyring = key_get(old->thread_keyring);
+ new->tgcred->tgid = old->tgcred->tgid;
+ new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
+
+ security_transfer_creds(new, old);
+
+ commit_creds(new);
+}
#include <linux/sysctl.h>
#include "internal.h"
+static const int zero, one = 1, max = INT_MAX;
+
ctl_table key_sysctls[] = {
{
.ctl_name = CTL_UNNUMBERED,
.data = &key_quota_maxkeys,
.maxlen = sizeof(unsigned),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = (void *) &one,
+ .extra2 = (void *) &max,
},
{
.ctl_name = CTL_UNNUMBERED,
.data = &key_quota_maxbytes,
.maxlen = sizeof(unsigned),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = (void *) &one,
+ .extra2 = (void *) &max,
},
{
.ctl_name = CTL_UNNUMBERED,
.data = &key_quota_root_maxkeys,
.maxlen = sizeof(unsigned),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = (void *) &one,
+ .extra2 = (void *) &max,
},
{
.ctl_name = CTL_UNNUMBERED,
.data = &key_quota_root_maxbytes,
.maxlen = sizeof(unsigned),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = (void *) &one,
+ .extra2 = (void *) &max,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "gc_delay",
+ .data = &key_gc_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = (void *) &zero,
+ .extra2 = (void *) &max,
},
{ .ctl_name = 0 }
};
}
switch (a->type) {
+ case LSM_AUDIT_NO_AUDIT:
+ return;
case LSM_AUDIT_DATA_IPC:
audit_log_format(ab, " key=%d ", a->u.ipc_id);
break;
/* Security operations */
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
{
- return security_ops->ptrace_may_access(child, mode);
+ return security_ops->ptrace_access_check(child, mode);
}
int security_ptrace_traceme(struct task_struct *parent)
return security_ops->task_create(clone_flags);
}
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+ return security_ops->cred_alloc_blank(cred, gfp);
+}
+
void security_cred_free(struct cred *cred)
{
security_ops->cred_free(cred);
security_ops->cred_commit(new, old);
}
+void security_transfer_creds(struct cred *new, const struct cred *old)
+{
+ security_ops->cred_transfer(new, old);
+}
+
int security_kernel_act_as(struct cred *new, u32 secid)
{
return security_ops->kernel_act_as(new, secid);
return security_ops->kernel_create_files_as(new, inode);
}
+int security_kernel_module_request(void)
+{
+ return security_ops->kernel_module_request();
+}
+
int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
{
return security_ops->task_setuid(id0, id1, id2, flags);
}
EXPORT_SYMBOL(security_release_secctx);
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+ return security_ops->inode_notifysecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_notifysecctx);
+
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+ return security_ops->inode_setsecctx(dentry, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_setsecctx);
+
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+ return security_ops->inode_getsecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_getsecctx);
+
#ifdef CONFIG_SECURITY_NETWORK
int security_unix_stream_connect(struct socket *sock, struct socket *other,
security_ops->inet_conn_established(sk, skb);
}
+int security_tun_dev_create(void)
+{
+ return security_ops->tun_dev_create();
+}
+EXPORT_SYMBOL(security_tun_dev_create);
+
+void security_tun_dev_post_create(struct sock *sk)
+{
+ return security_ops->tun_dev_post_create(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_post_create);
+
+int security_tun_dev_attach(struct sock *sk)
+{
+ return security_ops->tun_dev_attach(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_attach);
+
#endif /* CONFIG_SECURITY_NETWORK */
#ifdef CONFIG_SECURITY_NETWORK_XFRM
return security_ops->key_getsecurity(key, _buffer);
}
+int security_key_session_to_parent(const struct cred *cred,
+ const struct cred *parent_cred,
+ struct key *key)
+{
+ return security_ops->key_session_to_parent(cred, parent_cred, key);
+}
+
#endif /* CONFIG_KEYS */
#ifdef CONFIG_AUDIT
* @tclass: target security class
* @av: access vector
*/
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
+static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
{
const char **common_pts = NULL;
u32 common_base = 0;
return node;
}
-static inline void avc_print_ipv6_addr(struct audit_buffer *ab,
- struct in6_addr *addr, __be16 port,
- char *name1, char *name2)
+/**
+ * avc_audit_pre_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
{
- if (!ipv6_addr_any(addr))
- audit_log_format(ab, " %s=%pI6", name1, addr);
- if (port)
- audit_log_format(ab, " %s=%d", name2, ntohs(port));
+ struct common_audit_data *ad = a;
+ audit_log_format(ab, "avc: %s ",
+ ad->selinux_audit_data.denied ? "denied" : "granted");
+ avc_dump_av(ab, ad->selinux_audit_data.tclass,
+ ad->selinux_audit_data.audited);
+ audit_log_format(ab, " for ");
}
-static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
- __be16 port, char *name1, char *name2)
+/**
+ * avc_audit_post_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
{
- if (addr)
- audit_log_format(ab, " %s=%pI4", name1, &addr);
- if (port)
- audit_log_format(ab, " %s=%d", name2, ntohs(port));
+ struct common_audit_data *ad = a;
+ audit_log_format(ab, " ");
+ avc_dump_query(ab, ad->selinux_audit_data.ssid,
+ ad->selinux_audit_data.tsid,
+ ad->selinux_audit_data.tclass);
}
/**
*/
void avc_audit(u32 ssid, u32 tsid,
u16 tclass, u32 requested,
- struct av_decision *avd, int result, struct avc_audit_data *a)
+ struct av_decision *avd, int result, struct common_audit_data *a)
{
- struct task_struct *tsk = current;
- struct inode *inode = NULL;
+ struct common_audit_data stack_data;
u32 denied, audited;
- struct audit_buffer *ab;
-
denied = requested & ~avd->allowed;
if (denied) {
audited = denied;
if (!(audited & avd->auditallow))
return;
}
-
- ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
- if (!ab)
- return; /* audit_panic has been called */
- audit_log_format(ab, "avc: %s ", denied ? "denied" : "granted");
- avc_dump_av(ab, tclass, audited);
- audit_log_format(ab, " for ");
- if (a && a->tsk)
- tsk = a->tsk;
- if (tsk && tsk->pid) {
- audit_log_format(ab, " pid=%d comm=", tsk->pid);
- audit_log_untrustedstring(ab, tsk->comm);
+ if (!a) {
+ a = &stack_data;
+ memset(a, 0, sizeof(*a));
+ a->type = LSM_AUDIT_NO_AUDIT;
}
- if (a) {
- switch (a->type) {
- case AVC_AUDIT_DATA_IPC:
- audit_log_format(ab, " key=%d", a->u.ipc_id);
- break;
- case AVC_AUDIT_DATA_CAP:
- audit_log_format(ab, " capability=%d", a->u.cap);
- break;
- case AVC_AUDIT_DATA_FS:
- if (a->u.fs.path.dentry) {
- struct dentry *dentry = a->u.fs.path.dentry;
- if (a->u.fs.path.mnt) {
- audit_log_d_path(ab, "path=",
- &a->u.fs.path);
- } else {
- audit_log_format(ab, " name=");
- audit_log_untrustedstring(ab, dentry->d_name.name);
- }
- inode = dentry->d_inode;
- } else if (a->u.fs.inode) {
- struct dentry *dentry;
- inode = a->u.fs.inode;
- dentry = d_find_alias(inode);
- if (dentry) {
- audit_log_format(ab, " name=");
- audit_log_untrustedstring(ab, dentry->d_name.name);
- dput(dentry);
- }
- }
- if (inode)
- audit_log_format(ab, " dev=%s ino=%lu",
- inode->i_sb->s_id,
- inode->i_ino);
- break;
- case AVC_AUDIT_DATA_NET:
- if (a->u.net.sk) {
- struct sock *sk = a->u.net.sk;
- struct unix_sock *u;
- int len = 0;
- char *p = NULL;
-
- switch (sk->sk_family) {
- case AF_INET: {
- struct inet_sock *inet = inet_sk(sk);
-
- avc_print_ipv4_addr(ab, inet->rcv_saddr,
- inet->sport,
- "laddr", "lport");
- avc_print_ipv4_addr(ab, inet->daddr,
- inet->dport,
- "faddr", "fport");
- break;
- }
- case AF_INET6: {
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *inet6 = inet6_sk(sk);
-
- avc_print_ipv6_addr(ab, &inet6->rcv_saddr,
- inet->sport,
- "laddr", "lport");
- avc_print_ipv6_addr(ab, &inet6->daddr,
- inet->dport,
- "faddr", "fport");
- break;
- }
- case AF_UNIX:
- u = unix_sk(sk);
- if (u->dentry) {
- struct path path = {
- .dentry = u->dentry,
- .mnt = u->mnt
- };
- audit_log_d_path(ab, "path=",
- &path);
- break;
- }
- if (!u->addr)
- break;
- len = u->addr->len-sizeof(short);
- p = &u->addr->name->sun_path[0];
- audit_log_format(ab, " path=");
- if (*p)
- audit_log_untrustedstring(ab, p);
- else
- audit_log_n_hex(ab, p, len);
- break;
- }
- }
-
- switch (a->u.net.family) {
- case AF_INET:
- avc_print_ipv4_addr(ab, a->u.net.v4info.saddr,
- a->u.net.sport,
- "saddr", "src");
- avc_print_ipv4_addr(ab, a->u.net.v4info.daddr,
- a->u.net.dport,
- "daddr", "dest");
- break;
- case AF_INET6:
- avc_print_ipv6_addr(ab, &a->u.net.v6info.saddr,
- a->u.net.sport,
- "saddr", "src");
- avc_print_ipv6_addr(ab, &a->u.net.v6info.daddr,
- a->u.net.dport,
- "daddr", "dest");
- break;
- }
- if (a->u.net.netif > 0) {
- struct net_device *dev;
-
- /* NOTE: we always use init's namespace */
- dev = dev_get_by_index(&init_net,
- a->u.net.netif);
- if (dev) {
- audit_log_format(ab, " netif=%s",
- dev->name);
- dev_put(dev);
- }
- }
- break;
- }
- }
- audit_log_format(ab, " ");
- avc_dump_query(ab, ssid, tsid, tclass);
- audit_log_end(ab);
+ a->selinux_audit_data.tclass = tclass;
+ a->selinux_audit_data.requested = requested;
+ a->selinux_audit_data.ssid = ssid;
+ a->selinux_audit_data.tsid = tsid;
+ a->selinux_audit_data.audited = audited;
+ a->selinux_audit_data.denied = denied;
+ a->lsm_pre_audit = avc_audit_pre_callback;
+ a->lsm_post_audit = avc_audit_post_callback;
+ common_lsm_audit(a);
}
/**
* another -errno upon other errors.
*/
int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
- u32 requested, struct avc_audit_data *auditdata)
+ u32 requested, struct common_audit_data *auditdata)
{
struct av_decision avd;
int rc;
{
return avc_cache.latest_notif;
}
+
+void avc_disable(void)
+{
+ if (avc_node_cachep)
+ kmem_cache_destroy(avc_node_cachep);
+}
* Eric Paris <eparis@redhat.com>
* Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
* <dgoeddel@trustedcs.com>
- * Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P.
- * Paul Moore <paul.moore@hp.com>
+ * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P.
+ * Paul Moore <paul.moore@hp.com>
* Copyright (C) 2007 Hitachi Software Engineering Co., Ltd.
* Yuichi Nakamura <ynakam@hitachisoft.jp>
*
sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
sbsec->flags &= ~SE_SBLABELSUPP;
+ /* Special handling for sysfs. Is genfs but also has setxattr handler*/
+ if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+ sbsec->flags |= SE_SBLABELSUPP;
+
/* Initialize the root inode. */
rc = inode_doinit_with_dentry(root_inode, root);
const struct cred *cred,
int cap, int audit)
{
- struct avc_audit_data ad;
+ struct common_audit_data ad;
struct av_decision avd;
u16 sclass;
u32 sid = cred_sid(cred);
u32 av = CAP_TO_MASK(cap);
int rc;
- AVC_AUDIT_DATA_INIT(&ad, CAP);
+ COMMON_AUDIT_DATA_INIT(&ad, CAP);
ad.tsk = tsk;
ad.u.cap = cap;
static int inode_has_perm(const struct cred *cred,
struct inode *inode,
u32 perms,
- struct avc_audit_data *adp)
+ struct common_audit_data *adp)
{
struct inode_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid;
+ validate_creds(cred);
+
if (unlikely(IS_PRIVATE(inode)))
return 0;
if (!adp) {
adp = &ad;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.inode = inode;
}
u32 av)
{
struct inode *inode = dentry->d_inode;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path.mnt = mnt;
ad.u.fs.path.dentry = dentry;
return inode_has_perm(cred, inode, av, &ad);
{
struct file_security_struct *fsec = file->f_security;
struct inode *inode = file->f_path.dentry->d_inode;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = cred_sid(cred);
int rc;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path = file->f_path;
if (sid != fsec->sid) {
struct inode_security_struct *dsec;
struct superblock_security_struct *sbsec;
u32 sid, newsid;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
int rc;
dsec = dir->i_security;
sid = tsec->sid;
newsid = tsec->create_sid;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path.dentry = dentry;
rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
{
struct inode_security_struct *dsec, *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
u32 av;
int rc;
dsec = dir->i_security;
isec = dentry->d_inode->i_security;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path.dentry = dentry;
av = DIR__SEARCH;
struct dentry *new_dentry)
{
struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
u32 av;
int old_is_dir, new_is_dir;
old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
new_dsec = new_dir->i_security;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path.dentry = old_dentry;
rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
static int superblock_has_perm(const struct cred *cred,
struct super_block *sb,
u32 perms,
- struct avc_audit_data *ad)
+ struct common_audit_data *ad)
{
struct superblock_security_struct *sbsec;
u32 sid = cred_sid(cred);
/* Hook functions begin here. */
-static int selinux_ptrace_may_access(struct task_struct *child,
+static int selinux_ptrace_access_check(struct task_struct *child,
unsigned int mode)
{
int rc;
- rc = cap_ptrace_may_access(child, mode);
+ rc = cap_ptrace_access_check(child, mode);
if (rc)
return rc;
const struct task_security_struct *old_tsec;
struct task_security_struct *new_tsec;
struct inode_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
struct inode *inode = bprm->file->f_path.dentry->d_inode;
int rc;
return rc;
}
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path = bprm->file->f_path;
if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
static inline void flush_unauthorized_files(const struct cred *cred,
struct files_struct *files)
{
- struct avc_audit_data ad;
+ struct common_audit_data ad;
struct file *file, *devnull = NULL;
struct tty_struct *tty;
struct fdtable *fdt;
/* Revalidate access to inherited open files. */
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
spin_lock(&files->file_lock);
for (;;) {
static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
{
const struct cred *cred = current_cred();
- struct avc_audit_data ad;
+ struct common_audit_data ad;
int rc;
rc = superblock_doinit(sb, data);
if (flags & MS_KERNMOUNT)
return 0;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path.dentry = sb->s_root;
return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
}
static int selinux_sb_statfs(struct dentry *dentry)
{
const struct cred *cred = current_cred();
- struct avc_audit_data ad;
+ struct common_audit_data ad;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path.dentry = dentry->d_sb->s_root;
return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
}
static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
{
const struct cred *cred = current_cred();
+ unsigned int ia_valid = iattr->ia_valid;
+
+ /* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
+ if (ia_valid & ATTR_FORCE) {
+ ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE |
+ ATTR_FORCE);
+ if (!ia_valid)
+ return 0;
+ }
- if (iattr->ia_valid & ATTR_FORCE)
- return 0;
-
- if (iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
- ATTR_ATIME_SET | ATTR_MTIME_SET))
+ if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
+ ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
return dentry_has_perm(cred, NULL, dentry, FILE__SETATTR);
return dentry_has_perm(cred, NULL, dentry, FILE__WRITE);
struct inode *inode = dentry->d_inode;
struct inode_security_struct *isec = inode->i_security;
struct superblock_security_struct *sbsec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 newsid, sid = current_sid();
int rc = 0;
if (!is_owner_or_cap(inode))
return -EPERM;
- AVC_AUDIT_DATA_INIT(&ad, FS);
+ COMMON_AUDIT_DATA_INIT(&ad, FS);
ad.u.fs.path.dentry = dentry;
rc = avc_has_perm(sid, isec->sid, isec->sclass,
return rc;
isec->sid = newsid;
+ isec->initialized = 1;
return 0;
}
const struct cred *cred = current_cred();
struct inode *inode = file->f_path.dentry->d_inode;
- if (!mask) {
- /* No permission to check. Existence test. */
- return 0;
- }
-
/* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */
if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
mask |= MAY_APPEND;
static int selinux_file_permission(struct file *file, int mask)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct file_security_struct *fsec = file->f_security;
+ struct inode_security_struct *isec = inode->i_security;
+ u32 sid = current_sid();
+
if (!mask)
/* No permission to check. Existence test. */
return 0;
+ if (sid == fsec->sid && fsec->isid == isec->sid &&
+ fsec->pseqno == avc_policy_seqno())
+ /* No change since dentry_open check. */
+ return 0;
+
return selinux_revalidate_file_permission(file, mask);
}
return current_has_perm(current, PROCESS__FORK);
}
+/*
+ * allocate the SELinux part of blank credentials
+ */
+static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+ struct task_security_struct *tsec;
+
+ tsec = kzalloc(sizeof(struct task_security_struct), gfp);
+ if (!tsec)
+ return -ENOMEM;
+
+ cred->security = tsec;
+ return 0;
+}
+
/*
* detach and free the LSM part of a set of credentials
*/
static void selinux_cred_free(struct cred *cred)
{
struct task_security_struct *tsec = cred->security;
- cred->security = NULL;
+
+ BUG_ON((unsigned long) cred->security < PAGE_SIZE);
+ cred->security = (void *) 0x7UL;
kfree(tsec);
}
return 0;
}
+/*
+ * transfer the SELinux data to a blank set of creds
+ */
+static void selinux_cred_transfer(struct cred *new, const struct cred *old)
+{
+ const struct task_security_struct *old_tsec = old->security;
+ struct task_security_struct *tsec = new->security;
+
+ *tsec = *old_tsec;
+}
+
/*
* set the security data for a kernel service
* - all the creation contexts are set to unlabelled
return 0;
}
+static int selinux_kernel_module_request(void)
+{
+ return task_has_system(current, SYSTEM__MODULE_REQUEST);
+}
+
static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
{
return current_has_perm(p, PROCESS__SETPGID);
/* Returns error only if unable to parse addresses */
static int selinux_parse_skb_ipv4(struct sk_buff *skb,
- struct avc_audit_data *ad, u8 *proto)
+ struct common_audit_data *ad, u8 *proto)
{
int offset, ihlen, ret = -EINVAL;
struct iphdr _iph, *ih;
/* Returns error only if unable to parse addresses */
static int selinux_parse_skb_ipv6(struct sk_buff *skb,
- struct avc_audit_data *ad, u8 *proto)
+ struct common_audit_data *ad, u8 *proto)
{
u8 nexthdr;
int ret = -EINVAL, offset;
#endif /* IPV6 */
-static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad,
+static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad,
char **_addrp, int src, u8 *proto)
{
char *addrp;
u32 perms)
{
struct inode_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid;
int err = 0;
goto out;
sid = task_sid(task);
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.sk = sock->sk;
err = avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
if (family == PF_INET || family == PF_INET6) {
char *addrp;
struct inode_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
struct sockaddr_in *addr4 = NULL;
struct sockaddr_in6 *addr6 = NULL;
unsigned short snum;
snum, &sid);
if (err)
goto out;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.sport = htons(snum);
ad.u.net.family = family;
err = avc_has_perm(isec->sid, sid,
if (err)
goto out;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.sport = htons(snum);
ad.u.net.family = family;
isec = SOCK_INODE(sock)->i_security;
if (isec->sclass == SECCLASS_TCP_SOCKET ||
isec->sclass == SECCLASS_DCCP_SOCKET) {
- struct avc_audit_data ad;
+ struct common_audit_data ad;
struct sockaddr_in *addr4 = NULL;
struct sockaddr_in6 *addr6 = NULL;
unsigned short snum;
perm = (isec->sclass == SECCLASS_TCP_SOCKET) ?
TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.dport = htons(snum);
ad.u.net.family = sk->sk_family;
err = avc_has_perm(isec->sid, sid, isec->sclass, perm, &ad);
struct sk_security_struct *ssec;
struct inode_security_struct *isec;
struct inode_security_struct *other_isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
int err;
isec = SOCK_INODE(sock)->i_security;
other_isec = SOCK_INODE(other)->i_security;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.sk = other->sk;
err = avc_has_perm(isec->sid, other_isec->sid,
{
struct inode_security_struct *isec;
struct inode_security_struct *other_isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
int err;
isec = SOCK_INODE(sock)->i_security;
other_isec = SOCK_INODE(other)->i_security;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.sk = other->sk;
err = avc_has_perm(isec->sid, other_isec->sid,
static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family,
u32 peer_sid,
- struct avc_audit_data *ad)
+ struct common_audit_data *ad)
{
int err;
u32 if_sid;
struct sk_security_struct *sksec = sk->sk_security;
u32 peer_sid;
u32 sk_sid = sksec->sid;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
char *addrp;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.netif = skb->iif;
ad.u.net.family = family;
err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
struct sk_security_struct *sksec = sk->sk_security;
u16 family = sk->sk_family;
u32 sk_sid = sksec->sid;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
char *addrp;
u8 secmark_active;
u8 peerlbl_active;
if (!secmark_active && !peerlbl_active)
return 0;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.netif = skb->iif;
ad.u.net.family = family;
err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
fl->secid = req->secid;
}
+static int selinux_tun_dev_create(void)
+{
+ u32 sid = current_sid();
+
+ /* we aren't taking into account the "sockcreate" SID since the socket
+ * that is being created here is not a socket in the traditional sense,
+ * instead it is a private sock, accessible only to the kernel, and
+ * representing a wide range of network traffic spanning multiple
+ * connections unlike traditional sockets - check the TUN driver to
+ * get a better understanding of why this socket is special */
+
+ return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE,
+ NULL);
+}
+
+static void selinux_tun_dev_post_create(struct sock *sk)
+{
+ struct sk_security_struct *sksec = sk->sk_security;
+
+ /* we don't currently perform any NetLabel based labeling here and it
+ * isn't clear that we would want to do so anyway; while we could apply
+ * labeling without the support of the TUN user the resulting labeled
+ * traffic from the other end of the connection would almost certainly
+ * cause confusion to the TUN user that had no idea network labeling
+ * protocols were being used */
+
+ /* see the comments in selinux_tun_dev_create() about why we don't use
+ * the sockcreate SID here */
+
+ sksec->sid = current_sid();
+ sksec->sclass = SECCLASS_TUN_SOCKET;
+}
+
+static int selinux_tun_dev_attach(struct sock *sk)
+{
+ struct sk_security_struct *sksec = sk->sk_security;
+ u32 sid = current_sid();
+ int err;
+
+ err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
+ TUN_SOCKET__RELABELFROM, NULL);
+ if (err)
+ return err;
+ err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET,
+ TUN_SOCKET__RELABELTO, NULL);
+ if (err)
+ return err;
+
+ sksec->sid = sid;
+
+ return 0;
+}
+
static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
{
int err = 0;
int err;
char *addrp;
u32 peer_sid;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u8 secmark_active;
u8 netlbl_active;
u8 peerlbl_active;
if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
return NF_DROP;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.netif = ifindex;
ad.u.net.family = family;
if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
{
struct sock *sk = skb->sk;
struct sk_security_struct *sksec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
char *addrp;
u8 proto;
return NF_ACCEPT;
sksec = sk->sk_security;
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.netif = ifindex;
ad.u.net.family = family;
if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
u32 secmark_perm;
u32 peer_sid;
struct sock *sk;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
char *addrp;
u8 secmark_active;
u8 peerlbl_active;
secmark_perm = PACKET__SEND;
}
- AVC_AUDIT_DATA_INIT(&ad, NET);
+ COMMON_AUDIT_DATA_INIT(&ad, NET);
ad.u.net.netif = ifindex;
ad.u.net.family = family;
if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL))
static int selinux_netlink_recv(struct sk_buff *skb, int capability)
{
int err;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
err = cap_netlink_recv(skb, capability);
if (err)
return err;
- AVC_AUDIT_DATA_INIT(&ad, CAP);
+ COMMON_AUDIT_DATA_INIT(&ad, CAP);
ad.u.cap = capability;
return avc_has_perm(NETLINK_CB(skb).sid, NETLINK_CB(skb).sid,
u32 perms)
{
struct ipc_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
isec = ipc_perms->security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = ipc_perms->key;
return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
{
struct ipc_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
int rc;
isec = msq->q_perm.security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = msq->q_perm.key;
rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg)
{
struct ipc_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
isec = msq->q_perm.security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = msq->q_perm.key;
return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
{
struct ipc_security_struct *isec;
struct msg_security_struct *msec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
int rc;
return rc;
}
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = msq->q_perm.key;
/* Can this process write to the queue? */
{
struct ipc_security_struct *isec;
struct msg_security_struct *msec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = task_sid(target);
int rc;
isec = msq->q_perm.security;
msec = msg->security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = msq->q_perm.key;
rc = avc_has_perm(sid, isec->sid,
static int selinux_shm_alloc_security(struct shmid_kernel *shp)
{
struct ipc_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
int rc;
isec = shp->shm_perm.security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = shp->shm_perm.key;
rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg)
{
struct ipc_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
isec = shp->shm_perm.security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = shp->shm_perm.key;
return avc_has_perm(sid, isec->sid, SECCLASS_SHM,
static int selinux_sem_alloc_security(struct sem_array *sma)
{
struct ipc_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
int rc;
isec = sma->sem_perm.security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = sma->sem_perm.key;
rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
static int selinux_sem_associate(struct sem_array *sma, int semflg)
{
struct ipc_security_struct *isec;
- struct avc_audit_data ad;
+ struct common_audit_data ad;
u32 sid = current_sid();
isec = sma->sem_perm.security;
- AVC_AUDIT_DATA_INIT(&ad, IPC);
+ COMMON_AUDIT_DATA_INIT(&ad, IPC);
ad.u.ipc_id = sma->sem_perm.key;
return avc_has_perm(sid, isec->sid, SECCLASS_SEM,
/* Only allow single threaded processes to change context */
error = -EPERM;
- if (!is_single_threaded(p)) {
+ if (!current_is_single_threaded()) {
error = security_bounded_transition(tsec->sid, sid);
if (error)
goto abort_change;
kfree(secdata);
}
+/*
+ * called with inode->i_mutex locked
+ */
+static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+ return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0);
+}
+
+/*
+ * called with inode->i_mutex locked
+ */
+static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+ return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0);
+}
+
+static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+ int len = 0;
+ len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX,
+ ctx, true);
+ if (len < 0)
+ return len;
+ *ctxlen = len;
+ return 0;
+}
#ifdef CONFIG_KEYS
static int selinux_key_alloc(struct key *k, const struct cred *cred,
static struct security_operations selinux_ops = {
.name = "selinux",
- .ptrace_may_access = selinux_ptrace_may_access,
+ .ptrace_access_check = selinux_ptrace_access_check,
.ptrace_traceme = selinux_ptrace_traceme,
.capget = selinux_capget,
.capset = selinux_capset,
.dentry_open = selinux_dentry_open,
.task_create = selinux_task_create,
+ .cred_alloc_blank = selinux_cred_alloc_blank,
.cred_free = selinux_cred_free,
.cred_prepare = selinux_cred_prepare,
+ .cred_transfer = selinux_cred_transfer,
.kernel_act_as = selinux_kernel_act_as,
.kernel_create_files_as = selinux_kernel_create_files_as,
+ .kernel_module_request = selinux_kernel_module_request,
.task_setpgid = selinux_task_setpgid,
.task_getpgid = selinux_task_getpgid,
.task_getsid = selinux_task_getsid,
.secid_to_secctx = selinux_secid_to_secctx,
.secctx_to_secid = selinux_secctx_to_secid,
.release_secctx = selinux_release_secctx,
+ .inode_notifysecctx = selinux_inode_notifysecctx,
+ .inode_setsecctx = selinux_inode_setsecctx,
+ .inode_getsecctx = selinux_inode_getsecctx,
.unix_stream_connect = selinux_socket_unix_stream_connect,
.unix_may_send = selinux_socket_unix_may_send,
.inet_csk_clone = selinux_inet_csk_clone,
.inet_conn_established = selinux_inet_conn_established,
.req_classify_flow = selinux_req_classify_flow,
+ .tun_dev_create = selinux_tun_dev_create,
+ .tun_dev_post_create = selinux_tun_dev_post_create,
+ .tun_dev_attach = selinux_tun_dev_attach,
#ifdef CONFIG_SECURITY_NETWORK_XFRM
.xfrm_policy_alloc_security = selinux_xfrm_policy_alloc,
selinux_disabled = 1;
selinux_enabled = 0;
+ /* Try to destroy the avc node cache */
+ avc_disable();
+
/* Reset security_ops to the secondary module, dummy or capability. */
security_ops = secondary_ops;
S_(SECCLASS_KEY_SOCKET, socket, 0x00400000UL)
S_(SECCLASS_UNIX_STREAM_SOCKET, socket, 0x00400000UL)
S_(SECCLASS_UNIX_DGRAM_SOCKET, socket, 0x00400000UL)
+ S_(SECCLASS_TUN_SOCKET, socket, 0x00400000UL)
S_(SECCLASS_IPC, ipc, 0x00000200UL)
S_(SECCLASS_SEM, ipc, 0x00000200UL)
S_(SECCLASS_MSGQ, ipc, 0x00000200UL)
S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, "syslog_read")
S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, "syslog_mod")
S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, "syslog_console")
+ S_(SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, "module_request")
S_(SECCLASS_CAPABILITY, CAPABILITY__CHOWN, "chown")
S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_OVERRIDE, "dac_override")
S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_READ_SEARCH, "dac_read_search")
#define UNIX_DGRAM_SOCKET__RECV_MSG 0x00080000UL
#define UNIX_DGRAM_SOCKET__SEND_MSG 0x00100000UL
#define UNIX_DGRAM_SOCKET__NAME_BIND 0x00200000UL
+#define TUN_SOCKET__IOCTL 0x00000001UL
+#define TUN_SOCKET__READ 0x00000002UL
+#define TUN_SOCKET__WRITE 0x00000004UL
+#define TUN_SOCKET__CREATE 0x00000008UL
+#define TUN_SOCKET__GETATTR 0x00000010UL
+#define TUN_SOCKET__SETATTR 0x00000020UL
+#define TUN_SOCKET__LOCK 0x00000040UL
+#define TUN_SOCKET__RELABELFROM 0x00000080UL
+#define TUN_SOCKET__RELABELTO 0x00000100UL
+#define TUN_SOCKET__APPEND 0x00000200UL
+#define TUN_SOCKET__BIND 0x00000400UL
+#define TUN_SOCKET__CONNECT 0x00000800UL
+#define TUN_SOCKET__LISTEN 0x00001000UL
+#define TUN_SOCKET__ACCEPT 0x00002000UL
+#define TUN_SOCKET__GETOPT 0x00004000UL
+#define TUN_SOCKET__SETOPT 0x00008000UL
+#define TUN_SOCKET__SHUTDOWN 0x00010000UL
+#define TUN_SOCKET__RECVFROM 0x00020000UL
+#define TUN_SOCKET__SENDTO 0x00040000UL
+#define TUN_SOCKET__RECV_MSG 0x00080000UL
+#define TUN_SOCKET__SEND_MSG 0x00100000UL
+#define TUN_SOCKET__NAME_BIND 0x00200000UL
#define PROCESS__FORK 0x00000001UL
#define PROCESS__TRANSITION 0x00000002UL
#define PROCESS__SIGCHLD 0x00000004UL
#define SYSTEM__SYSLOG_READ 0x00000002UL
#define SYSTEM__SYSLOG_MOD 0x00000004UL
#define SYSTEM__SYSLOG_CONSOLE 0x00000008UL
+#define SYSTEM__MODULE_REQUEST 0x00000010UL
#define CAPABILITY__CHOWN 0x00000001UL
#define CAPABILITY__DAC_OVERRIDE 0x00000002UL
#define CAPABILITY__DAC_READ_SEARCH 0x00000004UL
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/audit.h>
+#include <linux/lsm_audit.h>
#include <linux/in6.h>
#include <linux/path.h>
#include <asm/system.h>
struct sock;
struct sk_buff;
-/* Auxiliary data to use in generating the audit record. */
-struct avc_audit_data {
- char type;
-#define AVC_AUDIT_DATA_FS 1
-#define AVC_AUDIT_DATA_NET 2
-#define AVC_AUDIT_DATA_CAP 3
-#define AVC_AUDIT_DATA_IPC 4
- struct task_struct *tsk;
- union {
- struct {
- struct path path;
- struct inode *inode;
- } fs;
- struct {
- int netif;
- struct sock *sk;
- u16 family;
- __be16 dport;
- __be16 sport;
- union {
- struct {
- __be32 daddr;
- __be32 saddr;
- } v4;
- struct {
- struct in6_addr daddr;
- struct in6_addr saddr;
- } v6;
- } fam;
- } net;
- int cap;
- int ipc_id;
- } u;
-};
-
-#define v4info fam.v4
-#define v6info fam.v6
-
-/* Initialize an AVC audit data structure. */
-#define AVC_AUDIT_DATA_INIT(_d,_t) \
- { memset((_d), 0, sizeof(struct avc_audit_data)); (_d)->type = AVC_AUDIT_DATA_##_t; }
-
/*
* AVC statistics
*/
void avc_audit(u32 ssid, u32 tsid,
u16 tclass, u32 requested,
- struct av_decision *avd, int result, struct avc_audit_data *auditdata);
+ struct av_decision *avd,
+ int result,
+ struct common_audit_data *a);
#define AVC_STRICT 1 /* Ignore permissive mode. */
int avc_has_perm_noaudit(u32 ssid, u32 tsid,
int avc_has_perm(u32 ssid, u32 tsid,
u16 tclass, u32 requested,
- struct avc_audit_data *auditdata);
+ struct common_audit_data *auditdata);
u32 avc_policy_seqno(void);
u32 events, u32 ssid, u32 tsid,
u16 tclass, u32 perms);
-/* Shows permission in human readable form */
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av);
-
/* Exported to selinuxfs */
int avc_get_hash_stats(char *page);
extern unsigned int avc_cache_threshold;
+/* Attempt to free avc node cache */
+void avc_disable(void);
+
#ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
DECLARE_PER_CPU(struct avc_cache_stats, avc_cache_stats);
#endif
S_(NULL)
S_(NULL)
S_("kernel_service")
+ S_("tun_socket")
#define SECCLASS_PEER 68
#define SECCLASS_CAPABILITY2 69
#define SECCLASS_KERNEL_SERVICE 74
+#define SECCLASS_TUN_SOCKET 75
/*
* Security identifier indices for initial entities
int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
struct sk_buff *skb,
u16 family,
- struct avc_audit_data *ad);
+ struct common_audit_data *ad);
int selinux_netlbl_socket_setsockopt(struct socket *sock,
int level,
int optname);
static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
struct sk_buff *skb,
u16 family,
- struct avc_audit_data *ad)
+ struct common_audit_data *ad)
{
return 0;
}
}
int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
- struct avc_audit_data *ad);
+ struct common_audit_data *ad);
int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
- struct avc_audit_data *ad, u8 proto);
+ struct common_audit_data *ad, u8 proto);
int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
static inline void selinux_xfrm_notify_policyload(void)
}
static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
- struct avc_audit_data *ad)
+ struct common_audit_data *ad)
{
return 0;
}
static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
- struct avc_audit_data *ad, u8 proto)
+ struct common_audit_data *ad, u8 proto)
{
return 0;
}
int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
struct sk_buff *skb,
u16 family,
- struct avc_audit_data *ad)
+ struct common_audit_data *ad)
{
int rc;
u32 nlbl_sid;
*
* Added validation of kernel classes and permissions
*
+ * Updated: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ * Added support for bounds domain and audit messaged on masked permissions
+ *
+ * Copyright (C) 2008, 2009 NEC Corporation
* Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
* Copyright (C) 2003 - 2004, 2006 Tresys Technology, LLC
return s[0];
}
+/*
+ * security_dump_masked_av - dumps masked permissions during
+ * security_compute_av due to RBAC, MLS/Constraint and Type bounds.
+ */
+static int dump_masked_av_helper(void *k, void *d, void *args)
+{
+ struct perm_datum *pdatum = d;
+ char **permission_names = args;
+
+ BUG_ON(pdatum->value < 1 || pdatum->value > 32);
+
+ permission_names[pdatum->value - 1] = (char *)k;
+
+ return 0;
+}
+
+static void security_dump_masked_av(struct context *scontext,
+ struct context *tcontext,
+ u16 tclass,
+ u32 permissions,
+ const char *reason)
+{
+ struct common_datum *common_dat;
+ struct class_datum *tclass_dat;
+ struct audit_buffer *ab;
+ char *tclass_name;
+ char *scontext_name = NULL;
+ char *tcontext_name = NULL;
+ char *permission_names[32];
+ int index, length;
+ bool need_comma = false;
+
+ if (!permissions)
+ return;
+
+ tclass_name = policydb.p_class_val_to_name[tclass - 1];
+ tclass_dat = policydb.class_val_to_struct[tclass - 1];
+ common_dat = tclass_dat->comdatum;
+
+ /* init permission_names */
+ if (common_dat &&
+ hashtab_map(common_dat->permissions.table,
+ dump_masked_av_helper, permission_names) < 0)
+ goto out;
+
+ if (hashtab_map(tclass_dat->permissions.table,
+ dump_masked_av_helper, permission_names) < 0)
+ goto out;
+
+ /* get scontext/tcontext in text form */
+ if (context_struct_to_string(scontext,
+ &scontext_name, &length) < 0)
+ goto out;
+
+ if (context_struct_to_string(tcontext,
+ &tcontext_name, &length) < 0)
+ goto out;
+
+ /* audit a message */
+ ab = audit_log_start(current->audit_context,
+ GFP_ATOMIC, AUDIT_SELINUX_ERR);
+ if (!ab)
+ goto out;
+
+ audit_log_format(ab, "op=security_compute_av reason=%s "
+ "scontext=%s tcontext=%s tclass=%s perms=",
+ reason, scontext_name, tcontext_name, tclass_name);
+
+ for (index = 0; index < 32; index++) {
+ u32 mask = (1 << index);
+
+ if ((mask & permissions) == 0)
+ continue;
+
+ audit_log_format(ab, "%s%s",
+ need_comma ? "," : "",
+ permission_names[index]
+ ? permission_names[index] : "????");
+ need_comma = true;
+ }
+ audit_log_end(ab);
+out:
+ /* release scontext/tcontext */
+ kfree(tcontext_name);
+ kfree(scontext_name);
+
+ return;
+}
+
/*
* security_boundary_permission - drops violated permissions
* on boundary constraint.
}
if (masked) {
- struct audit_buffer *ab;
- char *stype_name
- = policydb.p_type_val_to_name[source->value - 1];
- char *ttype_name
- = policydb.p_type_val_to_name[target->value - 1];
- char *tclass_name
- = policydb.p_class_val_to_name[tclass - 1];
-
/* mask violated permissions */
avd->allowed &= ~masked;
- /* notice to userspace via audit message */
- ab = audit_log_start(current->audit_context,
- GFP_ATOMIC, AUDIT_SELINUX_ERR);
- if (!ab)
- return;
-
- audit_log_format(ab, "av boundary violation: "
- "source=%s target=%s tclass=%s",
- stype_name, ttype_name, tclass_name);
- avc_dump_av(ab, tclass, masked);
- audit_log_end(ab);
+ /* audit masked permissions */
+ security_dump_masked_av(scontext, tcontext,
+ tclass, masked, "bounds");
}
}
if ((constraint->permissions & (avd->allowed)) &&
!constraint_expr_eval(scontext, tcontext, NULL,
constraint->expr)) {
- avd->allowed = (avd->allowed) & ~(constraint->permissions);
+ avd->allowed &= ~(constraint->permissions);
}
constraint = constraint->next;
}
break;
}
if (!ra)
- avd->allowed = (avd->allowed) & ~(PROCESS__TRANSITION |
- PROCESS__DYNTRANSITION);
+ avd->allowed &= ~(PROCESS__TRANSITION |
+ PROCESS__DYNTRANSITION);
}
/*
}
index = type->bounds;
}
+
+ if (rc) {
+ char *old_name = NULL;
+ char *new_name = NULL;
+ int length;
+
+ if (!context_struct_to_string(old_context,
+ &old_name, &length) &&
+ !context_struct_to_string(new_context,
+ &new_name, &length)) {
+ audit_log(current->audit_context,
+ GFP_ATOMIC, AUDIT_SELINUX_ERR,
+ "op=security_bounded_transition "
+ "result=denied "
+ "oldcontext=%s newcontext=%s",
+ old_name, new_name);
+ }
+ kfree(new_name);
+ kfree(old_name);
+ }
out:
read_unlock(&policy_rwlock);
* gone thru the IPSec process.
*/
int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
- struct avc_audit_data *ad)
+ struct common_audit_data *ad)
{
int i, rc = 0;
struct sec_path *sp;
* checked in the selinux_xfrm_state_pol_flow_match hook above.
*/
int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
- struct avc_audit_data *ad, u8 proto)
+ struct common_audit_data *ad, u8 proto)
{
struct dst_entry *dst;
int rc = 0;
{
memset(a, 0, sizeof(*a));
a->a.type = type;
- a->a.function = func;
+ a->a.smack_audit_data.function = func;
}
static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
static void smack_log_callback(struct audit_buffer *ab, void *a)
{
struct common_audit_data *ad = a;
- struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
- audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function,
+ struct smack_audit_data *sad = &ad->smack_audit_data;
+ audit_log_format(ab, "lsm=SMACK fn=%s action=%s",
+ ad->smack_audit_data.function,
sad->result ? "denied" : "granted");
audit_log_format(ab, " subject=");
audit_log_untrustedstring(ab, sad->subject);
if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
return;
- if (a->function == NULL)
- a->function = "unknown";
+ if (a->smack_audit_data.function == NULL)
+ a->smack_audit_data.function = "unknown";
/* end preparing the audit data */
- sad = &a->lsm_priv.smack_audit_data;
+ sad = &a->smack_audit_data;
smack_str_from_perm(request_buffer, request);
sad->subject = subject_label;
sad->object = object_label;
*/
/**
- * smack_ptrace_may_access - Smack approval on PTRACE_ATTACH
+ * smack_ptrace_access_check - Smack approval on PTRACE_ATTACH
* @ctp: child task pointer
* @mode: ptrace attachment mode
*
*
* Do the capability checks, and require read and write.
*/
-static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
+static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
{
int rc;
struct smk_audit_info ad;
char *sp, *tsp;
- rc = cap_ptrace_may_access(ctp, mode);
+ rc = cap_ptrace_access_check(ctp, mode);
if (rc != 0)
return rc;
* Task hooks
*/
+/**
+ * smack_cred_alloc_blank - "allocate" blank task-level security credentials
+ * @new: the new credentials
+ * @gfp: the atomicity of any memory allocations
+ *
+ * Prepare a blank set of credentials for modification. This must allocate all
+ * the memory the LSM module might require such that cred_transfer() can
+ * complete without error.
+ */
+static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+ cred->security = NULL;
+ return 0;
+}
+
+
/**
* smack_cred_free - "free" task-level security credentials
* @cred: the credentials in question
{
}
+/**
+ * smack_cred_transfer - Transfer the old credentials to the new credentials
+ * @new: the new credentials
+ * @old: the original credentials
+ *
+ * Fill in a set of blank credentials from another set of credentials.
+ */
+static void smack_cred_transfer(struct cred *new, const struct cred *old)
+{
+ new->security = old->security;
+}
+
/**
* smack_kernel_act_as - Set the subjective context in a set of credentials
* @new: points to the set of credentials to be modified.
if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) {
nsp->smk_inode = sp;
+ nsp->smk_flags |= SMK_INODE_INSTANT;
return 0;
}
/*
/*
* Perfectly reasonable for this to be NULL
*/
- if (sip == NULL || sip->sin_family != PF_INET)
+ if (sip == NULL || sip->sin_family != AF_INET)
return 0;
return smack_netlabel_send(sock->sk, sip);
{
}
+static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+ return smack_inode_setsecurity(inode, XATTR_SMACK_SUFFIX, ctx, ctxlen, 0);
+}
+
+static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+ return __vfs_setxattr_noperm(dentry, XATTR_NAME_SMACK, ctx, ctxlen, 0);
+}
+
+static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+ int len = 0;
+ len = smack_inode_getsecurity(inode, XATTR_SMACK_SUFFIX, ctx, true);
+
+ if (len < 0)
+ return len;
+ *ctxlen = len;
+ return 0;
+}
+
struct security_operations smack_ops = {
.name = "smack",
- .ptrace_may_access = smack_ptrace_may_access,
+ .ptrace_access_check = smack_ptrace_access_check,
.ptrace_traceme = smack_ptrace_traceme,
.syslog = smack_syslog,
.file_send_sigiotask = smack_file_send_sigiotask,
.file_receive = smack_file_receive,
+ .cred_alloc_blank = smack_cred_alloc_blank,
.cred_free = smack_cred_free,
.cred_prepare = smack_cred_prepare,
.cred_commit = smack_cred_commit,
+ .cred_transfer = smack_cred_transfer,
.kernel_act_as = smack_kernel_act_as,
.kernel_create_files_as = smack_kernel_create_files_as,
.task_setpgid = smack_task_setpgid,
.secid_to_secctx = smack_secid_to_secctx,
.secctx_to_secid = smack_secctx_to_secid,
.release_secctx = smack_release_secctx,
+ .inode_notifysecctx = smack_inode_notifysecctx,
+ .inode_setsecctx = smack_inode_setsecctx,
+ .inode_getsecctx = smack_inode_getsecctx,
};
return true;
}
+/**
+ * tomoyo_delete_domain - Delete a domain.
+ *
+ * @domainname: The name of domain.
+ *
+ * Returns 0.
+ */
+static int tomoyo_delete_domain(char *domainname)
+{
+ struct tomoyo_domain_info *domain;
+ struct tomoyo_path_info name;
+
+ name.name = domainname;
+ tomoyo_fill_path_info(&name);
+ down_write(&tomoyo_domain_list_lock);
+ /* Is there an active domain? */
+ list_for_each_entry(domain, &tomoyo_domain_list, list) {
+ /* Never delete tomoyo_kernel_domain */
+ if (domain == &tomoyo_kernel_domain)
+ continue;
+ if (domain->is_deleted ||
+ tomoyo_pathcmp(domain->domainname, &name))
+ continue;
+ domain->is_deleted = true;
+ break;
+ }
+ up_write(&tomoyo_domain_list_lock);
+ return 0;
+}
+
/**
* tomoyo_write_domain_policy - Write domain policy.
*
const char *tomoyo_get_msg(const bool is_enforce);
/* Convert single path operation to operation name. */
const char *tomoyo_sp2keyword(const u8 operation);
-/* Delete a domain. */
-int tomoyo_delete_domain(char *data);
/* Create "alias" entry in exception policy. */
int tomoyo_write_alias_policy(char *data, const bool is_delete);
/*
return tomoyo_update_alias_entry(data, cp, is_delete);
}
-/* Domain create/delete handler. */
-
-/**
- * tomoyo_delete_domain - Delete a domain.
- *
- * @domainname: The name of domain.
- *
- * Returns 0.
- */
-int tomoyo_delete_domain(char *domainname)
-{
- struct tomoyo_domain_info *domain;
- struct tomoyo_path_info name;
-
- name.name = domainname;
- tomoyo_fill_path_info(&name);
- down_write(&tomoyo_domain_list_lock);
- /* Is there an active domain? */
- list_for_each_entry(domain, &tomoyo_domain_list, list) {
- /* Never delete tomoyo_kernel_domain */
- if (domain == &tomoyo_kernel_domain)
- continue;
- if (domain->is_deleted ||
- tomoyo_pathcmp(domain->domainname, &name))
- continue;
- domain->is_deleted = true;
- break;
- }
- up_write(&tomoyo_domain_list_lock);
- return 0;
-}
-
/**
* tomoyo_find_or_assign_new_domain - Create a domain.
*
/**
* tomoyo_find_next_domain - Find a domain.
*
- * @bprm: Pointer to "struct linux_binprm".
- * @next_domain: Pointer to pointer to "struct tomoyo_domain_info".
+ * @bprm: Pointer to "struct linux_binprm".
*
* Returns 0 on success, negative value otherwise.
*/
-int tomoyo_find_next_domain(struct linux_binprm *bprm,
- struct tomoyo_domain_info **next_domain)
+int tomoyo_find_next_domain(struct linux_binprm *bprm)
{
/*
* This function assumes that the size of buffer returned by
tomoyo_set_domain_flag(old_domain, false,
TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED);
out:
+ if (!domain)
+ domain = old_domain;
+ bprm->cred->security = domain;
tomoyo_free(real_program_name);
tomoyo_free(symlink_program_name);
- *next_domain = domain ? domain : old_domain;
tomoyo_free(tmp);
return retval;
}
#include "tomoyo.h"
#include "realpath.h"
+static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
+{
+ new->security = NULL;
+ return 0;
+}
+
static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
gfp_t gfp)
{
return 0;
}
+static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
+{
+ /*
+ * Since "struct tomoyo_domain_info *" is a sharable pointer,
+ * we don't need to duplicate.
+ */
+ new->security = old->security;
+}
+
static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
{
int rc;
* Execute permission is checked against pathname passed to do_execve()
* using current domain.
*/
- if (!domain) {
- struct tomoyo_domain_info *next_domain = NULL;
- int retval = tomoyo_find_next_domain(bprm, &next_domain);
-
- if (!retval)
- bprm->cred->security = next_domain;
- return retval;
- }
+ if (!domain)
+ return tomoyo_find_next_domain(bprm);
/*
* Read permission is checked against interpreters using next domain.
* '1' is the result of open_to_namei_flags(O_RDONLY).
*/
static struct security_operations tomoyo_security_ops = {
.name = "tomoyo",
+ .cred_alloc_blank = tomoyo_cred_alloc_blank,
.cred_prepare = tomoyo_cred_prepare,
+ .cred_transfer = tomoyo_cred_transfer,
.bprm_set_creds = tomoyo_bprm_set_creds,
.bprm_check_security = tomoyo_bprm_check_security,
#ifdef CONFIG_SYSCTL
struct path *path2);
int tomoyo_check_rewrite_permission(struct tomoyo_domain_info *domain,
struct file *filp);
-int tomoyo_find_next_domain(struct linux_binprm *bprm,
- struct tomoyo_domain_info **next_domain);
+int tomoyo_find_next_domain(struct linux_binprm *bprm);
/* Index numbers for Access Controls. */