]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge branch 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Sep 2009 16:16:39 +0000 (09:16 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Sep 2009 16:16:39 +0000 (09:16 -0700)
* 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6: (54 commits)
  [S390] tape: Use pr_xxx instead of dev_xxx in shared driver code
  [S390] Wire up page fault events for software perf counters.
  [S390] Remove smp_cpu_not_running.
  [S390] Get rid of cpuid.h header file.
  [S390] Limit cpu detection to 256 physical cpus.
  [S390] tape: Fix device online messages
  [S390] Enable guest page hinting by default.
  [S390] use generic scatterlist.h
  [S390] s390dbf: Add description for usage of "%s" in sprintf events
  [S390] Initialize __LC_THREAD_INFO early.
  [S390] fix recursive locking on page_table_lock
  [S390] kvm: use console_initcall() to initialize s390 virtio console
  [S390] tape: reversed order of labels
  [S390] hypfs: Use "%u" instead of "%d" for unsigned ints in snprintf
  [S390] kernel: Print an error message if kernel NSS cannot be defined
  [S390] zcrypt: Free ap_device if dev_set_name fails.
  [S390] zcrypt: Use spin_lock_bh in suspend callback
  [S390] xpram: Remove checksum validation for suspend/resume
  [S390] vmur: Invalid allocation sequence for vmur class
  [S390] hypfs: remove useless variable qname
  ...

167 files changed:
Documentation/keys.txt
Documentation/kmemleak.txt
MAINTAINERS
arch/alpha/include/asm/thread_info.h
arch/alpha/kernel/signal.c
arch/arm/include/asm/thread_info.h
arch/arm/kernel/entry-common.S
arch/arm/kernel/signal.c
arch/avr32/include/asm/thread_info.h
arch/avr32/kernel/entry-avr32b.S
arch/avr32/kernel/signal.c
arch/cris/kernel/ptrace.c
arch/frv/kernel/signal.c
arch/h8300/include/asm/thread_info.h
arch/h8300/kernel/signal.c
arch/ia64/kernel/process.c
arch/m32r/include/asm/thread_info.h
arch/m32r/kernel/signal.c
arch/mips/include/asm/thread_info.h
arch/mips/kernel/signal.c
arch/mn10300/kernel/signal.c
arch/parisc/include/asm/thread_info.h
arch/parisc/kernel/entry.S
arch/parisc/kernel/signal.c
arch/s390/kernel/signal.c
arch/sh/kernel/signal_32.c
arch/sh/kernel/signal_64.c
arch/sparc/kernel/signal_32.c
arch/sparc/kernel/signal_64.c
arch/x86/kernel/aperture_64.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/signal.c
arch/x86/mm/kmemcheck/kmemcheck.c
drivers/char/tpm/tpm_tis.c
drivers/infiniband/core/iwcm.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/multicast.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/smi.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/amso1100/c2.c
drivers/infiniband/hw/amso1100/c2_provider.c
drivers/infiniband/hw/cxgb3/cxio_hal.c
drivers/infiniband/hw/cxgb3/cxio_wr.h
drivers/infiniband/hw/cxgb3/iwch.c
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/cxgb3/iwch_cm.h
drivers/infiniband/hw/cxgb3/iwch_mem.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ehca/ehca_sqp.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_catas.c
drivers/infiniband/hw/mthca/mthca_config_reg.h
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_eq.c
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_provider.h
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/mthca/mthca_reset.c
drivers/infiniband/hw/nes/nes.h
drivers/infiniband/hw/nes/nes_cm.c
drivers/infiniband/hw/nes/nes_cm.h
drivers/infiniband/hw/nes/nes_hw.c
drivers/infiniband/hw/nes/nes_hw.h
drivers/infiniband/hw/nes/nes_utils.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/net/cxgb3/cxgb3_main.c
drivers/net/cxgb3/cxgb3_offload.c
drivers/net/cxgb3/cxgb3_offload.h
drivers/net/mlx4/cq.c
drivers/net/mlx4/eq.c
drivers/net/mlx4/icm.c
drivers/net/mlx4/main.c
drivers/net/mlx4/mcg.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/mr.c
drivers/net/mlx4/pd.c
drivers/net/mlx4/profile.c
drivers/net/mlx4/qp.c
drivers/net/mlx4/reset.c
drivers/net/mlx4/srq.c
drivers/net/tun.c
drivers/scsi/cxgb3i/cxgb3i_init.c
drivers/staging/comedi/comedi_fops.c
fs/locks.c
fs/namei.c
fs/nfsd/auth.c
fs/nfsd/nfssvc.c
fs/nfsd/vfs.c
fs/open.c
fs/sysfs/dir.c
fs/sysfs/inode.c
fs/sysfs/symlink.c
fs/sysfs/sysfs.h
fs/xattr.c
include/linux/cred.h
include/linux/key.h
include/linux/keyctl.h
include/linux/kmemcheck.h
include/linux/kmemleak.h
include/linux/lsm_audit.h
include/linux/sched.h
include/linux/security.h
include/linux/xattr.h
kernel/acct.c
kernel/cred.c
kernel/exit.c
kernel/fork.c
kernel/kmod.c
kernel/ptrace.c
kernel/sysctl.c
lib/Kconfig.debug
lib/is_single_threaded.c
mm/bootmem.c
mm/kmemleak.c
net/core/dev.c
net/ipv4/tcp_cong.c
security/Makefile
security/capability.c
security/commoncap.c
security/keys/Makefile
security/keys/compat.c
security/keys/gc.c [new file with mode: 0644]
security/keys/internal.h
security/keys/key.c
security/keys/keyctl.c
security/keys/keyring.c
security/keys/proc.c
security/keys/process_keys.c
security/keys/sysctl.c
security/lsm_audit.c
security/security.c
security/selinux/avc.c
security/selinux/hooks.c
security/selinux/include/av_inherit.h
security/selinux/include/av_perm_to_string.h
security/selinux/include/av_permissions.h
security/selinux/include/avc.h
security/selinux/include/class_to_string.h
security/selinux/include/flask.h
security/selinux/include/netlabel.h
security/selinux/include/xfrm.h
security/selinux/netlabel.c
security/selinux/ss/services.c
security/selinux/xfrm.c
security/smack/smack.h
security/smack/smack_access.c
security/smack/smack_lsm.c
security/tomoyo/common.c
security/tomoyo/common.h
security/tomoyo/domain.c
security/tomoyo/tomoyo.c
security/tomoyo/tomoyo.h

index b56aacc1fff864022dbdf67aac997d54f0d14f32..e4dbbdb1bd961e7a7a582c3dbb3ada1008218218 100644 (file)
@@ -26,7 +26,7 @@ This document has the following sections:
        - Notes on accessing payload contents
        - Defining a key type
        - Request-key callback service
-       - Key access filesystem
+       - Garbage collection
 
 
 ============
@@ -113,6 +113,9 @@ Each key has a number of attributes:
 
      (*) Dead. The key's type was unregistered, and so the key is now useless.
 
+Keys in the last three states are subject to garbage collection.  See the
+section on "Garbage collection".
+
 
 ====================
 KEY SERVICE OVERVIEW
@@ -754,6 +757,26 @@ The keyctl syscall functions are:
      successful.
 
 
+ (*) Install the calling process's session keyring on its parent.
+
+       long keyctl(KEYCTL_SESSION_TO_PARENT);
+
+     This functions attempts to install the calling process's session keyring
+     on to the calling process's parent, replacing the parent's current session
+     keyring.
+
+     The calling process must have the same ownership as its parent, the
+     keyring must have the same ownership as the calling process, the calling
+     process must have LINK permission on the keyring and the active LSM module
+     mustn't deny permission, otherwise error EPERM will be returned.
+
+     Error ENOMEM will be returned if there was insufficient memory to complete
+     the operation, otherwise 0 will be returned to indicate success.
+
+     The keyring will be replaced next time the parent process leaves the
+     kernel and resumes executing userspace.
+
+
 ===============
 KERNEL SERVICES
 ===============
@@ -1231,3 +1254,17 @@ by executing:
 
 In this case, the program isn't required to actually attach the key to a ring;
 the rings are provided for reference.
+
+
+==================
+GARBAGE COLLECTION
+==================
+
+Dead keys (for which the type has been removed) will be automatically unlinked
+from those keyrings that point to them and deleted as soon as possible by a
+background garbage collector.
+
+Similarly, revoked and expired keys will be garbage collected, but only after a
+certain amount of time has passed.  This time is set as a number of seconds in:
+
+       /proc/sys/kernel/keys/gc_delay
index 89068030b01bbdb34392df6bf4fd8782d928ac57..34f6638aa5aceec30d290812fdc7fcebf3b86621 100644 (file)
@@ -27,6 +27,13 @@ To trigger an intermediate memory scan:
 
   # echo scan > /sys/kernel/debug/kmemleak
 
+To clear the list of all current possible memory leaks:
+
+  # echo clear > /sys/kernel/debug/kmemleak
+
+New leaks will then come up upon reading /sys/kernel/debug/kmemleak
+again.
+
 Note that the orphan objects are listed in the order they were allocated
 and one object at the beginning of the list may cause other subsequent
 objects to be reported as orphan.
@@ -42,6 +49,9 @@ Memory scanning parameters can be modified at run-time by writing to the
   scan=<secs>  - set the automatic memory scanning period in seconds
                  (default 600, 0 to stop the automatic scanning)
   scan         - trigger a memory scan
+  clear                - clear list of current memory leak suspects, done by
+                 marking all current reported unreferenced objects grey
+  dump=<addr>  - dump information about the object found at <addr>
 
 Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
 the kernel command line.
@@ -86,6 +96,27 @@ avoid this, kmemleak can also store the number of values pointing to an
 address inside the block address range that need to be found so that the
 block is not considered a leak. One example is __vmalloc().
 
+Testing specific sections with kmemleak
+---------------------------------------
+
+Upon initial bootup your /sys/kernel/debug/kmemleak output page may be
+quite extensive. This can also be the case if you have very buggy code
+when doing development. To work around these situations you can use the
+'clear' command to clear all reported unreferenced objects from the
+/sys/kernel/debug/kmemleak output. By issuing a 'scan' after a 'clear'
+you can find new unreferenced objects; this should help with testing
+specific sections of code.
+
+To test a critical section on demand with a clean kmemleak do:
+
+  # echo clear > /sys/kernel/debug/kmemleak
+  ... test your kernel or modules ...
+  # echo scan > /sys/kernel/debug/kmemleak
+
+Then as usual to get your report with:
+
+  # cat /sys/kernel/debug/kmemleak
+
 Kmemleak API
 ------------
 
index 8dca9d89c6c1d1d04e65e58e881e902b0e450d09..989ff1149390c783823cb5e671e5962574ffad9e 100644 (file)
@@ -439,7 +439,7 @@ F:  drivers/hwmon/ams/
 AMSO1100 RNIC DRIVER
 M:     Tom Tucker <tom@opengridcomputing.com>
 M:     Steve Wise <swise@opengridcomputing.com>
-L:     general@lists.openfabrics.org
+L:     linux-rdma@vger.kernel.org
 S:     Maintained
 F:     drivers/infiniband/hw/amso1100/
 
@@ -1494,7 +1494,7 @@ F:        drivers/net/cxgb3/
 
 CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
 M:     Steve Wise <swise@chelsio.com>
-L:     general@lists.openfabrics.org
+L:     linux-rdma@vger.kernel.org
 W:     http://www.openfabrics.org
 S:     Supported
 F:     drivers/infiniband/hw/cxgb3/
@@ -1868,7 +1868,7 @@ F:        fs/efs/
 EHCA (IBM GX bus InfiniBand adapter) DRIVER
 M:     Hoang-Nam Nguyen <hnguyen@de.ibm.com>
 M:     Christoph Raisch <raisch@de.ibm.com>
-L:     general@lists.openfabrics.org
+L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/hw/ehca/
 
@@ -2552,7 +2552,7 @@ INFINIBAND SUBSYSTEM
 M:     Roland Dreier <rolandd@cisco.com>
 M:     Sean Hefty <sean.hefty@intel.com>
 M:     Hal Rosenstock <hal.rosenstock@gmail.com>
-L:     general@lists.openfabrics.org (moderated for non-subscribers)
+L:     linux-rdma@vger.kernel.org
 W:     http://www.openib.org/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
 S:     Supported
@@ -2729,7 +2729,7 @@ F:        drivers/net/ipg.c
 
 IPATH DRIVER
 M:     Ralph Campbell <infinipath@qlogic.com>
-L:     general@lists.openfabrics.org
+L:     linux-rdma@vger.kernel.org
 T:     git git://git.qlogic.com/ipath-linux-2.6
 S:     Supported
 F:     drivers/infiniband/hw/ipath/
@@ -3485,7 +3485,7 @@ F:        drivers/scsi/NCR_D700.*
 NETEFFECT IWARP RNIC DRIVER (IW_NES)
 M:     Faisal Latif <faisal.latif@intel.com>
 M:     Chien Tung <chien.tin.tung@intel.com>
-L:     general@lists.openfabrics.org
+L:     linux-rdma@vger.kernel.org
 W:     http://www.neteffect.com
 S:     Supported
 F:     drivers/infiniband/hw/nes/
index 60c83abfde7027832e0e7609ac7e032accb66386..5076a8860b18ff4440ee9d9187e57d696406321d 100644 (file)
@@ -75,6 +75,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_UAC_SIGBUS         7
 #define TIF_MEMDIE             8
 #define TIF_RESTORE_SIGMASK    9       /* restore signal mask in do_signal */
+#define TIF_NOTIFY_RESUME      10      /* callback before returning to user */
 #define TIF_FREEZE             16      /* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
@@ -82,10 +83,12 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 /* Work to do on interrupt/exception return.  */
-#define _TIF_WORK_MASK         (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
+#define _TIF_WORK_MASK         (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+                                _TIF_NOTIFY_RESUME)
 
 /* Work to do on any return to userspace.  */
 #define _TIF_ALLWORK_MASK      (_TIF_WORK_MASK         \
index df65eaa84c4c41218a68121c3adce386437f6642..0932dbb1ef8eff444943645e15b0802b1d4cf5d5 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/binfmts.h>
 #include <linux/bitops.h>
 #include <linux/syscalls.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/sigcontext.h>
@@ -683,4 +684,11 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw,
 {
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal(regs, sw, r0, r19);
+
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
 }
index 73394e50cbca26e4198349de503fd18554f331d6..d3a39b1e6c0fc12cde0ccf4376a5d250b8232840 100644 (file)
@@ -130,11 +130,13 @@ extern void vfp_sync_state(struct thread_info *thread);
  *  TIF_SYSCALL_TRACE  - syscall trace active
  *  TIF_SIGPENDING     - signal pending
  *  TIF_NEED_RESCHED   - rescheduling necessary
+ *  TIF_NOTIFY_RESUME  - callback before returning to user
  *  TIF_USEDFPU                - FPU was used by this task this quantum (SMP)
  *  TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED
  */
 #define TIF_SIGPENDING         0
 #define TIF_NEED_RESCHED       1
+#define TIF_NOTIFY_RESUME      2       /* callback before returning to user */
 #define TIF_SYSCALL_TRACE      8
 #define TIF_POLLING_NRFLAG     16
 #define TIF_USING_IWMMXT       17
@@ -143,6 +145,7 @@ extern void vfp_sync_state(struct thread_info *thread);
 
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
+#define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
 #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
 #define _TIF_USING_IWMMXT      (1 << TIF_USING_IWMMXT)
index 8c3de1a350b5dd85bdf06b79f1dab163cd305743..7813ab782fda6796bf455a39dcc723a6236707e5 100644 (file)
@@ -51,7 +51,7 @@ fast_work_pending:
 work_pending:
        tst     r1, #_TIF_NEED_RESCHED
        bne     work_resched
-       tst     r1, #_TIF_SIGPENDING
+       tst     r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME
        beq     no_work_pending
        mov     r0, sp                          @ 'regs'
        mov     r2, why                         @ 'syscall'
index f6bc5d442782374bb99618702722dde5d37e1dc2..b76fe06d92e746b68855d42112071f69fff71c5a 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/personality.h>
 #include <linux/freezer.h>
 #include <linux/uaccess.h>
+#include <linux/tracehook.h>
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
@@ -707,4 +708,11 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 {
        if (thread_flags & _TIF_SIGPENDING)
                do_signal(&current->blocked, regs, syscall);
+
+       if (thread_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
 }
index fc42de5ca209ac5e9de0cfca9c1f35561f740be2..fd0c5d7e933701c31166deb5f4f7269c2adb8206 100644 (file)
@@ -84,6 +84,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE             6
 #define TIF_RESTORE_SIGMASK    7       /* restore signal mask in do_signal */
 #define TIF_CPU_GOING_TO_SLEEP 8       /* CPU is entering sleep 0 mode */
+#define TIF_NOTIFY_RESUME      9       /* callback before returning to user */
 #define TIF_FREEZE             29
 #define TIF_DEBUG              30      /* debugging enabled */
 #define TIF_USERSPACE          31      /* true if FS sets userspace */
@@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE            (1 << TIF_MEMDIE)
 #define _TIF_RESTORE_SIGMASK   (1 << TIF_RESTORE_SIGMASK)
 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE            (1 << TIF_FREEZE)
 
 /* Note: The masks below must never span more than 16 bits! */
@@ -103,13 +105,15 @@ static inline struct thread_info *current_thread_info(void)
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK                         \
        ((1 << TIF_SIGPENDING)                  \
+        | _TIF_NOTIFY_RESUME                   \
         | (1 << TIF_NEED_RESCHED)              \
         | (1 << TIF_POLLING_NRFLAG)            \
         | (1 << TIF_BREAKPOINT)                \
         | (1 << TIF_RESTORE_SIGMASK))
 
 /* work to do on any return to userspace */
-#define _TIF_ALLWORK_MASK      (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE))
+#define _TIF_ALLWORK_MASK      (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \
+                                _TIF_NOTIFY_RESUME)
 /* work to do on return from debug mode */
 #define _TIF_DBGWORK_MASK      (_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT))
 
index 009a80155d67d96b7da1ee34e60ec726abcfdb4b..169268c40ae2552df9430128e4ae6e6964249989 100644 (file)
@@ -281,7 +281,7 @@ syscall_exit_work:
        ld.w    r1, r0[TI_flags]
        rjmp    1b
 
-2:     mov     r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
+2:     mov     r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NOTIFY_RESUME
        tst     r1, r2
        breq    3f
        unmask_interrupts
index 27227561bad67a7ebea577acffa3796f5998b8ea..64f886fac2efef6ea05bd1c0703cfdf4ccb8d7d4 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ptrace.h>
 #include <linux/unistd.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/ucontext.h>
@@ -322,4 +323,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
 
        if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal(regs, &current->blocked, syscall);
+
+       if (ti->flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
 }
index b326023baab28d3d8be5eee2acc472aa323256e6..48b0f3912632f98d48ae22e40945d057aa393269 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -36,4 +37,11 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
        /* deal with pending signal delivery */
        if (thread_info_flags & _TIF_SIGPENDING)
                do_signal(canrestart,regs);
+
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
 }
index 4a7a62c6e7833ed91a9087b3762cb3d0ba24df5f..6b0a2b6fed6a9326ab8f4d28955cda0bada96b6a 100644 (file)
@@ -572,6 +572,8 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(__frame);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 
 } /* end do_notify_resume() */
index 8bbc8b0ee45db3ad2ddb97f6c6c9be4b5550315f..70e67e47d0205eef549c4529c3ac627ac9cd2e42 100644 (file)
@@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void)
                                           TIF_NEED_RESCHED */
 #define TIF_MEMDIE             4
 #define TIF_RESTORE_SIGMASK    5       /* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME      6       /* callback before returning to user */
 #define TIF_FREEZE             16      /* is freezing for suspend */
 
 /* as above, but as bit values */
@@ -97,6 +98,7 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 #define _TIF_FREEZE            (1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK         0x0000FFFE      /* work to do on interrupt/exception return */
index cf3472f7389b9467bdb483ac883e81d4414a0532..af842c369d24301e4b38861d29ffae2df0f863e7 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 
 #include <asm/setup.h>
 #include <asm/uaccess.h>
@@ -552,4 +553,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 {
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal(regs, NULL);
+
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
 }
index 5d7c0e5b9e76f150b20269cb9ba9e2497062209a..89969e9500456e3a17212b6f20ea47d6abbc6fb3 100644 (file)
@@ -192,6 +192,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
        if (test_thread_flag(TIF_NOTIFY_RESUME)) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(&scr->pt);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 
        /* copy user rbs to kernel rbs */
index 07bb5bd00e2a0660dc3be6c0c0c4514da8ade81b..71578151a403e9353c007b849fd5e711336fc3fc 100644 (file)
@@ -149,6 +149,7 @@ static inline unsigned int get_thread_fault_code(void)
 #define TIF_NEED_RESCHED       2       /* rescheduling necessary */
 #define TIF_SINGLESTEP         3       /* restore singlestep on return to user mode */
 #define TIF_IRET               4       /* return with iret */
+#define TIF_NOTIFY_RESUME      5       /* callback before returning to user */
 #define TIF_RESTORE_SIGMASK    8       /* restore signal mask in do_signal() */
 #define TIF_USEDFPU            16      /* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG     17      /* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -160,6 +161,7 @@ static inline unsigned int get_thread_fault_code(void)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP                (1<<TIF_SINGLESTEP)
 #define _TIF_IRET              (1<<TIF_IRET)
+#define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU           (1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
index 18124542a6ebb695444c9681996147b69ac5d6de..144b0f124fc72f08b20f93336f96da81327fe61c 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -408,5 +409,12 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
        if (thread_info_flags & _TIF_SIGPENDING)
                do_signal(regs,oldset);
 
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
+
        clear_thread_flag(TIF_IRET);
 }
index f9df720d2e40e215aa0cb21561f467d6d95a2b6b..01cc1630b66cc4cf7f8f7a0159245fd72e5bf893 100644 (file)
@@ -115,6 +115,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define TIF_NEED_RESCHED       2       /* rescheduling necessary */
 #define TIF_SYSCALL_AUDIT      3       /* syscall auditing active */
 #define TIF_SECCOMP            4       /* secure computing */
+#define TIF_NOTIFY_RESUME      5       /* callback before returning to user */
 #define TIF_RESTORE_SIGMASK    9       /* restore signal mask in do_signal() */
 #define TIF_USEDFPU            16      /* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG     17      /* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -139,6 +140,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
 #define _TIF_SYSCALL_AUDIT     (1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP           (1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
 #define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU           (1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
index 830c5ef9932b389cbc4ad7f1410b750721a6b070..6254041b942f9a9b56295024a0fd1320fb2da6ae 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/compiler.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <linux/tracehook.h>
 
 #include <asm/abi.h>
 #include <asm/asm.h>
@@ -700,4 +701,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
        /* deal with pending signal delivery */
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal(regs);
+
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
 }
index feb2f2e810db7c785ea5a7562ac77dfb79666a79..a21f43bc68e269cf412e6ae3b0c0293c0dcd96c0 100644 (file)
@@ -568,5 +568,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(__frame);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 }
index 4ce0edfbe9694dc4daaed2253a625e9d46a25c72..ac775a76bff71508a939a08fc983d9d4b8e272f5 100644 (file)
@@ -59,6 +59,7 @@ struct thread_info {
 #define TIF_MEMDIE             5
 #define TIF_RESTORE_SIGMASK    6       /* restore saved signal mask */
 #define TIF_FREEZE             7       /* is freezing for suspend */
+#define TIF_NOTIFY_RESUME      8       /* callback before returning to user */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
@@ -67,8 +68,9 @@ struct thread_info {
 #define _TIF_32BIT             (1 << TIF_32BIT)
 #define _TIF_RESTORE_SIGMASK   (1 << TIF_RESTORE_SIGMASK)
 #define _TIF_FREEZE            (1 << TIF_FREEZE)
+#define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 
-#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | \
+#define _TIF_USER_WORK_MASK     (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
                                  _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
 
 #endif /* __KERNEL__ */
index e552e547cb93fd88050ee800d62f2f2667b3ba5d..8c4712b74dc13b626f449e699ee31aca15701315 100644 (file)
@@ -948,7 +948,7 @@ intr_check_sig:
        /* As above */
        mfctl   %cr30,%r1
        LDREG   TI_FLAGS(%r1),%r19
-       ldi     (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK), %r20
+       ldi     (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME), %r20
        and,COND(<>)    %r19, %r20, %r0
        b,n     intr_restore    /* skip past if we've nothing to do */
 
index f82544225e8e8b43b8ca76664101a849d1e8a7f7..8eb3c63c407a43e5aaac3e96654ce391cd8764a5 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/stddef.h>
 #include <linux/compat.h>
 #include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/rt_sigframe.h>
 #include <asm/uaccess.h>
@@ -645,4 +646,11 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall)
        if (test_thread_flag(TIF_SIGPENDING) ||
            test_thread_flag(TIF_RESTORE_SIGMASK))
                do_signal(regs, in_syscall);
+
+       if (test_thread_flag(TIF_NOTIFY_RESUME)) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
+       }
 }
index 062bd64e65fabe1f09641a4c3e0290ae4ff013ee..6b4fef877f9d0ccf0fd28ea654b9553ab3b70df9 100644 (file)
@@ -536,4 +536,6 @@ void do_notify_resume(struct pt_regs *regs)
 {
        clear_thread_flag(TIF_NOTIFY_RESUME);
        tracehook_notify_resume(regs);
+       if (current->replacement_session_keyring)
+               key_replace_session_keyring();
 }
index b5afbec1db59ce6fb2d74cd0cd9e0fa51b879d0f..04a21883f32730bf18013031fe342e27c50b6b99 100644 (file)
@@ -640,5 +640,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 }
index 0663a0ee6021f4ab31c2bb32fd7a24fa82cd9a6a..9e5c9b1d7e9872fe3591e8520e8b7383e96e7962 100644 (file)
@@ -772,5 +772,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_info
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 }
index 181d069a2d44bdeb160125053150b894e550598b..7ce1a1005b1da4c3b13f8b87927d060376279f2a 100644 (file)
@@ -590,6 +590,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 }
 
index ec82d76dc6f2cc245ca54203dd38c35fee192ed2..647afbda7ae1f170896675dcc4603dfe5c58ec0b 100644 (file)
@@ -613,5 +613,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 }
+
index 676debfc1702346939a5f17423e6a706f7e69886..128111d8ffe0de7ffcdaf0891221ff37d0ae61f8 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/bitops.h>
 #include <linux/ioport.h>
 #include <linux/suspend.h>
+#include <linux/kmemleak.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/iommu.h>
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void)
         * code for safe
         */
        p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+       /*
+        * Kmemleak should not scan this block as it may not be mapped via the
+        * kernel direct mapping.
+        */
+       kmemleak_ignore(p);
        if (!p || __pa(p)+aper_size > 0xffffffff) {
                printk(KERN_ERR
                        "Cannot allocate aperture memory hole (%p,%uK)\n",
index 1a041bcf506bd8efd0d27a4e99e2003e07ff6445..fa80f60e9607db49e4ed1b3266869d961828afe9 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/dmar.h>
 #include <linux/bootmem.h>
 #include <linux/pci.h>
+#include <linux/kmemleak.h>
 
 #include <asm/proto.h>
 #include <asm/dma.h>
@@ -88,6 +89,11 @@ void __init dma32_reserve_bootmem(void)
        size = roundup(dma32_bootmem_size, align);
        dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
                                 512ULL<<20);
+       /*
+        * Kmemleak should not scan this block as it may not be mapped via the
+        * kernel direct mapping.
+        */
+       kmemleak_ignore(dma32_bootmem_ptr);
        if (dma32_bootmem_ptr)
                dma32_bootmem_size = size;
        else
index 4c578751e94ec08dc4e6ed69338947105e444654..81e58238c4ce642ed54ea3ce63c265427e1d6984 100644 (file)
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(regs);
+               if (current->replacement_session_keyring)
+                       key_replace_session_keyring();
        }
 
 #ifdef CONFIG_X86_32
index 2c55ed098654f3815586973a52446b83c3db5228..528bf954eb7403c82d0816957b02a653ff9bcf18 100644 (file)
@@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs,
        kmemcheck_shadow_set(shadow, size);
 }
 
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+       enum kmemcheck_shadow status;
+       void *shadow;
+
+       shadow = kmemcheck_shadow_lookup(addr);
+       if (!shadow)
+               return true;
+
+       status = kmemcheck_shadow_test(shadow, size);
+
+       return status == KMEMCHECK_SHADOW_INITIALIZED;
+}
+
 /* Access may cross page boundary */
 static void kmemcheck_read(struct pt_regs *regs,
        unsigned long addr, unsigned int size)
index aec1931608aa28e3645463735c299805a50ab2b8..0b73e4ec1addafff42a79752a666fecd4b25d665 100644 (file)
@@ -450,6 +450,12 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
                goto out_err;
        }
 
+       /* Default timeouts */
+       chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+       chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
+       chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+       chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
+
        if (request_locality(chip, 0) != 0) {
                rc = -ENODEV;
                goto out_err;
@@ -457,12 +463,6 @@ static int tpm_tis_init(struct device *dev, resource_size_t start,
 
        vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
 
-       /* Default timeouts */
-       chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-       chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
-       chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-       chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
-
        dev_info(dev,
                 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
                 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
index 8f9509e1ebf76494217b7d48f6b7bdef307ac3b4..55d093a36ae48a29a09877176b1448e279555d00 100644 (file)
@@ -362,6 +362,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
                 * In either case, must tell the provider to reject.
                 */
                cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               cm_id->device->iwcm->reject(cm_id, NULL, 0);
                break;
        case IW_CM_STATE_CONN_SENT:
        case IW_CM_STATE_DESTROYING:
index de922a04ca2dbd5ca4f8696410750735bdaee681..7522008fda86880f759128e694d9df0cda68c764 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -45,14 +46,21 @@ MODULE_DESCRIPTION("kernel IB MAD API");
 MODULE_AUTHOR("Hal Rosenstock");
 MODULE_AUTHOR("Sean Hefty");
 
+int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
+int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
+
+module_param_named(send_queue_size, mad_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
+module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+
 static struct kmem_cache *ib_mad_cache;
 
 static struct list_head ib_mad_port_list;
 static u32 ib_mad_client_id = 0;
 
 /* Port list lock */
-static spinlock_t ib_mad_port_list_lock;
-
+static DEFINE_SPINLOCK(ib_mad_port_list_lock);
 
 /* Forward declarations */
 static int method_in_use(struct ib_mad_mgmt_method_table **method,
@@ -1974,7 +1982,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
        unsigned long delay;
 
        if (list_empty(&mad_agent_priv->wait_list)) {
-               cancel_delayed_work(&mad_agent_priv->timed_work);
+               __cancel_delayed_work(&mad_agent_priv->timed_work);
        } else {
                mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
                                         struct ib_mad_send_wr_private,
@@ -1983,7 +1991,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
                if (time_after(mad_agent_priv->timeout,
                               mad_send_wr->timeout)) {
                        mad_agent_priv->timeout = mad_send_wr->timeout;
-                       cancel_delayed_work(&mad_agent_priv->timed_work);
+                       __cancel_delayed_work(&mad_agent_priv->timed_work);
                        delay = mad_send_wr->timeout - jiffies;
                        if ((long)delay <= 0)
                                delay = 1;
@@ -2023,7 +2031,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
 
        /* Reschedule a work item if we have a shorter timeout */
        if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
-               cancel_delayed_work(&mad_agent_priv->timed_work);
+               __cancel_delayed_work(&mad_agent_priv->timed_work);
                queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
                                   &mad_agent_priv->timed_work, delay);
        }
@@ -2736,8 +2744,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
        qp_init_attr.send_cq = qp_info->port_priv->cq;
        qp_init_attr.recv_cq = qp_info->port_priv->cq;
        qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
-       qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+       qp_init_attr.cap.max_send_wr = mad_sendq_size;
+       qp_init_attr.cap.max_recv_wr = mad_recvq_size;
        qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
        qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
        qp_init_attr.qp_type = qp_type;
@@ -2752,8 +2760,8 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
                goto error;
        }
        /* Use minimum queue sizes unless the CQ is resized */
-       qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE;
-       qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE;
+       qp_info->send_queue.max_active = mad_sendq_size;
+       qp_info->recv_queue.max_active = mad_recvq_size;
        return 0;
 
 error:
@@ -2792,7 +2800,7 @@ static int ib_mad_port_open(struct ib_device *device,
        init_mad_qp(port_priv, &port_priv->qp_info[0]);
        init_mad_qp(port_priv, &port_priv->qp_info[1]);
 
-       cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
+       cq_size = (mad_sendq_size + mad_recvq_size) * 2;
        port_priv->cq = ib_create_cq(port_priv->device,
                                     ib_mad_thread_completion_handler,
                                     NULL, port_priv, cq_size, 0);
@@ -2984,7 +2992,11 @@ static int __init ib_mad_init_module(void)
 {
        int ret;
 
-       spin_lock_init(&ib_mad_port_list_lock);
+       mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
+       mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
+
+       mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
+       mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
 
        ib_mad_cache = kmem_cache_create("ib_mad",
                                         sizeof(struct ib_mad_private),
@@ -3021,4 +3033,3 @@ static void __exit ib_mad_cleanup_module(void)
 
 module_init(ib_mad_init_module);
 module_exit(ib_mad_cleanup_module);
-
index 05ce331733b069413c69d1ff7d9a2c00f4231cd2..9430ab4969c55505d0cbf59ddafeda741a8892a2 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2009 HNR Consulting. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -49,6 +50,8 @@
 /* QP and CQ parameters */
 #define IB_MAD_QP_SEND_SIZE    128
 #define IB_MAD_QP_RECV_SIZE    512
+#define IB_MAD_QP_MIN_SIZE     64
+#define IB_MAD_QP_MAX_SIZE     8192
 #define IB_MAD_SEND_REQ_MAX_SG 2
 #define IB_MAD_RECV_REQ_MAX_SG 1
 
index 107f170c57cdb12cc05786551b21d29b01ecc6de..8d82ba17135366317ba2d3e856c78c4cfb97829a 100644 (file)
@@ -106,6 +106,8 @@ struct mcast_group {
        struct ib_sa_query      *query;
        int                     query_id;
        u16                     pkey_index;
+       u8                      leave_state;
+       int                     retries;
 };
 
 struct mcast_member {
@@ -350,6 +352,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state)
 
        rec = group->rec;
        rec.join_state = leave_state;
+       group->leave_state = leave_state;
 
        ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
                                       port->port_num, IB_SA_METHOD_DELETE, &rec,
@@ -542,7 +545,11 @@ static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
 {
        struct mcast_group *group = context;
 
-       mcast_work_handler(&group->work);
+       if (status && group->retries > 0 &&
+           !send_leave(group, group->leave_state))
+               group->retries--;
+       else
+               mcast_work_handler(&group->work);
 }
 
 static struct mcast_group *acquire_group(struct mcast_port *port,
@@ -565,6 +572,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
        if (!group)
                return NULL;
 
+       group->retries = 3;
        group->port = port;
        group->rec.mgid = *mgid;
        group->pkey_index = MCAST_INVALID_PKEY_INDEX;
index 1865049e80f7548be1c814e6a9bfc73229f5195a..82543716d59ef74ce57029e03fd1e09aef6032a3 100644 (file)
@@ -109,10 +109,10 @@ static struct ib_client sa_client = {
        .remove = ib_sa_remove_one
 };
 
-static spinlock_t idr_lock;
+static DEFINE_SPINLOCK(idr_lock);
 static DEFINE_IDR(query_idr);
 
-static spinlock_t tid_lock;
+static DEFINE_SPINLOCK(tid_lock);
 static u32 tid;
 
 #define PATH_REC_FIELD(field) \
@@ -1077,9 +1077,6 @@ static int __init ib_sa_init(void)
 {
        int ret;
 
-       spin_lock_init(&idr_lock);
-       spin_lock_init(&tid_lock);
-
        get_random_bytes(&tid, sizeof tid);
 
        ret = ib_register_client(&sa_client);
index 87236753bce9b7a93f9b41b30297e7d4c5a4d432..5855e4405d9bf2ca2b04a0fac5aebabed46c538b 100644 (file)
@@ -52,6 +52,10 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
        hop_cnt = smp->hop_cnt;
 
        /* See section 14.2.2.2, Vol 1 IB spec */
+       /* C14-6 -- valid hop_cnt values are from 0 to 63 */
+       if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+               return IB_SMI_DISCARD;
+
        if (!ib_get_smp_direction(smp)) {
                /* C14-9:1 */
                if (hop_cnt && hop_ptr == 0) {
@@ -133,6 +137,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
        hop_cnt = smp->hop_cnt;
 
        /* See section 14.2.2.2, Vol 1 IB spec */
+       /* C14-6 -- valid hop_cnt values are from 0 to 63 */
+       if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
+               return IB_SMI_DISCARD;
+
        if (!ib_get_smp_direction(smp)) {
                /* C14-9:1 -- sender should have incremented hop_ptr */
                if (hop_cnt && hop_ptr == 0)
index eb36a81dd09bff2d544675de4c37c447614d3262..d3fff9e008a3e01f1f81795101738ae236b02dcb 100644 (file)
@@ -73,7 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr);
 DEFINE_IDR(ib_uverbs_qp_idr);
 DEFINE_IDR(ib_uverbs_srq_idr);
 
-static spinlock_t map_lock;
+static DEFINE_SPINLOCK(map_lock);
 static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
 
@@ -584,14 +584,16 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 
        if (hdr.command < 0                             ||
            hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
-           !uverbs_cmd_table[hdr.command]              ||
-           !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+           !uverbs_cmd_table[hdr.command])
                return -EINVAL;
 
        if (!file->ucontext &&
            hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
                return -EINVAL;
 
+       if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
+               return -ENOSYS;
+
        return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr,
                                             hdr.in_words * 4, hdr.out_words * 4);
 }
@@ -836,8 +838,6 @@ static int __init ib_uverbs_init(void)
 {
        int ret;
 
-       spin_lock_init(&map_lock);
-
        ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
                                     "infiniband_verbs");
        if (ret) {
index 0cfbb6d2f762b5699c4edef953ab5757196dae5c..8250740c94b09bce8f5c73c6928d895cd24a28c3 100644 (file)
@@ -86,11 +86,7 @@ MODULE_DEVICE_TABLE(pci, c2_pci_table);
 
 static void c2_print_macaddr(struct net_device *netdev)
 {
-       pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
-               "IRQ %u\n", netdev->name,
-               netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
-               netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
-               netdev->irq);
+       pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq);
 }
 
 static void c2_set_rxbufsize(struct c2_port *c2_port)
index f1948fad85d7cd1586c009bd7e89d53f2c30375b..ad723bd8bf498090560c72bc78a1a06dc72e9730 100644 (file)
@@ -780,11 +780,11 @@ int c2_register_device(struct c2_dev *dev)
        /* Register pseudo network device */
        dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
        if (!dev->pseudo_netdev)
-               goto out3;
+               goto out;
 
        ret = register_netdev(dev->pseudo_netdev);
        if (ret)
-               goto out2;
+               goto out_free_netdev;
 
        pr_debug("%s:%u\n", __func__, __LINE__);
        strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
@@ -851,6 +851,10 @@ int c2_register_device(struct c2_dev *dev)
        dev->ibdev.post_recv = c2_post_receive;
 
        dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
+       if (dev->ibdev.iwcm == NULL) {
+               ret = -ENOMEM;
+               goto out_unregister_netdev;
+       }
        dev->ibdev.iwcm->add_ref = c2_add_ref;
        dev->ibdev.iwcm->rem_ref = c2_rem_ref;
        dev->ibdev.iwcm->get_qp = c2_get_qp;
@@ -862,23 +866,25 @@ int c2_register_device(struct c2_dev *dev)
 
        ret = ib_register_device(&dev->ibdev);
        if (ret)
-               goto out1;
+               goto out_free_iwcm;
 
        for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) {
                ret = device_create_file(&dev->ibdev.dev,
                                               c2_dev_attributes[i]);
                if (ret)
-                       goto out0;
+                       goto out_unregister_ibdev;
        }
-       goto out3;
+       goto out;
 
-out0:
+out_unregister_ibdev:
        ib_unregister_device(&dev->ibdev);
-out1:
+out_free_iwcm:
+       kfree(dev->ibdev.iwcm);
+out_unregister_netdev:
        unregister_netdev(dev->pseudo_netdev);
-out2:
+out_free_netdev:
        free_netdev(dev->pseudo_netdev);
-out3:
+out:
        pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret);
        return ret;
 }
index 62f9cf2f94ec647756dcd5f2ae06c9f01a9eafac..72ed3396b721e36d528f3d6191034493447c33ae 100644 (file)
@@ -852,7 +852,9 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
        wqe->qpcaps = attr->qpcaps;
        wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
        wqe->rqe_count = cpu_to_be16(attr->rqe_count);
-       wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
+       wqe->flags_rtr_type = cpu_to_be16(attr->flags |
+                                         V_RTR_TYPE(attr->rtr_type) |
+                                         V_CHAN(attr->chan));
        wqe->ord = cpu_to_be32(attr->ord);
        wqe->ird = cpu_to_be32(attr->ird);
        wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
@@ -1032,6 +1034,7 @@ err3:
 err2:
        cxio_hal_destroy_ctrl_qp(rdev_p);
 err1:
+       rdev_p->t3cdev_p->ulp = NULL;
        list_del(&rdev_p->entry);
        return err;
 }
index 32e3b1461d81d551f3b1aa2a602c9449d2597c8c..a197a5b7ac7fc74836aaf2ece28ff93c4271428d 100644 (file)
@@ -327,6 +327,11 @@ enum rdma_init_rtr_types {
 #define V_RTR_TYPE(x)  ((x) << S_RTR_TYPE)
 #define G_RTR_TYPE(x)  ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
 
+#define S_CHAN         4
+#define M_CHAN         0x3
+#define V_CHAN(x)      ((x) << S_CHAN)
+#define G_CHAN(x)      ((((x) >> S_CHAN)) & M_CHAN)
+
 struct t3_rdma_init_attr {
        u32 tid;
        u32 qpid;
@@ -346,6 +351,7 @@ struct t3_rdma_init_attr {
        u16 flags;
        u16 rqe_count;
        u32 irs;
+       u32 chan;
 };
 
 struct t3_rdma_init_wr {
index 26fc0a4eaa749f91477a711be17710e98dc36ff1..b0ea0105ddf6c2caa01dda9716d7f8f38e21642b 100644 (file)
@@ -51,7 +51,7 @@ cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
 
 static void open_rnic_dev(struct t3cdev *);
 static void close_rnic_dev(struct t3cdev *);
-static void iwch_err_handler(struct t3cdev *, u32, u32);
+static void iwch_event_handler(struct t3cdev *, u32, u32);
 
 struct cxgb3_client t3c_client = {
        .name = "iw_cxgb3",
@@ -59,7 +59,7 @@ struct cxgb3_client t3c_client = {
        .remove = close_rnic_dev,
        .handlers = t3c_handlers,
        .redirect = iwch_ep_redirect,
-       .err_handler = iwch_err_handler
+       .event_handler = iwch_event_handler
 };
 
 static LIST_HEAD(dev_list);
@@ -105,11 +105,9 @@ static void rnic_init(struct iwch_dev *rnicp)
 static void open_rnic_dev(struct t3cdev *tdev)
 {
        struct iwch_dev *rnicp;
-       static int vers_printed;
 
        PDBG("%s t3cdev %p\n", __func__,  tdev);
-       if (!vers_printed++)
-               printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
+       printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
                       DRV_VERSION);
        rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
        if (!rnicp) {
@@ -162,21 +160,36 @@ static void close_rnic_dev(struct t3cdev *tdev)
        mutex_unlock(&dev_mutex);
 }
 
-static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error)
+static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
 {
        struct cxio_rdev *rdev = tdev->ulp;
-       struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev);
+       struct iwch_dev *rnicp;
        struct ib_event event;
+       u32    portnum = port_id + 1;
 
-       if (status == OFFLOAD_STATUS_DOWN) {
+       if (!rdev)
+               return;
+       rnicp = rdev_to_iwch_dev(rdev);
+       switch (evt) {
+       case OFFLOAD_STATUS_DOWN: {
                rdev->flags = CXIO_ERROR_FATAL;
-
-               event.device = &rnicp->ibdev;
                event.event  = IB_EVENT_DEVICE_FATAL;
-               event.element.port_num = 0;
-               ib_dispatch_event(&event);
+               break;
+               }
+       case OFFLOAD_PORT_DOWN: {
+               event.event  = IB_EVENT_PORT_ERR;
+               break;
+               }
+       case OFFLOAD_PORT_UP: {
+               event.event  = IB_EVENT_PORT_ACTIVE;
+               break;
+               }
        }
 
+       event.device = &rnicp->ibdev;
+       event.element.port_num = portnum;
+       ib_dispatch_event(&event);
+
        return;
 }
 
index 52d7bb0c2a126cbf8a867cb522e9062c39015a5b..66b41351910ad390d5ec70dbb18c5ab99a196f45 100644 (file)
@@ -286,7 +286,7 @@ void __free_ep(struct kref *kref)
        ep = container_of(container_of(kref, struct iwch_ep_common, kref),
                          struct iwch_ep, com);
        PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
-       if (ep->com.flags & RELEASE_RESOURCES) {
+       if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
                cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
                dst_release(ep->dst);
                l2t_release(L2DATA(ep->com.tdev), ep->l2t);
@@ -297,7 +297,7 @@ void __free_ep(struct kref *kref)
 static void release_ep_resources(struct iwch_ep *ep)
 {
        PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
-       ep->com.flags |= RELEASE_RESOURCES;
+       set_bit(RELEASE_RESOURCES, &ep->com.flags);
        put_ep(&ep->com);
 }
 
@@ -786,10 +786,12 @@ static void connect_request_upcall(struct iwch_ep *ep)
        event.private_data_len = ep->plen;
        event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
        event.provider_data = ep;
-       if (state_read(&ep->parent_ep->com) != DEAD)
+       if (state_read(&ep->parent_ep->com) != DEAD) {
+               get_ep(&ep->com);
                ep->parent_ep->com.cm_id->event_handler(
                                                ep->parent_ep->com.cm_id,
                                                &event);
+       }
        put_ep(&ep->parent_ep->com);
        ep->parent_ep = NULL;
 }
@@ -1156,8 +1158,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
         * We get 2 abort replies from the HW.  The first one must
         * be ignored except for scribbling that we need one more.
         */
-       if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) {
-               ep->com.flags |= ABORT_REQ_IN_PROGRESS;
+       if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
                return CPL_RET_BUF_DONE;
        }
 
@@ -1477,10 +1478,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
                /*
                 * We're gonna mark this puppy DEAD, but keep
                 * the reference on it until the ULP accepts or
-                * rejects the CR.
+                * rejects the CR. Also wake up anyone waiting
+                * in rdma connection migration (see iwch_accept_cr()).
                 */
                __state_set(&ep->com, CLOSING);
-               get_ep(&ep->com);
+               ep->com.rpl_done = 1;
+               ep->com.rpl_err = -ECONNRESET;
+               PDBG("waking up ep %p\n", ep);
+               wake_up(&ep->com.waitq);
                break;
        case MPA_REP_SENT:
                __state_set(&ep->com, CLOSING);
@@ -1561,8 +1566,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
         * We get 2 peer aborts from the HW.  The first one must
         * be ignored except for scribbling that we need one more.
         */
-       if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) {
-               ep->com.flags |= PEER_ABORT_IN_PROGRESS;
+       if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
                return CPL_RET_BUF_DONE;
        }
 
@@ -1589,9 +1593,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
                /*
                 * We're gonna mark this puppy DEAD, but keep
                 * the reference on it until the ULP accepts or
-                * rejects the CR.
+                * rejects the CR. Also wake up anyone waiting
+                * in rdma connection migration (see iwch_accept_cr()).
                 */
-               get_ep(&ep->com);
+               ep->com.rpl_done = 1;
+               ep->com.rpl_err = -ECONNRESET;
+               PDBG("waking up ep %p\n", ep);
+               wake_up(&ep->com.waitq);
                break;
        case MORIBUND:
        case CLOSING:
@@ -1797,6 +1805,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
                err = send_mpa_reject(ep, pdata, pdata_len);
                err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
        }
+       put_ep(&ep->com);
        return 0;
 }
 
@@ -1810,8 +1819,10 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-       if (state_read(&ep->com) == DEAD)
-               return -ECONNRESET;
+       if (state_read(&ep->com) == DEAD) {
+               err = -ECONNRESET;
+               goto err;
+       }
 
        BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
        BUG_ON(!qp);
@@ -1819,15 +1830,14 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
            (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
                abort_connection(ep, NULL, GFP_KERNEL);
-               return -EINVAL;
+               err = -EINVAL;
+               goto err;
        }
 
        cm_id->add_ref(cm_id);
        ep->com.cm_id = cm_id;
        ep->com.qp = qp;
 
-       ep->com.rpl_done = 0;
-       ep->com.rpl_err = 0;
        ep->ird = conn_param->ird;
        ep->ord = conn_param->ord;
 
@@ -1836,8 +1846,6 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
        PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
 
-       get_ep(&ep->com);
-
        /* bind QP to EP and move to RTS */
        attrs.mpa_attr = ep->mpa_attr;
        attrs.max_ird = ep->ird;
@@ -1855,30 +1863,31 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        err = iwch_modify_qp(ep->com.qp->rhp,
                             ep->com.qp, mask, &attrs, 1);
        if (err)
-               goto err;
+               goto err1;
 
        /* if needed, wait for wr_ack */
        if (iwch_rqes_posted(qp)) {
                wait_event(ep->com.waitq, ep->com.rpl_done);
                err = ep->com.rpl_err;
                if (err)
-                       goto err;
+                       goto err1;
        }
 
        err = send_mpa_reply(ep, conn_param->private_data,
                             conn_param->private_data_len);
        if (err)
-               goto err;
+               goto err1;
 
 
        state_set(&ep->com, FPDU_MODE);
        established_upcall(ep);
        put_ep(&ep->com);
        return 0;
-err:
+err1:
        ep->com.cm_id = NULL;
        ep->com.qp = NULL;
        cm_id->rem_ref(cm_id);
+err:
        put_ep(&ep->com);
        return err;
 }
@@ -2097,14 +2106,17 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
                        ep->com.state = CLOSING;
                        start_ep_timer(ep);
                }
+               set_bit(CLOSE_SENT, &ep->com.flags);
                break;
        case CLOSING:
-               close = 1;
-               if (abrupt) {
-                       stop_ep_timer(ep);
-                       ep->com.state = ABORTING;
-               } else
-                       ep->com.state = MORIBUND;
+               if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
+                       close = 1;
+                       if (abrupt) {
+                               stop_ep_timer(ep);
+                               ep->com.state = ABORTING;
+                       } else
+                               ep->com.state = MORIBUND;
+               }
                break;
        case MORIBUND:
        case ABORTING:
index 43c0aea7eadc8b0b52ac1b8b3df8965c5a20a059..b9efadfffb4f64b39e991230b25e676c9a23c777 100644 (file)
@@ -145,9 +145,10 @@ enum iwch_ep_state {
 };
 
 enum iwch_ep_flags {
-       PEER_ABORT_IN_PROGRESS  = (1 << 0),
-       ABORT_REQ_IN_PROGRESS   = (1 << 1),
-       RELEASE_RESOURCES       = (1 << 2),
+       PEER_ABORT_IN_PROGRESS  = 0,
+       ABORT_REQ_IN_PROGRESS   = 1,
+       RELEASE_RESOURCES       = 2,
+       CLOSE_SENT              = 3,
 };
 
 struct iwch_ep_common {
@@ -162,7 +163,7 @@ struct iwch_ep_common {
        wait_queue_head_t waitq;
        int rpl_done;
        int rpl_err;
-       u32 flags;
+       unsigned long flags;
 };
 
 struct iwch_listen_ep {
index ec49a5cbdebbc19705968069b58d2ab9f98d7d8e..e1ec65ebb016e4c7cfbfe81ca1e2095ba614ce08 100644 (file)
@@ -39,7 +39,7 @@
 #include "iwch.h"
 #include "iwch_provider.h"
 
-static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
 {
        u32 mmid;
 
@@ -47,14 +47,15 @@ static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
        mhp->attr.stag = stag;
        mmid = stag >> 8;
        mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-       insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
        PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
+       return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
 }
 
 int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
                      struct iwch_mr *mhp, int shift)
 {
        u32 stag;
+       int ret;
 
        if (cxio_register_phys_mem(&rhp->rdev,
                                   &stag, mhp->attr.pdid,
@@ -66,9 +67,11 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
                                   mhp->attr.pbl_size, mhp->attr.pbl_addr))
                return -ENOMEM;
 
-       iwch_finish_mem_reg(mhp, stag);
-
-       return 0;
+       ret = iwch_finish_mem_reg(mhp, stag);
+       if (ret)
+               cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+                      mhp->attr.pbl_addr);
+       return ret;
 }
 
 int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
@@ -77,6 +80,7 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
                                        int npages)
 {
        u32 stag;
+       int ret;
 
        /* We could support this... */
        if (npages > mhp->attr.pbl_size)
@@ -93,9 +97,12 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
                                   mhp->attr.pbl_size, mhp->attr.pbl_addr))
                return -ENOMEM;
 
-       iwch_finish_mem_reg(mhp, stag);
+       ret = iwch_finish_mem_reg(mhp, stag);
+       if (ret)
+               cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+                      mhp->attr.pbl_addr);
 
-       return 0;
+       return ret;
 }
 
 int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
index e2a63214008a90b1ddf137341bbbbc4eac2d16e4..6895523779d0d5ebb9d5eeb4f58e9d5ca42a1c0c 100644 (file)
@@ -195,7 +195,11 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
        spin_lock_init(&chp->lock);
        atomic_set(&chp->refcnt, 1);
        init_waitqueue_head(&chp->wait);
-       insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+       if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
+               cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
+               kfree(chp);
+               return ERR_PTR(-ENOMEM);
+       }
 
        if (ucontext) {
                struct iwch_mm_entry *mm;
@@ -750,7 +754,11 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
        mhp->attr.stag = stag;
        mmid = (stag) >> 8;
        mhp->ibmw.rkey = stag;
-       insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+       if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+               cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
+               kfree(mhp);
+               return ERR_PTR(-ENOMEM);
+       }
        PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
        return &(mhp->ibmw);
 }
@@ -778,37 +786,43 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
        struct iwch_mr *mhp;
        u32 mmid;
        u32 stag = 0;
-       int ret;
+       int ret = 0;
 
        php = to_iwch_pd(pd);
        rhp = php->rhp;
        mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
        if (!mhp)
-               return ERR_PTR(-ENOMEM);
+               goto err;
 
        mhp->rhp = rhp;
        ret = iwch_alloc_pbl(mhp, pbl_depth);
-       if (ret) {
-               kfree(mhp);
-               return ERR_PTR(ret);
-       }
+       if (ret)
+               goto err1;
        mhp->attr.pbl_size = pbl_depth;
        ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
                                 mhp->attr.pbl_size, mhp->attr.pbl_addr);
-       if (ret) {
-               iwch_free_pbl(mhp);
-               kfree(mhp);
-               return ERR_PTR(ret);
-       }
+       if (ret)
+               goto err2;
        mhp->attr.pdid = php->pdid;
        mhp->attr.type = TPT_NON_SHARED_MR;
        mhp->attr.stag = stag;
        mhp->attr.state = 1;
        mmid = (stag) >> 8;
        mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
-       insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+       if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
+               goto err3;
+
        PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
        return &(mhp->ibmr);
+err3:
+       cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
+                      mhp->attr.pbl_addr);
+err2:
+       iwch_free_pbl(mhp);
+err1:
+       kfree(mhp);
+err:
+       return ERR_PTR(ret);
 }
 
 static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
@@ -961,7 +975,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
        spin_lock_init(&qhp->lock);
        init_waitqueue_head(&qhp->wait);
        atomic_set(&qhp->refcnt, 1);
-       insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
+
+       if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
+               cxio_destroy_qp(&rhp->rdev, &qhp->wq,
+                       ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+               kfree(qhp);
+               return ERR_PTR(-ENOMEM);
+       }
 
        if (udata) {
 
@@ -1418,6 +1438,7 @@ int iwch_register_device(struct iwch_dev *dev)
 bail2:
        ib_unregister_device(&dev->ibdev);
 bail1:
+       kfree(dev->ibdev.iwcm);
        return ret;
 }
 
@@ -1430,5 +1451,6 @@ void iwch_unregister_device(struct iwch_dev *dev)
                device_remove_file(&dev->ibdev.dev,
                                   iwch_class_attributes[i]);
        ib_unregister_device(&dev->ibdev);
+       kfree(dev->ibdev.iwcm);
        return;
 }
index 27bbdc8e773ae934e03b8d421a08102c7b945d63..6e86534719414ff5e5e6c859dd4fd3cbf995cfa5 100644 (file)
@@ -889,6 +889,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
        init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
        init_attr.rqe_count = iwch_rqes_posted(qhp);
        init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
+       init_attr.chan = qhp->ep->l2t->smt_idx;
        if (peer2peer) {
                init_attr.rtr_type = RTR_READ;
                if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
index fab18a2c74a8d55f00e5d2772ef1b448e01d6e59..5b635aa5947e27822a5386cdc447f8b4a2693db3 100644 (file)
@@ -52,7 +52,7 @@
 #include "ehca_tools.h"
 #include "hcp_if.h"
 
-#define HCAD_VERSION "0028"
+#define HCAD_VERSION "0029"
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
@@ -64,7 +64,7 @@ static int ehca_hw_level      = 0;
 static int ehca_poll_all_eqs  = 1;
 
 int ehca_debug_level   = 0;
-int ehca_nr_ports      = 2;
+int ehca_nr_ports      = -1;
 int ehca_use_hp_mr     = 0;
 int ehca_port_act_time = 30;
 int ehca_static_rate   = -1;
@@ -95,8 +95,8 @@ MODULE_PARM_DESC(hw_level,
                 "Hardware level (0: autosensing (default), "
                 "0x10..0x14: eHCA, 0x20..0x23: eHCA2)");
 MODULE_PARM_DESC(nr_ports,
-                "number of connected ports (-1: autodetect, 1: port one only, "
-                "2: two ports (default)");
+                "number of connected ports (-1: autodetect (default), "
+                "1: port one only, 2: two ports)");
 MODULE_PARM_DESC(use_hp_mr,
                 "Use high performance MRs (default: no)");
 MODULE_PARM_DESC(port_act_time,
index 5a3d96f84c79c780b078b03b363bb847ef41006e..8fd88cd828fd657ab11ef33e551e0f1060669f80 100644 (file)
@@ -786,7 +786,11 @@ repoll:
        wc->slid = cqe->rlid;
        wc->dlid_path_bits = cqe->dlid;
        wc->src_qp = cqe->remote_qp_number;
-       wc->wc_flags = cqe->w_completion_flags;
+       /*
+        * HW has "Immed data present" and "GRH present" in bits 6 and 5.
+        * SW defines those in bits 1 and 0, so we can just shift and mask.
+        */
+       wc->wc_flags = (cqe->w_completion_flags >> 5) & 3;
        wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
        wc->sl = cqe->service_level;
 
index c568b28f4e207416762c18c69d86f27818e9d530..8c1213f8916a19bfdccab6f3dcaf08f14065c509 100644 (file)
@@ -125,14 +125,30 @@ struct ib_perf {
        u8 data[192];
 } __attribute__ ((packed));
 
+/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */
+struct tcslfl {
+       u32 tc:8;
+       u32 sl:4;
+       u32 fl:20;
+} __attribute__ ((packed));
+
+/* IP Version/TC/FL packed into 32 bits, as in GRH */
+struct vertcfl {
+       u32 ver:4;
+       u32 tc:8;
+       u32 fl:20;
+} __attribute__ ((packed));
 
 static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
+                            struct ib_wc *in_wc, struct ib_grh *in_grh,
                             struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
        struct ib_perf *in_perf = (struct ib_perf *)in_mad;
        struct ib_perf *out_perf = (struct ib_perf *)out_mad;
        struct ib_class_port_info *poi =
                (struct ib_class_port_info *)out_perf->data;
+       struct tcslfl *tcslfl =
+               (struct tcslfl *)&poi->redirect_tcslfl;
        struct ehca_shca *shca =
                container_of(ibdev, struct ehca_shca, ib_device);
        struct ehca_sport *sport = &shca->sport[port_num - 1];
@@ -158,10 +174,29 @@ static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
                poi->base_version = 1;
                poi->class_version = 1;
                poi->resp_time_value = 18;
-               poi->redirect_lid = sport->saved_attr.lid;
-               poi->redirect_qp = sport->pma_qp_nr;
+
+               /* copy local routing information from WC where applicable */
+               tcslfl->sl         = in_wc->sl;
+               poi->redirect_lid  =
+                       sport->saved_attr.lid | in_wc->dlid_path_bits;
+               poi->redirect_qp   = sport->pma_qp_nr;
                poi->redirect_qkey = IB_QP1_QKEY;
-               poi->redirect_pkey = IB_DEFAULT_PKEY_FULL;
+
+               ehca_query_pkey(ibdev, port_num, in_wc->pkey_index,
+                               &poi->redirect_pkey);
+
+               /* if request was globally routed, copy route info */
+               if (in_grh) {
+                       struct vertcfl *vertcfl =
+                               (struct vertcfl *)&in_grh->version_tclass_flow;
+                       memcpy(poi->redirect_gid, in_grh->dgid.raw,
+                              sizeof(poi->redirect_gid));
+                       tcslfl->tc        = vertcfl->tc;
+                       tcslfl->fl        = vertcfl->fl;
+               } else
+                       /* else only fill in default GID */
+                       ehca_query_gid(ibdev, port_num, 0,
+                                      (union ib_gid *)&poi->redirect_gid);
 
                ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
                         sport->saved_attr.lid, sport->pma_qp_nr);
@@ -183,8 +218,7 @@ perf_reply:
 
 int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
                     struct ib_wc *in_wc, struct ib_grh *in_grh,
-                    struct ib_mad *in_mad,
-                    struct ib_mad *out_mad)
+                    struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
        int ret;
 
@@ -196,7 +230,8 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
                return IB_MAD_RESULT_SUCCESS;
 
        ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
-       ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad);
+       ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh,
+                               in_mad, out_mad);
 
        return ret;
 }
index 23173982b32c1c636fdc2bbc398ad1168e3ff10d..38a287006612c055b3a1faa32800099041257c35 100644 (file)
@@ -1616,7 +1616,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
                pd->port_cnt = 1;
                port_fp(fp) = pd;
                pd->port_pid = get_pid(task_pid(current));
-               strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
+               strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
                ipath_stats.sps_ports++;
                ret = 0;
        } else
index 16a702d460184f8f66e2434b720aa9842a145701..ceb98ee7866646d87ecdc6b8351622a11441facd 100644 (file)
@@ -60,7 +60,7 @@ static int recv_subn_get_nodedescription(struct ib_smp *smp,
        if (smp->attr_mod)
                smp->status |= IB_SMP_INVALID_FIELD;
 
-       strncpy(smp->data, ibdev->node_desc, sizeof(smp->data));
+       memcpy(smp->data, ibdev->node_desc, sizeof(smp->data));
 
        return reply(smp);
 }
index ae3d7590346e850c0f5579bea15390c91e8b4764..3cb3f47a10b85753cded5f80f019517bf81e3635 100644 (file)
@@ -342,6 +342,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
        struct mlx4_ib_alloc_ucontext_resp resp;
        int err;
 
+       if (!dev->ib_active)
+               return ERR_PTR(-EAGAIN);
+
        resp.qp_tab_size      = dev->dev->caps.num_qps;
        resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
        resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
@@ -540,15 +543,11 @@ static struct device_attribute *mlx4_class_attributes[] = {
 
 static void *mlx4_ib_add(struct mlx4_dev *dev)
 {
-       static int mlx4_ib_version_printed;
        struct mlx4_ib_dev *ibdev;
        int num_ports = 0;
        int i;
 
-       if (!mlx4_ib_version_printed) {
-               printk(KERN_INFO "%s", mlx4_ib_version);
-               ++mlx4_ib_version_printed;
-       }
+       printk_once(KERN_INFO "%s", mlx4_ib_version);
 
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
                num_ports++;
@@ -673,6 +672,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                        goto err_reg;
        }
 
+       ibdev->ib_active = true;
+
        return ibdev;
 
 err_reg:
@@ -729,6 +730,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
                break;
 
        case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+               ibdev->ib_active = false;
                ibev.event = IB_EVENT_DEVICE_FATAL;
                break;
 
index 8a7dd6795fa0a2117f3f21311c9a1a4eccdf4c9c..3486d7675e56dfece310b090abf0eec93d140ec6 100644 (file)
@@ -175,6 +175,7 @@ struct mlx4_ib_dev {
        spinlock_t              sm_lock;
 
        struct mutex            cap_mask_mutex;
+       bool                    ib_active;
 };
 
 static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
index c4a02648c8afe78ce6553e95fcc0742ed499274b..219b10397b4d0fd64f19767adbdb3e24352f1476 100644 (file)
@@ -615,10 +615,12 @@ static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
 }
 
 static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+       __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
 {
-       if (send_cq == recv_cq)
+       if (send_cq == recv_cq) {
                spin_lock_irq(&send_cq->lock);
-       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+               __acquire(&recv_cq->lock);
+       } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
                spin_lock_irq(&send_cq->lock);
                spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
        } else {
@@ -628,10 +630,12 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
 }
 
 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
+       __releases(&send_cq->lock) __releases(&recv_cq->lock)
 {
-       if (send_cq == recv_cq)
+       if (send_cq == recv_cq) {
+               __release(&recv_cq->lock);
                spin_unlock_irq(&send_cq->lock);
-       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
+       else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
                spin_unlock(&recv_cq->lock);
                spin_unlock_irq(&send_cq->lock);
        } else {
index 65ad359fdf164e506ec9a5726ba12c5a5ee882f2..056b2a4c69700f9fdb7cb6640213804e5f850aec 100644 (file)
@@ -88,6 +88,7 @@ static void handle_catas(struct mthca_dev *dev)
        event.device = &dev->ib_dev;
        event.event  = IB_EVENT_DEVICE_FATAL;
        event.element.port_num = 0;
+       dev->active = false;
 
        ib_dispatch_event(&event);
 
index 75671f75cac482bf67ce0a6c3692634cce7f43b5..155bc66395beaeaf5aa1476b884cb12c527715a5 100644 (file)
@@ -34,8 +34,6 @@
 #ifndef MTHCA_CONFIG_REG_H
 #define MTHCA_CONFIG_REG_H
 
-#include <asm/page.h>
-
 #define MTHCA_HCR_BASE         0x80680
 #define MTHCA_HCR_SIZE         0x0001c
 #define MTHCA_ECR_BASE         0x80700
index 9ef611f6dd36d52f531198d0a61d7ec11dbe78fd..7e6a6d64ad4eb1bee96b0d2d244daf20898ab3b6 100644 (file)
@@ -357,6 +357,7 @@ struct mthca_dev {
        struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
        spinlock_t            sm_lock;
        u8                    rate[MTHCA_MAX_PORTS];
+       bool                  active;
 };
 
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
index 90e4e450a12022d4186ca19f1c94c9e897e5c785..8c31fa36e95e7102a96ecf9ca6b31988b0f99640 100644 (file)
@@ -829,27 +829,34 @@ int mthca_init_eq_table(struct mthca_dev *dev)
 
        if (dev->mthca_flags & MTHCA_FLAG_MSI_X) {
                static const char *eq_name[] = {
-                       [MTHCA_EQ_COMP]  = DRV_NAME " (comp)",
-                       [MTHCA_EQ_ASYNC] = DRV_NAME " (async)",
-                       [MTHCA_EQ_CMD]   = DRV_NAME " (cmd)"
+                       [MTHCA_EQ_COMP]  = DRV_NAME "-comp",
+                       [MTHCA_EQ_ASYNC] = DRV_NAME "-async",
+                       [MTHCA_EQ_CMD]   = DRV_NAME "-cmd"
                };
 
                for (i = 0; i < MTHCA_NUM_EQ; ++i) {
+                       snprintf(dev->eq_table.eq[i].irq_name,
+                                IB_DEVICE_NAME_MAX,
+                                "%s@pci:%s", eq_name[i],
+                                pci_name(dev->pdev));
                        err = request_irq(dev->eq_table.eq[i].msi_x_vector,
                                          mthca_is_memfree(dev) ?
                                          mthca_arbel_msi_x_interrupt :
                                          mthca_tavor_msi_x_interrupt,
-                                         0, eq_name[i], dev->eq_table.eq + i);
+                                         0, dev->eq_table.eq[i].irq_name,
+                                         dev->eq_table.eq + i);
                        if (err)
                                goto err_out_cmd;
                        dev->eq_table.eq[i].have_irq = 1;
                }
        } else {
+               snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX,
+                        DRV_NAME "@pci:%s", pci_name(dev->pdev));
                err = request_irq(dev->pdev->irq,
                                  mthca_is_memfree(dev) ?
                                  mthca_arbel_interrupt :
                                  mthca_tavor_interrupt,
-                                 IRQF_SHARED, DRV_NAME, dev);
+                                 IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev);
                if (err)
                        goto err_out_cmd;
                dev->eq_table.have_irq = 1;
index 13da9f1d24c0a2bf9d715f32701d0cb02406cba2..b01b28987874e9145d241310347e4691695c2716 100644 (file)
@@ -1116,6 +1116,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
        pci_set_drvdata(pdev, mdev);
        mdev->hca_type = hca_type;
 
+       mdev->active = true;
+
        return 0;
 
 err_unregister:
@@ -1215,15 +1217,11 @@ int __mthca_restart_one(struct pci_dev *pdev)
 static int __devinit mthca_init_one(struct pci_dev *pdev,
                                    const struct pci_device_id *id)
 {
-       static int mthca_version_printed = 0;
        int ret;
 
        mutex_lock(&mthca_device_mutex);
 
-       if (!mthca_version_printed) {
-               printk(KERN_INFO "%s", mthca_version);
-               ++mthca_version_printed;
-       }
+       printk_once(KERN_INFO "%s", mthca_version);
 
        if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
                printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
index 87ad889e367b2b6b39cfe010ec1ec28f1dd488ac..bcf7a401482015f3b5c09afdc4177bab3d10577f 100644 (file)
@@ -334,6 +334,9 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
        struct mthca_ucontext           *context;
        int                              err;
 
+       if (!(to_mdev(ibdev)->active))
+               return ERR_PTR(-EAGAIN);
+
        memset(&uresp, 0, sizeof uresp);
 
        uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
index c621f8794b8820d11e6293d62c2ce35d11e8e7ee..90f4c4d2e98359f5b808643b32e7bb6e8a959201 100644 (file)
@@ -113,6 +113,7 @@ struct mthca_eq {
        int                    nent;
        struct mthca_buf_list *page_list;
        struct mthca_mr        mr;
+       char                   irq_name[IB_DEVICE_NAME_MAX];
 };
 
 struct mthca_av;
index f5081bfde6db19641ce62cb3638139caebb6f880..c10576fa60c112931650ab8c322b32dab5f478f9 100644 (file)
@@ -1319,10 +1319,12 @@ int mthca_alloc_qp(struct mthca_dev *dev,
 }
 
 static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+       __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
 {
-       if (send_cq == recv_cq)
+       if (send_cq == recv_cq) {
                spin_lock_irq(&send_cq->lock);
-       else if (send_cq->cqn < recv_cq->cqn) {
+               __acquire(&recv_cq->lock);
+       } else if (send_cq->cqn < recv_cq->cqn) {
                spin_lock_irq(&send_cq->lock);
                spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
        } else {
@@ -1332,10 +1334,12 @@ static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
 }
 
 static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
+       __releases(&send_cq->lock) __releases(&recv_cq->lock)
 {
-       if (send_cq == recv_cq)
+       if (send_cq == recv_cq) {
+               __release(&recv_cq->lock);
                spin_unlock_irq(&send_cq->lock);
-       else if (send_cq->cqn < recv_cq->cqn) {
+       else if (send_cq->cqn < recv_cq->cqn) {
                spin_unlock(&recv_cq->lock);
                spin_unlock_irq(&send_cq->lock);
        } else {
index acb6817f6060615a4db753e3b4dde6c7f3dd31bb..2a13a163d33780ce9d646aba280a0ceeb76ff9ca 100644 (file)
@@ -30,7 +30,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
index bf1720f7f35fe6e07d80321bff8e1de42a834256..bcc6abc4faffafb9fba5b3076644b568434ecc98 100644 (file)
@@ -523,7 +523,7 @@ int nes_cm_disconn(struct nes_qp *);
 void nes_cm_disconn_worker(void *);
 
 /* nes_verbs.c */
-int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32);
+int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32);
 int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
 struct nes_ib_device *nes_init_ofa_device(struct net_device *);
 void nes_destroy_ofa_device(struct nes_ib_device *);
index 114b802771ada144c5a4df3fc25407e2acb34156..73473db1986361f2a390a30b09c942f74e62ca96 100644 (file)
@@ -2450,19 +2450,16 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
  */
 int nes_cm_disconn(struct nes_qp *nesqp)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&nesqp->lock, flags);
-       if (nesqp->disconn_pending == 0) {
-               nesqp->disconn_pending++;
-               spin_unlock_irqrestore(&nesqp->lock, flags);
-               /* init our disconnect work element, to */
-               INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker);
+       struct disconn_work *work;
 
-               queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work);
-       } else
-               spin_unlock_irqrestore(&nesqp->lock, flags);
+       work = kzalloc(sizeof *work, GFP_ATOMIC);
+       if (!work)
+               return -ENOMEM; /* Timer will clean up */
 
+       nes_add_ref(&nesqp->ibqp);
+       work->nesqp = nesqp;
+       INIT_WORK(&work->work, nes_disconnect_worker);
+       queue_work(g_cm_core->disconn_wq, &work->work);
        return 0;
 }
 
@@ -2472,11 +2469,14 @@ int nes_cm_disconn(struct nes_qp *nesqp)
  */
 static void nes_disconnect_worker(struct work_struct *work)
 {
-       struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work);
+       struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+       struct nes_qp *nesqp = dwork->nesqp;
 
+       kfree(dwork);
        nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
                        nesqp->last_aeq, nesqp->hwqp.qp_id);
        nes_cm_disconn_true(nesqp);
+       nes_rem_ref(&nesqp->ibqp);
 }
 
 
@@ -2493,7 +2493,12 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
        u16 last_ae;
        u8 original_hw_tcp_state;
        u8 original_ibqp_state;
-       u8 issued_disconnect_reset = 0;
+       enum iw_cm_event_type disconn_status = IW_CM_EVENT_STATUS_OK;
+       int issue_disconn = 0;
+       int issue_close = 0;
+       int issue_flush = 0;
+       u32 flush_q = NES_CQP_FLUSH_RQ;
+       struct ib_event ibevent;
 
        if (!nesqp) {
                nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
@@ -2517,24 +2522,55 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
        original_ibqp_state   = nesqp->ibqp_state;
        last_ae = nesqp->last_aeq;
 
+       if (nesqp->term_flags) {
+               issue_disconn = 1;
+               issue_close = 1;
+               nesqp->cm_id = NULL;
+               if (nesqp->flush_issued == 0) {
+                       nesqp->flush_issued = 1;
+                       issue_flush = 1;
+               }
+       } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
+                       ((original_ibqp_state == IB_QPS_RTS) &&
+                       (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+               issue_disconn = 1;
+               if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
+                       disconn_status = IW_CM_EVENT_STATUS_RESET;
+       }
+
+       if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
+                (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
+                (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
+                (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+               issue_close = 1;
+               nesqp->cm_id = NULL;
+               if (nesqp->flush_issued == 0) {
+                       nesqp->flush_issued = 1;
+                       issue_flush = 1;
+               }
+       }
+
+       spin_unlock_irqrestore(&nesqp->lock, flags);
 
-       nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state);
+       if ((issue_flush) && (nesqp->destroyed == 0)) {
+               /* Flush the queue(s) */
+               if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE)
+                       flush_q |= NES_CQP_FLUSH_SQ;
+               flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1);
 
-       if ((nesqp->cm_id) && (cm_id->event_handler)) {
-               if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
-                               ((original_ibqp_state == IB_QPS_RTS) &&
-                               (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+               if (nesqp->term_flags) {
+                       ibevent.device = nesqp->ibqp.device;
+                       ibevent.event = nesqp->terminate_eventtype;
+                       ibevent.element.qp = &nesqp->ibqp;
+                       nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+               }
+       }
+
+       if ((cm_id) && (cm_id->event_handler)) {
+               if (issue_disconn) {
                        atomic_inc(&cm_disconnects);
                        cm_event.event = IW_CM_EVENT_DISCONNECT;
-                       if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
-                               cm_event.status = IW_CM_EVENT_STATUS_RESET;
-                               nes_debug(NES_DBG_CM, "Generating a CM "
-                                       "Disconnect Event (status reset) for "
-                                       "QP%u, cm_id = %p. \n",
-                                       nesqp->hwqp.qp_id, cm_id);
-                       } else
-                               cm_event.status = IW_CM_EVENT_STATUS_OK;
-
+                       cm_event.status = disconn_status;
                        cm_event.local_addr = cm_id->local_addr;
                        cm_event.remote_addr = cm_id->remote_addr;
                        cm_event.private_data = NULL;
@@ -2547,29 +2583,14 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
                                nesqp->hwqp.sq_tail, cm_id,
                                atomic_read(&nesqp->refcount));
 
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
                        ret = cm_id->event_handler(cm_id, &cm_event);
                        if (ret)
                                nes_debug(NES_DBG_CM, "OFA CM event_handler "
                                        "returned, ret=%d\n", ret);
-                       spin_lock_irqsave(&nesqp->lock, flags);
                }
 
-               nesqp->disconn_pending = 0;
-               /* There might have been another AE while the lock was released */
-               original_hw_tcp_state = nesqp->hw_tcp_state;
-               original_ibqp_state   = nesqp->ibqp_state;
-               last_ae = nesqp->last_aeq;
-
-               if ((issued_disconnect_reset == 0) && (nesqp->cm_id) &&
-                               ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
-                                (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
-                                (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
-                                (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+               if (issue_close) {
                        atomic_inc(&cm_closes);
-                       nesqp->cm_id = NULL;
-                       nesqp->in_disconnect = 0;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
                        nes_disconnect(nesqp, 1);
 
                        cm_id->provider_data = nesqp;
@@ -2588,28 +2609,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
                        }
 
                        cm_id->rem_ref(cm_id);
-
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       if (nesqp->flush_issued == 0) {
-                               nesqp->flush_issued = 1;
-                               spin_unlock_irqrestore(&nesqp->lock, flags);
-                               flush_wqes(nesvnic->nesdev, nesqp,
-                                       NES_CQP_FLUSH_RQ, 1);
-                       } else
-                               spin_unlock_irqrestore(&nesqp->lock, flags);
-               } else {
-                       cm_id = nesqp->cm_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       /* check to see if the inbound reset beat the outbound reset */
-                       if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) {
-                               nes_debug(NES_DBG_CM, "QP%u: Decing refcount "
-                                       "due to inbound reset beating the "
-                                       "outbound reset.\n", nesqp->hwqp.qp_id);
-                       }
                }
-       } else {
-               nesqp->disconn_pending = 0;
-               spin_unlock_irqrestore(&nesqp->lock, flags);
        }
 
        return 0;
index 8b7e7c0e496ecc7c2055c9845db3a0e746e8e3b9..90e8e4d8a5cef8522039c252f300530da6a15957 100644 (file)
@@ -410,8 +410,6 @@ struct nes_cm_ops {
 int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
                enum nes_timer_type, int, int);
 
-int nes_cm_disconn(struct nes_qp *);
-
 int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
 int nes_reject(struct iw_cm_id *, const void *, u8);
 int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
index 4a84d02ece0637fcbd3da54aed4131e6fbb45c4b..63a1a8e1e8a3d2f28631d7674188657deb2bf1f4 100644 (file)
@@ -74,6 +74,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 static void process_critical_error(struct nes_device *nesdev);
 static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
 static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
+static void nes_terminate_timeout(unsigned long context);
+static void nes_terminate_start_timer(struct nes_qp *nesqp);
 
 #ifdef CONFIG_INFINIBAND_NES_DEBUG
 static unsigned char *nes_iwarp_state_str[] = {
@@ -2903,6 +2905,417 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
 }
 
 
+static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
+{
+       u16 pkt_len;
+
+       if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
+               /* skip over ethernet header */
+               pkt_len = be16_to_cpu(*(u16 *)(pkt + ETH_HLEN - 2));
+               pkt += ETH_HLEN;
+
+               /* Skip over IP and TCP headers */
+               pkt += 4 * (pkt[0] & 0x0f);
+               pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+       }
+       return pkt;
+}
+
+/* Determine if incoming error pkt is rdma layer */
+static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info)
+{
+       u8 *pkt;
+       u16 *mpa;
+       u32 opcode = 0xffffffff;
+
+       if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+               pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+               mpa = (u16 *)locate_mpa(pkt, aeq_info);
+               opcode = be16_to_cpu(mpa[1]) & 0xf;
+       }
+
+       return opcode;
+}
+
+/* Build iWARP terminate header */
+static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info)
+{
+       u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+       u16 ddp_seg_len;
+       int copy_len = 0;
+       u8 is_tagged = 0;
+       u8 flush_code = 0;
+       struct nes_terminate_hdr *termhdr;
+
+       termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase;
+       memset(termhdr, 0, 64);
+
+       if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+
+               /* Use data from offending packet to fill in ddp & rdma hdrs */
+               pkt = locate_mpa(pkt, aeq_info);
+               ddp_seg_len = be16_to_cpu(*(u16 *)pkt);
+               if (ddp_seg_len) {
+                       copy_len = 2;
+                       termhdr->hdrct = DDP_LEN_FLAG;
+                       if (pkt[2] & 0x80) {
+                               is_tagged = 1;
+                               if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+                                       copy_len += TERM_DDP_LEN_TAGGED;
+                                       termhdr->hdrct |= DDP_HDR_FLAG;
+                               }
+                       } else {
+                               if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+                                       copy_len += TERM_DDP_LEN_UNTAGGED;
+                                       termhdr->hdrct |= DDP_HDR_FLAG;
+                               }
+
+                               if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
+                                       if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
+                                               copy_len += TERM_RDMA_LEN;
+                                               termhdr->hdrct |= RDMA_HDR_FLAG;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       switch (async_event_id) {
+       case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
+               switch (iwarp_opcode(nesqp, aeq_info)) {
+               case IWARP_OPCODE_WRITE:
+                       flush_code = IB_WC_LOC_PROT_ERR;
+                       termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+                       termhdr->error_code = DDP_TAGGED_INV_STAG;
+                       break;
+               default:
+                       flush_code = IB_WC_REM_ACCESS_ERR;
+                       termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+                       termhdr->error_code = RDMAP_INV_STAG;
+               }
+               break;
+       case NES_AEQE_AEID_AMP_INVALID_STAG:
+               flush_code = IB_WC_REM_ACCESS_ERR;
+               termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+               termhdr->error_code = RDMAP_INV_STAG;
+               break;
+       case NES_AEQE_AEID_AMP_BAD_QP:
+               flush_code = IB_WC_LOC_QP_OP_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+               termhdr->error_code = DDP_UNTAGGED_INV_QN;
+               break;
+       case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
+       case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
+               switch (iwarp_opcode(nesqp, aeq_info)) {
+               case IWARP_OPCODE_SEND_INV:
+               case IWARP_OPCODE_SEND_SE_INV:
+                       flush_code = IB_WC_REM_OP_ERR;
+                       termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+                       termhdr->error_code = RDMAP_CANT_INV_STAG;
+                       break;
+               default:
+                       flush_code = IB_WC_REM_ACCESS_ERR;
+                       termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+                       termhdr->error_code = RDMAP_INV_STAG;
+               }
+               break;
+       case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+               if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) {
+                       flush_code = IB_WC_LOC_PROT_ERR;
+                       termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+                       termhdr->error_code = DDP_TAGGED_BOUNDS;
+               } else {
+                       flush_code = IB_WC_REM_ACCESS_ERR;
+                       termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+                       termhdr->error_code = RDMAP_INV_BOUNDS;
+               }
+               break;
+       case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+       case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+       case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
+               flush_code = IB_WC_REM_ACCESS_ERR;
+               termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+               termhdr->error_code = RDMAP_ACCESS;
+               break;
+       case NES_AEQE_AEID_AMP_TO_WRAP:
+               flush_code = IB_WC_REM_ACCESS_ERR;
+               termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+               termhdr->error_code = RDMAP_TO_WRAP;
+               break;
+       case NES_AEQE_AEID_AMP_BAD_PD:
+               switch (iwarp_opcode(nesqp, aeq_info)) {
+               case IWARP_OPCODE_WRITE:
+                       flush_code = IB_WC_LOC_PROT_ERR;
+                       termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+                       termhdr->error_code = DDP_TAGGED_UNASSOC_STAG;
+                       break;
+               case IWARP_OPCODE_SEND_INV:
+               case IWARP_OPCODE_SEND_SE_INV:
+                       flush_code = IB_WC_REM_ACCESS_ERR;
+                       termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+                       termhdr->error_code = RDMAP_CANT_INV_STAG;
+                       break;
+               default:
+                       flush_code = IB_WC_REM_ACCESS_ERR;
+                       termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
+                       termhdr->error_code = RDMAP_UNASSOC_STAG;
+               }
+               break;
+       case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+               flush_code = IB_WC_LOC_LEN_ERR;
+               termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+               termhdr->error_code = MPA_MARKER;
+               break;
+       case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+               flush_code = IB_WC_GENERAL_ERR;
+               termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
+               termhdr->error_code = MPA_CRC;
+               break;
+       case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+       case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+               flush_code = IB_WC_LOC_LEN_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+               termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+               break;
+       case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+       case NES_AEQE_AEID_DDP_NO_L_BIT:
+               flush_code = IB_WC_FATAL_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
+               termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
+               break;
+       case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+       case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+               flush_code = IB_WC_GENERAL_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+               termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE;
+               break;
+       case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+               flush_code = IB_WC_LOC_LEN_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+               termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG;
+               break;
+       case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+               flush_code = IB_WC_GENERAL_ERR;
+               if (is_tagged) {
+                       termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
+                       termhdr->error_code = DDP_TAGGED_INV_DDP_VER;
+               } else {
+                       termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+                       termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER;
+               }
+               break;
+       case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+               flush_code = IB_WC_GENERAL_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+               termhdr->error_code = DDP_UNTAGGED_INV_MO;
+               break;
+       case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+               flush_code = IB_WC_REM_OP_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+               termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF;
+               break;
+       case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+               flush_code = IB_WC_GENERAL_ERR;
+               termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
+               termhdr->error_code = DDP_UNTAGGED_INV_QN;
+               break;
+       case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+               flush_code = IB_WC_GENERAL_ERR;
+               termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+               termhdr->error_code = RDMAP_INV_RDMAP_VER;
+               break;
+       case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+               flush_code = IB_WC_LOC_QP_OP_ERR;
+               termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+               termhdr->error_code = RDMAP_UNEXPECTED_OP;
+               break;
+       default:
+               flush_code = IB_WC_FATAL_ERR;
+               termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
+               termhdr->error_code = RDMAP_UNSPECIFIED;
+               break;
+       }
+
+       if (copy_len)
+               memcpy(termhdr + 1, pkt, copy_len);
+
+       if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) {
+               if (aeq_info & NES_AEQE_SQ)
+                       nesqp->term_sq_flush_code = flush_code;
+               else
+                       nesqp->term_rq_flush_code = flush_code;
+       }
+
+       return sizeof(struct nes_terminate_hdr) + copy_len;
+}
+
+static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp,
+                struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype)
+{
+       u64 context;
+       unsigned long flags;
+       u32 aeq_info;
+       u16 async_event_id;
+       u8 tcp_state;
+       u8 iwarp_state;
+       u32 termlen = 0;
+       u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE |
+                          NES_CQP_QP_TERM_DONT_SEND_FIN;
+       struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+       if (nesqp->term_flags & NES_TERM_SENT)
+               return; /* Sanity check */
+
+       aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+       tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+       iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+       async_event_id = (u16)aeq_info;
+
+       context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
+               aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
+       if (!context) {
+               WARN_ON(!context);
+               return;
+       }
+
+       nesqp = (struct nes_qp *)(unsigned long)context;
+       spin_lock_irqsave(&nesqp->lock, flags);
+       nesqp->hw_iwarp_state = iwarp_state;
+       nesqp->hw_tcp_state = tcp_state;
+       nesqp->last_aeq = async_event_id;
+       nesqp->terminate_eventtype = eventtype;
+       spin_unlock_irqrestore(&nesqp->lock, flags);
+
+       if (nesadapter->send_term_ok)
+               termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info);
+       else
+               mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG;
+
+       nes_terminate_start_timer(nesqp);
+       nesqp->term_flags |= NES_TERM_SENT;
+       nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0);
+}
+
+static void nes_terminate_send_fin(struct nes_device *nesdev,
+                         struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+       u32 aeq_info;
+       u16 async_event_id;
+       u8 tcp_state;
+       u8 iwarp_state;
+       unsigned long flags;
+
+       aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+       tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
+       iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
+       async_event_id = (u16)aeq_info;
+
+       spin_lock_irqsave(&nesqp->lock, flags);
+       nesqp->hw_iwarp_state = iwarp_state;
+       nesqp->hw_tcp_state = tcp_state;
+       nesqp->last_aeq = async_event_id;
+       spin_unlock_irqrestore(&nesqp->lock, flags);
+
+       /* Send the fin only */
+       nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE |
+               NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0);
+}
+
+/* Cleanup after a terminate sent or received */
+static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred)
+{
+       u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
+       unsigned long flags;
+       struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device);
+       struct nes_device *nesdev = nesvnic->nesdev;
+       u8 first_time = 0;
+
+       spin_lock_irqsave(&nesqp->lock, flags);
+       if (nesqp->hte_added) {
+               nesqp->hte_added = 0;
+               next_iwarp_state |= NES_CQP_QP_DEL_HTE;
+       }
+
+       first_time = (nesqp->term_flags & NES_TERM_DONE) == 0;
+       nesqp->term_flags |= NES_TERM_DONE;
+       spin_unlock_irqrestore(&nesqp->lock, flags);
+
+       /* Make sure we go through this only once */
+       if (first_time) {
+               if (timeout_occurred == 0)
+                       del_timer(&nesqp->terminate_timer);
+               else
+                       next_iwarp_state |= NES_CQP_QP_RESET;
+
+               nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+               nes_cm_disconn(nesqp);
+       }
+}
+
+static void nes_terminate_received(struct nes_device *nesdev,
+                               struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
+{
+       u32 aeq_info;
+       u8 *pkt;
+       u32 *mpa;
+       u8 ddp_ctl;
+       u8 rdma_ctl;
+       u16 aeq_id = 0;
+
+       aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
+       if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
+               /* Terminate is not a performance path so the silicon */
+               /* did not validate the frame - do it now */
+               pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
+               mpa = (u32 *)locate_mpa(pkt, aeq_info);
+               ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff;
+               rdma_ctl = be32_to_cpu(mpa[0]) & 0xff;
+               if ((ddp_ctl & 0xc0) != 0x40)
+                       aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC;
+               else if ((ddp_ctl & 0x03) != 1)
+                       aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION;
+               else if (be32_to_cpu(mpa[2]) != 2)
+                       aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN;
+               else if (be32_to_cpu(mpa[3]) != 1)
+                       aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN;
+               else if (be32_to_cpu(mpa[4]) != 0)
+                       aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO;
+               else if ((rdma_ctl & 0xc0) != 0x40)
+                       aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+               if (aeq_id) {
+                       /* Bad terminate recvd - send back a terminate */
+                       aeq_info = (aeq_info & 0xffff0000) | aeq_id;
+                       aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
+                       nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+                       return;
+               }
+       }
+
+       nesqp->term_flags |= NES_TERM_RCVD;
+       nesqp->terminate_eventtype = IB_EVENT_QP_FATAL;
+       nes_terminate_start_timer(nesqp);
+       nes_terminate_send_fin(nesdev, nesqp, aeqe);
+}
+
+/* Timeout routine in case terminate fails to complete */
+static void nes_terminate_timeout(unsigned long context)
+{
+       struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
+
+       nes_terminate_done(nesqp, 1);
+}
+
+/* Set a timer in case hw cannot complete the terminate sequence */
+static void nes_terminate_start_timer(struct nes_qp *nesqp)
+{
+       init_timer(&nesqp->terminate_timer);
+       nesqp->terminate_timer.function = nes_terminate_timeout;
+       nesqp->terminate_timer.expires = jiffies + HZ;
+       nesqp->terminate_timer.data = (unsigned long)nesqp;
+       add_timer(&nesqp->terminate_timer);
+}
+
 /**
  * nes_process_iwarp_aeqe
  */
@@ -2910,28 +3323,27 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
                                   struct nes_hw_aeqe *aeqe)
 {
        u64 context;
-       u64 aeqe_context = 0;
        unsigned long flags;
        struct nes_qp *nesqp;
+       struct nes_hw_cq *hw_cq;
+       struct nes_cq *nescq;
        int resource_allocated;
-       /* struct iw_cm_id *cm_id; */
        struct nes_adapter *nesadapter = nesdev->nesadapter;
-       struct ib_event ibevent;
-       /* struct iw_cm_event cm_event; */
        u32 aeq_info;
        u32 next_iwarp_state = 0;
        u16 async_event_id;
        u8 tcp_state;
        u8 iwarp_state;
+       int must_disconn = 1;
+       int must_terminate = 0;
+       struct ib_event ibevent;
 
        nes_debug(NES_DBG_AEQ, "\n");
        aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
-       if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
+       if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) {
                context  = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
                context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
        } else {
-               aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
-               aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
                context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
                                                aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
                BUG_ON(!context);
@@ -2948,7 +3360,11 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 
        switch (async_event_id) {
                case NES_AEQE_AEID_LLP_FIN_RECEIVED:
-                       nesqp = *((struct nes_qp **)&context);
+                       nesqp = (struct nes_qp *)(unsigned long)context;
+
+                       if (nesqp->term_flags)
+                               return; /* Ignore it, wait for close complete */
+
                        if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
                                nesqp->cm_id->add_ref(nesqp->cm_id);
                                schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
@@ -2959,18 +3375,24 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
                                                nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
                                                async_event_id, nesqp->last_aeq, tcp_state);
                        }
+
                        if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
                                        (nesqp->ibqp_state != IB_QPS_RTS)) {
                                /* FIN Received but tcp state or IB state moved on,
                                                should expect a close complete */
                                return;
                        }
+
                case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
+                       nesqp = (struct nes_qp *)(unsigned long)context;
+                       if (nesqp->term_flags) {
+                               nes_terminate_done(nesqp, 0);
+                               return;
+                       }
+
                case NES_AEQE_AEID_LLP_CONNECTION_RESET:
-               case NES_AEQE_AEID_TERMINATE_SENT:
-               case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
                case NES_AEQE_AEID_RESET_SENT:
-                       nesqp = *((struct nes_qp **)&context);
+                       nesqp = (struct nes_qp *)(unsigned long)context;
                        if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
                                tcp_state = NES_AEQE_TCP_STATE_CLOSED;
                        }
@@ -2982,12 +3404,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
                        if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
                                        (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
                                nesqp->hte_added = 0;
-                               spin_unlock_irqrestore(&nesqp->lock, flags);
-                               nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n",
-                                               nesqp->hwqp.qp_id);
-                               nes_hw_modify_qp(nesdev, nesqp,
-                                               NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0);
-                               spin_lock_irqsave(&nesqp->lock, flags);
+                               next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
                        }
 
                        if ((nesqp->ibqp_state == IB_QPS_RTS) &&
@@ -2999,151 +3416,106 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
                                                nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
                                                break;
                                        case NES_AEQE_IWARP_STATE_TERMINATE:
-                                               next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
-                                               nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
-                                               if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
-                                                       next_iwarp_state |= 0x02000000;
-                                                       nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
-                                               }
+                                               must_disconn = 0; /* terminate path takes care of disconn */
+                                               if (nesqp->term_flags == 0)
+                                                       must_terminate = 1;
                                                break;
-                                       default:
-                                               next_iwarp_state = 0;
-                               }
-                               spin_unlock_irqrestore(&nesqp->lock, flags);
-                               if (next_iwarp_state) {
-                                       nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
-                                                       " also added another reference\n",
-                                                       nesqp->hwqp.qp_id, next_iwarp_state);
-                                       nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
                                }
-                               nes_cm_disconn(nesqp);
                        } else {
                                if (async_event_id ==  NES_AEQE_AEID_LLP_FIN_RECEIVED) {
                                        /* FIN Received but ib state not RTS,
                                                        close complete will be on its way */
-                                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                                       return;
-                               }
-                               spin_unlock_irqrestore(&nesqp->lock, flags);
-                               if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
-                                       next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000;
-                                       nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
-                                       nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X,"
-                                                       " also added another reference\n",
-                                                       nesqp->hwqp.qp_id, next_iwarp_state);
-                                       nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
+                                       must_disconn = 0;
                                }
-                               nes_cm_disconn(nesqp);
                        }
-                       break;
-               case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
-                       nesqp = *((struct nes_qp **)&context);
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
                        spin_unlock_irqrestore(&nesqp->lock, flags);
-                       nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED"
-                                       " event on QP%u \n  Q2 Data:\n",
-                                       nesqp->hwqp.qp_id);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_FATAL;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-                       }
-                       if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
-                                       ((nesqp->ibqp_state == IB_QPS_RTS)&&
-                                       (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
+
+                       if (must_terminate)
+                               nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+                       else if (must_disconn) {
+                               if (next_iwarp_state) {
+                                       nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n",
+                                                 nesqp->hwqp.qp_id, next_iwarp_state);
+                                       nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+                               }
                                nes_cm_disconn(nesqp);
-                       } else {
-                               nesqp->in_disconnect = 0;
-                               wake_up(&nesqp->kick_waitq);
                        }
                        break;
-               case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
-                       nesqp = *((struct nes_qp **)&context);
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
-                       nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
-                       nesqp->last_aeq = async_event_id;
-                       if (nesqp->cm_id) {
-                               nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
-                                               " event on QP%u, remote IP = 0x%08X \n",
-                                               nesqp->hwqp.qp_id,
-                                               ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr));
-                       } else {
-                               nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES"
-                                               " event on QP%u \n",
-                                               nesqp->hwqp.qp_id);
-                       }
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET;
-                       nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_FATAL;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-                       }
+
+               case NES_AEQE_AEID_TERMINATE_SENT:
+                       nesqp = (struct nes_qp *)(unsigned long)context;
+                       nes_terminate_send_fin(nesdev, nesqp, aeqe);
                        break;
-               case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
-                       if (NES_AEQE_INBOUND_RDMA&aeq_info) {
-                               nesqp = nesadapter->qp_table[le32_to_cpu(
-                                               aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
-                       } else {
-                               /* TODO: get the actual WQE and mask off wqe index */
-                               context &= ~((u64)511);
-                               nesqp = *((struct nes_qp **)&context);
-                       }
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n",
-                                       nesqp->hwqp.qp_id);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-                       }
+
+               case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
+                       nesqp = (struct nes_qp *)(unsigned long)context;
+                       nes_terminate_received(nesdev, nesqp, aeqe);
                        break;
+
+               case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
+               case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
                case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
-                       nesqp = *((struct nes_qp **)&context);
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n",
-                                       nesqp->hwqp.qp_id);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-                       }
-                       break;
+               case NES_AEQE_AEID_AMP_INVALID_STAG:
+               case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
+               case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
                case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
-                       nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words
-                                       [NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u,"
-                                       " nesqp = %p, AE reported %p\n",
-                                       nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context));
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
+               case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+               case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
+               case NES_AEQE_AEID_AMP_TO_WRAP:
+                       nesqp = (struct nes_qp *)(unsigned long)context;
+                       nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
+                       break;
+
+               case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
+               case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
+               case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
+               case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
+                       nesqp = (struct nes_qp *)(unsigned long)context;
+                       if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
+                               aeq_info &= 0xffff0000;
+                               aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
+                               aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
                        }
+
+               case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
+               case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
+               case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+               case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
+               case NES_AEQE_AEID_AMP_BAD_QP:
+               case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+               case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
+               case NES_AEQE_AEID_DDP_NO_L_BIT:
+               case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
+               case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+               case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
+               case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
+               case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
+               case NES_AEQE_AEID_AMP_BAD_PD:
+               case NES_AEQE_AEID_AMP_FASTREG_SHARED:
+               case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG:
+               case NES_AEQE_AEID_AMP_FASTREG_MW_STAG:
+               case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS:
+               case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW:
+               case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH:
+               case NES_AEQE_AEID_AMP_INVALIDATE_SHARED:
+               case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS:
+               case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG:
+               case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG:
+               case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG:
+               case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG:
+               case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS:
+               case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS:
+               case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT:
+               case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED:
+               case NES_AEQE_AEID_BAD_CLOSE:
+               case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO:
+               case NES_AEQE_AEID_STAG_ZERO_INVALID:
+               case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
+               case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+                       nesqp = (struct nes_qp *)(unsigned long)context;
+                       nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
                        break;
+
                case NES_AEQE_AEID_CQ_OPERATION_ERROR:
                        context <<= 1;
                        nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
@@ -3153,83 +3525,19 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
                        if (resource_allocated) {
                                printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
                                                __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
+                               hw_cq = (struct nes_hw_cq *)(unsigned long)context;
+                               if (hw_cq) {
+                                       nescq = container_of(hw_cq, struct nes_cq, hw_cq);
+                                       if (nescq->ibcq.event_handler) {
+                                               ibevent.device = nescq->ibcq.device;
+                                               ibevent.event = IB_EVENT_CQ_ERR;
+                                               ibevent.element.cq = &nescq->ibcq;
+                                               nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context);
+                                       }
+                               }
                        }
                        break;
-               case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
-                       nesqp = nesadapter->qp_table[le32_to_cpu(
-                                       aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG"
-                                       "_FOR_AVAILABLE_BUFFER event on QP%u\n",
-                                       nesqp->hwqp.qp_id);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-                       }
-                       /* tell cm to disconnect, cm will queue work to thread */
-                       nes_cm_disconn(nesqp);
-                       break;
-               case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
-                       nesqp = *((struct nes_qp **)&context);
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN"
-                                       "_NO_BUFFER_AVAILABLE event on QP%u\n",
-                                       nesqp->hwqp.qp_id);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_FATAL;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-                       }
-                       /* tell cm to disconnect, cm will queue work to thread */
-                       nes_cm_disconn(nesqp);
-                       break;
-               case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
-                       nesqp = *((struct nes_qp **)&context);
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR"
-                                       " event on QP%u \n  Q2 Data:\n",
-                                       nesqp->hwqp.qp_id);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_FATAL;
-                               nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
-                       }
-                       /* tell cm to disconnect, cm will queue work to thread */
-                       nes_cm_disconn(nesqp);
-                       break;
-                       /* TODO: additional AEs need to be here */
-               case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
-                       nesqp = *((struct nes_qp **)&context);
-                       spin_lock_irqsave(&nesqp->lock, flags);
-                       nesqp->hw_iwarp_state = iwarp_state;
-                       nesqp->hw_tcp_state = tcp_state;
-                       nesqp->last_aeq = async_event_id;
-                       spin_unlock_irqrestore(&nesqp->lock, flags);
-                       if (nesqp->ibqp.event_handler) {
-                               ibevent.device = nesqp->ibqp.device;
-                               ibevent.element.qp = &nesqp->ibqp;
-                               ibevent.event = IB_EVENT_QP_ACCESS_ERR;
-                               nesqp->ibqp.event_handler(&ibevent,
-                                               nesqp->ibqp.qp_context);
-                       }
-                       nes_cm_disconn(nesqp);
-                       break;
+
                default:
                        nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
                                        async_event_id);
@@ -3238,7 +3546,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 
 }
 
-
 /**
  * nes_iwarp_ce_handler
  */
@@ -3373,6 +3680,8 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
 {
        struct nes_cqp_request *cqp_request;
        struct nes_hw_cqp_wqe *cqp_wqe;
+       u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
+       u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
        int ret;
 
        cqp_request = nes_get_cqp_request(nesdev);
@@ -3389,6 +3698,24 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
        cqp_wqe = &cqp_request->cqp_wqe;
        nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
 
+       /* If wqe in error was identified, set code to be put into cqe */
+       if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) {
+               which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+               sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code;
+               nesqp->term_sq_flush_code = 0;
+       }
+
+       if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) {
+               which_wq |= NES_CQP_FLUSH_MAJ_MIN;
+               rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code;
+               nesqp->term_rq_flush_code = 0;
+       }
+
+       if (which_wq & NES_CQP_FLUSH_MAJ_MIN) {
+               cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code);
+               cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code);
+       }
+
        cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
                        cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
        cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
index c3654c6383fec427e501bcc3e1b04b574c700d43..f28a41ba9fa14378bf9f68c147e74976e0c1c3f4 100644 (file)
@@ -241,6 +241,7 @@ enum nes_cqp_stag_wqeword_idx {
 };
 
 #define NES_CQP_OP_IWARP_STATE_SHIFT 28
+#define NES_CQP_OP_TERMLEN_SHIFT     28
 
 enum nes_cqp_qp_bits {
        NES_CQP_QP_ARP_VALID = (1<<8),
@@ -265,12 +266,16 @@ enum nes_cqp_qp_bits {
        NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
        NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
        NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
+       NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24),
+       NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25),
        NES_CQP_QP_RESET = (1<<31),
 };
 
 enum nes_cqp_qp_wqe_word_idx {
        NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
        NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
+       NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8,
+       NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9,
        NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
 };
 
@@ -361,6 +366,7 @@ enum nes_cqp_arp_bits {
 enum nes_cqp_flush_bits {
        NES_CQP_FLUSH_SQ = (1<<30),
        NES_CQP_FLUSH_RQ = (1<<31),
+       NES_CQP_FLUSH_MAJ_MIN = (1<<28),
 };
 
 enum nes_cqe_opcode_bits {
@@ -633,11 +639,14 @@ enum nes_aeqe_bits {
        NES_AEQE_INBOUND_RDMA = (1<<19),
        NES_AEQE_IWARP_STATE_MASK = (7<<20),
        NES_AEQE_TCP_STATE_MASK = (0xf<<24),
+       NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28),
        NES_AEQE_VALID = (1<<31),
 };
 
 #define NES_AEQE_IWARP_STATE_SHIFT     20
 #define NES_AEQE_TCP_STATE_SHIFT       24
+#define NES_AEQE_Q2_DATA_ETHERNET       (1<<28)
+#define NES_AEQE_Q2_DATA_MPA            (1<<29)
 
 enum nes_aeqe_iwarp_state {
        NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
@@ -751,6 +760,15 @@ enum nes_iwarp_sq_wqe_bits {
        NES_IWARP_SQ_OP_NOP = 12,
 };
 
+enum nes_iwarp_cqe_major_code {
+       NES_IWARP_CQE_MAJOR_FLUSH = 1,
+       NES_IWARP_CQE_MAJOR_DRV = 0x8000
+};
+
+enum nes_iwarp_cqe_minor_code {
+       NES_IWARP_CQE_MINOR_FLUSH = 1
+};
+
 #define NES_EEPROM_READ_REQUEST (1<<16)
 #define NES_MAC_ADDR_VALID      (1<<20)
 
@@ -1119,6 +1137,7 @@ struct nes_adapter {
        u8            netdev_max;       /* from host nic address count in EEPROM */
        u8            port_count;
        u8            virtwq;
+       u8            send_term_ok;
        u8            et_use_adaptive_rx_coalesce;
        u8            adapter_fcn_count;
        u8 pft_mcast_map[NES_PFT_SIZE];
@@ -1217,6 +1236,90 @@ struct nes_ib_device {
        u32 num_pd;
 };
 
+enum nes_hdrct_flags {
+       DDP_LEN_FLAG                    = 0x80,
+       DDP_HDR_FLAG                    = 0x40,
+       RDMA_HDR_FLAG                   = 0x20
+};
+
+enum nes_term_layers {
+       LAYER_RDMA                      = 0,
+       LAYER_DDP                       = 1,
+       LAYER_MPA                       = 2
+};
+
+enum nes_term_error_types {
+       RDMAP_CATASTROPHIC              = 0,
+       RDMAP_REMOTE_PROT               = 1,
+       RDMAP_REMOTE_OP                 = 2,
+       DDP_CATASTROPHIC                = 0,
+       DDP_TAGGED_BUFFER               = 1,
+       DDP_UNTAGGED_BUFFER             = 2,
+       DDP_LLP                         = 3
+};
+
+enum nes_term_rdma_errors {
+       RDMAP_INV_STAG                  = 0x00,
+       RDMAP_INV_BOUNDS                = 0x01,
+       RDMAP_ACCESS                    = 0x02,
+       RDMAP_UNASSOC_STAG              = 0x03,
+       RDMAP_TO_WRAP                   = 0x04,
+       RDMAP_INV_RDMAP_VER             = 0x05,
+       RDMAP_UNEXPECTED_OP             = 0x06,
+       RDMAP_CATASTROPHIC_LOCAL        = 0x07,
+       RDMAP_CATASTROPHIC_GLOBAL       = 0x08,
+       RDMAP_CANT_INV_STAG             = 0x09,
+       RDMAP_UNSPECIFIED               = 0xff
+};
+
+enum nes_term_ddp_errors {
+       DDP_CATASTROPHIC_LOCAL          = 0x00,
+       DDP_TAGGED_INV_STAG             = 0x00,
+       DDP_TAGGED_BOUNDS               = 0x01,
+       DDP_TAGGED_UNASSOC_STAG         = 0x02,
+       DDP_TAGGED_TO_WRAP              = 0x03,
+       DDP_TAGGED_INV_DDP_VER          = 0x04,
+       DDP_UNTAGGED_INV_QN             = 0x01,
+       DDP_UNTAGGED_INV_MSN_NO_BUF     = 0x02,
+       DDP_UNTAGGED_INV_MSN_RANGE      = 0x03,
+       DDP_UNTAGGED_INV_MO             = 0x04,
+       DDP_UNTAGGED_INV_TOO_LONG       = 0x05,
+       DDP_UNTAGGED_INV_DDP_VER        = 0x06
+};
+
+enum nes_term_mpa_errors {
+       MPA_CLOSED                      = 0x01,
+       MPA_CRC                         = 0x02,
+       MPA_MARKER                      = 0x03,
+       MPA_REQ_RSP                     = 0x04,
+};
+
+struct nes_terminate_hdr {
+       u8 layer_etype;
+       u8 error_code;
+       u8 hdrct;
+       u8 rsvd;
+};
+
+/* Used to determine how to fill in terminate error codes */
+#define IWARP_OPCODE_WRITE             0
+#define IWARP_OPCODE_READREQ           1
+#define IWARP_OPCODE_READRSP           2
+#define IWARP_OPCODE_SEND              3
+#define IWARP_OPCODE_SEND_INV          4
+#define IWARP_OPCODE_SEND_SE           5
+#define IWARP_OPCODE_SEND_SE_INV       6
+#define IWARP_OPCODE_TERM              7
+
+/* These values are used only during terminate processing */
+#define TERM_DDP_LEN_TAGGED    14
+#define TERM_DDP_LEN_UNTAGGED  18
+#define TERM_RDMA_LEN          28
+#define RDMA_OPCODE_MASK       0x0f
+#define RDMA_READ_REQ_OPCODE   1
+#define BAD_FRAME_OFFSET       64
+#define CQE_MAJOR_DRV          0x8000
+
 #define nes_vlan_rx vlan_hwaccel_receive_skb
 #define nes_netif_rx netif_receive_skb
 
index a282031d15c7088726c2d1834de8f5fd43775973..9687c397ce1ac7480e36692a022d358258ae3c51 100644 (file)
@@ -183,6 +183,9 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada
                } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
                        nesadapter->virtwq = 1;
                }
+               if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
+                       nesadapter->send_term_ok = 1;
+
                nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8))  <<  16) +
                                (u32)((u8)eeprom_data);
 
@@ -548,7 +551,7 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
                spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
        }
        if (cqp_request == NULL) {
-               cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
+               cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC);
                if (cqp_request) {
                        cqp_request->dynamic = 1;
                        INIT_LIST_HEAD(&cqp_request->list);
index 21e0fd336cf710196dcbf2fe5bb3daecac7a21f4..a680c42d6e8cb3b72a33c5a6c707d15cc294b202 100644 (file)
@@ -667,15 +667,32 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
  */
 static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
 {
+       struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+       struct net_device *netdev = nesvnic->netdev;
+
        memset(props, 0, sizeof(*props));
 
-       props->max_mtu = IB_MTU_2048;
-       props->active_mtu = IB_MTU_2048;
+       props->max_mtu = IB_MTU_4096;
+
+       if (netdev->mtu  >= 4096)
+               props->active_mtu = IB_MTU_4096;
+       else if (netdev->mtu  >= 2048)
+               props->active_mtu = IB_MTU_2048;
+       else if (netdev->mtu  >= 1024)
+               props->active_mtu = IB_MTU_1024;
+       else if (netdev->mtu  >= 512)
+               props->active_mtu = IB_MTU_512;
+       else
+               props->active_mtu = IB_MTU_256;
+
        props->lid = 1;
        props->lmc = 0;
        props->sm_lid = 0;
        props->sm_sl = 0;
-       props->state = IB_PORT_ACTIVE;
+       if (nesvnic->linkup)
+               props->state = IB_PORT_ACTIVE;
+       else
+               props->state = IB_PORT_DOWN;
        props->phys_state = 0;
        props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
                        IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
@@ -1505,13 +1522,46 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 }
 
 
+/**
+ * nes_clean_cq
+ */
+static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq)
+{
+       u32 cq_head;
+       u32 lo;
+       u32 hi;
+       u64 u64temp;
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&nescq->lock, flags);
+
+       cq_head = nescq->hw_cq.cq_head;
+       while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
+               rmb();
+               lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+               hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]);
+               u64temp = (((u64)hi) << 32) | ((u64)lo);
+               u64temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+               if (u64temp == (u64)(unsigned long)nesqp) {
+                       /* Zero the context value so cqe will be ignored */
+                       nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0;
+                       nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0;
+               }
+
+               if (++cq_head >= nescq->hw_cq.cq_size)
+                       cq_head = 0;
+       }
+
+       spin_unlock_irqrestore(&nescq->lock, flags);
+}
+
+
 /**
  * nes_destroy_qp
  */
 static int nes_destroy_qp(struct ib_qp *ibqp)
 {
        struct nes_qp *nesqp = to_nesqp(ibqp);
-       /* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */
        struct nes_ucontext *nes_ucontext;
        struct ib_qp_attr attr;
        struct iw_cm_id *cm_id;
@@ -1548,7 +1598,6 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
                        nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
        }
 
-
        if (nesqp->user_mode) {
                if ((ibqp->uobject)&&(ibqp->uobject->context)) {
                        nes_ucontext = to_nesucontext(ibqp->uobject->context);
@@ -1560,6 +1609,13 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
                }
                if (nesqp->pbl_pbase)
                        kunmap(nesqp->page);
+       } else {
+               /* Clean any pending completions from the cq(s) */
+               if (nesqp->nesscq)
+                       nes_clean_cq(nesqp, nesqp->nesscq);
+
+               if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
+                       nes_clean_cq(nesqp, nesqp->nesrcq);
        }
 
        nes_rem_ref(&nesqp->ibqp);
@@ -2884,7 +2940,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  * nes_hw_modify_qp
  */
 int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
-               u32 next_iwarp_state, u32 wait_completion)
+               u32 next_iwarp_state, u32 termlen, u32 wait_completion)
 {
        struct nes_hw_cqp_wqe *cqp_wqe;
        /* struct iw_cm_id *cm_id = nesqp->cm_id; */
@@ -2916,6 +2972,13 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
        set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
        set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
 
+       /* If sending a terminate message, fill in the length (in words) */
+       if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) &&
+           !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) {
+               termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT;
+               set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen);
+       }
+
        atomic_set(&cqp_request->refcount, 2);
        nes_post_cqp_request(nesdev, cqp_request);
 
@@ -3086,6 +3149,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                                }
                                nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
                                                nesqp->hwqp.qp_id);
+                               if (nesqp->term_flags)
+                                       del_timer(&nesqp->terminate_timer);
+
                                next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
                                /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
                                        if (nesqp->hte_added) {
@@ -3163,7 +3229,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
        if (issue_modify_qp) {
                nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
-               ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1);
+               ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1);
                if (ret)
                        nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
                                        " failed for QP%u.\n",
@@ -3328,6 +3394,12 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
        head = nesqp->hwqp.sq_head;
 
        while (ib_wr) {
+               /* Check for QP error */
+               if (nesqp->term_flags) {
+                       err = -EINVAL;
+                       break;
+               }
+
                /* Check for SQ overflow */
                if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
                        err = -EINVAL;
@@ -3484,6 +3556,12 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
        head = nesqp->hwqp.rq_head;
 
        while (ib_wr) {
+               /* Check for QP error */
+               if (nesqp->term_flags) {
+                       err = -EINVAL;
+                       break;
+               }
+
                if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
                        err = -EINVAL;
                        break;
@@ -3547,7 +3625,6 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
        u64 u64temp;
        u64 wrid;
-       /* u64 u64temp; */
        unsigned long flags = 0;
        struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
        struct nes_device *nesdev = nesvnic->nesdev;
@@ -3555,12 +3632,13 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
        struct nes_qp *nesqp;
        struct nes_hw_cqe cqe;
        u32 head;
-       u32 wq_tail;
+       u32 wq_tail = 0;
        u32 cq_size;
        u32 cqe_count = 0;
        u32 wqe_index;
        u32 u32temp;
-       /* u32 counter; */
+       u32 move_cq_head = 1;
+       u32 err_code;
 
        nes_debug(NES_DBG_CQ, "\n");
 
@@ -3570,29 +3648,40 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
        cq_size = nescq->hw_cq.cq_size;
 
        while (cqe_count < num_entries) {
-               if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
-                               NES_CQE_VALID) {
-                       /*
-                        * Make sure we read CQ entry contents *after*
-                        * we've checked the valid bit.
-                        */
-                       rmb();
-
-                       cqe = nescq->hw_cq.cq_vbase[head];
-                       nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
-                       u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
-                       wqe_index = u32temp &
-                                       (nesdev->nesadapter->max_qp_wr - 1);
-                       u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
-                       /* parse CQE, get completion context from WQE (either rq or sq */
-                       u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
-                                       ((u64)u32temp);
-                       nesqp = *((struct nes_qp **)&u64temp);
+               if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
+                               NES_CQE_VALID) == 0)
+                       break;
+
+               /*
+                * Make sure we read CQ entry contents *after*
+                * we've checked the valid bit.
+                */
+               rmb();
+
+               cqe = nescq->hw_cq.cq_vbase[head];
+               u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+               wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1);
+               u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
+               /* parse CQE, get completion context from WQE (either rq or sq) */
+               u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+                               ((u64)u32temp);
+
+               if (u64temp) {
+                       nesqp = (struct nes_qp *)(unsigned long)u64temp;
                        memset(entry, 0, sizeof *entry);
                        if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
                                entry->status = IB_WC_SUCCESS;
                        } else {
-                               entry->status = IB_WC_WR_FLUSH_ERR;
+                               err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]);
+                               if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) {
+                                       entry->status = err_code & 0x0000ffff;
+
+                                       /* The rest of the cqe's will be marked as flushed */
+                                       nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] =
+                                               cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) |
+                                                           NES_IWARP_CQE_MINOR_FLUSH);
+                               } else
+                                       entry->status = IB_WC_WR_FLUSH_ERR;
                        }
 
                        entry->qp = &nesqp->ibqp;
@@ -3601,20 +3690,18 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
                        if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
                                if (nesqp->skip_lsmm) {
                                        nesqp->skip_lsmm = 0;
-                                       wq_tail = nesqp->hwqp.sq_tail++;
+                                       nesqp->hwqp.sq_tail++;
                                }
 
                                /* Working on a SQ Completion*/
-                               wq_tail = wqe_index;
-                               nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
-                               wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+                               wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
                                                wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
-                                               ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail].
+                                               ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
                                                wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
-                               entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+                               entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
                                                wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
 
-                               switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+                               switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
                                                wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
                                        case NES_IWARP_SQ_OP_RDMAW:
                                                nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
@@ -3623,7 +3710,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
                                        case NES_IWARP_SQ_OP_RDMAR:
                                                nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
                                                entry->opcode = IB_WC_RDMA_READ;
-                                               entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail].
+                                               entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
                                                                wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
                                                break;
                                        case NES_IWARP_SQ_OP_SENDINV:
@@ -3634,33 +3721,54 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
                                                entry->opcode = IB_WC_SEND;
                                                break;
                                }
+
+                               nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
+                               if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) {
+                                       move_cq_head = 0;
+                                       wq_tail = nesqp->hwqp.sq_tail;
+                               }
                        } else {
                                /* Working on a RQ Completion*/
-                               wq_tail = wqe_index;
-                                       nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
                                entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
-                               wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
-                                       ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
+                               wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
+                                       ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
                                        entry->opcode = IB_WC_RECV;
+
+                               nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
+                               if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) {
+                                       move_cq_head = 0;
+                                       wq_tail = nesqp->hwqp.rq_tail;
+                               }
                        }
+
                        entry->wr_id = wrid;
+                       entry++;
+                       cqe_count++;
+               }
 
+               if (move_cq_head) {
+                       nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
                        if (++head >= cq_size)
                                head = 0;
-                       cqe_count++;
                        nescq->polled_completions++;
+
                        if ((nescq->polled_completions > (cq_size / 2)) ||
                                        (nescq->polled_completions == 255)) {
                                nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
-                                               " are pending %u of %u.\n",
-                                               nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
+                                       " are pending %u of %u.\n",
+                                       nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
                                nes_write32(nesdev->regs+NES_CQE_ALLOC,
-                                               nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
+                                       nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
                                nescq->polled_completions = 0;
                        }
-                       entry++;
-               } else
-                       break;
+               } else {
+                       /* Update the wqe index and set status to flush */
+                       wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
+                       wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail;
+                       nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] =
+                               cpu_to_le32(wqe_index);
+                       move_cq_head = 1; /* ready for next pass */
+               }
        }
 
        if (nescq->polled_completions) {
index 41c07f29f7c9ce3f7105308801617739375c90c4..89822d75f82ec581375bd87c9336f600eed6c7d1 100644 (file)
@@ -40,6 +40,10 @@ struct nes_device;
 #define NES_MAX_USER_DB_REGIONS  4096
 #define NES_MAX_USER_WQ_REGIONS  4096
 
+#define NES_TERM_SENT            0x01
+#define NES_TERM_RCVD            0x02
+#define NES_TERM_DONE            0x04
+
 struct nes_ucontext {
        struct ib_ucontext ibucontext;
        struct nes_device  *nesdev;
@@ -119,6 +123,11 @@ struct nes_wq {
        spinlock_t lock;
 };
 
+struct disconn_work {
+       struct work_struct    work;
+       struct nes_qp         *nesqp;
+};
+
 struct iw_cm_id;
 struct ietf_mpa_frame;
 
@@ -127,7 +136,6 @@ struct nes_qp {
        void                  *allocated_buffer;
        struct iw_cm_id       *cm_id;
        struct workqueue_struct *wq;
-       struct work_struct    disconn_work;
        struct nes_cq         *nesscq;
        struct nes_cq         *nesrcq;
        struct nes_pd         *nespd;
@@ -155,9 +163,13 @@ struct nes_qp {
        void                  *pbl_vbase;
        dma_addr_t            pbl_pbase;
        struct page           *page;
+       struct timer_list     terminate_timer;
+       enum ib_event_type    terminate_eventtype;
        wait_queue_head_t     kick_waitq;
        u16                   in_disconnect;
        u16                   private_data_len;
+       u16                   term_sq_flush_code;
+       u16                   term_rq_flush_code;
        u8                    active_conn;
        u8                    skip_lsmm;
        u8                    user_mode;
@@ -165,7 +177,7 @@ struct nes_qp {
        u8                    hw_iwarp_state;
        u8                    flush_issued;
        u8                    hw_tcp_state;
-       u8                    disconn_pending;
+       u8                    term_flags;
        u8                    destroyed;
 };
 #endif                 /* NES_VERBS_H */
index 181b1f32325f3a009ce13af55c7aafa66dd67177..8f4b4fca2a1d9dbd26964771f6baa99614884455 100644 (file)
@@ -31,7 +31,6 @@
  */
 
 #include <rdma/ib_cm.h>
-#include <rdma/ib_cache.h>
 #include <net/dst.h>
 #include <net/icmp.h>
 #include <linux/icmpv6.h>
index e7e5adf84e840e3f13aa54dc1c1036f6d4a81846..e35f4a0ea9d5ffd457fef7becca2ed36685fb73b 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 
-#include <rdma/ib_cache.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
 
index e319d91f60a609be13d3e18c94df9477207178c6..2bf5116deec41d7109b1b6b3b6ccc141ae3ac052 100644 (file)
@@ -604,8 +604,11 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
                                           skb_queue_len(&neigh->queue));
                                goto err_drop;
                        }
-               } else
+               } else {
+                       spin_unlock_irqrestore(&priv->lock, flags);
                        ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
+                       return;
+               }
        } else {
                neigh->ah  = NULL;
 
@@ -688,7 +691,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                ipoib_dbg(priv, "Send unicast ARP to %04x\n",
                          be16_to_cpu(path->pathrec.dlid));
 
+               spin_unlock_irqrestore(&priv->lock, flags);
                ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+               return;
        } else if ((path->query || !path_rec_start(dev, path)) &&
                   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
                /* put pseudoheader back on for next time */
index a0e97532e7140f422fbd1e40b1a57626f8e2396b..25874fc680c99d74a0402b29000d6569e0a7a9b7 100644 (file)
@@ -720,7 +720,9 @@ out:
                        }
                }
 
+               spin_unlock_irqrestore(&priv->lock, flags);
                ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+               return;
        }
 
 unlock:
@@ -758,6 +760,20 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
        }
 }
 
+static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
+                                    const u8 *broadcast)
+{
+       if (addrlen != INFINIBAND_ALEN)
+               return 0;
+       /* reserved QPN, prefix, scope */
+       if (memcmp(addr, broadcast, 6))
+               return 0;
+       /* signature lower, pkey */
+       if (memcmp(addr + 7, broadcast + 7, 3))
+               return 0;
+       return 1;
+}
+
 void ipoib_mcast_restart_task(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv =
@@ -791,6 +807,11 @@ void ipoib_mcast_restart_task(struct work_struct *work)
        for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
                union ib_gid mgid;
 
+               if (!ipoib_mcast_addr_is_valid(mclist->dmi_addr,
+                                              mclist->dmi_addrlen,
+                                              dev->broadcast))
+                       continue;
+
                memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
 
                mcast = __ipoib_mcast_find(dev, &mgid);
index fb5df5c6203e9541849fce56a52abfaedfbe7391..c97ab82ec743b5edd1371dcdf3d3e64afc198dfd 100644 (file)
@@ -1286,6 +1286,7 @@ static int cxgb_open(struct net_device *dev)
        if (!other_ports)
                schedule_chk_task(adapter);
 
+       cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_UP, pi->port_id);
        return 0;
 }
 
@@ -1318,6 +1319,7 @@ static int cxgb_close(struct net_device *dev)
        if (!adapter->open_device_map)
                cxgb_down(adapter);
 
+       cxgb3_event_notify(&adapter->tdev, OFFLOAD_PORT_DOWN, pi->port_id);
        return 0;
 }
 
@@ -2717,7 +2719,7 @@ static int t3_adapter_error(struct adapter *adapter, int reset)
 
        if (is_offload(adapter) &&
            test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) {
-               cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
+               cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_DOWN, 0);
                offload_close(&adapter->tdev);
        }
 
@@ -2782,7 +2784,7 @@ static void t3_resume_ports(struct adapter *adapter)
        }
 
        if (is_offload(adapter) && !ofld_disable)
-               cxgb3_err_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
+               cxgb3_event_notify(&adapter->tdev, OFFLOAD_STATUS_UP, 0);
 }
 
 /*
index f9f54b57b28ca9af177f00b52923f607f2979f64..75064eea1d87eea36acb36b5351f7f5a16855adb 100644 (file)
@@ -153,14 +153,14 @@ void cxgb3_remove_clients(struct t3cdev *tdev)
        mutex_unlock(&cxgb3_db_lock);
 }
 
-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error)
+void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port)
 {
        struct cxgb3_client *client;
 
        mutex_lock(&cxgb3_db_lock);
        list_for_each_entry(client, &client_list, client_list) {
-               if (client->err_handler)
-                       client->err_handler(tdev, status, error);
+               if (client->event_handler)
+                       client->event_handler(tdev, event, port);
        }
        mutex_unlock(&cxgb3_db_lock);
 }
index 55945f422aec0f70e2f608d1dd55cc23375003ae..670aa62042daa8699b5aa0f4e2dfb6f3822003f1 100644 (file)
@@ -64,14 +64,16 @@ void cxgb3_register_client(struct cxgb3_client *client);
 void cxgb3_unregister_client(struct cxgb3_client *client);
 void cxgb3_add_clients(struct t3cdev *tdev);
 void cxgb3_remove_clients(struct t3cdev *tdev);
-void cxgb3_err_notify(struct t3cdev *tdev, u32 status, u32 error);
+void cxgb3_event_notify(struct t3cdev *tdev, u32 event, u32 port);
 
 typedef int (*cxgb3_cpl_handler_func)(struct t3cdev *dev,
                                      struct sk_buff *skb, void *ctx);
 
 enum {
        OFFLOAD_STATUS_UP,
-       OFFLOAD_STATUS_DOWN
+       OFFLOAD_STATUS_DOWN,
+       OFFLOAD_PORT_DOWN,
+       OFFLOAD_PORT_UP
 };
 
 struct cxgb3_client {
@@ -82,7 +84,7 @@ struct cxgb3_client {
        int (*redirect)(void *ctx, struct dst_entry *old,
                        struct dst_entry *new, struct l2t_entry *l2t);
        struct list_head client_list;
-       void (*err_handler)(struct t3cdev *tdev, u32 status, u32 error);
+       void (*event_handler)(struct t3cdev *tdev, u32 event, u32 port);
 };
 
 /*
index ac57b6a42c6ee8791fcca7f385a2c703b4e96799..ccfe276943f09f7e036899d90a09ea6220ab77cf 100644 (file)
@@ -34,7 +34,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/hardirq.h>
 
 #include <linux/mlx4/cmd.h>
index b9ceddde46c0adf5f7e9602235bef02cd534babf..bffb7995cb70a4f6a9b09bdcc137cd317e3a641e 100644 (file)
@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include "mlx4.h"
 #include "fw.h"
 
+enum {
+       MLX4_IRQNAME_SIZE       = 64
+};
+
 enum {
        MLX4_NUM_ASYNC_EQE      = 0x100,
        MLX4_NUM_SPARE_EQE      = 0x80,
@@ -526,48 +529,6 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
        iounmap(priv->clr_base);
 }
 
-int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
-{
-       struct mlx4_priv *priv = mlx4_priv(dev);
-       int ret;
-
-       /*
-        * We assume that mapping one page is enough for the whole EQ
-        * context table.  This is fine with all current HCAs, because
-        * we only use 32 EQs and each EQ uses 64 bytes of context
-        * memory, or 1 KB total.
-        */
-       priv->eq_table.icm_virt = icm_virt;
-       priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
-       if (!priv->eq_table.icm_page)
-               return -ENOMEM;
-       priv->eq_table.icm_dma  = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
-                                              PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-       if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) {
-               __free_page(priv->eq_table.icm_page);
-               return -ENOMEM;
-       }
-
-       ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
-       if (ret) {
-               pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-                              PCI_DMA_BIDIRECTIONAL);
-               __free_page(priv->eq_table.icm_page);
-       }
-
-       return ret;
-}
-
-void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
-{
-       struct mlx4_priv *priv = mlx4_priv(dev);
-
-       mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
-       pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
-                      PCI_DMA_BIDIRECTIONAL);
-       __free_page(priv->eq_table.icm_page);
-}
-
 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
@@ -615,7 +576,9 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
        priv->eq_table.clr_int  = priv->clr_base +
                (priv->eq_table.inta_pin < 32 ? 4 : 0);
 
-       priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+       priv->eq_table.irq_names =
+               kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1),
+                       GFP_KERNEL);
        if (!priv->eq_table.irq_names) {
                err = -ENOMEM;
                goto err_out_bitmap;
@@ -638,17 +601,25 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
                goto err_out_comp;
 
        if (dev->flags & MLX4_FLAG_MSI_X) {
-               static const char async_eq_name[] = "mlx4-async";
                const char *eq_name;
 
                for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
                        if (i < dev->caps.num_comp_vectors) {
-                               snprintf(priv->eq_table.irq_names + i * 16, 16,
-                                        "mlx4-comp-%d", i);
-                               eq_name = priv->eq_table.irq_names + i * 16;
-                       } else
-                               eq_name = async_eq_name;
+                               snprintf(priv->eq_table.irq_names +
+                                        i * MLX4_IRQNAME_SIZE,
+                                        MLX4_IRQNAME_SIZE,
+                                        "mlx4-comp-%d@pci:%s", i,
+                                        pci_name(dev->pdev));
+                       } else {
+                               snprintf(priv->eq_table.irq_names +
+                                        i * MLX4_IRQNAME_SIZE,
+                                        MLX4_IRQNAME_SIZE,
+                                        "mlx4-async@pci:%s",
+                                        pci_name(dev->pdev));
+                       }
 
+                       eq_name = priv->eq_table.irq_names +
+                                 i * MLX4_IRQNAME_SIZE;
                        err = request_irq(priv->eq_table.eq[i].irq,
                                          mlx4_msi_x_interrupt, 0, eq_name,
                                          priv->eq_table.eq + i);
@@ -658,8 +629,12 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
                        priv->eq_table.eq[i].have_irq = 1;
                }
        } else {
+               snprintf(priv->eq_table.irq_names,
+                        MLX4_IRQNAME_SIZE,
+                        DRV_NAME "@pci:%s",
+                        pci_name(dev->pdev));
                err = request_irq(dev->pdev->irq, mlx4_interrupt,
-                                 IRQF_SHARED, DRV_NAME, dev);
+                                 IRQF_SHARED, priv->eq_table.irq_names, dev);
                if (err)
                        goto err_out_async;
 
index baf4bf66062ca6a91652eb6bd5e5224b3ffb9440..04b382fcb8c881c2ab76fea05636deb63c64effd 100644 (file)
@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
index dac621b1e9fc9a287fb8b4f7fdfa4f538ecc2a77..3dd481e77f92f1f5cff4af9cfd9d9975439520af 100644 (file)
@@ -525,7 +525,10 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
                goto err_unmap_aux;
        }
 
-       err = mlx4_map_eq_icm(dev, init_hca->eqc_base);
+       err = mlx4_init_icm_table(dev, &priv->eq_table.table,
+                                 init_hca->eqc_base, dev_cap->eqc_entry_sz,
+                                 dev->caps.num_eqs, dev->caps.num_eqs,
+                                 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
                goto err_unmap_cmpt;
@@ -668,7 +671,7 @@ err_unmap_mtt:
        mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
 
 err_unmap_eq:
-       mlx4_unmap_eq_icm(dev);
+       mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
 
 err_unmap_cmpt:
        mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
@@ -698,11 +701,11 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
        mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
        mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
        mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
+       mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
        mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
        mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
        mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
        mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
-       mlx4_unmap_eq_icm(dev);
 
        mlx4_UNMAP_ICM_AUX(dev);
        mlx4_free_icm(dev, priv->fw.aux_icm, 0);
@@ -786,7 +789,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
        return 0;
 
 err_close:
-       mlx4_close_hca(dev);
+       mlx4_CLOSE_HCA(dev, 0);
 
 err_free_icm:
        mlx4_free_icms(dev);
@@ -1070,18 +1073,12 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_disable_pdev;
        }
 
-       err = pci_request_region(pdev, 0, DRV_NAME);
+       err = pci_request_regions(pdev, DRV_NAME);
        if (err) {
-               dev_err(&pdev->dev, "Cannot request control region, aborting.\n");
+               dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
                goto err_disable_pdev;
        }
 
-       err = pci_request_region(pdev, 2, DRV_NAME);
-       if (err) {
-               dev_err(&pdev->dev, "Cannot request UAR region, aborting.\n");
-               goto err_release_bar0;
-       }
-
        pci_set_master(pdev);
 
        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1090,7 +1087,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                if (err) {
                        dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
-                       goto err_release_bar2;
+                       goto err_release_regions;
                }
        }
        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
@@ -1101,7 +1098,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
                if (err) {
                        dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
                                "aborting.\n");
-                       goto err_release_bar2;
+                       goto err_release_regions;
                }
        }
 
@@ -1110,7 +1107,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
                dev_err(&pdev->dev, "Device struct alloc failed, "
                        "aborting.\n");
                err = -ENOMEM;
-               goto err_release_bar2;
+               goto err_release_regions;
        }
 
        dev       = &priv->dev;
@@ -1205,11 +1202,8 @@ err_cmd:
 err_free_dev:
        kfree(priv);
 
-err_release_bar2:
-       pci_release_region(pdev, 2);
-
-err_release_bar0:
-       pci_release_region(pdev, 0);
+err_release_regions:
+       pci_release_regions(pdev);
 
 err_disable_pdev:
        pci_disable_device(pdev);
@@ -1265,8 +1259,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
                        pci_disable_msix(pdev);
 
                kfree(priv);
-               pci_release_region(pdev, 2);
-               pci_release_region(pdev, 0);
+               pci_release_regions(pdev);
                pci_disable_device(pdev);
                pci_set_drvdata(pdev, NULL);
        }
index 6053c357a470c27bdc1cf827d24388269f413774..5ccbce9866fe0b70918f362fba4b9a6e4bef8da3 100644 (file)
@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 
index 5bd79c2b184fc0f5875d0a74f1956db721344e51..bc72d6e4919b8542b0721b797edd67560e31fb86 100644 (file)
@@ -205,9 +205,7 @@ struct mlx4_eq_table {
        void __iomem          **uar_map;
        u32                     clr_mask;
        struct mlx4_eq         *eq;
-       u64                     icm_virt;
-       struct page            *icm_page;
-       dma_addr_t              icm_dma;
+       struct mlx4_icm_table   table;
        struct mlx4_icm_table   cmpt_table;
        int                     have_irq;
        u8                      inta_pin;
@@ -373,9 +371,6 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
                      struct mlx4_dev_cap *dev_cap,
                      struct mlx4_init_hca_param *init_hca);
 
-int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt);
-void mlx4_unmap_eq_icm(struct mlx4_dev *dev);
-
 int mlx4_cmd_init(struct mlx4_dev *dev);
 void mlx4_cmd_cleanup(struct mlx4_dev *dev);
 void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
index f96948be0a449248243e6f423708dc4b9e1c2b53..ca7ab8e7b4cc3917cc94599315f6d16f8e930150 100644 (file)
@@ -32,7 +32,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 
 #include <linux/mlx4/cmd.h>
index 26d1a7a9e375e55db351c29919b3187215e6c784..c4988d6bd5b2f38bc92aaa6284bfe255ae3ff559 100644 (file)
@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 
 #include <asm/page.h>
index bd22df95adf9a51fa76632404bbf9488df833b5b..ca25b9dc837853e979650a3c5fc8b01f6a71e961 100644 (file)
@@ -32,8 +32,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
-
 #include "mlx4.h"
 #include "fw.h"
 
index 1c565ef8d179148ead7408457778dff904f9d439..42ab9fc01d3e1f04e40623f194903fbecc11568e 100644 (file)
@@ -33,8 +33,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
-
 #include <linux/mlx4/cmd.h>
 #include <linux/mlx4/qp.h>
 
index 3951b884c0fba94abebab3e254de14cdf99c63d2..e5741dab3825fb6a3f17ac6b87ead4203ec8064b 100644 (file)
@@ -31,7 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
index fe9f218691f5f0d48306b3586efe87a4b47a9007..1377d0dc8f1f41d7de9f35d9a97bf49c0e0c433b 100644 (file)
@@ -31,8 +31,6 @@
  * SOFTWARE.
  */
 
-#include <linux/init.h>
-
 #include <linux/mlx4/cmd.h>
 
 #include "mlx4.h"
index 42b6c6319bc29824fe4e2061ee882759cfeb05c1..87214a257d2ae3431777e94e375e65bb33878828 100644 (file)
@@ -130,17 +130,10 @@ static inline struct tun_sock *tun_sk(struct sock *sk)
 static int tun_attach(struct tun_struct *tun, struct file *file)
 {
        struct tun_file *tfile = file->private_data;
-       const struct cred *cred = current_cred();
        int err;
 
        ASSERT_RTNL();
 
-       /* Check permissions */
-       if (((tun->owner != -1 && cred->euid != tun->owner) ||
-            (tun->group != -1 && !in_egroup_p(tun->group))) &&
-               !capable(CAP_NET_ADMIN))
-               return -EPERM;
-
        netif_tx_lock_bh(tun->dev);
 
        err = -EINVAL;
@@ -926,6 +919,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
        dev = __dev_get_by_name(net, ifr->ifr_name);
        if (dev) {
+               const struct cred *cred = current_cred();
+
                if (ifr->ifr_flags & IFF_TUN_EXCL)
                        return -EBUSY;
                if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
@@ -935,6 +930,14 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                else
                        return -EINVAL;
 
+               if (((tun->owner != -1 && cred->euid != tun->owner) ||
+                    (tun->group != -1 && !in_egroup_p(tun->group))) &&
+                   !capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               err = security_tun_dev_attach(tun->sk);
+               if (err < 0)
+                       return err;
+
                err = tun_attach(tun, file);
                if (err < 0)
                        return err;
@@ -947,6 +950,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
+               err = security_tun_dev_create();
+               if (err < 0)
+                       return err;
 
                /* Set dev type */
                if (ifr->ifr_flags & IFF_TUN) {
@@ -989,6 +995,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                tun->sk = sk;
                container_of(sk, struct tun_sock, sk)->tun = tun;
 
+               security_tun_dev_post_create(sk);
+
                tun_net_init(dev);
 
                if (strchr(dev->name, '%')) {
index 042d9bce9914550ad37ba68937195b6fdf4e1dd9..d0ab23a583558a5b66cb856e760015140a7e410f 100644 (file)
@@ -26,7 +26,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
 
 static void open_s3_dev(struct t3cdev *);
 static void close_s3_dev(struct t3cdev *);
-static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error);
+static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port);
 
 static cxgb3_cpl_handler_func cxgb3i_cpl_handlers[NUM_CPL_CMDS];
 static struct cxgb3_client t3c_client = {
@@ -34,7 +34,7 @@ static struct cxgb3_client t3c_client = {
        .handlers = cxgb3i_cpl_handlers,
        .add = open_s3_dev,
        .remove = close_s3_dev,
-       .err_handler = s3_err_handler,
+       .event_handler = s3_event_handler,
 };
 
 /**
@@ -66,16 +66,16 @@ static void close_s3_dev(struct t3cdev *t3dev)
        cxgb3i_ddp_cleanup(t3dev);
 }
 
-static void s3_err_handler(struct t3cdev *tdev, u32 status, u32 error)
+static void s3_event_handler(struct t3cdev *tdev, u32 event, u32 port)
 {
        struct cxgb3i_adapter *snic = cxgb3i_adapter_find_by_tdev(tdev);
 
-       cxgb3i_log_info("snic 0x%p, tdev 0x%p, status 0x%x, err 0x%x.\n",
-                       snic, tdev, status, error);
+       cxgb3i_log_info("snic 0x%p, tdev 0x%p, event 0x%x, port 0x%x.\n",
+                       snic, tdev, event, port);
        if (!snic)
                return;
 
-       switch (status) {
+       switch (event) {
        case OFFLOAD_STATUS_DOWN:
                snic->flags |= CXGB3I_ADAPTER_FLAG_RESET;
                break;
index 9d7c99394ec6767a2cef94408a7913a63d6a5ec0..640f65c6ef849b6dc5182151e156513f0b1d3f51 100644 (file)
@@ -1752,12 +1752,12 @@ static int comedi_open(struct inode *inode, struct file *file)
        mutex_lock(&dev->mutex);
        if (dev->attached)
                goto ok;
-       if (!capable(CAP_SYS_MODULE) && dev->in_request_module) {
+       if (!capable(CAP_NET_ADMIN) && dev->in_request_module) {
                DPRINTK("in request module\n");
                mutex_unlock(&dev->mutex);
                return -ENODEV;
        }
-       if (capable(CAP_SYS_MODULE) && dev->in_request_module)
+       if (capable(CAP_NET_ADMIN) && dev->in_request_module)
                goto ok;
 
        dev->in_request_module = 1;
@@ -1770,8 +1770,8 @@ static int comedi_open(struct inode *inode, struct file *file)
 
        dev->in_request_module = 0;
 
-       if (!dev->attached && !capable(CAP_SYS_MODULE)) {
-               DPRINTK("not attached and not CAP_SYS_MODULE\n");
+       if (!dev->attached && !capable(CAP_NET_ADMIN)) {
+               DPRINTK("not attached and not CAP_NET_ADMIN\n");
                mutex_unlock(&dev->mutex);
                return -ENODEV;
        }
index b6440f52178fad125f3d0101d7a97f82ad2cf3ee..52366e877d7636a04078bdd14133f30be6459f5d 100644 (file)
@@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
        if (can_sleep)
                lock->fl_flags |= FL_SLEEP;
 
-       error = security_file_lock(filp, cmd);
+       error = security_file_lock(filp, lock->fl_type);
        if (error)
                goto out_free;
 
index ed27bb205b7e9d13a80d240ed98385e8dcfdc384..d11f404667e962ababa87f3678a52e78c50794dd 100644 (file)
@@ -1533,9 +1533,11 @@ int may_open(struct path *path, int acc_mode, int flag)
        if (error)
                return error;
 
-       error = ima_path_check(path,
-                              acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC),
+       error = ima_path_check(path, acc_mode ?
+                              acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
+                              ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
                               IMA_COUNT_UPDATE);
+
        if (error)
                return error;
        /*
index 5573508f707fa5cd652da4503049fa6e89c0f9f8..36fcabbf5186452023707c360fe16e3f168200a2 100644 (file)
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
        int flags = nfsexp_flags(rqstp, exp);
        int ret;
 
+       validate_process_creds();
+
        /* discard any old override before preparing the new set */
        revert_creds(get_cred(current->real_cred));
        new = prepare_creds();
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
        else
                new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
                                                        new->cap_permitted);
+       validate_process_creds();
        put_cred(override_creds(new));
        put_cred(new);
+       validate_process_creds();
        return 0;
 
 oom:
index 492c79b7800b5b7fcb4f8a8813b2d369a5039d7a..24d58adfe5fdc1a014caf1d92249146821fd5ca3 100644 (file)
@@ -496,7 +496,9 @@ nfsd(void *vrqstp)
                /* Lock the export hash tables for reading. */
                exp_readlock();
 
+               validate_process_creds();
                svc_process(rqstp);
+               validate_process_creds();
 
                /* Unlock export hash tables */
                exp_readunlock();
index 23341c1063bcd05fb2841dc6b49d0a91eef36293..8fa09bfbcba7f51664f3cf67e68695952f9e348b 100644 (file)
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        __be32          err;
        int             host_err;
 
+       validate_process_creds();
+
        /*
         * If we get here, then the client has already done an "open",
         * and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 out_nfserr:
        err = nfserrno(host_err);
 out:
+       validate_process_creds();
        return err;
 }
 
index dd98e8076024d638cb0987b57b24269d20588587..31191bf513e40ebcbd05668cc0db2bc46c1c1311 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -199,7 +199,7 @@ out:
 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
        struct file *filp)
 {
-       int err;
+       int ret;
        struct iattr newattrs;
 
        /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
@@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
        }
 
        /* Remove suid/sgid on truncate too */
-       newattrs.ia_valid |= should_remove_suid(dentry);
+       ret = should_remove_suid(dentry);
+       if (ret)
+               newattrs.ia_valid |= ret | ATTR_FORCE;
 
        mutex_lock(&dentry->d_inode->i_mutex);
-       err = notify_change(dentry, &newattrs);
+       ret = notify_change(dentry, &newattrs);
        mutex_unlock(&dentry->d_inode->i_mutex);
-       return err;
+       return ret;
 }
 
 static long do_sys_truncate(const char __user *pathname, loff_t length)
@@ -957,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
        int error;
        struct file *f;
 
+       validate_creds(cred);
+
        /*
         * We must always pass in a valid mount pointer.   Historically
         * callers got away with not passing it, but we must enforce this at
index 14f2d71ea3ce8b7077b0da8774bc554b912cd0fb..0050fc40e8c9f373a3f7c61855870ad7d4a1952c 100644 (file)
@@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 const struct inode_operations sysfs_dir_inode_operations = {
        .lookup         = sysfs_lookup,
        .setattr        = sysfs_setattr,
+       .setxattr       = sysfs_setxattr,
 };
 
 static void remove_dir(struct sysfs_dirent *sd)
index 555f0ff988df1379638d0372ebced2349db25848..2b6a8d9de73dfd0ddba64ba0113a23f6f01fc929 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
 #include "sysfs.h"
 
 extern struct super_block * sysfs_sb;
@@ -35,6 +37,7 @@ static struct backing_dev_info sysfs_backing_dev_info = {
 
 static const struct inode_operations sysfs_inode_operations ={
        .setattr        = sysfs_setattr,
+       .setxattr       = sysfs_setxattr,
 };
 
 int __init sysfs_inode_init(void)
@@ -42,18 +45,37 @@ int __init sysfs_inode_init(void)
        return bdi_init(&sysfs_backing_dev_info);
 }
 
+struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
+{
+       struct sysfs_inode_attrs *attrs;
+       struct iattr *iattrs;
+
+       attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
+       if (!attrs)
+               return NULL;
+       iattrs = &attrs->ia_iattr;
+
+       /* assign default attributes */
+       iattrs->ia_mode = sd->s_mode;
+       iattrs->ia_uid = 0;
+       iattrs->ia_gid = 0;
+       iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
+
+       return attrs;
+}
 int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
 {
        struct inode * inode = dentry->d_inode;
        struct sysfs_dirent * sd = dentry->d_fsdata;
-       struct iattr * sd_iattr;
+       struct sysfs_inode_attrs *sd_attrs;
+       struct iattr *iattrs;
        unsigned int ia_valid = iattr->ia_valid;
        int error;
 
        if (!sd)
                return -EINVAL;
 
-       sd_iattr = sd->s_iattr;
+       sd_attrs = sd->s_iattr;
 
        error = inode_change_ok(inode, iattr);
        if (error)
@@ -65,42 +87,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
        if (error)
                return error;
 
-       if (!sd_iattr) {
+       if (!sd_attrs) {
                /* setting attributes for the first time, allocate now */
-               sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
-               if (!sd_iattr)
+               sd_attrs = sysfs_init_inode_attrs(sd);
+               if (!sd_attrs)
                        return -ENOMEM;
-               /* assign default attributes */
-               sd_iattr->ia_mode = sd->s_mode;
-               sd_iattr->ia_uid = 0;
-               sd_iattr->ia_gid = 0;
-               sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
-               sd->s_iattr = sd_iattr;
+               sd->s_iattr = sd_attrs;
+       } else {
+               /* attributes were changed at least once in past */
+               iattrs = &sd_attrs->ia_iattr;
+
+               if (ia_valid & ATTR_UID)
+                       iattrs->ia_uid = iattr->ia_uid;
+               if (ia_valid & ATTR_GID)
+                       iattrs->ia_gid = iattr->ia_gid;
+               if (ia_valid & ATTR_ATIME)
+                       iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
+                                       inode->i_sb->s_time_gran);
+               if (ia_valid & ATTR_MTIME)
+                       iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
+                                       inode->i_sb->s_time_gran);
+               if (ia_valid & ATTR_CTIME)
+                       iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
+                                       inode->i_sb->s_time_gran);
+               if (ia_valid & ATTR_MODE) {
+                       umode_t mode = iattr->ia_mode;
+
+                       if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                               mode &= ~S_ISGID;
+                       iattrs->ia_mode = sd->s_mode = mode;
+               }
        }
+       return error;
+}
 
-       /* attributes were changed atleast once in past */
-
-       if (ia_valid & ATTR_UID)
-               sd_iattr->ia_uid = iattr->ia_uid;
-       if (ia_valid & ATTR_GID)
-               sd_iattr->ia_gid = iattr->ia_gid;
-       if (ia_valid & ATTR_ATIME)
-               sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
-                                               inode->i_sb->s_time_gran);
-       if (ia_valid & ATTR_MTIME)
-               sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
-                                               inode->i_sb->s_time_gran);
-       if (ia_valid & ATTR_CTIME)
-               sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
-                                               inode->i_sb->s_time_gran);
-       if (ia_valid & ATTR_MODE) {
-               umode_t mode = iattr->ia_mode;
-
-               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                       mode &= ~S_ISGID;
-               sd_iattr->ia_mode = sd->s_mode = mode;
-       }
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+               size_t size, int flags)
+{
+       struct sysfs_dirent *sd = dentry->d_fsdata;
+       struct sysfs_inode_attrs *iattrs;
+       void *secdata;
+       int error;
+       u32 secdata_len = 0;
+
+       if (!sd)
+               return -EINVAL;
+       if (!sd->s_iattr)
+               sd->s_iattr = sysfs_init_inode_attrs(sd);
+       if (!sd->s_iattr)
+               return -ENOMEM;
+
+       iattrs = sd->s_iattr;
+
+       if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
+               const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+               error = security_inode_setsecurity(dentry->d_inode, suffix,
+                                               value, size, flags);
+               if (error)
+                       goto out;
+               error = security_inode_getsecctx(dentry->d_inode,
+                                               &secdata, &secdata_len);
+               if (error)
+                       goto out;
+               if (iattrs->ia_secdata)
+                       security_release_secctx(iattrs->ia_secdata,
+                                               iattrs->ia_secdata_len);
+               iattrs->ia_secdata = secdata;
+               iattrs->ia_secdata_len = secdata_len;
 
+       } else
+               return -EINVAL;
+out:
        return error;
 }
 
@@ -146,6 +203,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
 static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
        struct bin_attribute *bin_attr;
+       struct sysfs_inode_attrs *iattrs;
 
        inode->i_private = sysfs_get(sd);
        inode->i_mapping->a_ops = &sysfs_aops;
@@ -154,16 +212,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
        inode->i_ino = sd->s_ino;
        lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
 
-       if (sd->s_iattr) {
+       iattrs = sd->s_iattr;
+       if (iattrs) {
                /* sysfs_dirent has non-default attributes
                 * get them for the new inode from persistent copy
                 * in sysfs_dirent
                 */
-               set_inode_attr(inode, sd->s_iattr);
+               set_inode_attr(inode, &iattrs->ia_iattr);
+               if (iattrs->ia_secdata)
+                       security_inode_notifysecctx(inode,
+                                               iattrs->ia_secdata,
+                                               iattrs->ia_secdata_len);
        } else
                set_default_inode_attr(inode, sd->s_mode);
 
-
        /* initialize inode according to type */
        switch (sysfs_type(sd)) {
        case SYSFS_DIR:
index 1d897ad808e0b1d234c302bc745a3c858848ce6b..c5081ad77026e95689e58fda616dc0784b958044 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/namei.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 
 #include "sysfs.h"
 
@@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
 }
 
 const struct inode_operations sysfs_symlink_inode_operations = {
+       .setxattr = sysfs_setxattr,
        .readlink = generic_readlink,
        .follow_link = sysfs_follow_link,
        .put_link = sysfs_put_link,
index 3fa0d98481e2253bd444c1656a50f96d35a150a7..af4c4e7482ac80cd41d9072fcf1a9150a626694c 100644 (file)
@@ -8,6 +8,8 @@
  * This file is released under the GPLv2.
  */
 
+#include <linux/fs.h>
+
 struct sysfs_open_dirent;
 
 /* type-specific structures for sysfs_dirent->s_* union members */
@@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr {
        struct hlist_head       buffers;
 };
 
+struct sysfs_inode_attrs {
+       struct iattr    ia_iattr;
+       void            *ia_secdata;
+       u32             ia_secdata_len;
+};
+
 /*
  * sysfs_dirent - the building block of sysfs hierarchy.  Each and
  * every sysfs node is represented by single sysfs_dirent.
@@ -56,7 +64,7 @@ struct sysfs_dirent {
        unsigned int            s_flags;
        ino_t                   s_ino;
        umode_t                 s_mode;
-       struct iattr            *s_iattr;
+       struct sysfs_inode_attrs *s_iattr;
 };
 
 #define SD_DEACTIVATED_BIAS            INT_MIN
@@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
 struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
 void sysfs_delete_inode(struct inode *inode);
 int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+               size_t size, int flags);
 int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
 int sysfs_inode_init(void);
 
index 1c3d0af59ddf84c7fb51c3f5da52803d2d095e9a..6d4f6d3449fbb8f3749e86148c3ee0d3c28c26eb 100644 (file)
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask)
        return inode_permission(inode, mask);
 }
 
-int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
-               size_t size, int flags)
+/**
+ *  __vfs_setxattr_noperm - perform setxattr operation without performing
+ *  permission checks.
+ *
+ *  @dentry - object to perform setxattr on
+ *  @name - xattr name to set
+ *  @value - value to set @name to
+ *  @size - size of @value
+ *  @flags - flags to pass into filesystem operations
+ *
+ *  returns the result of the internal setxattr or setsecurity operations.
+ *
+ *  This function requires the caller to lock the inode's i_mutex before it
+ *  is executed. It also assumes that the caller will make the appropriate
+ *  permission checks.
+ */
+int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
+               const void *value, size_t size, int flags)
 {
        struct inode *inode = dentry->d_inode;
-       int error;
-
-       error = xattr_permission(inode, name, MAY_WRITE);
-       if (error)
-               return error;
+       int error = -EOPNOTSUPP;
 
-       mutex_lock(&inode->i_mutex);
-       error = security_inode_setxattr(dentry, name, value, size, flags);
-       if (error)
-               goto out;
-       error = -EOPNOTSUPP;
        if (inode->i_op->setxattr) {
                error = inode->i_op->setxattr(dentry, name, value, size, flags);
                if (!error) {
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                if (!error)
                        fsnotify_xattr(dentry);
        }
+
+       return error;
+}
+
+
+int
+vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+               size_t size, int flags)
+{
+       struct inode *inode = dentry->d_inode;
+       int error;
+
+       error = xattr_permission(inode, name, MAY_WRITE);
+       if (error)
+               return error;
+
+       mutex_lock(&inode->i_mutex);
+       error = security_inode_setxattr(dentry, name, value, size, flags);
+       if (error)
+               goto out;
+
+       error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+
 out:
        mutex_unlock(&inode->i_mutex);
        return error;
index 4fa9996963109dcc44edcfda3cce8100dd48f29e..24520a539c6ff18b9067b24d56dc22075c7096e2 100644 (file)
@@ -114,6 +114,13 @@ struct thread_group_cred {
  */
 struct cred {
        atomic_t        usage;
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       atomic_t        subscribers;    /* number of processes subscribed */
+       void            *put_addr;
+       unsigned        magic;
+#define CRED_MAGIC     0x43736564
+#define CRED_MAGIC_DEAD        0x44656144
+#endif
        uid_t           uid;            /* real UID of the task */
        gid_t           gid;            /* real GID of the task */
        uid_t           suid;           /* saved UID of the task */
@@ -143,7 +150,9 @@ struct cred {
 };
 
 extern void __put_cred(struct cred *);
+extern void exit_creds(struct task_struct *);
 extern int copy_creds(struct task_struct *, unsigned long);
+extern struct cred *cred_alloc_blank(void);
 extern struct cred *prepare_creds(void);
 extern struct cred *prepare_exec_creds(void);
 extern struct cred *prepare_usermodehelper_creds(void);
@@ -158,6 +167,60 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
 extern int set_create_files_as(struct cred *, struct inode *);
 extern void __init cred_init(void);
 
+/*
+ * check for validity of credentials
+ */
+#ifdef CONFIG_DEBUG_CREDENTIALS
+extern void __invalid_creds(const struct cred *, const char *, unsigned);
+extern void __validate_process_creds(struct task_struct *,
+                                    const char *, unsigned);
+
+static inline bool creds_are_invalid(const struct cred *cred)
+{
+       if (cred->magic != CRED_MAGIC)
+               return true;
+       if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+               return true;
+#ifdef CONFIG_SECURITY_SELINUX
+       if ((unsigned long) cred->security < PAGE_SIZE)
+               return true;
+       if ((*(u32*)cred->security & 0xffffff00) ==
+           (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+               return true;
+#endif
+       return false;
+}
+
+static inline void __validate_creds(const struct cred *cred,
+                                   const char *file, unsigned line)
+{
+       if (unlikely(creds_are_invalid(cred)))
+               __invalid_creds(cred, file, line);
+}
+
+#define validate_creds(cred)                           \
+do {                                                   \
+       __validate_creds((cred), __FILE__, __LINE__);   \
+} while(0)
+
+#define validate_process_creds()                               \
+do {                                                           \
+       __validate_process_creds(current, __FILE__, __LINE__);  \
+} while(0)
+
+extern void validate_creds_for_do_exit(struct task_struct *);
+#else
+static inline void validate_creds(const struct cred *cred)
+{
+}
+static inline void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+}
+static inline void validate_process_creds(void)
+{
+}
+#endif
+
 /**
  * get_new_cred - Get a reference on a new set of credentials
  * @cred: The new credentials to reference
@@ -186,7 +249,9 @@ static inline struct cred *get_new_cred(struct cred *cred)
  */
 static inline const struct cred *get_cred(const struct cred *cred)
 {
-       return get_new_cred((struct cred *) cred);
+       struct cred *nonconst_cred = (struct cred *) cred;
+       validate_creds(cred);
+       return get_new_cred(nonconst_cred);
 }
 
 /**
@@ -204,7 +269,7 @@ static inline void put_cred(const struct cred *_cred)
 {
        struct cred *cred = (struct cred *) _cred;
 
-       BUG_ON(atomic_read(&(cred)->usage) <= 0);
+       validate_creds(cred);
        if (atomic_dec_and_test(&(cred)->usage))
                __put_cred(cred);
 }
index e544f466d69a421cf48f2b626b0bf90d1ec22033..cd50dfa1d4c224de2a26e2d8ef3926d7fc74435e 100644 (file)
@@ -129,7 +129,10 @@ struct key {
        struct rw_semaphore     sem;            /* change vs change sem */
        struct key_user         *user;          /* owner of this key */
        void                    *security;      /* security data for this key */
-       time_t                  expiry;         /* time at which key expires (or 0) */
+       union {
+               time_t          expiry;         /* time at which key expires (or 0) */
+               time_t          revoked_at;     /* time at which key was revoked */
+       };
        uid_t                   uid;
        gid_t                   gid;
        key_perm_t              perm;           /* access permissions */
@@ -275,6 +278,8 @@ static inline key_serial_t key_serial(struct key *key)
 extern ctl_table key_sysctls[];
 #endif
 
+extern void key_replace_session_keyring(void);
+
 /*
  * the userspace interface
  */
@@ -297,6 +302,7 @@ extern void key_init(void);
 #define key_fsuid_changed(t)           do { } while(0)
 #define key_fsgid_changed(t)           do { } while(0)
 #define key_init()                     do { } while(0)
+#define key_replace_session_keyring()  do { } while(0)
 
 #endif /* CONFIG_KEYS */
 #endif /* __KERNEL__ */
index c0688eb7209396bf8ca3d5ba3202f6b8033ef981..bd383f1944fb2fde14bcd4ae5dbd6a92714168fb 100644 (file)
@@ -52,5 +52,6 @@
 #define KEYCTL_SET_TIMEOUT             15      /* set key timeout */
 #define KEYCTL_ASSUME_AUTHORITY                16      /* assume request_key() authorisation */
 #define KEYCTL_GET_SECURITY            17      /* get key security label */
+#define KEYCTL_SESSION_TO_PARENT       18      /* apply session keyring to parent process */
 
 #endif /*  _LINUX_KEYCTL_H */
index 47b39b7c7e849189a9115c7669181c8f4f4714bd..dc2fd545db002459a0add2169ed861bec7ecf425 100644 (file)
@@ -34,6 +34,8 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n);
 int kmemcheck_show_addr(unsigned long address);
 int kmemcheck_hide_addr(unsigned long address);
 
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
+
 #else
 #define kmemcheck_enabled 0
 
@@ -99,6 +101,11 @@ static inline void kmemcheck_mark_initialized_pages(struct page *p,
 {
 }
 
+static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+       return true;
+}
+
 #endif /* CONFIG_KMEMCHECK */
 
 /*
index 6a63807f714e197b59ad639149803af270e20e93..3c7497d46ee9655fa86f7bb42f1637a023d6fd25 100644 (file)
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
-extern void kmemleak_init(void);
+extern void kmemleak_init(void) __ref;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
-                          gfp_t gfp);
-extern void kmemleak_free(const void *ptr);
-extern void kmemleak_free_part(const void *ptr, size_t size);
+                          gfp_t gfp) __ref;
+extern void kmemleak_free(const void *ptr) __ref;
+extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
 extern void kmemleak_padding(const void *ptr, unsigned long offset,
-                            size_t size);
-extern void kmemleak_not_leak(const void *ptr);
-extern void kmemleak_ignore(const void *ptr);
+                            size_t size) __ref;
+extern void kmemleak_not_leak(const void *ptr) __ref;
+extern void kmemleak_ignore(const void *ptr) __ref;
 extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
-                              size_t length, gfp_t gfp);
-extern void kmemleak_no_scan(const void *ptr);
+                              size_t length, gfp_t gfp) __ref;
+extern void kmemleak_no_scan(const void *ptr) __ref;
 
 static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
                                            int min_count, unsigned long flags,
index e461b2c3d711a078bddba5086ee9df5143734ef7..190c37854870f4cf201dbe8c7970eab8b2788cac 100644 (file)
@@ -33,6 +33,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_IPC     4
 #define LSM_AUDIT_DATA_TASK    5
 #define LSM_AUDIT_DATA_KEY     6
+#define LSM_AUDIT_NO_AUDIT     7
        struct task_struct *tsk;
        union   {
                struct {
@@ -66,16 +67,19 @@ struct common_audit_data {
                } key_struct;
 #endif
        } u;
-       const char *function;
        /* this union contains LSM specific data */
        union {
+#ifdef CONFIG_SECURITY_SMACK
                /* SMACK data */
                struct smack_audit_data {
+                       const char *function;
                        char *subject;
                        char *object;
                        char *request;
                        int result;
                } smack_audit_data;
+#endif
+#ifdef CONFIG_SECURITY_SELINUX
                /* SELinux data */
                struct {
                        u32 ssid;
@@ -83,10 +87,12 @@ struct common_audit_data {
                        u16 tclass;
                        u32 requested;
                        u32 audited;
+                       u32 denied;
                        struct av_decision *avd;
                        int result;
                } selinux_audit_data;
-       } lsm_priv;
+#endif
+       };
        /* these callback will be implemented by a specific LSM */
        void (*lsm_pre_audit)(struct audit_buffer *, void *);
        void (*lsm_post_audit)(struct audit_buffer *, void *);
@@ -104,7 +110,7 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
 /* Initialize an LSM audit data structure. */
 #define COMMON_AUDIT_DATA_INIT(_d, _t) \
        { memset((_d), 0, sizeof(struct common_audit_data)); \
-        (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
+        (_d)->type = LSM_AUDIT_DATA_##_t; }
 
 void common_lsm_audit(struct common_audit_data *a);
 
index 0f1ea4a6695763debe7a6e87bf586c36efcc0c76..9304027673b0ac6abb0daf0bc8128dee3db0fcda 100644 (file)
@@ -1292,6 +1292,7 @@ struct task_struct {
        struct mutex cred_guard_mutex;  /* guard against foreign influences on
                                         * credential calculations
                                         * (notably. ptrace) */
+       struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
 
        char comm[TASK_COMM_LEN]; /* executable name excluding path
                                     - access with [gs]et_task_comm (which lock
@@ -2077,7 +2078,7 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 #define for_each_process(p) \
        for (p = &init_task ; (p = next_task(p)) != &init_task ; )
 
-extern bool is_single_threaded(struct task_struct *);
+extern bool current_is_single_threaded(void);
 
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so
index 1f16eea2017b1062caf0be4540c4e0cb19a2bb27..d050b66ab9ef0415b169122590f7335714ad9b2e 100644 (file)
@@ -53,7 +53,7 @@ struct audit_krule;
 extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
                       int cap, int audit);
 extern int cap_settime(struct timespec *ts, struct timezone *tz);
-extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
+extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
 extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
 extern int cap_capset(struct cred *new, const struct cred *old,
@@ -653,6 +653,11 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     manual page for definitions of the @clone_flags.
  *     @clone_flags contains the flags indicating what should be shared.
  *     Return 0 if permission is granted.
+ * @cred_alloc_blank:
+ *     @cred points to the credentials.
+ *     @gfp indicates the atomicity of any memory allocations.
+ *     Only allocate sufficient memory and attach to @cred such that
+ *     cred_transfer() will not get ENOMEM.
  * @cred_free:
  *     @cred points to the credentials.
  *     Deallocate and clear the cred->security field in a set of credentials.
@@ -665,6 +670,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     @new points to the new credentials.
  *     @old points to the original credentials.
  *     Install a new set of credentials.
+ * @cred_transfer:
+ *     @new points to the new credentials.
+ *     @old points to the original credentials.
+ *     Transfer data from original creds to new creds
  * @kernel_act_as:
  *     Set the credentials for a kernel service to act as (subjective context).
  *     @new points to the credentials to be modified.
@@ -678,6 +687,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     @inode points to the inode to use as a reference.
  *     The current task must be the one that nominated @inode.
  *     Return 0 if successful.
+ * @kernel_module_request:
+ *     Ability to trigger the kernel to automatically upcall to userspace for
+ *     userspace to load a kernel module with the given name.
+ *     Return 0 if successful.
  * @task_setuid:
  *     Check permission before setting one or more of the user identity
  *     attributes of the current process.  The @flags parameter indicates
@@ -994,6 +1007,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     Sets the connection's peersid to the secmark on skb.
  * @req_classify_flow:
  *     Sets the flow's sid to the openreq sid.
+ * @tun_dev_create:
+ *     Check permissions prior to creating a new TUN device.
+ * @tun_dev_post_create:
+ *     This hook allows a module to update or allocate a per-socket security
+ *     structure.
+ *     @sk contains the newly created sock structure.
+ * @tun_dev_attach:
+ *     Check permissions prior to attaching to a persistent TUN device.  This
+ *     hook can also be used by the module to update any security state
+ *     associated with the TUN device's sock structure.
+ *     @sk contains the existing sock structure.
  *
  * Security hooks for XFRM operations.
  *
@@ -1088,6 +1112,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     Return the length of the string (including terminating NUL) or -ve if
  *      an error.
  *     May also return 0 (and a NULL buffer pointer) if there is no label.
+ * @key_session_to_parent:
+ *     Forcibly assign the session keyring from a process to its parent
+ *     process.
+ *     @cred: Pointer to process's credentials
+ *     @parent_cred: Pointer to parent process's credentials
+ *     @keyring: Proposed new session keyring
+ *     Return 0 if permission is granted, -ve error otherwise.
  *
  * Security hooks affecting all System V IPC operations.
  *
@@ -1229,7 +1260,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     @alter contains the flag indicating whether changes are to be made.
  *     Return 0 if permission is granted.
  *
- * @ptrace_may_access:
+ * @ptrace_access_check:
  *     Check permission before allowing the current process to trace the
  *     @child process.
  *     Security modules may also want to perform a process tracing check
@@ -1244,7 +1275,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     Check that the @parent process has sufficient permission to trace the
  *     current process before allowing the current process to present itself
  *     to the @parent process for tracing.
- *     The parent process will still have to undergo the ptrace_may_access
+ *     The parent process will still have to undergo the ptrace_access_check
  *     checks before it is allowed to trace this one.
  *     @parent contains the task_struct structure for debugger process.
  *     Return 0 if permission is granted.
@@ -1351,12 +1382,47 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *     audit_rule_init.
  *     @rule contains the allocated rule
  *
+ * @inode_notifysecctx:
+ *     Notify the security module of what the security context of an inode
+ *     should be.  Initializes the incore security context managed by the
+ *     security module for this inode.  Example usage:  NFS client invokes
+ *     this hook to initialize the security context in its incore inode to the
+ *     value provided by the server for the file when the server returned the
+ *     file's attributes to the client.
+ *
+ *     Must be called with inode->i_mutex locked.
+ *
+ *     @inode we wish to set the security context of.
+ *     @ctx contains the string which we wish to set in the inode.
+ *     @ctxlen contains the length of @ctx.
+ *
+ * @inode_setsecctx:
+ *     Change the security context of an inode.  Updates the
+ *     incore security context managed by the security module and invokes the
+ *     fs code as needed (via __vfs_setxattr_noperm) to update any backing
+ *     xattrs that represent the context.  Example usage:  NFS server invokes
+ *     this hook to change the security context in its incore inode and on the
+ *     backing filesystem to a value provided by the client on a SETATTR
+ *     operation.
+ *
+ *     Must be called with inode->i_mutex locked.
+ *
+ *     @dentry contains the inode we wish to set the security context of.
+ *     @ctx contains the string which we wish to set in the inode.
+ *     @ctxlen contains the length of @ctx.
+ *
+ * @inode_getsecctx:
+ *     Returns a string containing all relavent security context information
+ *
+ *     @inode we wish to set the security context of.
+ *     @ctx is a pointer in which to place the allocated security context.
+ *     @ctxlen points to the place to put the length of @ctx.
  * This is the main security structure.
  */
 struct security_operations {
        char name[SECURITY_NAME_MAX + 1];
 
-       int (*ptrace_may_access) (struct task_struct *child, unsigned int mode);
+       int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
        int (*ptrace_traceme) (struct task_struct *parent);
        int (*capget) (struct task_struct *target,
                       kernel_cap_t *effective,
@@ -1483,12 +1549,15 @@ struct security_operations {
        int (*dentry_open) (struct file *file, const struct cred *cred);
 
        int (*task_create) (unsigned long clone_flags);
+       int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp);
        void (*cred_free) (struct cred *cred);
        int (*cred_prepare)(struct cred *new, const struct cred *old,
                            gfp_t gfp);
        void (*cred_commit)(struct cred *new, const struct cred *old);
+       void (*cred_transfer)(struct cred *new, const struct cred *old);
        int (*kernel_act_as)(struct cred *new, u32 secid);
        int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
+       int (*kernel_module_request)(void);
        int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
        int (*task_fix_setuid) (struct cred *new, const struct cred *old,
                                int flags);
@@ -1556,6 +1625,10 @@ struct security_operations {
        int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid);
        void (*release_secctx) (char *secdata, u32 seclen);
 
+       int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
+       int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
+       int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
+
 #ifdef CONFIG_SECURITY_NETWORK
        int (*unix_stream_connect) (struct socket *sock,
                                    struct socket *other, struct sock *newsk);
@@ -1592,6 +1665,9 @@ struct security_operations {
        void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
        void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
        void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
+       int (*tun_dev_create)(void);
+       void (*tun_dev_post_create)(struct sock *sk);
+       int (*tun_dev_attach)(struct sock *sk);
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1620,6 +1696,9 @@ struct security_operations {
                               const struct cred *cred,
                               key_perm_t perm);
        int (*key_getsecurity)(struct key *key, char **_buffer);
+       int (*key_session_to_parent)(const struct cred *cred,
+                                    const struct cred *parent_cred,
+                                    struct key *key);
 #endif /* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -1637,7 +1716,7 @@ extern int security_module_enable(struct security_operations *ops);
 extern int register_security(struct security_operations *ops);
 
 /* Security operations */
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode);
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
 int security_ptrace_traceme(struct task_struct *parent);
 int security_capget(struct task_struct *target,
                    kernel_cap_t *effective,
@@ -1736,11 +1815,14 @@ int security_file_send_sigiotask(struct task_struct *tsk,
 int security_file_receive(struct file *file);
 int security_dentry_open(struct file *file, const struct cred *cred);
 int security_task_create(unsigned long clone_flags);
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
 void security_cred_free(struct cred *cred);
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_commit_creds(struct cred *new, const struct cred *old);
+void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
+int security_kernel_module_request(void);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
                             int flags);
@@ -1796,6 +1878,9 @@ int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
 
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
 #else /* CONFIG_SECURITY */
 struct security_mnt_opts {
 };
@@ -1818,10 +1903,10 @@ static inline int security_init(void)
        return 0;
 }
 
-static inline int security_ptrace_may_access(struct task_struct *child,
+static inline int security_ptrace_access_check(struct task_struct *child,
                                             unsigned int mode)
 {
-       return cap_ptrace_may_access(child, mode);
+       return cap_ptrace_access_check(child, mode);
 }
 
 static inline int security_ptrace_traceme(struct task_struct *parent)
@@ -2266,6 +2351,11 @@ static inline int security_task_create(unsigned long clone_flags)
        return 0;
 }
 
+static inline int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+       return 0;
+}
+
 static inline void security_cred_free(struct cred *cred)
 { }
 
@@ -2281,6 +2371,11 @@ static inline void security_commit_creds(struct cred *new,
 {
 }
 
+static inline void security_transfer_creds(struct cred *new,
+                                          const struct cred *old)
+{
+}
+
 static inline int security_kernel_act_as(struct cred *cred, u32 secid)
 {
        return 0;
@@ -2292,6 +2387,11 @@ static inline int security_kernel_create_files_as(struct cred *cred,
        return 0;
 }
 
+static inline int security_kernel_module_request(void)
+{
+       return 0;
+}
+
 static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
                                       int flags)
 {
@@ -2537,6 +2637,19 @@ static inline int security_secctx_to_secid(const char *secdata,
 static inline void security_release_secctx(char *secdata, u32 seclen)
 {
 }
+
+static inline int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+       return -EOPNOTSUPP;
+}
+static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+       return -EOPNOTSUPP;
+}
+static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+       return -EOPNOTSUPP;
+}
 #endif /* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
@@ -2575,6 +2688,9 @@ void security_inet_csk_clone(struct sock *newsk,
                        const struct request_sock *req);
 void security_inet_conn_established(struct sock *sk,
                        struct sk_buff *skb);
+int security_tun_dev_create(void);
+void security_tun_dev_post_create(struct sock *sk);
+int security_tun_dev_attach(struct sock *sk);
 
 #else  /* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket *sock,
@@ -2725,6 +2841,20 @@ static inline void security_inet_conn_established(struct sock *sk,
                        struct sk_buff *skb)
 {
 }
+
+static inline int security_tun_dev_create(void)
+{
+       return 0;
+}
+
+static inline void security_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static inline int security_tun_dev_attach(struct sock *sk)
+{
+       return 0;
+}
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2881,6 +3011,9 @@ void security_key_free(struct key *key);
 int security_key_permission(key_ref_t key_ref,
                            const struct cred *cred, key_perm_t perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
+int security_key_session_to_parent(const struct cred *cred,
+                                  const struct cred *parent_cred,
+                                  struct key *key);
 
 #else
 
@@ -2908,6 +3041,13 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
        return 0;
 }
 
+static inline int security_key_session_to_parent(const struct cred *cred,
+                                                const struct cred *parent_cred,
+                                                struct key *key)
+{
+       return 0;
+}
+
 #endif
 #endif /* CONFIG_KEYS */
 
index d131e352cfe1f1ec9b0157625cc2db98d1b8098a..5c84af8c5f6fd7014439570a8d958f9b83eadecb 100644 (file)
@@ -49,6 +49,7 @@ struct xattr_handler {
 ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
 ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
+int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
 int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
 int vfs_removexattr(struct dentry *, const char *);
 
index 9f3391090b3e826d1d7cb935e58f7d01aa054b37..9a4715a2f6bf67ecfc35310bdf7e42ead2ee11c8 100644 (file)
@@ -491,13 +491,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
        u64 run_time;
        struct timespec uptime;
        struct tty_struct *tty;
+       const struct cred *orig_cred;
+
+       /* Perform file operations on behalf of whoever enabled accounting */
+       orig_cred = override_creds(file->f_cred);
 
        /*
         * First check to see if there is enough free_space to continue
         * the process accounting system.
         */
        if (!check_free_space(acct, file))
-               return;
+               goto out;
 
        /*
         * Fill the accounting struct with the needed info as recorded
@@ -578,6 +582,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
                               sizeof(acct_t), &file->f_pos);
        current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
        set_fs(fs);
+out:
+       revert_creds(orig_cred);
 }
 
 /**
index 1bb4d7e5d61694a6b8c01ba47f8432dcd37c828e..006fcab009d5053d628ef44075612f2a966f22be 100644 (file)
 #include <linux/cn_proc.h>
 #include "cred-internals.h"
 
+#if 0
+#define kdebug(FMT, ...) \
+       printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#else
+static inline __attribute__((format(printf, 1, 2)))
+void no_printk(const char *fmt, ...)
+{
+}
+#define kdebug(FMT, ...) \
+       no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#endif
+
 static struct kmem_cache *cred_jar;
 
 /*
@@ -36,6 +48,10 @@ static struct thread_group_cred init_tgcred = {
  */
 struct cred init_cred = {
        .usage                  = ATOMIC_INIT(4),
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       .subscribers            = ATOMIC_INIT(2),
+       .magic                  = CRED_MAGIC,
+#endif
        .securebits             = SECUREBITS_DEFAULT,
        .cap_inheritable        = CAP_INIT_INH_SET,
        .cap_permitted          = CAP_FULL_SET,
@@ -48,6 +64,31 @@ struct cred init_cred = {
 #endif
 };
 
+static inline void set_cred_subscribers(struct cred *cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       atomic_set(&cred->subscribers, n);
+#endif
+}
+
+static inline int read_cred_subscribers(const struct cred *cred)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       return atomic_read(&cred->subscribers);
+#else
+       return 0;
+#endif
+}
+
+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       struct cred *cred = (struct cred *) _cred;
+
+       atomic_add(n, &cred->subscribers);
+#endif
+}
+
 /*
  * Dispose of the shared task group credentials
  */
@@ -85,9 +126,22 @@ static void put_cred_rcu(struct rcu_head *rcu)
 {
        struct cred *cred = container_of(rcu, struct cred, rcu);
 
+       kdebug("put_cred_rcu(%p)", cred);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       if (cred->magic != CRED_MAGIC_DEAD ||
+           atomic_read(&cred->usage) != 0 ||
+           read_cred_subscribers(cred) != 0)
+               panic("CRED: put_cred_rcu() sees %p with"
+                     " mag %x, put %p, usage %d, subscr %d\n",
+                     cred, cred->magic, cred->put_addr,
+                     atomic_read(&cred->usage),
+                     read_cred_subscribers(cred));
+#else
        if (atomic_read(&cred->usage) != 0)
                panic("CRED: put_cred_rcu() sees %p with usage %d\n",
                      cred, atomic_read(&cred->usage));
+#endif
 
        security_cred_free(cred);
        key_put(cred->thread_keyring);
@@ -106,12 +160,90 @@ static void put_cred_rcu(struct rcu_head *rcu)
  */
 void __put_cred(struct cred *cred)
 {
+       kdebug("__put_cred(%p{%d,%d})", cred,
+              atomic_read(&cred->usage),
+              read_cred_subscribers(cred));
+
        BUG_ON(atomic_read(&cred->usage) != 0);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       BUG_ON(read_cred_subscribers(cred) != 0);
+       cred->magic = CRED_MAGIC_DEAD;
+       cred->put_addr = __builtin_return_address(0);
+#endif
+       BUG_ON(cred == current->cred);
+       BUG_ON(cred == current->real_cred);
 
        call_rcu(&cred->rcu, put_cred_rcu);
 }
 EXPORT_SYMBOL(__put_cred);
 
+/*
+ * Clean up a task's credentials when it exits
+ */
+void exit_creds(struct task_struct *tsk)
+{
+       struct cred *cred;
+
+       kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
+              atomic_read(&tsk->cred->usage),
+              read_cred_subscribers(tsk->cred));
+
+       cred = (struct cred *) tsk->real_cred;
+       tsk->real_cred = NULL;
+       validate_creds(cred);
+       alter_cred_subscribers(cred, -1);
+       put_cred(cred);
+
+       cred = (struct cred *) tsk->cred;
+       tsk->cred = NULL;
+       validate_creds(cred);
+       alter_cred_subscribers(cred, -1);
+       put_cred(cred);
+
+       cred = (struct cred *) tsk->replacement_session_keyring;
+       if (cred) {
+               tsk->replacement_session_keyring = NULL;
+               validate_creds(cred);
+               put_cred(cred);
+       }
+}
+
+/*
+ * Allocate blank credentials, such that the credentials can be filled in at a
+ * later date without risk of ENOMEM.
+ */
+struct cred *cred_alloc_blank(void)
+{
+       struct cred *new;
+
+       new = kmem_cache_zalloc(cred_jar, GFP_KERNEL);
+       if (!new)
+               return NULL;
+
+#ifdef CONFIG_KEYS
+       new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
+       if (!new->tgcred) {
+               kfree(new);
+               return NULL;
+       }
+       atomic_set(&new->tgcred->usage, 1);
+#endif
+
+       atomic_set(&new->usage, 1);
+
+       if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
+               goto error;
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       new->magic = CRED_MAGIC;
+#endif
+       return new;
+
+error:
+       abort_creds(new);
+       return NULL;
+}
+
 /**
  * prepare_creds - Prepare a new set of credentials for modification
  *
@@ -132,16 +264,19 @@ struct cred *prepare_creds(void)
        const struct cred *old;
        struct cred *new;
 
-       BUG_ON(atomic_read(&task->real_cred->usage) < 1);
+       validate_process_creds();
 
        new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
        if (!new)
                return NULL;
 
+       kdebug("prepare_creds() alloc %p", new);
+
        old = task->cred;
        memcpy(new, old, sizeof(struct cred));
 
        atomic_set(&new->usage, 1);
+       set_cred_subscribers(new, 0);
        get_group_info(new->group_info);
        get_uid(new->user);
 
@@ -157,6 +292,7 @@ struct cred *prepare_creds(void)
 
        if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
                goto error;
+       validate_creds(new);
        return new;
 
 error:
@@ -229,9 +365,12 @@ struct cred *prepare_usermodehelper_creds(void)
        if (!new)
                return NULL;
 
+       kdebug("prepare_usermodehelper_creds() alloc %p", new);
+
        memcpy(new, &init_cred, sizeof(struct cred));
 
        atomic_set(&new->usage, 1);
+       set_cred_subscribers(new, 0);
        get_group_info(new->group_info);
        get_uid(new->user);
 
@@ -250,6 +389,7 @@ struct cred *prepare_usermodehelper_creds(void)
 #endif
        if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
                goto error;
+       validate_creds(new);
 
        BUG_ON(atomic_read(&new->usage) != 1);
        return new;
@@ -286,6 +426,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
            ) {
                p->real_cred = get_cred(p->cred);
                get_cred(p->cred);
+               alter_cred_subscribers(p->cred, 2);
+               kdebug("share_creds(%p{%d,%d})",
+                      p->cred, atomic_read(&p->cred->usage),
+                      read_cred_subscribers(p->cred));
                atomic_inc(&p->cred->user->processes);
                return 0;
        }
@@ -331,6 +475,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 
        atomic_inc(&new->user->processes);
        p->cred = p->real_cred = get_cred(new);
+       alter_cred_subscribers(new, 2);
+       validate_creds(new);
        return 0;
 
 error_put:
@@ -355,13 +501,20 @@ error_put:
 int commit_creds(struct cred *new)
 {
        struct task_struct *task = current;
-       const struct cred *old;
+       const struct cred *old = task->real_cred;
 
-       BUG_ON(task->cred != task->real_cred);
-       BUG_ON(atomic_read(&task->real_cred->usage) < 2);
+       kdebug("commit_creds(%p{%d,%d})", new,
+              atomic_read(&new->usage),
+              read_cred_subscribers(new));
+
+       BUG_ON(task->cred != old);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       BUG_ON(read_cred_subscribers(old) < 2);
+       validate_creds(old);
+       validate_creds(new);
+#endif
        BUG_ON(atomic_read(&new->usage) < 1);
 
-       old = task->real_cred;
        security_commit_creds(new, old);
 
        get_cred(new); /* we will require a ref for the subj creds too */
@@ -390,12 +543,14 @@ int commit_creds(struct cred *new)
         *   cheaply with the new uid cache, so if it matters
         *   we should be checking for it.  -DaveM
         */
+       alter_cred_subscribers(new, 2);
        if (new->user != old->user)
                atomic_inc(&new->user->processes);
        rcu_assign_pointer(task->real_cred, new);
        rcu_assign_pointer(task->cred, new);
        if (new->user != old->user)
                atomic_dec(&old->user->processes);
+       alter_cred_subscribers(old, -2);
 
        sched_switch_user(task);
 
@@ -428,6 +583,13 @@ EXPORT_SYMBOL(commit_creds);
  */
 void abort_creds(struct cred *new)
 {
+       kdebug("abort_creds(%p{%d,%d})", new,
+              atomic_read(&new->usage),
+              read_cred_subscribers(new));
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+       BUG_ON(read_cred_subscribers(new) != 0);
+#endif
        BUG_ON(atomic_read(&new->usage) < 1);
        put_cred(new);
 }
@@ -444,7 +606,20 @@ const struct cred *override_creds(const struct cred *new)
 {
        const struct cred *old = current->cred;
 
-       rcu_assign_pointer(current->cred, get_cred(new));
+       kdebug("override_creds(%p{%d,%d})", new,
+              atomic_read(&new->usage),
+              read_cred_subscribers(new));
+
+       validate_creds(old);
+       validate_creds(new);
+       get_cred(new);
+       alter_cred_subscribers(new, 1);
+       rcu_assign_pointer(current->cred, new);
+       alter_cred_subscribers(old, -1);
+
+       kdebug("override_creds() = %p{%d,%d}", old,
+              atomic_read(&old->usage),
+              read_cred_subscribers(old));
        return old;
 }
 EXPORT_SYMBOL(override_creds);
@@ -460,7 +635,15 @@ void revert_creds(const struct cred *old)
 {
        const struct cred *override = current->cred;
 
+       kdebug("revert_creds(%p{%d,%d})", old,
+              atomic_read(&old->usage),
+              read_cred_subscribers(old));
+
+       validate_creds(old);
+       validate_creds(override);
+       alter_cred_subscribers(old, 1);
        rcu_assign_pointer(current->cred, old);
+       alter_cred_subscribers(override, -1);
        put_cred(override);
 }
 EXPORT_SYMBOL(revert_creds);
@@ -502,11 +685,15 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
        if (!new)
                return NULL;
 
+       kdebug("prepare_kernel_cred() alloc %p", new);
+
        if (daemon)
                old = get_task_cred(daemon);
        else
                old = get_cred(&init_cred);
 
+       validate_creds(old);
+
        *new = *old;
        get_uid(new->user);
        get_group_info(new->group_info);
@@ -526,7 +713,9 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
                goto error;
 
        atomic_set(&new->usage, 1);
+       set_cred_subscribers(new, 0);
        put_cred(old);
+       validate_creds(new);
        return new;
 
 error:
@@ -589,3 +778,95 @@ int set_create_files_as(struct cred *new, struct inode *inode)
        return security_kernel_create_files_as(new, inode);
 }
 EXPORT_SYMBOL(set_create_files_as);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+
+/*
+ * dump invalid credentials
+ */
+static void dump_invalid_creds(const struct cred *cred, const char *label,
+                              const struct task_struct *tsk)
+{
+       printk(KERN_ERR "CRED: %s credentials: %p %s%s%s\n",
+              label, cred,
+              cred == &init_cred ? "[init]" : "",
+              cred == tsk->real_cred ? "[real]" : "",
+              cred == tsk->cred ? "[eff]" : "");
+       printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n",
+              cred->magic, cred->put_addr);
+       printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n",
+              atomic_read(&cred->usage),
+              read_cred_subscribers(cred));
+       printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n",
+              cred->uid, cred->euid, cred->suid, cred->fsuid);
+       printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n",
+              cred->gid, cred->egid, cred->sgid, cred->fsgid);
+#ifdef CONFIG_SECURITY
+       printk(KERN_ERR "CRED: ->security is %p\n", cred->security);
+       if ((unsigned long) cred->security >= PAGE_SIZE &&
+           (((unsigned long) cred->security & 0xffffff00) !=
+            (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)))
+               printk(KERN_ERR "CRED: ->security {%x, %x}\n",
+                      ((u32*)cred->security)[0],
+                      ((u32*)cred->security)[1]);
+#endif
+}
+
+/*
+ * report use of invalid credentials
+ */
+void __invalid_creds(const struct cred *cred, const char *file, unsigned line)
+{
+       printk(KERN_ERR "CRED: Invalid credentials\n");
+       printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+       dump_invalid_creds(cred, "Specified", current);
+       BUG();
+}
+EXPORT_SYMBOL(__invalid_creds);
+
+/*
+ * check the credentials on a process
+ */
+void __validate_process_creds(struct task_struct *tsk,
+                             const char *file, unsigned line)
+{
+       if (tsk->cred == tsk->real_cred) {
+               if (unlikely(read_cred_subscribers(tsk->cred) < 2 ||
+                            creds_are_invalid(tsk->cred)))
+                       goto invalid_creds;
+       } else {
+               if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 ||
+                            read_cred_subscribers(tsk->cred) < 1 ||
+                            creds_are_invalid(tsk->real_cred) ||
+                            creds_are_invalid(tsk->cred)))
+                       goto invalid_creds;
+       }
+       return;
+
+invalid_creds:
+       printk(KERN_ERR "CRED: Invalid process credentials\n");
+       printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+
+       dump_invalid_creds(tsk->real_cred, "Real", tsk);
+       if (tsk->cred != tsk->real_cred)
+               dump_invalid_creds(tsk->cred, "Effective", tsk);
+       else
+               printk(KERN_ERR "CRED: Effective creds == Real creds\n");
+       BUG();
+}
+EXPORT_SYMBOL(__validate_process_creds);
+
+/*
+ * check creds for do_exit()
+ */
+void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+       kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
+              tsk->real_cred, tsk->cred,
+              atomic_read(&tsk->cred->usage),
+              read_cred_subscribers(tsk->cred));
+
+       __validate_process_creds(tsk, __FILE__, __LINE__);
+}
+
+#endif /* CONFIG_DEBUG_CREDENTIALS */
index 869dc221733e27701c275a0bdf283404507108f6..c98ff7a8025f70fad1ce747daaeb20bca4c553bb 100644 (file)
@@ -901,6 +901,8 @@ NORET_TYPE void do_exit(long code)
 
        tracehook_report_exit(&code);
 
+       validate_creds_for_do_exit(tsk);
+
        /*
         * We're taking recursive faults here in do_exit. Safest is to just
         * leave this task alone and wait for reboot.
@@ -1009,6 +1011,8 @@ NORET_TYPE void do_exit(long code)
        if (tsk->splice_pipe)
                __free_pipe_info(tsk->splice_pipe);
 
+       validate_creds_for_do_exit(tsk);
+
        preempt_disable();
        /* causes final put_task_struct in finish_task_switch(). */
        tsk->state = TASK_DEAD;
index e6c04d462ab250b806cd2e7d76d84fd1729d121e..aab8579c6093bb5f1c63a26876d45546e25c21df 100644 (file)
@@ -152,8 +152,7 @@ void __put_task_struct(struct task_struct *tsk)
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
 
-       put_cred(tsk->real_cred);
-       put_cred(tsk->cred);
+       exit_creds(tsk);
        delayacct_tsk_free(tsk);
 
        if (!profile_handoff_task(tsk))
@@ -1297,8 +1296,7 @@ bad_fork_cleanup_put_domain:
        module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
        atomic_dec(&p->cred->user->processes);
-       put_cred(p->real_cred);
-       put_cred(p->cred);
+       exit_creds(p);
 bad_fork_free:
        free_task(p);
 fork_out:
index 385c31a1bdbf3118b3d82d44b6720af727738bcb..4e8cae2e9148153c5055a91d42948e38a685c22f 100644 (file)
@@ -78,6 +78,10 @@ int __request_module(bool wait, const char *fmt, ...)
 #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
        static int kmod_loop_msg;
 
+       ret = security_kernel_module_request();
+       if (ret)
+               return ret;
+
        va_start(args, fmt);
        ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
        va_end(args);
@@ -462,6 +466,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
        int retval = 0;
 
        BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+       validate_creds(sub_info->cred);
 
        helper_lock();
        if (sub_info->path[0] == '\0')
index 082c320e4dbf0a4538b064f55814c72246852877..307c285af59e89141412181d547f0fca7d4c7073 100644 (file)
@@ -152,7 +152,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
        if (!dumpable && !capable(CAP_SYS_PTRACE))
                return -EPERM;
 
-       return security_ptrace_may_access(task, mode);
+       return security_ptrace_access_check(task, mode);
 }
 
 bool ptrace_may_access(struct task_struct *task, unsigned int mode)
index 58be76017fd000f3b019c0acab6d3cdeb672afda..71d8dc7f99208629b63eb73800a69ea85e119c43 100644 (file)
@@ -49,7 +49,6 @@
 #include <linux/acpi.h>
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
-#include <linux/security.h>
 #include <linux/slow-work.h>
 #include <linux/perf_counter.h>
 
index 12327b2bb785c256a59cb864a2c2b8976c33e5aa..fbb87cf138c516db92f9282cf0ee01c1aaa443c3 100644 (file)
@@ -653,6 +653,21 @@ config DEBUG_NOTIFIERS
          This is a relatively cheap check but if you care about maximum
          performance, say N.
 
+config DEBUG_CREDENTIALS
+       bool "Debug credential management"
+       depends on DEBUG_KERNEL
+       help
+         Enable this to turn on some debug checking for credential
+         management.  The additional code keeps track of the number of
+         pointers from task_structs to any given cred struct, and checks to
+         see that this number never exceeds the usage count of the cred
+         struct.
+
+         Furthermore, if SELinux is enabled, this also checks that the
+         security pointer in the cred struct is never seen to be invalid.
+
+         If unsure, say N.
+
 #
 # Select this config option from the architecture Kconfig, if it
 # it is preferred to always offer frame pointers as a config
index f1ed2fe76c6575b9fc7ad28fdc215f87e6a73b3e..bd2bea963364c757e90db1e47ce557a94d053c0b 100644 (file)
 
 #include <linux/sched.h>
 
-/**
- * is_single_threaded - Determine if a thread group is single-threaded or not
- * @p: A task in the thread group in question
- *
- * This returns true if the thread group to which a task belongs is single
- * threaded, false if it is not.
+/*
+ * Returns true if the task does not share ->mm with another thread/process.
  */
-bool is_single_threaded(struct task_struct *p)
+bool current_is_single_threaded(void)
 {
-       struct task_struct *g, *t;
-       struct mm_struct *mm = p->mm;
+       struct task_struct *task = current;
+       struct mm_struct *mm = task->mm;
+       struct task_struct *p, *t;
+       bool ret;
 
-       if (atomic_read(&p->signal->count) != 1)
-               goto no;
+       if (atomic_read(&task->signal->live) != 1)
+               return false;
 
-       if (atomic_read(&p->mm->mm_users) != 1) {
-               read_lock(&tasklist_lock);
-               do_each_thread(g, t) {
-                       if (t->mm == mm && t != p)
-                               goto no_unlock;
-               } while_each_thread(g, t);
-               read_unlock(&tasklist_lock);
-       }
+       if (atomic_read(&mm->mm_users) == 1)
+               return true;
 
-       return true;
+       ret = false;
+       rcu_read_lock();
+       for_each_process(p) {
+               if (unlikely(p->flags & PF_KTHREAD))
+                       continue;
+               if (unlikely(p == task->group_leader))
+                       continue;
+
+               t = p;
+               do {
+                       if (unlikely(t->mm == mm))
+                               goto found;
+                       if (likely(t->mm))
+                               break;
+                       /*
+                        * t->mm == NULL. Make sure next_thread/next_task
+                        * will see other CLONE_VM tasks which might be
+                        * forked before exiting.
+                        */
+                       smp_rmb();
+               } while_each_thread(p, t);
+       }
+       ret = true;
+found:
+       rcu_read_unlock();
 
-no_unlock:
-       read_unlock(&tasklist_lock);
-no:
-       return false;
+       return ret;
 }
index 701740c9e81bdf967062b340e53cff8b7913427a..555d5d2731c6d963a66db15d9eb66aa03e9d6c8d 100644 (file)
@@ -521,7 +521,11 @@ find_block:
                region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
                                start_off);
                memset(region, 0, size);
-               kmemleak_alloc(region, size, 1, 0);
+               /*
+                * The min_count is set to 0 so that bootmem allocated blocks
+                * are never reported as leaks.
+                */
+               kmemleak_alloc(region, size, 0, 0);
                return region;
        }
 
index 487267310a8447a175ede900f69c35ca55d63dba..4ea4510e2996cb5dab18852cb1f5e285f7fbd81e 100644 (file)
 #include <linux/string.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
+#include <linux/workqueue.h>
 
 #include <asm/sections.h>
 #include <asm/processor.h>
 #include <asm/atomic.h>
 
+#include <linux/kmemcheck.h>
 #include <linux/kmemleak.h>
 
 /*
 #define SECS_FIRST_SCAN                60      /* delay before the first scan */
 #define SECS_SCAN_WAIT         600     /* subsequent auto scanning delay */
 #define GRAY_LIST_PASSES       25      /* maximum number of gray list scans */
+#define MAX_SCAN_SIZE          4096    /* maximum size of a scanned block */
 
 #define BYTES_PER_POINTER      sizeof(void *)
 
@@ -120,6 +123,9 @@ struct kmemleak_scan_area {
        size_t length;
 };
 
+#define KMEMLEAK_GREY  0
+#define KMEMLEAK_BLACK -1
+
 /*
  * Structure holding the metadata for each allocated memory block.
  * Modifications to such objects should be made while holding the
@@ -161,6 +167,15 @@ struct kmemleak_object {
 /* flag set on newly allocated objects */
 #define OBJECT_NEW             (1 << 3)
 
+/* number of bytes to print per line; must be 16 or 32 */
+#define HEX_ROW_SIZE           16
+/* number of bytes to print at a time (1, 2, 4, 8) */
+#define HEX_GROUP_SIZE         1
+/* include ASCII after the hex output */
+#define HEX_ASCII              1
+/* max number of lines to be printed */
+#define HEX_MAX_LINES          2
+
 /* the list of all allocated objects */
 static LIST_HEAD(object_list);
 /* the list of gray-colored objects (see color_gray comment below) */
@@ -228,11 +243,14 @@ struct early_log {
        int min_count;                  /* minimum reference count */
        unsigned long offset;           /* scan area offset */
        size_t length;                  /* scan area length */
+       unsigned long trace[MAX_TRACE]; /* stack trace */
+       unsigned int trace_len;         /* stack trace length */
 };
 
 /* early logging buffer and current position */
-static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
-static int crt_early_log;
+static struct early_log
+       early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
+static int crt_early_log __initdata;
 
 static void kmemleak_disable(void);
 
@@ -254,6 +272,35 @@ static void kmemleak_disable(void);
        kmemleak_disable();             \
 } while (0)
 
+/*
+ * Printing of the objects hex dump to the seq file. The number of lines to be
+ * printed is limited to HEX_MAX_LINES to prevent seq file spamming. The
+ * actual number of printed bytes depends on HEX_ROW_SIZE. It must be called
+ * with the object->lock held.
+ */
+static void hex_dump_object(struct seq_file *seq,
+                           struct kmemleak_object *object)
+{
+       const u8 *ptr = (const u8 *)object->pointer;
+       int i, len, remaining;
+       unsigned char linebuf[HEX_ROW_SIZE * 5];
+
+       /* limit the number of lines to HEX_MAX_LINES */
+       remaining = len =
+               min(object->size, (size_t)(HEX_MAX_LINES * HEX_ROW_SIZE));
+
+       seq_printf(seq, "  hex dump (first %d bytes):\n", len);
+       for (i = 0; i < len; i += HEX_ROW_SIZE) {
+               int linelen = min(remaining, HEX_ROW_SIZE);
+
+               remaining -= HEX_ROW_SIZE;
+               hex_dump_to_buffer(ptr + i, linelen, HEX_ROW_SIZE,
+                                  HEX_GROUP_SIZE, linebuf, sizeof(linebuf),
+                                  HEX_ASCII);
+               seq_printf(seq, "    %s\n", linebuf);
+       }
+}
+
 /*
  * Object colors, encoded with count and min_count:
  * - white - orphan object, not enough references to it (count < min_count)
@@ -264,19 +311,21 @@ static void kmemleak_disable(void);
  * Newly created objects don't have any color assigned (object->count == -1)
  * before the next memory scan when they become white.
  */
-static int color_white(const struct kmemleak_object *object)
+static bool color_white(const struct kmemleak_object *object)
 {
-       return object->count != -1 && object->count < object->min_count;
+       return object->count != KMEMLEAK_BLACK &&
+               object->count < object->min_count;
 }
 
-static int color_gray(const struct kmemleak_object *object)
+static bool color_gray(const struct kmemleak_object *object)
 {
-       return object->min_count != -1 && object->count >= object->min_count;
+       return object->min_count != KMEMLEAK_BLACK &&
+               object->count >= object->min_count;
 }
 
-static int color_black(const struct kmemleak_object *object)
+static bool color_black(const struct kmemleak_object *object)
 {
-       return object->min_count == -1;
+       return object->min_count == KMEMLEAK_BLACK;
 }
 
 /*
@@ -284,7 +333,7 @@ static int color_black(const struct kmemleak_object *object)
  * not be deleted and have a minimum age to avoid false positives caused by
  * pointers temporarily stored in CPU registers.
  */
-static int unreferenced_object(struct kmemleak_object *object)
+static bool unreferenced_object(struct kmemleak_object *object)
 {
        return (object->flags & OBJECT_ALLOCATED) && color_white(object) &&
                time_before_eq(object->jiffies + jiffies_min_age,
@@ -304,6 +353,7 @@ static void print_unreferenced(struct seq_file *seq,
                   object->pointer, object->size);
        seq_printf(seq, "  comm \"%s\", pid %d, jiffies %lu\n",
                   object->comm, object->pid, object->jiffies);
+       hex_dump_object(seq, object);
        seq_printf(seq, "  backtrace:\n");
 
        for (i = 0; i < object->trace_len; i++) {
@@ -330,6 +380,7 @@ static void dump_object_info(struct kmemleak_object *object)
                  object->comm, object->pid, object->jiffies);
        pr_notice("  min_count = %d\n", object->min_count);
        pr_notice("  count = %d\n", object->count);
+       pr_notice("  flags = 0x%lx\n", object->flags);
        pr_notice("  backtrace:\n");
        print_stack_trace(&trace, 4);
 }
@@ -433,22 +484,37 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
        return object;
 }
 
+/*
+ * Save stack trace to the given array of MAX_TRACE size.
+ */
+static int __save_stack_trace(unsigned long *trace)
+{
+       struct stack_trace stack_trace;
+
+       stack_trace.max_entries = MAX_TRACE;
+       stack_trace.nr_entries = 0;
+       stack_trace.entries = trace;
+       stack_trace.skip = 2;
+       save_stack_trace(&stack_trace);
+
+       return stack_trace.nr_entries;
+}
+
 /*
  * Create the metadata (struct kmemleak_object) corresponding to an allocated
  * memory block and add it to the object_list and object_tree_root.
  */
-static void create_object(unsigned long ptr, size_t size, int min_count,
-                         gfp_t gfp)
+static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
+                                            int min_count, gfp_t gfp)
 {
        unsigned long flags;
        struct kmemleak_object *object;
        struct prio_tree_node *node;
-       struct stack_trace trace;
 
        object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK);
        if (!object) {
                kmemleak_stop("Cannot allocate a kmemleak_object structure\n");
-               return;
+               return NULL;
        }
 
        INIT_LIST_HEAD(&object->object_list);
@@ -482,18 +548,14 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
        }
 
        /* kernel backtrace */
-       trace.max_entries = MAX_TRACE;
-       trace.nr_entries = 0;
-       trace.entries = object->trace;
-       trace.skip = 1;
-       save_stack_trace(&trace);
-       object->trace_len = trace.nr_entries;
+       object->trace_len = __save_stack_trace(object->trace);
 
        INIT_PRIO_TREE_NODE(&object->tree_node);
        object->tree_node.start = ptr;
        object->tree_node.last = ptr + size - 1;
 
        write_lock_irqsave(&kmemleak_lock, flags);
+
        min_addr = min(min_addr, ptr);
        max_addr = max(max_addr, ptr + size);
        node = prio_tree_insert(&object_tree_root, &object->tree_node);
@@ -504,20 +566,19 @@ static void create_object(unsigned long ptr, size_t size, int min_count,
         * random memory blocks.
         */
        if (node != &object->tree_node) {
-               unsigned long flags;
-
                kmemleak_stop("Cannot insert 0x%lx into the object search tree "
                              "(already existing)\n", ptr);
                object = lookup_object(ptr, 1);
-               spin_lock_irqsave(&object->lock, flags);
+               spin_lock(&object->lock);
                dump_object_info(object);
-               spin_unlock_irqrestore(&object->lock, flags);
+               spin_unlock(&object->lock);
 
                goto out;
        }
        list_add_tail_rcu(&object->object_list, &object_list);
 out:
        write_unlock_irqrestore(&kmemleak_lock, flags);
+       return object;
 }
 
 /*
@@ -604,46 +665,55 @@ static void delete_object_part(unsigned long ptr, size_t size)
 
        put_object(object);
 }
-/*
- * Make a object permanently as gray-colored so that it can no longer be
- * reported as a leak. This is used in general to mark a false positive.
- */
-static void make_gray_object(unsigned long ptr)
+
+static void __paint_it(struct kmemleak_object *object, int color)
+{
+       object->min_count = color;
+       if (color == KMEMLEAK_BLACK)
+               object->flags |= OBJECT_NO_SCAN;
+}
+
+static void paint_it(struct kmemleak_object *object, int color)
 {
        unsigned long flags;
+
+       spin_lock_irqsave(&object->lock, flags);
+       __paint_it(object, color);
+       spin_unlock_irqrestore(&object->lock, flags);
+}
+
+static void paint_ptr(unsigned long ptr, int color)
+{
        struct kmemleak_object *object;
 
        object = find_and_get_object(ptr, 0);
        if (!object) {
-               kmemleak_warn("Graying unknown object at 0x%08lx\n", ptr);
+               kmemleak_warn("Trying to color unknown object "
+                             "at 0x%08lx as %s\n", ptr,
+                             (color == KMEMLEAK_GREY) ? "Grey" :
+                             (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
                return;
        }
-
-       spin_lock_irqsave(&object->lock, flags);
-       object->min_count = 0;
-       spin_unlock_irqrestore(&object->lock, flags);
+       paint_it(object, color);
        put_object(object);
 }
 
+/*
+ * Make a object permanently as gray-colored so that it can no longer be
+ * reported as a leak. This is used in general to mark a false positive.
+ */
+static void make_gray_object(unsigned long ptr)
+{
+       paint_ptr(ptr, KMEMLEAK_GREY);
+}
+
 /*
  * Mark the object as black-colored so that it is ignored from scans and
  * reporting.
  */
 static void make_black_object(unsigned long ptr)
 {
-       unsigned long flags;
-       struct kmemleak_object *object;
-
-       object = find_and_get_object(ptr, 0);
-       if (!object) {
-               kmemleak_warn("Blacking unknown object at 0x%08lx\n", ptr);
-               return;
-       }
-
-       spin_lock_irqsave(&object->lock, flags);
-       object->min_count = -1;
-       spin_unlock_irqrestore(&object->lock, flags);
-       put_object(object);
+       paint_ptr(ptr, KMEMLEAK_BLACK);
 }
 
 /*
@@ -715,14 +785,15 @@ static void object_no_scan(unsigned long ptr)
  * Log an early kmemleak_* call to the early_log buffer. These calls will be
  * processed later once kmemleak is fully initialized.
  */
-static void log_early(int op_type, const void *ptr, size_t size,
-                     int min_count, unsigned long offset, size_t length)
+static void __init log_early(int op_type, const void *ptr, size_t size,
+                            int min_count, unsigned long offset, size_t length)
 {
        unsigned long flags;
        struct early_log *log;
 
        if (crt_early_log >= ARRAY_SIZE(early_log)) {
-               pr_warning("Early log buffer exceeded\n");
+               pr_warning("Early log buffer exceeded, "
+                          "please increase DEBUG_KMEMLEAK_EARLY_LOG_SIZE\n");
                kmemleak_disable();
                return;
        }
@@ -739,16 +810,45 @@ static void log_early(int op_type, const void *ptr, size_t size,
        log->min_count = min_count;
        log->offset = offset;
        log->length = length;
+       if (op_type == KMEMLEAK_ALLOC)
+               log->trace_len = __save_stack_trace(log->trace);
        crt_early_log++;
        local_irq_restore(flags);
 }
 
+/*
+ * Log an early allocated block and populate the stack trace.
+ */
+static void early_alloc(struct early_log *log)
+{
+       struct kmemleak_object *object;
+       unsigned long flags;
+       int i;
+
+       if (!atomic_read(&kmemleak_enabled) || !log->ptr || IS_ERR(log->ptr))
+               return;
+
+       /*
+        * RCU locking needed to ensure object is not freed via put_object().
+        */
+       rcu_read_lock();
+       object = create_object((unsigned long)log->ptr, log->size,
+                              log->min_count, GFP_KERNEL);
+       spin_lock_irqsave(&object->lock, flags);
+       for (i = 0; i < log->trace_len; i++)
+               object->trace[i] = log->trace[i];
+       object->trace_len = log->trace_len;
+       spin_unlock_irqrestore(&object->lock, flags);
+       rcu_read_unlock();
+}
+
 /*
  * Memory allocation function callback. This function is called from the
  * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
  * vmalloc etc.).
  */
-void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp)
+void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
+                         gfp_t gfp)
 {
        pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
 
@@ -763,7 +863,7 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc);
  * Memory freeing function callback. This function is called from the kernel
  * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
  */
-void kmemleak_free(const void *ptr)
+void __ref kmemleak_free(const void *ptr)
 {
        pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -778,7 +878,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free);
  * Partial memory freeing function callback. This function is usually called
  * from bootmem allocator when (part of) a memory block is freed.
  */
-void kmemleak_free_part(const void *ptr, size_t size)
+void __ref kmemleak_free_part(const void *ptr, size_t size)
 {
        pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -793,7 +893,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_part);
  * Mark an already allocated memory block as a false positive. This will cause
  * the block to no longer be reported as leak and always be scanned.
  */
-void kmemleak_not_leak(const void *ptr)
+void __ref kmemleak_not_leak(const void *ptr)
 {
        pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -809,7 +909,7 @@ EXPORT_SYMBOL(kmemleak_not_leak);
  * corresponding block is not a leak and does not contain any references to
  * other allocated memory blocks.
  */
-void kmemleak_ignore(const void *ptr)
+void __ref kmemleak_ignore(const void *ptr)
 {
        pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -823,8 +923,8 @@ EXPORT_SYMBOL(kmemleak_ignore);
 /*
  * Limit the range to be scanned in an allocated memory block.
  */
-void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length,
-                       gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
+                             size_t length, gfp_t gfp)
 {
        pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -838,7 +938,7 @@ EXPORT_SYMBOL(kmemleak_scan_area);
 /*
  * Inform kmemleak not to scan the given memory block.
  */
-void kmemleak_no_scan(const void *ptr)
+void __ref kmemleak_no_scan(const void *ptr)
 {
        pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -882,15 +982,22 @@ static void scan_block(void *_start, void *_end,
        unsigned long *end = _end - (BYTES_PER_POINTER - 1);
 
        for (ptr = start; ptr < end; ptr++) {
-               unsigned long flags;
-               unsigned long pointer = *ptr;
                struct kmemleak_object *object;
+               unsigned long flags;
+               unsigned long pointer;
 
                if (allow_resched)
                        cond_resched();
                if (scan_should_stop())
                        break;
 
+               /* don't scan uninitialized memory */
+               if (!kmemcheck_is_obj_initialized((unsigned long)ptr,
+                                                 BYTES_PER_POINTER))
+                       continue;
+
+               pointer = *ptr;
+
                object = find_and_get_object(pointer, 1);
                if (!object)
                        continue;
@@ -949,10 +1056,21 @@ static void scan_object(struct kmemleak_object *object)
        if (!(object->flags & OBJECT_ALLOCATED))
                /* already freed object */
                goto out;
-       if (hlist_empty(&object->area_list))
-               scan_block((void *)object->pointer,
-                          (void *)(object->pointer + object->size), object, 0);
-       else
+       if (hlist_empty(&object->area_list)) {
+               void *start = (void *)object->pointer;
+               void *end = (void *)(object->pointer + object->size);
+
+               while (start < end && (object->flags & OBJECT_ALLOCATED) &&
+                      !(object->flags & OBJECT_NO_SCAN)) {
+                       scan_block(start, min(start + MAX_SCAN_SIZE, end),
+                                  object, 0);
+                       start += MAX_SCAN_SIZE;
+
+                       spin_unlock_irqrestore(&object->lock, flags);
+                       cond_resched();
+                       spin_lock_irqsave(&object->lock, flags);
+               }
+       } else
                hlist_for_each_entry(area, elem, &object->area_list, node)
                        scan_block((void *)(object->pointer + area->offset),
                                   (void *)(object->pointer + area->offset
@@ -970,7 +1088,6 @@ static void kmemleak_scan(void)
 {
        unsigned long flags;
        struct kmemleak_object *object, *tmp;
-       struct task_struct *task;
        int i;
        int new_leaks = 0;
        int gray_list_pass = 0;
@@ -1037,15 +1154,16 @@ static void kmemleak_scan(void)
        }
 
        /*
-        * Scanning the task stacks may introduce false negatives and it is
-        * not enabled by default.
+        * Scanning the task stacks (may introduce false negatives).
         */
        if (kmemleak_stack_scan) {
+               struct task_struct *p, *g;
+
                read_lock(&tasklist_lock);
-               for_each_process(task)
-                       scan_block(task_stack_page(task),
-                                  task_stack_page(task) + THREAD_SIZE,
-                                  NULL, 0);
+               do_each_thread(g, p) {
+                       scan_block(task_stack_page(p), task_stack_page(p) +
+                                  THREAD_SIZE, NULL, 0);
+               } while_each_thread(g, p);
                read_unlock(&tasklist_lock);
        }
 
@@ -1170,7 +1288,7 @@ static int kmemleak_scan_thread(void *arg)
  * Start the automatic memory scanning thread. This function must be called
  * with the scan_mutex held.
  */
-void start_scan_thread(void)
+static void start_scan_thread(void)
 {
        if (scan_thread)
                return;
@@ -1185,7 +1303,7 @@ void start_scan_thread(void)
  * Stop the automatic memory scanning thread. This function must be called
  * with the scan_mutex held.
  */
-void stop_scan_thread(void)
+static void stop_scan_thread(void)
 {
        if (scan_thread) {
                kthread_stop(scan_thread);
@@ -1294,6 +1412,49 @@ static int kmemleak_release(struct inode *inode, struct file *file)
        return seq_release(inode, file);
 }
 
+static int dump_str_object_info(const char *str)
+{
+       unsigned long flags;
+       struct kmemleak_object *object;
+       unsigned long addr;
+
+       addr= simple_strtoul(str, NULL, 0);
+       object = find_and_get_object(addr, 0);
+       if (!object) {
+               pr_info("Unknown object at 0x%08lx\n", addr);
+               return -EINVAL;
+       }
+
+       spin_lock_irqsave(&object->lock, flags);
+       dump_object_info(object);
+       spin_unlock_irqrestore(&object->lock, flags);
+
+       put_object(object);
+       return 0;
+}
+
+/*
+ * We use grey instead of black to ensure we can do future scans on the same
+ * objects. If we did not do future scans these black objects could
+ * potentially contain references to newly allocated objects in the future and
+ * we'd end up with false positives.
+ */
+static void kmemleak_clear(void)
+{
+       struct kmemleak_object *object;
+       unsigned long flags;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(object, &object_list, object_list) {
+               spin_lock_irqsave(&object->lock, flags);
+               if ((object->flags & OBJECT_REPORTED) &&
+                   unreferenced_object(object))
+                       __paint_it(object, KMEMLEAK_GREY);
+               spin_unlock_irqrestore(&object->lock, flags);
+       }
+       rcu_read_unlock();
+}
+
 /*
  * File write operation to configure kmemleak at run-time. The following
  * commands can be written to the /sys/kernel/debug/kmemleak file:
@@ -1305,6 +1466,9 @@ static int kmemleak_release(struct inode *inode, struct file *file)
  *   scan=...  - set the automatic memory scanning period in seconds (0 to
  *               disable it)
  *   scan      - trigger a memory scan
+ *   clear     - mark all current reported unreferenced kmemleak objects as
+ *               grey to ignore printing them
+ *   dump=...  - dump information about the object found at the given address
  */
 static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
                              size_t size, loff_t *ppos)
@@ -1345,6 +1509,10 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
                }
        } else if (strncmp(buf, "scan", 4) == 0)
                kmemleak_scan();
+       else if (strncmp(buf, "clear", 5) == 0)
+               kmemleak_clear();
+       else if (strncmp(buf, "dump=", 5) == 0)
+               ret = dump_str_object_info(buf + 5);
        else
                ret = -EINVAL;
 
@@ -1371,7 +1539,7 @@ static const struct file_operations kmemleak_fops = {
  * Perform the freeing of the kmemleak internal objects after waiting for any
  * current memory scan to complete.
  */
-static int kmemleak_cleanup_thread(void *arg)
+static void kmemleak_do_cleanup(struct work_struct *work)
 {
        struct kmemleak_object *object;
 
@@ -1383,22 +1551,9 @@ static int kmemleak_cleanup_thread(void *arg)
                delete_object_full(object->pointer);
        rcu_read_unlock();
        mutex_unlock(&scan_mutex);
-
-       return 0;
 }
 
-/*
- * Start the clean-up thread.
- */
-static void kmemleak_cleanup(void)
-{
-       struct task_struct *cleanup_thread;
-
-       cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL,
-                                    "kmemleak-clean");
-       if (IS_ERR(cleanup_thread))
-               pr_warning("Failed to create the clean-up thread\n");
-}
+static DECLARE_WORK(cleanup_work, kmemleak_do_cleanup);
 
 /*
  * Disable kmemleak. No memory allocation/freeing will be traced once this
@@ -1416,7 +1571,7 @@ static void kmemleak_disable(void)
 
        /* check whether it is too early for a kernel thread */
        if (atomic_read(&kmemleak_initialized))
-               kmemleak_cleanup();
+               schedule_work(&cleanup_work);
 
        pr_info("Kernel memory leak detector disabled\n");
 }
@@ -1469,8 +1624,7 @@ void __init kmemleak_init(void)
 
                switch (log->op_type) {
                case KMEMLEAK_ALLOC:
-                       kmemleak_alloc(log->ptr, log->size, log->min_count,
-                                      GFP_KERNEL);
+                       early_alloc(log);
                        break;
                case KMEMLEAK_FREE:
                        kmemleak_free(log->ptr);
@@ -1513,7 +1667,7 @@ static int __init kmemleak_late_init(void)
                 * after setting kmemleak_initialized and we may end up with
                 * two clean-up threads but serialized by scan_mutex.
                 */
-               kmemleak_cleanup();
+               schedule_work(&cleanup_work);
                return -ENOMEM;
        }
 
index 6a94475aee8594279eb90d94cc164c5169e533c5..278d489aad3bcecadcbbc38413ba4143c507159b 100644 (file)
@@ -1031,7 +1031,7 @@ void dev_load(struct net *net, const char *name)
        dev = __dev_get_by_name(net, name);
        read_unlock(&dev_base_lock);
 
-       if (!dev && capable(CAP_SYS_MODULE))
+       if (!dev && capable(CAP_NET_ADMIN))
                request_module("%s", name);
 }
 
index e92beb9e55e0d834c3184e4b2a5c6e4faa5ea057..6428b342b16442d48864c53ee7d4f8b25fc6a42c 100644 (file)
@@ -116,7 +116,7 @@ int tcp_set_default_congestion_control(const char *name)
        spin_lock(&tcp_cong_list_lock);
        ca = tcp_ca_find(name);
 #ifdef CONFIG_MODULES
-       if (!ca && capable(CAP_SYS_MODULE)) {
+       if (!ca && capable(CAP_NET_ADMIN)) {
                spin_unlock(&tcp_cong_list_lock);
 
                request_module("tcp_%s", name);
@@ -246,7 +246,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 
 #ifdef CONFIG_MODULES
        /* not found attempt to autoload module */
-       if (!ca && capable(CAP_SYS_MODULE)) {
+       if (!ca && capable(CAP_NET_ADMIN)) {
                rcu_read_unlock();
                request_module("tcp_%s", name);
                rcu_read_lock();
index b56e7f9ecbc2adf22706a849d704d8761dfa91dd..95ecc06392d73bed265e63d5a1b0f8772721f051 100644 (file)
@@ -16,9 +16,7 @@ obj-$(CONFIG_SECURITYFS)              += inode.o
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)         += selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)           += smack/built-in.o
-ifeq ($(CONFIG_AUDIT),y)
-obj-$(CONFIG_SECURITY_SMACK)           += lsm_audit.o
-endif
+obj-$(CONFIG_AUDIT)                    += lsm_audit.o
 obj-$(CONFIG_SECURITY_TOMOYO)          += tomoyo/built-in.o
 obj-$(CONFIG_SECURITY_ROOTPLUG)                += root_plug.o
 obj-$(CONFIG_CGROUP_DEVICE)            += device_cgroup.o
index 88f752e8152cbe1ea888747121181035fcb8fbbb..fce07a7bc8257e9e57afa2681d96d71d22cbb785 100644 (file)
@@ -373,6 +373,11 @@ static int cap_task_create(unsigned long clone_flags)
        return 0;
 }
 
+static int cap_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+       return 0;
+}
+
 static void cap_cred_free(struct cred *cred)
 {
 }
@@ -386,6 +391,10 @@ static void cap_cred_commit(struct cred *new, const struct cred *old)
 {
 }
 
+static void cap_cred_transfer(struct cred *new, const struct cred *old)
+{
+}
+
 static int cap_kernel_act_as(struct cred *new, u32 secid)
 {
        return 0;
@@ -396,6 +405,11 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
        return 0;
 }
 
+static int cap_kernel_module_request(void)
+{
+       return 0;
+}
+
 static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
        return 0;
@@ -701,10 +715,26 @@ static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 {
 }
 
+
+
 static void cap_req_classify_flow(const struct request_sock *req,
                                  struct flowi *fl)
 {
 }
+
+static int cap_tun_dev_create(void)
+{
+       return 0;
+}
+
+static void cap_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static int cap_tun_dev_attach(struct sock *sk)
+{
+       return 0;
+}
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -792,6 +822,20 @@ static void cap_release_secctx(char *secdata, u32 seclen)
 {
 }
 
+static int cap_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+       return 0;
+}
+
+static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+       return 0;
+}
+
+static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+       return 0;
+}
 #ifdef CONFIG_KEYS
 static int cap_key_alloc(struct key *key, const struct cred *cred,
                         unsigned long flags)
@@ -815,6 +859,13 @@ static int cap_key_getsecurity(struct key *key, char **_buffer)
        return 0;
 }
 
+static int cap_key_session_to_parent(const struct cred *cred,
+                                    const struct cred *parent_cred,
+                                    struct key *key)
+{
+       return 0;
+}
+
 #endif /* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -854,7 +905,7 @@ struct security_operations default_security_ops = {
 
 void security_fixup_ops(struct security_operations *ops)
 {
-       set_to_cap_if_null(ops, ptrace_may_access);
+       set_to_cap_if_null(ops, ptrace_access_check);
        set_to_cap_if_null(ops, ptrace_traceme);
        set_to_cap_if_null(ops, capget);
        set_to_cap_if_null(ops, capset);
@@ -940,11 +991,14 @@ void security_fixup_ops(struct security_operations *ops)
        set_to_cap_if_null(ops, file_receive);
        set_to_cap_if_null(ops, dentry_open);
        set_to_cap_if_null(ops, task_create);
+       set_to_cap_if_null(ops, cred_alloc_blank);
        set_to_cap_if_null(ops, cred_free);
        set_to_cap_if_null(ops, cred_prepare);
        set_to_cap_if_null(ops, cred_commit);
+       set_to_cap_if_null(ops, cred_transfer);
        set_to_cap_if_null(ops, kernel_act_as);
        set_to_cap_if_null(ops, kernel_create_files_as);
+       set_to_cap_if_null(ops, kernel_module_request);
        set_to_cap_if_null(ops, task_setuid);
        set_to_cap_if_null(ops, task_fix_setuid);
        set_to_cap_if_null(ops, task_setgid);
@@ -992,6 +1046,9 @@ void security_fixup_ops(struct security_operations *ops)
        set_to_cap_if_null(ops, secid_to_secctx);
        set_to_cap_if_null(ops, secctx_to_secid);
        set_to_cap_if_null(ops, release_secctx);
+       set_to_cap_if_null(ops, inode_notifysecctx);
+       set_to_cap_if_null(ops, inode_setsecctx);
+       set_to_cap_if_null(ops, inode_getsecctx);
 #ifdef CONFIG_SECURITY_NETWORK
        set_to_cap_if_null(ops, unix_stream_connect);
        set_to_cap_if_null(ops, unix_may_send);
@@ -1020,6 +1077,9 @@ void security_fixup_ops(struct security_operations *ops)
        set_to_cap_if_null(ops, inet_csk_clone);
        set_to_cap_if_null(ops, inet_conn_established);
        set_to_cap_if_null(ops, req_classify_flow);
+       set_to_cap_if_null(ops, tun_dev_create);
+       set_to_cap_if_null(ops, tun_dev_post_create);
+       set_to_cap_if_null(ops, tun_dev_attach);
 #endif /* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
        set_to_cap_if_null(ops, xfrm_policy_alloc_security);
@@ -1038,6 +1098,7 @@ void security_fixup_ops(struct security_operations *ops)
        set_to_cap_if_null(ops, key_free);
        set_to_cap_if_null(ops, key_permission);
        set_to_cap_if_null(ops, key_getsecurity);
+       set_to_cap_if_null(ops, key_session_to_parent);
 #endif /* CONFIG_KEYS */
 #ifdef CONFIG_AUDIT
        set_to_cap_if_null(ops, audit_rule_init);
index e3097c0a1311205cc231cee85cb012b271db1ed8..fe30751a6cd9cb8862fa7fcbd19a3b818264c934 100644 (file)
@@ -101,7 +101,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
 }
 
 /**
- * cap_ptrace_may_access - Determine whether the current process may access
+ * cap_ptrace_access_check - Determine whether the current process may access
  *                        another
  * @child: The process to be accessed
  * @mode: The mode of attachment.
@@ -109,7 +109,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
  * Determine whether a process may access another, returning 0 if permission
  * granted, -ve if denied.
  */
-int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
 {
        int ret = 0;
 
index 747a464943aff0ada556e903fa6750fc7b670b60..74d5447d7df7259d83f54f5af5ff176fe4b5b37f 100644 (file)
@@ -3,6 +3,7 @@
 #
 
 obj-y := \
+       gc.o \
        key.o \
        keyring.o \
        keyctl.o \
index c766c68a63bc2078c62bf256730e05e14b36268b..792c0a611a6d115a5a272ca748af6c1804c762ef 100644 (file)
@@ -82,6 +82,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
        case KEYCTL_GET_SECURITY:
                return keyctl_get_security(arg2, compat_ptr(arg3), arg4);
 
+       case KEYCTL_SESSION_TO_PARENT:
+               return keyctl_session_to_parent();
+
        default:
                return -EOPNOTSUPP;
        }
diff --git a/security/keys/gc.c b/security/keys/gc.c
new file mode 100644 (file)
index 0000000..1e616ae
--- /dev/null
@@ -0,0 +1,194 @@
+/* Key garbage collector
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <keys/keyring-type.h>
+#include "internal.h"
+
+/*
+ * Delay between key revocation/expiry in seconds
+ */
+unsigned key_gc_delay = 5 * 60;
+
+/*
+ * Reaper
+ */
+static void key_gc_timer_func(unsigned long);
+static void key_garbage_collector(struct work_struct *);
+static DEFINE_TIMER(key_gc_timer, key_gc_timer_func, 0, 0);
+static DECLARE_WORK(key_gc_work, key_garbage_collector);
+static key_serial_t key_gc_cursor; /* the last key the gc considered */
+static unsigned long key_gc_executing;
+static time_t key_gc_next_run = LONG_MAX;
+
+/*
+ * Schedule a garbage collection run
+ * - precision isn't particularly important
+ */
+void key_schedule_gc(time_t gc_at)
+{
+       unsigned long expires;
+       time_t now = current_kernel_time().tv_sec;
+
+       kenter("%ld", gc_at - now);
+
+       gc_at += key_gc_delay;
+
+       if (now >= gc_at) {
+               schedule_work(&key_gc_work);
+       } else if (gc_at < key_gc_next_run) {
+               expires = jiffies + (gc_at - now) * HZ;
+               mod_timer(&key_gc_timer, expires);
+       }
+}
+
+/*
+ * The garbage collector timer kicked off
+ */
+static void key_gc_timer_func(unsigned long data)
+{
+       kenter("");
+       key_gc_next_run = LONG_MAX;
+       schedule_work(&key_gc_work);
+}
+
+/*
+ * Garbage collect pointers from a keyring
+ * - return true if we altered the keyring
+ */
+static bool key_gc_keyring(struct key *keyring, time_t limit)
+       __releases(key_serial_lock)
+{
+       struct keyring_list *klist;
+       struct key *key;
+       int loop;
+
+       kenter("%x", key_serial(keyring));
+
+       if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
+               goto dont_gc;
+
+       /* scan the keyring looking for dead keys */
+       klist = rcu_dereference(keyring->payload.subscriptions);
+       if (!klist)
+               goto dont_gc;
+
+       for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+               key = klist->keys[loop];
+               if (test_bit(KEY_FLAG_DEAD, &key->flags) ||
+                   (key->expiry > 0 && key->expiry <= limit))
+                       goto do_gc;
+       }
+
+dont_gc:
+       kleave(" = false");
+       return false;
+
+do_gc:
+       key_gc_cursor = keyring->serial;
+       key_get(keyring);
+       spin_unlock(&key_serial_lock);
+       keyring_gc(keyring, limit);
+       key_put(keyring);
+       kleave(" = true");
+       return true;
+}
+
+/*
+ * Garbage collector for keys
+ * - this involves scanning the keyrings for dead, expired and revoked keys
+ *   that have overstayed their welcome
+ */
+static void key_garbage_collector(struct work_struct *work)
+{
+       struct rb_node *rb;
+       key_serial_t cursor;
+       struct key *key, *xkey;
+       time_t new_timer = LONG_MAX, limit;
+
+       kenter("");
+
+       if (test_and_set_bit(0, &key_gc_executing)) {
+               key_schedule_gc(current_kernel_time().tv_sec);
+               return;
+       }
+
+       limit = current_kernel_time().tv_sec;
+       if (limit > key_gc_delay)
+               limit -= key_gc_delay;
+       else
+               limit = key_gc_delay;
+
+       spin_lock(&key_serial_lock);
+
+       if (RB_EMPTY_ROOT(&key_serial_tree))
+               goto reached_the_end;
+
+       cursor = key_gc_cursor;
+       if (cursor < 0)
+               cursor = 0;
+
+       /* find the first key above the cursor */
+       key = NULL;
+       rb = key_serial_tree.rb_node;
+       while (rb) {
+               xkey = rb_entry(rb, struct key, serial_node);
+               if (cursor < xkey->serial) {
+                       key = xkey;
+                       rb = rb->rb_left;
+               } else if (cursor > xkey->serial) {
+                       rb = rb->rb_right;
+               } else {
+                       rb = rb_next(rb);
+                       if (!rb)
+                               goto reached_the_end;
+                       key = rb_entry(rb, struct key, serial_node);
+                       break;
+               }
+       }
+
+       if (!key)
+               goto reached_the_end;
+
+       /* trawl through the keys looking for keyrings */
+       for (;;) {
+               if (key->expiry > 0 && key->expiry < new_timer)
+                       new_timer = key->expiry;
+
+               if (key->type == &key_type_keyring &&
+                   key_gc_keyring(key, limit)) {
+                       /* the gc ate our lock */
+                       schedule_work(&key_gc_work);
+                       goto no_unlock;
+               }
+
+               rb = rb_next(&key->serial_node);
+               if (!rb) {
+                       key_gc_cursor = 0;
+                       break;
+               }
+               key = rb_entry(rb, struct key, serial_node);
+       }
+
+out:
+       spin_unlock(&key_serial_lock);
+no_unlock:
+       clear_bit(0, &key_gc_executing);
+       if (new_timer < LONG_MAX)
+               key_schedule_gc(new_timer);
+
+       kleave("");
+       return;
+
+reached_the_end:
+       key_gc_cursor = 0;
+       goto out;
+}
index 9fb679c66b8af3c90f85b7c016ff5657268685d2..24ba0307b7ad4a98382fd348a813d9722f85b444 100644 (file)
@@ -124,11 +124,18 @@ extern struct key *request_key_and_link(struct key_type *type,
                                        struct key *dest_keyring,
                                        unsigned long flags);
 
-extern key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
+extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
                                 key_perm_t perm);
+#define KEY_LOOKUP_CREATE      0x01
+#define KEY_LOOKUP_PARTIAL     0x02
+#define KEY_LOOKUP_FOR_UNLINK  0x04
 
 extern long join_session_keyring(const char *name);
 
+extern unsigned key_gc_delay;
+extern void keyring_gc(struct key *keyring, time_t limit);
+extern void key_schedule_gc(time_t expiry_at);
+
 /*
  * check to see whether permission is granted to use a key in the desired way
  */
@@ -194,6 +201,7 @@ extern long keyctl_set_timeout(key_serial_t, unsigned);
 extern long keyctl_assume_authority(key_serial_t);
 extern long keyctl_get_security(key_serial_t keyid, char __user *buffer,
                                size_t buflen);
+extern long keyctl_session_to_parent(void);
 
 /*
  * debugging key validation
index 4a1297d1ada4f41d97fd314a8848d7e132112da4..08531ad0f252129beffbf93c451d218cbce29bfb 100644 (file)
@@ -500,6 +500,7 @@ int key_negate_and_link(struct key *key,
                set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
                now = current_kernel_time();
                key->expiry = now.tv_sec + timeout;
+               key_schedule_gc(key->expiry);
 
                if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags))
                        awaken = 1;
@@ -642,10 +643,8 @@ struct key *key_lookup(key_serial_t id)
        goto error;
 
  found:
-       /* pretend it doesn't exist if it's dead */
-       if (atomic_read(&key->usage) == 0 ||
-           test_bit(KEY_FLAG_DEAD, &key->flags) ||
-           key->type == &key_type_dead)
+       /* pretend it doesn't exist if it is awaiting deletion */
+       if (atomic_read(&key->usage) == 0)
                goto not_found;
 
        /* this races with key_put(), but that doesn't matter since key_put()
@@ -890,6 +889,9 @@ EXPORT_SYMBOL(key_update);
  */
 void key_revoke(struct key *key)
 {
+       struct timespec now;
+       time_t time;
+
        key_check(key);
 
        /* make sure no one's trying to change or use the key when we mark it
@@ -902,6 +904,14 @@ void key_revoke(struct key *key)
            key->type->revoke)
                key->type->revoke(key);
 
+       /* set the death time to no more than the expiry time */
+       now = current_kernel_time();
+       time = now.tv_sec;
+       if (key->revoked_at == 0 || key->revoked_at > time) {
+               key->revoked_at = time;
+               key_schedule_gc(key->revoked_at);
+       }
+
        up_write(&key->sem);
 
 } /* end key_revoke() */
@@ -958,8 +968,10 @@ void unregister_key_type(struct key_type *ktype)
        for (_n = rb_first(&key_serial_tree); _n; _n = rb_next(_n)) {
                key = rb_entry(_n, struct key, serial_node);
 
-               if (key->type == ktype)
+               if (key->type == ktype) {
                        key->type = &key_type_dead;
+                       set_bit(KEY_FLAG_DEAD, &key->flags);
+               }
        }
 
        spin_unlock(&key_serial_lock);
@@ -984,6 +996,8 @@ void unregister_key_type(struct key_type *ktype)
        spin_unlock(&key_serial_lock);
        up_write(&key_types_sem);
 
+       key_schedule_gc(0);
+
 } /* end unregister_key_type() */
 
 EXPORT_SYMBOL(unregister_key_type);
index 7f09fb897d2b49e1b4a1e2f96e942094b9239474..74c96852459281cab2b959a0b2db55666c7c9a7b 100644 (file)
@@ -103,7 +103,7 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
        }
 
        /* find the target keyring (which must be writable) */
-       keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+       keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
        if (IS_ERR(keyring_ref)) {
                ret = PTR_ERR(keyring_ref);
                goto error3;
@@ -185,7 +185,8 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
        /* get the destination keyring if specified */
        dest_ref = NULL;
        if (destringid) {
-               dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
+               dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
+                                          KEY_WRITE);
                if (IS_ERR(dest_ref)) {
                        ret = PTR_ERR(dest_ref);
                        goto error3;
@@ -233,9 +234,11 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type,
 long keyctl_get_keyring_ID(key_serial_t id, int create)
 {
        key_ref_t key_ref;
+       unsigned long lflags;
        long ret;
 
-       key_ref = lookup_user_key(id, create, 0, KEY_SEARCH);
+       lflags = create ? KEY_LOOKUP_CREATE : 0;
+       key_ref = lookup_user_key(id, lflags, KEY_SEARCH);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
                goto error;
@@ -309,7 +312,7 @@ long keyctl_update_key(key_serial_t id,
        }
 
        /* find the target key (which must be writable) */
-       key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
+       key_ref = lookup_user_key(id, 0, KEY_WRITE);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
                goto error2;
@@ -337,10 +340,16 @@ long keyctl_revoke_key(key_serial_t id)
        key_ref_t key_ref;
        long ret;
 
-       key_ref = lookup_user_key(id, 0, 0, KEY_WRITE);
+       key_ref = lookup_user_key(id, 0, KEY_WRITE);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
-               goto error;
+               if (ret != -EACCES)
+                       goto error;
+               key_ref = lookup_user_key(id, 0, KEY_SETATTR);
+               if (IS_ERR(key_ref)) {
+                       ret = PTR_ERR(key_ref);
+                       goto error;
+               }
        }
 
        key_revoke(key_ref_to_ptr(key_ref));
@@ -363,7 +372,7 @@ long keyctl_keyring_clear(key_serial_t ringid)
        key_ref_t keyring_ref;
        long ret;
 
-       keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+       keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
        if (IS_ERR(keyring_ref)) {
                ret = PTR_ERR(keyring_ref);
                goto error;
@@ -389,13 +398,13 @@ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid)
        key_ref_t keyring_ref, key_ref;
        long ret;
 
-       keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+       keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
        if (IS_ERR(keyring_ref)) {
                ret = PTR_ERR(keyring_ref);
                goto error;
        }
 
-       key_ref = lookup_user_key(id, 1, 0, KEY_LINK);
+       key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_LINK);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
                goto error2;
@@ -423,13 +432,13 @@ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid)
        key_ref_t keyring_ref, key_ref;
        long ret;
 
-       keyring_ref = lookup_user_key(ringid, 0, 0, KEY_WRITE);
+       keyring_ref = lookup_user_key(ringid, 0, KEY_WRITE);
        if (IS_ERR(keyring_ref)) {
                ret = PTR_ERR(keyring_ref);
                goto error;
        }
 
-       key_ref = lookup_user_key(id, 0, 0, 0);
+       key_ref = lookup_user_key(id, KEY_LOOKUP_FOR_UNLINK, 0);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
                goto error2;
@@ -465,7 +474,7 @@ long keyctl_describe_key(key_serial_t keyid,
        char *tmpbuf;
        long ret;
 
-       key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
+       key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
        if (IS_ERR(key_ref)) {
                /* viewing a key under construction is permitted if we have the
                 * authorisation token handy */
@@ -474,7 +483,8 @@ long keyctl_describe_key(key_serial_t keyid,
                        if (!IS_ERR(instkey)) {
                                key_put(instkey);
                                key_ref = lookup_user_key(keyid,
-                                                         0, 1, 0);
+                                                         KEY_LOOKUP_PARTIAL,
+                                                         0);
                                if (!IS_ERR(key_ref))
                                        goto okay;
                        }
@@ -558,7 +568,7 @@ long keyctl_keyring_search(key_serial_t ringid,
        }
 
        /* get the keyring at which to begin the search */
-       keyring_ref = lookup_user_key(ringid, 0, 0, KEY_SEARCH);
+       keyring_ref = lookup_user_key(ringid, 0, KEY_SEARCH);
        if (IS_ERR(keyring_ref)) {
                ret = PTR_ERR(keyring_ref);
                goto error2;
@@ -567,7 +577,8 @@ long keyctl_keyring_search(key_serial_t ringid,
        /* get the destination keyring if specified */
        dest_ref = NULL;
        if (destringid) {
-               dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE);
+               dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE,
+                                          KEY_WRITE);
                if (IS_ERR(dest_ref)) {
                        ret = PTR_ERR(dest_ref);
                        goto error3;
@@ -637,7 +648,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
        long ret;
 
        /* find the key first */
-       key_ref = lookup_user_key(keyid, 0, 0, 0);
+       key_ref = lookup_user_key(keyid, 0, 0);
        if (IS_ERR(key_ref)) {
                ret = -ENOKEY;
                goto error;
@@ -700,7 +711,8 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid)
        if (uid == (uid_t) -1 && gid == (gid_t) -1)
                goto error;
 
-       key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+       key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+                                 KEY_SETATTR);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
                goto error;
@@ -805,7 +817,8 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
        if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL))
                goto error;
 
-       key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+       key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+                                 KEY_SETATTR);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
                goto error;
@@ -847,7 +860,7 @@ static long get_instantiation_keyring(key_serial_t ringid,
 
        /* if a specific keyring is nominated by ID, then use that */
        if (ringid > 0) {
-               dkref = lookup_user_key(ringid, 1, 0, KEY_WRITE);
+               dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_WRITE);
                if (IS_ERR(dkref))
                        return PTR_ERR(dkref);
                *_dest_keyring = key_ref_to_ptr(dkref);
@@ -1083,7 +1096,8 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
        time_t expiry;
        long ret;
 
-       key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR);
+       key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL,
+                                 KEY_SETATTR);
        if (IS_ERR(key_ref)) {
                ret = PTR_ERR(key_ref);
                goto error;
@@ -1101,6 +1115,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
        }
 
        key->expiry = expiry;
+       key_schedule_gc(key->expiry);
 
        up_write(&key->sem);
        key_put(key);
@@ -1170,7 +1185,7 @@ long keyctl_get_security(key_serial_t keyid,
        char *context;
        long ret;
 
-       key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW);
+       key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_VIEW);
        if (IS_ERR(key_ref)) {
                if (PTR_ERR(key_ref) != -EACCES)
                        return PTR_ERR(key_ref);
@@ -1182,7 +1197,7 @@ long keyctl_get_security(key_serial_t keyid,
                        return PTR_ERR(key_ref);
                key_put(instkey);
 
-               key_ref = lookup_user_key(keyid, 0, 1, 0);
+               key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, 0);
                if (IS_ERR(key_ref))
                        return PTR_ERR(key_ref);
        }
@@ -1213,6 +1228,105 @@ long keyctl_get_security(key_serial_t keyid,
        return ret;
 }
 
+/*
+ * attempt to install the calling process's session keyring on the process's
+ * parent process
+ * - the keyring must exist and must grant us LINK permission
+ * - implements keyctl(KEYCTL_SESSION_TO_PARENT)
+ */
+long keyctl_session_to_parent(void)
+{
+       struct task_struct *me, *parent;
+       const struct cred *mycred, *pcred;
+       struct cred *cred, *oldcred;
+       key_ref_t keyring_r;
+       int ret;
+
+       keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_LINK);
+       if (IS_ERR(keyring_r))
+               return PTR_ERR(keyring_r);
+
+       /* our parent is going to need a new cred struct, a new tgcred struct
+        * and new security data, so we allocate them here to prevent ENOMEM in
+        * our parent */
+       ret = -ENOMEM;
+       cred = cred_alloc_blank();
+       if (!cred)
+               goto error_keyring;
+
+       cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
+       keyring_r = NULL;
+
+       me = current;
+       write_lock_irq(&tasklist_lock);
+
+       parent = me->real_parent;
+       ret = -EPERM;
+
+       /* the parent mustn't be init and mustn't be a kernel thread */
+       if (parent->pid <= 1 || !parent->mm)
+               goto not_permitted;
+
+       /* the parent must be single threaded */
+       if (atomic_read(&parent->signal->count) != 1)
+               goto not_permitted;
+
+       /* the parent and the child must have different session keyrings or
+        * there's no point */
+       mycred = current_cred();
+       pcred = __task_cred(parent);
+       if (mycred == pcred ||
+           mycred->tgcred->session_keyring == pcred->tgcred->session_keyring)
+               goto already_same;
+
+       /* the parent must have the same effective ownership and mustn't be
+        * SUID/SGID */
+       if (pcred-> uid != mycred->euid ||
+           pcred->euid != mycred->euid ||
+           pcred->suid != mycred->euid ||
+           pcred-> gid != mycred->egid ||
+           pcred->egid != mycred->egid ||
+           pcred->sgid != mycred->egid)
+               goto not_permitted;
+
+       /* the keyrings must have the same UID */
+       if (pcred ->tgcred->session_keyring->uid != mycred->euid ||
+           mycred->tgcred->session_keyring->uid != mycred->euid)
+               goto not_permitted;
+
+       /* the LSM must permit the replacement of the parent's keyring with the
+        * keyring from this process */
+       ret = security_key_session_to_parent(mycred, pcred,
+                                            key_ref_to_ptr(keyring_r));
+       if (ret < 0)
+               goto not_permitted;
+
+       /* if there's an already pending keyring replacement, then we replace
+        * that */
+       oldcred = parent->replacement_session_keyring;
+
+       /* the replacement session keyring is applied just prior to userspace
+        * restarting */
+       parent->replacement_session_keyring = cred;
+       cred = NULL;
+       set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
+
+       write_unlock_irq(&tasklist_lock);
+       if (oldcred)
+               put_cred(oldcred);
+       return 0;
+
+already_same:
+       ret = 0;
+not_permitted:
+       put_cred(cred);
+       return ret;
+
+error_keyring:
+       key_ref_put(keyring_r);
+       return ret;
+}
+
 /*****************************************************************************/
 /*
  * the key control system call
@@ -1298,6 +1412,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
                                           (char __user *) arg3,
                                           (size_t) arg4);
 
+       case KEYCTL_SESSION_TO_PARENT:
+               return keyctl_session_to_parent();
+
        default:
                return -EOPNOTSUPP;
        }
index 3dba81c2eba360a615d9116e124cb36d86e89427..ac977f661a792780728e28d0f6faa374bccb5297 100644 (file)
@@ -1000,3 +1000,88 @@ static void keyring_revoke(struct key *keyring)
        }
 
 } /* end keyring_revoke() */
+
+/*
+ * Determine whether a key is dead
+ */
+static bool key_is_dead(struct key *key, time_t limit)
+{
+       return test_bit(KEY_FLAG_DEAD, &key->flags) ||
+               (key->expiry > 0 && key->expiry <= limit);
+}
+
+/*
+ * Collect garbage from the contents of a keyring
+ */
+void keyring_gc(struct key *keyring, time_t limit)
+{
+       struct keyring_list *klist, *new;
+       struct key *key;
+       int loop, keep, max;
+
+       kenter("%x", key_serial(keyring));
+
+       down_write(&keyring->sem);
+
+       klist = keyring->payload.subscriptions;
+       if (!klist)
+               goto just_return;
+
+       /* work out how many subscriptions we're keeping */
+       keep = 0;
+       for (loop = klist->nkeys - 1; loop >= 0; loop--)
+               if (!key_is_dead(klist->keys[loop], limit));
+                       keep++;
+
+       if (keep == klist->nkeys)
+               goto just_return;
+
+       /* allocate a new keyring payload */
+       max = roundup(keep, 4);
+       new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
+                     GFP_KERNEL);
+       if (!new)
+               goto just_return;
+       new->maxkeys = max;
+       new->nkeys = 0;
+       new->delkey = 0;
+
+       /* install the live keys
+        * - must take care as expired keys may be updated back to life
+        */
+       keep = 0;
+       for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+               key = klist->keys[loop];
+               if (!key_is_dead(key, limit)) {
+                       if (keep >= max)
+                               goto discard_new;
+                       new->keys[keep++] = key_get(key);
+               }
+       }
+       new->nkeys = keep;
+
+       /* adjust the quota */
+       key_payload_reserve(keyring,
+                           sizeof(struct keyring_list) +
+                           KEYQUOTA_LINK_BYTES * keep);
+
+       if (keep == 0) {
+               rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+               kfree(new);
+       } else {
+               rcu_assign_pointer(keyring->payload.subscriptions, new);
+       }
+
+       up_write(&keyring->sem);
+
+       call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+       kleave(" [yes]");
+       return;
+
+discard_new:
+       new->nkeys = keep;
+       keyring_clear_rcu_disposal(&new->rcu);
+just_return:
+       up_write(&keyring->sem);
+       kleave(" [no]");
+}
index 769f9bdfd2b33aaeadbd7eb3efb895a364d0536a..9d01021ca0c8cdd9c25c8292daff990204ecf466 100644 (file)
@@ -91,59 +91,94 @@ __initcall(key_proc_init);
  */
 #ifdef CONFIG_KEYS_DEBUG_PROC_KEYS
 
-static struct rb_node *__key_serial_next(struct rb_node *n)
+static struct rb_node *key_serial_next(struct rb_node *n)
 {
+       struct user_namespace *user_ns = current_user_ns();
+
+       n = rb_next(n);
        while (n) {
                struct key *key = rb_entry(n, struct key, serial_node);
-               if (key->user->user_ns == current_user_ns())
+               if (key->user->user_ns == user_ns)
                        break;
                n = rb_next(n);
        }
        return n;
 }
 
-static struct rb_node *key_serial_next(struct rb_node *n)
+static int proc_keys_open(struct inode *inode, struct file *file)
 {
-       return __key_serial_next(rb_next(n));
+       return seq_open(file, &proc_keys_ops);
 }
 
-static struct rb_node *key_serial_first(struct rb_root *r)
+static struct key *find_ge_key(key_serial_t id)
 {
-       struct rb_node *n = rb_first(r);
-       return __key_serial_next(n);
-}
+       struct user_namespace *user_ns = current_user_ns();
+       struct rb_node *n = key_serial_tree.rb_node;
+       struct key *minkey = NULL;
 
-static int proc_keys_open(struct inode *inode, struct file *file)
-{
-       return seq_open(file, &proc_keys_ops);
+       while (n) {
+               struct key *key = rb_entry(n, struct key, serial_node);
+               if (id < key->serial) {
+                       if (!minkey || minkey->serial > key->serial)
+                               minkey = key;
+                       n = n->rb_left;
+               } else if (id > key->serial) {
+                       n = n->rb_right;
+               } else {
+                       minkey = key;
+                       break;
+               }
+               key = NULL;
+       }
 
+       if (!minkey)
+               return NULL;
+
+       for (;;) {
+               if (minkey->user->user_ns == user_ns)
+                       return minkey;
+               n = rb_next(&minkey->serial_node);
+               if (!n)
+                       return NULL;
+               minkey = rb_entry(n, struct key, serial_node);
+       }
 }
 
 static void *proc_keys_start(struct seq_file *p, loff_t *_pos)
+       __acquires(key_serial_lock)
 {
-       struct rb_node *_p;
-       loff_t pos = *_pos;
+       key_serial_t pos = *_pos;
+       struct key *key;
 
        spin_lock(&key_serial_lock);
 
-       _p = key_serial_first(&key_serial_tree);
-       while (pos > 0 && _p) {
-               pos--;
-               _p = key_serial_next(_p);
-       }
-
-       return _p;
+       if (*_pos > INT_MAX)
+               return NULL;
+       key = find_ge_key(pos);
+       if (!key)
+               return NULL;
+       *_pos = key->serial;
+       return &key->serial_node;
+}
 
+static inline key_serial_t key_node_serial(struct rb_node *n)
+{
+       struct key *key = rb_entry(n, struct key, serial_node);
+       return key->serial;
 }
 
 static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos)
 {
-       (*_pos)++;
-       return key_serial_next((struct rb_node *) v);
+       struct rb_node *n;
 
+       n = key_serial_next(v);
+       if (n)
+               *_pos = key_node_serial(n);
+       return n;
 }
 
 static void proc_keys_stop(struct seq_file *p, void *v)
+       __releases(key_serial_lock)
 {
        spin_unlock(&key_serial_lock);
 }
@@ -174,11 +209,9 @@ static int proc_keys_show(struct seq_file *m, void *v)
        /* come up with a suitable timeout value */
        if (key->expiry == 0) {
                memcpy(xbuf, "perm", 5);
-       }
-       else if (now.tv_sec >= key->expiry) {
+       } else if (now.tv_sec >= key->expiry) {
                memcpy(xbuf, "expd", 5);
-       }
-       else {
+       } else {
                timo = key->expiry - now.tv_sec;
 
                if (timo < 60)
@@ -218,9 +251,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
        seq_putc(m, '\n');
 
        rcu_read_unlock();
-
        return 0;
-
 }
 
 #endif /* CONFIG_KEYS_DEBUG_PROC_KEYS */
@@ -246,6 +277,7 @@ static struct rb_node *key_user_first(struct rb_root *r)
        struct rb_node *n = rb_first(r);
        return __key_user_next(n);
 }
+
 /*****************************************************************************/
 /*
  * implement "/proc/key-users" to provides a list of the key users
@@ -253,10 +285,10 @@ static struct rb_node *key_user_first(struct rb_root *r)
 static int proc_key_users_open(struct inode *inode, struct file *file)
 {
        return seq_open(file, &proc_key_users_ops);
-
 }
 
 static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
+       __acquires(key_user_lock)
 {
        struct rb_node *_p;
        loff_t pos = *_pos;
@@ -270,17 +302,16 @@ static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
        }
 
        return _p;
-
 }
 
 static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos)
 {
        (*_pos)++;
        return key_user_next((struct rb_node *) v);
-
 }
 
 static void proc_key_users_stop(struct seq_file *p, void *v)
+       __releases(key_user_lock)
 {
        spin_unlock(&key_user_lock);
 }
index 276d27882ce84394d7d6ce5b0c12d82e05f89987..5c23afb31ece464ad0165e1a3fb052a8969244c5 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 #include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 #include "internal.h"
@@ -487,7 +488,7 @@ static int lookup_user_key_possessed(const struct key *key, const void *target)
  * - don't create special keyrings unless so requested
  * - partially constructed keys aren't found unless requested
  */
-key_ref_t lookup_user_key(key_serial_t id, int create, int partial,
+key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
                          key_perm_t perm)
 {
        struct request_key_auth *rka;
@@ -503,7 +504,7 @@ try_again:
        switch (id) {
        case KEY_SPEC_THREAD_KEYRING:
                if (!cred->thread_keyring) {
-                       if (!create)
+                       if (!(lflags & KEY_LOOKUP_CREATE))
                                goto error;
 
                        ret = install_thread_keyring();
@@ -521,7 +522,7 @@ try_again:
 
        case KEY_SPEC_PROCESS_KEYRING:
                if (!cred->tgcred->process_keyring) {
-                       if (!create)
+                       if (!(lflags & KEY_LOOKUP_CREATE))
                                goto error;
 
                        ret = install_process_keyring();
@@ -642,7 +643,14 @@ try_again:
                break;
        }
 
-       if (!partial) {
+       /* unlink does not use the nominated key in any way, so can skip all
+        * the permission checks as it is only concerned with the keyring */
+       if (lflags & KEY_LOOKUP_FOR_UNLINK) {
+               ret = 0;
+               goto error;
+       }
+
+       if (!(lflags & KEY_LOOKUP_PARTIAL)) {
                ret = wait_for_key_construction(key, true);
                switch (ret) {
                case -ERESTARTSYS:
@@ -660,7 +668,8 @@ try_again:
        }
 
        ret = -EIO;
-       if (!partial && !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
+       if (!(lflags & KEY_LOOKUP_PARTIAL) &&
+           !test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
                goto invalid_key;
 
        /* check the permissions */
@@ -702,7 +711,7 @@ long join_session_keyring(const char *name)
        /* only permit this if there's a single thread in the thread group -
         * this avoids us having to adjust the creds on all threads and risking
         * ENOMEM */
-       if (!is_single_threaded(current))
+       if (!current_is_single_threaded())
                return -EMLINK;
 
        new = prepare_creds();
@@ -760,3 +769,51 @@ error:
        abort_creds(new);
        return ret;
 }
+
+/*
+ * Replace a process's session keyring when that process resumes userspace on
+ * behalf of one of its children
+ */
+void key_replace_session_keyring(void)
+{
+       const struct cred *old;
+       struct cred *new;
+
+       if (!current->replacement_session_keyring)
+               return;
+
+       write_lock_irq(&tasklist_lock);
+       new = current->replacement_session_keyring;
+       current->replacement_session_keyring = NULL;
+       write_unlock_irq(&tasklist_lock);
+
+       if (!new)
+               return;
+
+       old = current_cred();
+       new->  uid      = old->  uid;
+       new-> euid      = old-> euid;
+       new-> suid      = old-> suid;
+       new->fsuid      = old->fsuid;
+       new->  gid      = old->  gid;
+       new-> egid      = old-> egid;
+       new-> sgid      = old-> sgid;
+       new->fsgid      = old->fsgid;
+       new->user       = get_uid(old->user);
+       new->group_info = get_group_info(old->group_info);
+
+       new->securebits = old->securebits;
+       new->cap_inheritable    = old->cap_inheritable;
+       new->cap_permitted      = old->cap_permitted;
+       new->cap_effective      = old->cap_effective;
+       new->cap_bset           = old->cap_bset;
+
+       new->jit_keyring        = old->jit_keyring;
+       new->thread_keyring     = key_get(old->thread_keyring);
+       new->tgcred->tgid       = old->tgcred->tgid;
+       new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
+
+       security_transfer_creds(new, old);
+
+       commit_creds(new);
+}
index b611d493c2d8be453551349f21c866bbfbb0ceac..5e05dc09e2dba0109ab931a57fecb97a4c017be9 100644 (file)
@@ -13,6 +13,8 @@
 #include <linux/sysctl.h>
 #include "internal.h"
 
+static const int zero, one = 1, max = INT_MAX;
+
 ctl_table key_sysctls[] = {
        {
                .ctl_name = CTL_UNNUMBERED,
@@ -20,7 +22,9 @@ ctl_table key_sysctls[] = {
                .data = &key_quota_maxkeys,
                .maxlen = sizeof(unsigned),
                .mode = 0644,
-               .proc_handler = &proc_dointvec,
+               .proc_handler = &proc_dointvec_minmax,
+               .extra1 = (void *) &one,
+               .extra2 = (void *) &max,
        },
        {
                .ctl_name = CTL_UNNUMBERED,
@@ -28,7 +32,9 @@ ctl_table key_sysctls[] = {
                .data = &key_quota_maxbytes,
                .maxlen = sizeof(unsigned),
                .mode = 0644,
-               .proc_handler = &proc_dointvec,
+               .proc_handler = &proc_dointvec_minmax,
+               .extra1 = (void *) &one,
+               .extra2 = (void *) &max,
        },
        {
                .ctl_name = CTL_UNNUMBERED,
@@ -36,7 +42,9 @@ ctl_table key_sysctls[] = {
                .data = &key_quota_root_maxkeys,
                .maxlen = sizeof(unsigned),
                .mode = 0644,
-               .proc_handler = &proc_dointvec,
+               .proc_handler = &proc_dointvec_minmax,
+               .extra1 = (void *) &one,
+               .extra2 = (void *) &max,
        },
        {
                .ctl_name = CTL_UNNUMBERED,
@@ -44,7 +52,19 @@ ctl_table key_sysctls[] = {
                .data = &key_quota_root_maxbytes,
                .maxlen = sizeof(unsigned),
                .mode = 0644,
-               .proc_handler = &proc_dointvec,
+               .proc_handler = &proc_dointvec_minmax,
+               .extra1 = (void *) &one,
+               .extra2 = (void *) &max,
+       },
+       {
+               .ctl_name = CTL_UNNUMBERED,
+               .procname = "gc_delay",
+               .data = &key_gc_delay,
+               .maxlen = sizeof(unsigned),
+               .mode = 0644,
+               .proc_handler = &proc_dointvec_minmax,
+               .extra1 = (void *) &zero,
+               .extra2 = (void *) &max,
        },
        { .ctl_name = 0 }
 };
index 94b868494b316dd15631d89ea49d5ced60ad99e5..500aad0ebd6acd8af2e63a7cbc8ae0965bbc5cdc 100644 (file)
@@ -220,6 +220,8 @@ static void dump_common_audit_data(struct audit_buffer *ab,
        }
 
        switch (a->type) {
+       case LSM_AUDIT_NO_AUDIT:
+               return;
        case LSM_AUDIT_DATA_IPC:
                audit_log_format(ab, " key=%d ", a->u.ipc_id);
                break;
index dc7674fbfc7a4a6fcfb132bc974f2c9480b90922..c4c673240c1c6761beadc94f3746e99d09c6a703 100644 (file)
@@ -124,9 +124,9 @@ int register_security(struct security_operations *ops)
 
 /* Security operations */
 
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
 {
-       return security_ops->ptrace_may_access(child, mode);
+       return security_ops->ptrace_access_check(child, mode);
 }
 
 int security_ptrace_traceme(struct task_struct *parent)
@@ -684,6 +684,11 @@ int security_task_create(unsigned long clone_flags)
        return security_ops->task_create(clone_flags);
 }
 
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+       return security_ops->cred_alloc_blank(cred, gfp);
+}
+
 void security_cred_free(struct cred *cred)
 {
        security_ops->cred_free(cred);
@@ -699,6 +704,11 @@ void security_commit_creds(struct cred *new, const struct cred *old)
        security_ops->cred_commit(new, old);
 }
 
+void security_transfer_creds(struct cred *new, const struct cred *old)
+{
+       security_ops->cred_transfer(new, old);
+}
+
 int security_kernel_act_as(struct cred *new, u32 secid)
 {
        return security_ops->kernel_act_as(new, secid);
@@ -709,6 +719,11 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
        return security_ops->kernel_create_files_as(new, inode);
 }
 
+int security_kernel_module_request(void)
+{
+       return security_ops->kernel_module_request();
+}
+
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
        return security_ops->task_setuid(id0, id1, id2, flags);
@@ -959,6 +974,24 @@ void security_release_secctx(char *secdata, u32 seclen)
 }
 EXPORT_SYMBOL(security_release_secctx);
 
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+       return security_ops->inode_notifysecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_notifysecctx);
+
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+       return security_ops->inode_setsecctx(dentry, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_setsecctx);
+
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+       return security_ops->inode_getsecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_getsecctx);
+
 #ifdef CONFIG_SECURITY_NETWORK
 
 int security_unix_stream_connect(struct socket *sock, struct socket *other,
@@ -1112,6 +1145,24 @@ void security_inet_conn_established(struct sock *sk,
        security_ops->inet_conn_established(sk, skb);
 }
 
+int security_tun_dev_create(void)
+{
+       return security_ops->tun_dev_create();
+}
+EXPORT_SYMBOL(security_tun_dev_create);
+
+void security_tun_dev_post_create(struct sock *sk)
+{
+       return security_ops->tun_dev_post_create(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_post_create);
+
+int security_tun_dev_attach(struct sock *sk)
+{
+       return security_ops->tun_dev_attach(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_attach);
+
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1218,6 +1269,13 @@ int security_key_getsecurity(struct key *key, char **_buffer)
        return security_ops->key_getsecurity(key, _buffer);
 }
 
+int security_key_session_to_parent(const struct cred *cred,
+                                  const struct cred *parent_cred,
+                                  struct key *key)
+{
+       return security_ops->key_session_to_parent(cred, parent_cred, key);
+}
+
 #endif /* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
index b2ab608598325bcea26430f6ebd1eac30a67ad61..e3d19014259b700e8e7fb55fae73d0f72d41330c 100644 (file)
@@ -137,7 +137,7 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
  * @tclass: target security class
  * @av: access vector
  */
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
+static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av)
 {
        const char **common_pts = NULL;
        u32 common_base = 0;
@@ -492,23 +492,35 @@ out:
        return node;
 }
 
-static inline void avc_print_ipv6_addr(struct audit_buffer *ab,
-                                      struct in6_addr *addr, __be16 port,
-                                      char *name1, char *name2)
+/**
+ * avc_audit_pre_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
 {
-       if (!ipv6_addr_any(addr))
-               audit_log_format(ab, " %s=%pI6", name1, addr);
-       if (port)
-               audit_log_format(ab, " %s=%d", name2, ntohs(port));
+       struct common_audit_data *ad = a;
+       audit_log_format(ab, "avc:  %s ",
+                        ad->selinux_audit_data.denied ? "denied" : "granted");
+       avc_dump_av(ab, ad->selinux_audit_data.tclass,
+                       ad->selinux_audit_data.audited);
+       audit_log_format(ab, " for ");
 }
 
-static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
-                                      __be16 port, char *name1, char *name2)
+/**
+ * avc_audit_post_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
 {
-       if (addr)
-               audit_log_format(ab, " %s=%pI4", name1, &addr);
-       if (port)
-               audit_log_format(ab, " %s=%d", name2, ntohs(port));
+       struct common_audit_data *ad = a;
+       audit_log_format(ab, " ");
+       avc_dump_query(ab, ad->selinux_audit_data.ssid,
+                          ad->selinux_audit_data.tsid,
+                          ad->selinux_audit_data.tclass);
 }
 
 /**
@@ -532,13 +544,10 @@ static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
  */
 void avc_audit(u32 ssid, u32 tsid,
               u16 tclass, u32 requested,
-              struct av_decision *avd, int result, struct avc_audit_data *a)
+              struct av_decision *avd, int result, struct common_audit_data *a)
 {
-       struct task_struct *tsk = current;
-       struct inode *inode = NULL;
+       struct common_audit_data stack_data;
        u32 denied, audited;
-       struct audit_buffer *ab;
-
        denied = requested & ~avd->allowed;
        if (denied) {
                audited = denied;
@@ -551,144 +560,20 @@ void avc_audit(u32 ssid, u32 tsid,
                if (!(audited & avd->auditallow))
                        return;
        }
-
-       ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
-       if (!ab)
-               return;         /* audit_panic has been called */
-       audit_log_format(ab, "avc:  %s ", denied ? "denied" : "granted");
-       avc_dump_av(ab, tclass, audited);
-       audit_log_format(ab, " for ");
-       if (a && a->tsk)
-               tsk = a->tsk;
-       if (tsk && tsk->pid) {
-               audit_log_format(ab, " pid=%d comm=", tsk->pid);
-               audit_log_untrustedstring(ab, tsk->comm);
+       if (!a) {
+               a = &stack_data;
+               memset(a, 0, sizeof(*a));
+               a->type = LSM_AUDIT_NO_AUDIT;
        }
-       if (a) {
-               switch (a->type) {
-               case AVC_AUDIT_DATA_IPC:
-                       audit_log_format(ab, " key=%d", a->u.ipc_id);
-                       break;
-               case AVC_AUDIT_DATA_CAP:
-                       audit_log_format(ab, " capability=%d", a->u.cap);
-                       break;
-               case AVC_AUDIT_DATA_FS:
-                       if (a->u.fs.path.dentry) {
-                               struct dentry *dentry = a->u.fs.path.dentry;
-                               if (a->u.fs.path.mnt) {
-                                       audit_log_d_path(ab, "path=",
-                                                        &a->u.fs.path);
-                               } else {
-                                       audit_log_format(ab, " name=");
-                                       audit_log_untrustedstring(ab, dentry->d_name.name);
-                               }
-                               inode = dentry->d_inode;
-                       } else if (a->u.fs.inode) {
-                               struct dentry *dentry;
-                               inode = a->u.fs.inode;
-                               dentry = d_find_alias(inode);
-                               if (dentry) {
-                                       audit_log_format(ab, " name=");
-                                       audit_log_untrustedstring(ab, dentry->d_name.name);
-                                       dput(dentry);
-                               }
-                       }
-                       if (inode)
-                               audit_log_format(ab, " dev=%s ino=%lu",
-                                                inode->i_sb->s_id,
-                                                inode->i_ino);
-                       break;
-               case AVC_AUDIT_DATA_NET:
-                       if (a->u.net.sk) {
-                               struct sock *sk = a->u.net.sk;
-                               struct unix_sock *u;
-                               int len = 0;
-                               char *p = NULL;
-
-                               switch (sk->sk_family) {
-                               case AF_INET: {
-                                       struct inet_sock *inet = inet_sk(sk);
-
-                                       avc_print_ipv4_addr(ab, inet->rcv_saddr,
-                                                           inet->sport,
-                                                           "laddr", "lport");
-                                       avc_print_ipv4_addr(ab, inet->daddr,
-                                                           inet->dport,
-                                                           "faddr", "fport");
-                                       break;
-                               }
-                               case AF_INET6: {
-                                       struct inet_sock *inet = inet_sk(sk);
-                                       struct ipv6_pinfo *inet6 = inet6_sk(sk);
-
-                                       avc_print_ipv6_addr(ab, &inet6->rcv_saddr,
-                                                           inet->sport,
-                                                           "laddr", "lport");
-                                       avc_print_ipv6_addr(ab, &inet6->daddr,
-                                                           inet->dport,
-                                                           "faddr", "fport");
-                                       break;
-                               }
-                               case AF_UNIX:
-                                       u = unix_sk(sk);
-                                       if (u->dentry) {
-                                               struct path path = {
-                                                       .dentry = u->dentry,
-                                                       .mnt = u->mnt
-                                               };
-                                               audit_log_d_path(ab, "path=",
-                                                                &path);
-                                               break;
-                                       }
-                                       if (!u->addr)
-                                               break;
-                                       len = u->addr->len-sizeof(short);
-                                       p = &u->addr->name->sun_path[0];
-                                       audit_log_format(ab, " path=");
-                                       if (*p)
-                                               audit_log_untrustedstring(ab, p);
-                                       else
-                                               audit_log_n_hex(ab, p, len);
-                                       break;
-                               }
-                       }
-
-                       switch (a->u.net.family) {
-                       case AF_INET:
-                               avc_print_ipv4_addr(ab, a->u.net.v4info.saddr,
-                                                   a->u.net.sport,
-                                                   "saddr", "src");
-                               avc_print_ipv4_addr(ab, a->u.net.v4info.daddr,
-                                                   a->u.net.dport,
-                                                   "daddr", "dest");
-                               break;
-                       case AF_INET6:
-                               avc_print_ipv6_addr(ab, &a->u.net.v6info.saddr,
-                                                   a->u.net.sport,
-                                                   "saddr", "src");
-                               avc_print_ipv6_addr(ab, &a->u.net.v6info.daddr,
-                                                   a->u.net.dport,
-                                                   "daddr", "dest");
-                               break;
-                       }
-                       if (a->u.net.netif > 0) {
-                               struct net_device *dev;
-
-                               /* NOTE: we always use init's namespace */
-                               dev = dev_get_by_index(&init_net,
-                                                      a->u.net.netif);
-                               if (dev) {
-                                       audit_log_format(ab, " netif=%s",
-                                                        dev->name);
-                                       dev_put(dev);
-                               }
-                       }
-                       break;
-               }
-       }
-       audit_log_format(ab, " ");
-       avc_dump_query(ab, ssid, tsid, tclass);
-       audit_log_end(ab);
+       a->selinux_audit_data.tclass = tclass;
+       a->selinux_audit_data.requested = requested;
+       a->selinux_audit_data.ssid = ssid;
+       a->selinux_audit_data.tsid = tsid;
+       a->selinux_audit_data.audited = audited;
+       a->selinux_audit_data.denied = denied;
+       a->lsm_pre_audit = avc_audit_pre_callback;
+       a->lsm_post_audit = avc_audit_post_callback;
+       common_lsm_audit(a);
 }
 
 /**
@@ -956,7 +841,7 @@ out:
  * another -errno upon other errors.
  */
 int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
-                u32 requested, struct avc_audit_data *auditdata)
+                u32 requested, struct common_audit_data *auditdata)
 {
        struct av_decision avd;
        int rc;
@@ -970,3 +855,9 @@ u32 avc_policy_seqno(void)
 {
        return avc_cache.latest_notif;
 }
+
+void avc_disable(void)
+{
+       if (avc_node_cachep)
+               kmem_cache_destroy(avc_node_cachep);
+}
index 8d8b69c5664ef7f1eaa89c8a19139152ca159477..417f7c9945229175f79842c5a9637af6fd48fc1d 100644 (file)
@@ -13,8 +13,8 @@
  *                                        Eric Paris <eparis@redhat.com>
  *  Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
  *                         <dgoeddel@trustedcs.com>
- *  Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P.
- *             Paul Moore <paul.moore@hp.com>
+ *  Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P.
+ *     Paul Moore <paul.moore@hp.com>
  *  Copyright (C) 2007 Hitachi Software Engineering Co., Ltd.
  *                    Yuichi Nakamura <ynakam@hitachisoft.jp>
  *
@@ -448,6 +448,10 @@ static int sb_finish_set_opts(struct super_block *sb)
            sbsec->behavior > ARRAY_SIZE(labeling_behaviors))
                sbsec->flags &= ~SE_SBLABELSUPP;
 
+       /* Special handling for sysfs. Is genfs but also has setxattr handler*/
+       if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0)
+               sbsec->flags |= SE_SBLABELSUPP;
+
        /* Initialize the root inode. */
        rc = inode_doinit_with_dentry(root_inode, root);
 
@@ -1479,14 +1483,14 @@ static int task_has_capability(struct task_struct *tsk,
                               const struct cred *cred,
                               int cap, int audit)
 {
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        struct av_decision avd;
        u16 sclass;
        u32 sid = cred_sid(cred);
        u32 av = CAP_TO_MASK(cap);
        int rc;
 
-       AVC_AUDIT_DATA_INIT(&ad, CAP);
+       COMMON_AUDIT_DATA_INIT(&ad, CAP);
        ad.tsk = tsk;
        ad.u.cap = cap;
 
@@ -1525,12 +1529,14 @@ static int task_has_system(struct task_struct *tsk,
 static int inode_has_perm(const struct cred *cred,
                          struct inode *inode,
                          u32 perms,
-                         struct avc_audit_data *adp)
+                         struct common_audit_data *adp)
 {
        struct inode_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid;
 
+       validate_creds(cred);
+
        if (unlikely(IS_PRIVATE(inode)))
                return 0;
 
@@ -1539,7 +1545,7 @@ static int inode_has_perm(const struct cred *cred,
 
        if (!adp) {
                adp = &ad;
-               AVC_AUDIT_DATA_INIT(&ad, FS);
+               COMMON_AUDIT_DATA_INIT(&ad, FS);
                ad.u.fs.inode = inode;
        }
 
@@ -1555,9 +1561,9 @@ static inline int dentry_has_perm(const struct cred *cred,
                                  u32 av)
 {
        struct inode *inode = dentry->d_inode;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path.mnt = mnt;
        ad.u.fs.path.dentry = dentry;
        return inode_has_perm(cred, inode, av, &ad);
@@ -1577,11 +1583,11 @@ static int file_has_perm(const struct cred *cred,
 {
        struct file_security_struct *fsec = file->f_security;
        struct inode *inode = file->f_path.dentry->d_inode;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = cred_sid(cred);
        int rc;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path = file->f_path;
 
        if (sid != fsec->sid) {
@@ -1612,7 +1618,7 @@ static int may_create(struct inode *dir,
        struct inode_security_struct *dsec;
        struct superblock_security_struct *sbsec;
        u32 sid, newsid;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        int rc;
 
        dsec = dir->i_security;
@@ -1621,7 +1627,7 @@ static int may_create(struct inode *dir,
        sid = tsec->sid;
        newsid = tsec->create_sid;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path.dentry = dentry;
 
        rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
@@ -1665,7 +1671,7 @@ static int may_link(struct inode *dir,
 
 {
        struct inode_security_struct *dsec, *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
        u32 av;
        int rc;
@@ -1673,7 +1679,7 @@ static int may_link(struct inode *dir,
        dsec = dir->i_security;
        isec = dentry->d_inode->i_security;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path.dentry = dentry;
 
        av = DIR__SEARCH;
@@ -1708,7 +1714,7 @@ static inline int may_rename(struct inode *old_dir,
                             struct dentry *new_dentry)
 {
        struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
        u32 av;
        int old_is_dir, new_is_dir;
@@ -1719,7 +1725,7 @@ static inline int may_rename(struct inode *old_dir,
        old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
        new_dsec = new_dir->i_security;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
 
        ad.u.fs.path.dentry = old_dentry;
        rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
@@ -1761,7 +1767,7 @@ static inline int may_rename(struct inode *old_dir,
 static int superblock_has_perm(const struct cred *cred,
                               struct super_block *sb,
                               u32 perms,
-                              struct avc_audit_data *ad)
+                              struct common_audit_data *ad)
 {
        struct superblock_security_struct *sbsec;
        u32 sid = cred_sid(cred);
@@ -1855,12 +1861,12 @@ static inline u32 open_file_to_av(struct file *file)
 
 /* Hook functions begin here. */
 
-static int selinux_ptrace_may_access(struct task_struct *child,
+static int selinux_ptrace_access_check(struct task_struct *child,
                                     unsigned int mode)
 {
        int rc;
 
-       rc = cap_ptrace_may_access(child, mode);
+       rc = cap_ptrace_access_check(child, mode);
        if (rc)
                return rc;
 
@@ -2101,7 +2107,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
        const struct task_security_struct *old_tsec;
        struct task_security_struct *new_tsec;
        struct inode_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        struct inode *inode = bprm->file->f_path.dentry->d_inode;
        int rc;
 
@@ -2139,7 +2145,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
                        return rc;
        }
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path = bprm->file->f_path;
 
        if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
@@ -2232,7 +2238,7 @@ extern struct dentry *selinux_null;
 static inline void flush_unauthorized_files(const struct cred *cred,
                                            struct files_struct *files)
 {
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        struct file *file, *devnull = NULL;
        struct tty_struct *tty;
        struct fdtable *fdt;
@@ -2266,7 +2272,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 
        /* Revalidate access to inherited open files. */
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
 
        spin_lock(&files->file_lock);
        for (;;) {
@@ -2515,7 +2521,7 @@ out:
 static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
        const struct cred *cred = current_cred();
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        int rc;
 
        rc = superblock_doinit(sb, data);
@@ -2526,7 +2532,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
        if (flags & MS_KERNMOUNT)
                return 0;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path.dentry = sb->s_root;
        return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
 }
@@ -2534,9 +2540,9 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 static int selinux_sb_statfs(struct dentry *dentry)
 {
        const struct cred *cred = current_cred();
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path.dentry = dentry->d_sb->s_root;
        return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
 }
@@ -2711,12 +2717,18 @@ static int selinux_inode_permission(struct inode *inode, int mask)
 static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 {
        const struct cred *cred = current_cred();
+       unsigned int ia_valid = iattr->ia_valid;
+
+       /* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
+       if (ia_valid & ATTR_FORCE) {
+               ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_MODE |
+                             ATTR_FORCE);
+               if (!ia_valid)
+                       return 0;
+       }
 
-       if (iattr->ia_valid & ATTR_FORCE)
-               return 0;
-
-       if (iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
-                              ATTR_ATIME_SET | ATTR_MTIME_SET))
+       if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID |
+                       ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
                return dentry_has_perm(cred, NULL, dentry, FILE__SETATTR);
 
        return dentry_has_perm(cred, NULL, dentry, FILE__WRITE);
@@ -2756,7 +2768,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
        struct inode *inode = dentry->d_inode;
        struct inode_security_struct *isec = inode->i_security;
        struct superblock_security_struct *sbsec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 newsid, sid = current_sid();
        int rc = 0;
 
@@ -2770,7 +2782,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
        if (!is_owner_or_cap(inode))
                return -EPERM;
 
-       AVC_AUDIT_DATA_INIT(&ad, FS);
+       COMMON_AUDIT_DATA_INIT(&ad, FS);
        ad.u.fs.path.dentry = dentry;
 
        rc = avc_has_perm(sid, isec->sid, isec->sclass,
@@ -2915,6 +2927,7 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name,
                return rc;
 
        isec->sid = newsid;
+       isec->initialized = 1;
        return 0;
 }
 
@@ -2939,11 +2952,6 @@ static int selinux_revalidate_file_permission(struct file *file, int mask)
        const struct cred *cred = current_cred();
        struct inode *inode = file->f_path.dentry->d_inode;
 
-       if (!mask) {
-               /* No permission to check.  Existence test. */
-               return 0;
-       }
-
        /* file_mask_to_av won't add FILE__WRITE if MAY_APPEND is set */
        if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE))
                mask |= MAY_APPEND;
@@ -2954,10 +2962,20 @@ static int selinux_revalidate_file_permission(struct file *file, int mask)
 
 static int selinux_file_permission(struct file *file, int mask)
 {
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct file_security_struct *fsec = file->f_security;
+       struct inode_security_struct *isec = inode->i_security;
+       u32 sid = current_sid();
+
        if (!mask)
                /* No permission to check.  Existence test. */
                return 0;
 
+       if (sid == fsec->sid && fsec->isid == isec->sid &&
+           fsec->pseqno == avc_policy_seqno())
+               /* No change since dentry_open check. */
+               return 0;
+
        return selinux_revalidate_file_permission(file, mask);
 }
 
@@ -3219,13 +3237,30 @@ static int selinux_task_create(unsigned long clone_flags)
        return current_has_perm(current, PROCESS__FORK);
 }
 
+/*
+ * allocate the SELinux part of blank credentials
+ */
+static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+       struct task_security_struct *tsec;
+
+       tsec = kzalloc(sizeof(struct task_security_struct), gfp);
+       if (!tsec)
+               return -ENOMEM;
+
+       cred->security = tsec;
+       return 0;
+}
+
 /*
  * detach and free the LSM part of a set of credentials
  */
 static void selinux_cred_free(struct cred *cred)
 {
        struct task_security_struct *tsec = cred->security;
-       cred->security = NULL;
+
+       BUG_ON((unsigned long) cred->security < PAGE_SIZE);
+       cred->security = (void *) 0x7UL;
        kfree(tsec);
 }
 
@@ -3248,6 +3283,17 @@ static int selinux_cred_prepare(struct cred *new, const struct cred *old,
        return 0;
 }
 
+/*
+ * transfer the SELinux data to a blank set of creds
+ */
+static void selinux_cred_transfer(struct cred *new, const struct cred *old)
+{
+       const struct task_security_struct *old_tsec = old->security;
+       struct task_security_struct *tsec = new->security;
+
+       *tsec = *old_tsec;
+}
+
 /*
  * set the security data for a kernel service
  * - all the creation contexts are set to unlabelled
@@ -3292,6 +3338,11 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode)
        return 0;
 }
 
+static int selinux_kernel_module_request(void)
+{
+       return task_has_system(current, SYSTEM__MODULE_REQUEST);
+}
+
 static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
 {
        return current_has_perm(p, PROCESS__SETPGID);
@@ -3409,7 +3460,7 @@ static void selinux_task_to_inode(struct task_struct *p,
 
 /* Returns error only if unable to parse addresses */
 static int selinux_parse_skb_ipv4(struct sk_buff *skb,
-                       struct avc_audit_data *ad, u8 *proto)
+                       struct common_audit_data *ad, u8 *proto)
 {
        int offset, ihlen, ret = -EINVAL;
        struct iphdr _iph, *ih;
@@ -3490,7 +3541,7 @@ out:
 
 /* Returns error only if unable to parse addresses */
 static int selinux_parse_skb_ipv6(struct sk_buff *skb,
-                       struct avc_audit_data *ad, u8 *proto)
+                       struct common_audit_data *ad, u8 *proto)
 {
        u8 nexthdr;
        int ret = -EINVAL, offset;
@@ -3561,7 +3612,7 @@ out:
 
 #endif /* IPV6 */
 
-static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad,
+static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad,
                             char **_addrp, int src, u8 *proto)
 {
        char *addrp;
@@ -3643,7 +3694,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock,
                           u32 perms)
 {
        struct inode_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid;
        int err = 0;
 
@@ -3653,7 +3704,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock,
                goto out;
        sid = task_sid(task);
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.sk = sock->sk;
        err = avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
 
@@ -3740,7 +3791,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
        if (family == PF_INET || family == PF_INET6) {
                char *addrp;
                struct inode_security_struct *isec;
-               struct avc_audit_data ad;
+               struct common_audit_data ad;
                struct sockaddr_in *addr4 = NULL;
                struct sockaddr_in6 *addr6 = NULL;
                unsigned short snum;
@@ -3769,7 +3820,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                                                      snum, &sid);
                                if (err)
                                        goto out;
-                               AVC_AUDIT_DATA_INIT(&ad, NET);
+                               COMMON_AUDIT_DATA_INIT(&ad, NET);
                                ad.u.net.sport = htons(snum);
                                ad.u.net.family = family;
                                err = avc_has_perm(isec->sid, sid,
@@ -3802,7 +3853,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
                if (err)
                        goto out;
 
-               AVC_AUDIT_DATA_INIT(&ad, NET);
+               COMMON_AUDIT_DATA_INIT(&ad, NET);
                ad.u.net.sport = htons(snum);
                ad.u.net.family = family;
 
@@ -3836,7 +3887,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
        isec = SOCK_INODE(sock)->i_security;
        if (isec->sclass == SECCLASS_TCP_SOCKET ||
            isec->sclass == SECCLASS_DCCP_SOCKET) {
-               struct avc_audit_data ad;
+               struct common_audit_data ad;
                struct sockaddr_in *addr4 = NULL;
                struct sockaddr_in6 *addr6 = NULL;
                unsigned short snum;
@@ -3861,7 +3912,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
                perm = (isec->sclass == SECCLASS_TCP_SOCKET) ?
                       TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT;
 
-               AVC_AUDIT_DATA_INIT(&ad, NET);
+               COMMON_AUDIT_DATA_INIT(&ad, NET);
                ad.u.net.dport = htons(snum);
                ad.u.net.family = sk->sk_family;
                err = avc_has_perm(isec->sid, sid, isec->sclass, perm, &ad);
@@ -3951,13 +4002,13 @@ static int selinux_socket_unix_stream_connect(struct socket *sock,
        struct sk_security_struct *ssec;
        struct inode_security_struct *isec;
        struct inode_security_struct *other_isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        int err;
 
        isec = SOCK_INODE(sock)->i_security;
        other_isec = SOCK_INODE(other)->i_security;
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.sk = other->sk;
 
        err = avc_has_perm(isec->sid, other_isec->sid,
@@ -3983,13 +4034,13 @@ static int selinux_socket_unix_may_send(struct socket *sock,
 {
        struct inode_security_struct *isec;
        struct inode_security_struct *other_isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        int err;
 
        isec = SOCK_INODE(sock)->i_security;
        other_isec = SOCK_INODE(other)->i_security;
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.sk = other->sk;
 
        err = avc_has_perm(isec->sid, other_isec->sid,
@@ -4002,7 +4053,7 @@ static int selinux_socket_unix_may_send(struct socket *sock,
 
 static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family,
                                    u32 peer_sid,
-                                   struct avc_audit_data *ad)
+                                   struct common_audit_data *ad)
 {
        int err;
        u32 if_sid;
@@ -4030,10 +4081,10 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
        struct sk_security_struct *sksec = sk->sk_security;
        u32 peer_sid;
        u32 sk_sid = sksec->sid;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        char *addrp;
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.netif = skb->iif;
        ad.u.net.family = family;
        err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4071,7 +4122,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
        struct sk_security_struct *sksec = sk->sk_security;
        u16 family = sk->sk_family;
        u32 sk_sid = sksec->sid;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        char *addrp;
        u8 secmark_active;
        u8 peerlbl_active;
@@ -4095,7 +4146,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
        if (!secmark_active && !peerlbl_active)
                return 0;
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.netif = skb->iif;
        ad.u.net.family = family;
        err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4309,6 +4360,59 @@ static void selinux_req_classify_flow(const struct request_sock *req,
        fl->secid = req->secid;
 }
 
+static int selinux_tun_dev_create(void)
+{
+       u32 sid = current_sid();
+
+       /* we aren't taking into account the "sockcreate" SID since the socket
+        * that is being created here is not a socket in the traditional sense,
+        * instead it is a private sock, accessible only to the kernel, and
+        * representing a wide range of network traffic spanning multiple
+        * connections unlike traditional sockets - check the TUN driver to
+        * get a better understanding of why this socket is special */
+
+       return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE,
+                           NULL);
+}
+
+static void selinux_tun_dev_post_create(struct sock *sk)
+{
+       struct sk_security_struct *sksec = sk->sk_security;
+
+       /* we don't currently perform any NetLabel based labeling here and it
+        * isn't clear that we would want to do so anyway; while we could apply
+        * labeling without the support of the TUN user the resulting labeled
+        * traffic from the other end of the connection would almost certainly
+        * cause confusion to the TUN user that had no idea network labeling
+        * protocols were being used */
+
+       /* see the comments in selinux_tun_dev_create() about why we don't use
+        * the sockcreate SID here */
+
+       sksec->sid = current_sid();
+       sksec->sclass = SECCLASS_TUN_SOCKET;
+}
+
+static int selinux_tun_dev_attach(struct sock *sk)
+{
+       struct sk_security_struct *sksec = sk->sk_security;
+       u32 sid = current_sid();
+       int err;
+
+       err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
+                          TUN_SOCKET__RELABELFROM, NULL);
+       if (err)
+               return err;
+       err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET,
+                          TUN_SOCKET__RELABELTO, NULL);
+       if (err)
+               return err;
+
+       sksec->sid = sid;
+
+       return 0;
+}
+
 static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
 {
        int err = 0;
@@ -4353,7 +4457,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
        int err;
        char *addrp;
        u32 peer_sid;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u8 secmark_active;
        u8 netlbl_active;
        u8 peerlbl_active;
@@ -4370,7 +4474,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
        if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
                return NF_DROP;
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.netif = ifindex;
        ad.u.net.family = family;
        if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
@@ -4458,7 +4562,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
 {
        struct sock *sk = skb->sk;
        struct sk_security_struct *sksec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        char *addrp;
        u8 proto;
 
@@ -4466,7 +4570,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
                return NF_ACCEPT;
        sksec = sk->sk_security;
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.netif = ifindex;
        ad.u.net.family = family;
        if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
@@ -4490,7 +4594,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
        u32 secmark_perm;
        u32 peer_sid;
        struct sock *sk;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        char *addrp;
        u8 secmark_active;
        u8 peerlbl_active;
@@ -4549,7 +4653,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
                secmark_perm = PACKET__SEND;
        }
 
-       AVC_AUDIT_DATA_INIT(&ad, NET);
+       COMMON_AUDIT_DATA_INIT(&ad, NET);
        ad.u.net.netif = ifindex;
        ad.u.net.family = family;
        if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL))
@@ -4619,13 +4723,13 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
 static int selinux_netlink_recv(struct sk_buff *skb, int capability)
 {
        int err;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
 
        err = cap_netlink_recv(skb, capability);
        if (err)
                return err;
 
-       AVC_AUDIT_DATA_INIT(&ad, CAP);
+       COMMON_AUDIT_DATA_INIT(&ad, CAP);
        ad.u.cap = capability;
 
        return avc_has_perm(NETLINK_CB(skb).sid, NETLINK_CB(skb).sid,
@@ -4684,12 +4788,12 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms,
                        u32 perms)
 {
        struct ipc_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
 
        isec = ipc_perms->security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = ipc_perms->key;
 
        return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
@@ -4709,7 +4813,7 @@ static void selinux_msg_msg_free_security(struct msg_msg *msg)
 static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 {
        struct ipc_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
        int rc;
 
@@ -4719,7 +4823,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 
        isec = msq->q_perm.security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = msq->q_perm.key;
 
        rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4739,12 +4843,12 @@ static void selinux_msg_queue_free_security(struct msg_queue *msq)
 static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg)
 {
        struct ipc_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
 
        isec = msq->q_perm.security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = msq->q_perm.key;
 
        return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4783,7 +4887,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
 {
        struct ipc_security_struct *isec;
        struct msg_security_struct *msec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
        int rc;
 
@@ -4804,7 +4908,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
                        return rc;
        }
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = msq->q_perm.key;
 
        /* Can this process write to the queue? */
@@ -4828,14 +4932,14 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 {
        struct ipc_security_struct *isec;
        struct msg_security_struct *msec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = task_sid(target);
        int rc;
 
        isec = msq->q_perm.security;
        msec = msg->security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = msq->q_perm.key;
 
        rc = avc_has_perm(sid, isec->sid,
@@ -4850,7 +4954,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 {
        struct ipc_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
        int rc;
 
@@ -4860,7 +4964,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 
        isec = shp->shm_perm.security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = shp->shm_perm.key;
 
        rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4880,12 +4984,12 @@ static void selinux_shm_free_security(struct shmid_kernel *shp)
 static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg)
 {
        struct ipc_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
 
        isec = shp->shm_perm.security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = shp->shm_perm.key;
 
        return avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4942,7 +5046,7 @@ static int selinux_shm_shmat(struct shmid_kernel *shp,
 static int selinux_sem_alloc_security(struct sem_array *sma)
 {
        struct ipc_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
        int rc;
 
@@ -4952,7 +5056,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma)
 
        isec = sma->sem_perm.security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = sma->sem_perm.key;
 
        rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
@@ -4972,12 +5076,12 @@ static void selinux_sem_free_security(struct sem_array *sma)
 static int selinux_sem_associate(struct sem_array *sma, int semflg)
 {
        struct ipc_security_struct *isec;
-       struct avc_audit_data ad;
+       struct common_audit_data ad;
        u32 sid = current_sid();
 
        isec = sma->sem_perm.security;
 
-       AVC_AUDIT_DATA_INIT(&ad, IPC);
+       COMMON_AUDIT_DATA_INIT(&ad, IPC);
        ad.u.ipc_id = sma->sem_perm.key;
 
        return avc_has_perm(sid, isec->sid, SECCLASS_SEM,
@@ -5195,7 +5299,7 @@ static int selinux_setprocattr(struct task_struct *p,
 
                /* Only allow single threaded processes to change context */
                error = -EPERM;
-               if (!is_single_threaded(p)) {
+               if (!current_is_single_threaded()) {
                        error = security_bounded_transition(tsec->sid, sid);
                        if (error)
                                goto abort_change;
@@ -5252,6 +5356,32 @@ static void selinux_release_secctx(char *secdata, u32 seclen)
        kfree(secdata);
 }
 
+/*
+ *     called with inode->i_mutex locked
+ */
+static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+       return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0);
+}
+
+/*
+ *     called with inode->i_mutex locked
+ */
+static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+       return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0);
+}
+
+static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+       int len = 0;
+       len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX,
+                                               ctx, true);
+       if (len < 0)
+               return len;
+       *ctxlen = len;
+       return 0;
+}
 #ifdef CONFIG_KEYS
 
 static int selinux_key_alloc(struct key *k, const struct cred *cred,
@@ -5323,7 +5453,7 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer)
 static struct security_operations selinux_ops = {
        .name =                         "selinux",
 
-       .ptrace_may_access =            selinux_ptrace_may_access,
+       .ptrace_access_check =          selinux_ptrace_access_check,
        .ptrace_traceme =               selinux_ptrace_traceme,
        .capget =                       selinux_capget,
        .capset =                       selinux_capset,
@@ -5396,10 +5526,13 @@ static struct security_operations selinux_ops = {
        .dentry_open =                  selinux_dentry_open,
 
        .task_create =                  selinux_task_create,
+       .cred_alloc_blank =             selinux_cred_alloc_blank,
        .cred_free =                    selinux_cred_free,
        .cred_prepare =                 selinux_cred_prepare,
+       .cred_transfer =                selinux_cred_transfer,
        .kernel_act_as =                selinux_kernel_act_as,
        .kernel_create_files_as =       selinux_kernel_create_files_as,
+       .kernel_module_request =        selinux_kernel_module_request,
        .task_setpgid =                 selinux_task_setpgid,
        .task_getpgid =                 selinux_task_getpgid,
        .task_getsid =                  selinux_task_getsid,
@@ -5448,6 +5581,9 @@ static struct security_operations selinux_ops = {
        .secid_to_secctx =              selinux_secid_to_secctx,
        .secctx_to_secid =              selinux_secctx_to_secid,
        .release_secctx =               selinux_release_secctx,
+       .inode_notifysecctx =           selinux_inode_notifysecctx,
+       .inode_setsecctx =              selinux_inode_setsecctx,
+       .inode_getsecctx =              selinux_inode_getsecctx,
 
        .unix_stream_connect =          selinux_socket_unix_stream_connect,
        .unix_may_send =                selinux_socket_unix_may_send,
@@ -5477,6 +5613,9 @@ static struct security_operations selinux_ops = {
        .inet_csk_clone =               selinux_inet_csk_clone,
        .inet_conn_established =        selinux_inet_conn_established,
        .req_classify_flow =            selinux_req_classify_flow,
+       .tun_dev_create =               selinux_tun_dev_create,
+       .tun_dev_post_create =          selinux_tun_dev_post_create,
+       .tun_dev_attach =               selinux_tun_dev_attach,
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
        .xfrm_policy_alloc_security =   selinux_xfrm_policy_alloc,
@@ -5691,6 +5830,9 @@ int selinux_disable(void)
        selinux_disabled = 1;
        selinux_enabled = 0;
 
+       /* Try to destroy the avc node cache */
+       avc_disable();
+
        /* Reset security_ops to the secondary module, dummy or capability. */
        security_ops = secondary_ops;
 
index 8377a4ba3b954434f98a6aefbc269c3747162347..abedcd704daef7c47ef2f392799dc80024fd7b4e 100644 (file)
@@ -15,6 +15,7 @@
    S_(SECCLASS_KEY_SOCKET, socket, 0x00400000UL)
    S_(SECCLASS_UNIX_STREAM_SOCKET, socket, 0x00400000UL)
    S_(SECCLASS_UNIX_DGRAM_SOCKET, socket, 0x00400000UL)
+   S_(SECCLASS_TUN_SOCKET, socket, 0x00400000UL)
    S_(SECCLASS_IPC, ipc, 0x00000200UL)
    S_(SECCLASS_SEM, ipc, 0x00000200UL)
    S_(SECCLASS_MSGQ, ipc, 0x00000200UL)
index 31df1d7c1aee2c0ead5536846a3ab7a46a074278..2b683ad83d21ceb42b8d16fe48f9f610398d30e2 100644 (file)
    S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, "syslog_read")
    S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, "syslog_mod")
    S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, "syslog_console")
+   S_(SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, "module_request")
    S_(SECCLASS_CAPABILITY, CAPABILITY__CHOWN, "chown")
    S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_OVERRIDE, "dac_override")
    S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_READ_SEARCH, "dac_read_search")
index d645192ee950aaf74f2693943e38cd7dda9bfdbf..0546d616ccacc2fae2fd8b5ca003d6cdc091a54f 100644 (file)
 #define UNIX_DGRAM_SOCKET__RECV_MSG               0x00080000UL
 #define UNIX_DGRAM_SOCKET__SEND_MSG               0x00100000UL
 #define UNIX_DGRAM_SOCKET__NAME_BIND              0x00200000UL
+#define TUN_SOCKET__IOCTL                         0x00000001UL
+#define TUN_SOCKET__READ                          0x00000002UL
+#define TUN_SOCKET__WRITE                         0x00000004UL
+#define TUN_SOCKET__CREATE                        0x00000008UL
+#define TUN_SOCKET__GETATTR                       0x00000010UL
+#define TUN_SOCKET__SETATTR                       0x00000020UL
+#define TUN_SOCKET__LOCK                          0x00000040UL
+#define TUN_SOCKET__RELABELFROM                   0x00000080UL
+#define TUN_SOCKET__RELABELTO                     0x00000100UL
+#define TUN_SOCKET__APPEND                        0x00000200UL
+#define TUN_SOCKET__BIND                          0x00000400UL
+#define TUN_SOCKET__CONNECT                       0x00000800UL
+#define TUN_SOCKET__LISTEN                        0x00001000UL
+#define TUN_SOCKET__ACCEPT                        0x00002000UL
+#define TUN_SOCKET__GETOPT                        0x00004000UL
+#define TUN_SOCKET__SETOPT                        0x00008000UL
+#define TUN_SOCKET__SHUTDOWN                      0x00010000UL
+#define TUN_SOCKET__RECVFROM                      0x00020000UL
+#define TUN_SOCKET__SENDTO                        0x00040000UL
+#define TUN_SOCKET__RECV_MSG                      0x00080000UL
+#define TUN_SOCKET__SEND_MSG                      0x00100000UL
+#define TUN_SOCKET__NAME_BIND                     0x00200000UL
 #define PROCESS__FORK                             0x00000001UL
 #define PROCESS__TRANSITION                       0x00000002UL
 #define PROCESS__SIGCHLD                          0x00000004UL
 #define SYSTEM__SYSLOG_READ                       0x00000002UL
 #define SYSTEM__SYSLOG_MOD                        0x00000004UL
 #define SYSTEM__SYSLOG_CONSOLE                    0x00000008UL
+#define SYSTEM__MODULE_REQUEST                    0x00000010UL
 #define CAPABILITY__CHOWN                         0x00000001UL
 #define CAPABILITY__DAC_OVERRIDE                  0x00000002UL
 #define CAPABILITY__DAC_READ_SEARCH               0x00000004UL
index d12ff1a9c0aa5e383347af29f9f03fe96e626ed2..e94e82f738188381c582d59e26aca94b867ac064 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/audit.h>
+#include <linux/lsm_audit.h>
 #include <linux/in6.h>
 #include <linux/path.h>
 #include <asm/system.h>
@@ -36,48 +37,6 @@ struct inode;
 struct sock;
 struct sk_buff;
 
-/* Auxiliary data to use in generating the audit record. */
-struct avc_audit_data {
-       char    type;
-#define AVC_AUDIT_DATA_FS   1
-#define AVC_AUDIT_DATA_NET  2
-#define AVC_AUDIT_DATA_CAP  3
-#define AVC_AUDIT_DATA_IPC  4
-       struct task_struct *tsk;
-       union   {
-               struct {
-                       struct path path;
-                       struct inode *inode;
-               } fs;
-               struct {
-                       int netif;
-                       struct sock *sk;
-                       u16 family;
-                       __be16 dport;
-                       __be16 sport;
-                       union {
-                               struct {
-                                       __be32 daddr;
-                                       __be32 saddr;
-                               } v4;
-                               struct {
-                                       struct in6_addr daddr;
-                                       struct in6_addr saddr;
-                               } v6;
-                       } fam;
-               } net;
-               int cap;
-               int ipc_id;
-       } u;
-};
-
-#define v4info fam.v4
-#define v6info fam.v6
-
-/* Initialize an AVC audit data structure. */
-#define AVC_AUDIT_DATA_INIT(_d,_t) \
-       { memset((_d), 0, sizeof(struct avc_audit_data)); (_d)->type = AVC_AUDIT_DATA_##_t; }
-
 /*
  * AVC statistics
  */
@@ -98,7 +57,9 @@ void __init avc_init(void);
 
 void avc_audit(u32 ssid, u32 tsid,
               u16 tclass, u32 requested,
-              struct av_decision *avd, int result, struct avc_audit_data *auditdata);
+              struct av_decision *avd,
+              int result,
+              struct common_audit_data *a);
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
 int avc_has_perm_noaudit(u32 ssid, u32 tsid,
@@ -108,7 +69,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 
 int avc_has_perm(u32 ssid, u32 tsid,
                 u16 tclass, u32 requested,
-                struct avc_audit_data *auditdata);
+                struct common_audit_data *auditdata);
 
 u32 avc_policy_seqno(void);
 
@@ -127,13 +88,13 @@ int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid,
                     u32 events, u32 ssid, u32 tsid,
                     u16 tclass, u32 perms);
 
-/* Shows permission in human readable form */
-void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av);
-
 /* Exported to selinuxfs */
 int avc_get_hash_stats(char *page);
 extern unsigned int avc_cache_threshold;
 
+/* Attempt to free avc node cache */
+void avc_disable(void);
+
 #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS
 DECLARE_PER_CPU(struct avc_cache_stats, avc_cache_stats);
 #endif
index 21ec786611d411ea1bfd52c9a0add689c0951237..7ab9299bfb6bb3b18c6d941effcd88e6a0d5524d 100644 (file)
@@ -77,3 +77,4 @@
     S_(NULL)
     S_(NULL)
     S_("kernel_service")
+    S_("tun_socket")
index 882f27d66facdbc780cb7ad6468af5d6b0796b78..f248500a1e3c3c197efa6eb5dfa6e6b0567f7960 100644 (file)
@@ -53,6 +53,7 @@
 #define SECCLASS_PEER                                    68
 #define SECCLASS_CAPABILITY2                             69
 #define SECCLASS_KERNEL_SERVICE                          74
+#define SECCLASS_TUN_SOCKET                              75
 
 /*
  * Security identifier indices for initial entities
index b4b5b9b2f0be1758babefede244375e604b08ccb..8d7384280a7a2cb39840fc3347ebde30b9352aef 100644 (file)
@@ -59,7 +59,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family);
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
                                struct sk_buff *skb,
                                u16 family,
-                               struct avc_audit_data *ad);
+                               struct common_audit_data *ad);
 int selinux_netlbl_socket_setsockopt(struct socket *sock,
                                     int level,
                                     int optname);
@@ -129,7 +129,7 @@ static inline int selinux_netlbl_socket_post_create(struct sock *sk,
 static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
                                              struct sk_buff *skb,
                                              u16 family,
-                                             struct avc_audit_data *ad)
+                                             struct common_audit_data *ad)
 {
        return 0;
 }
index 289e24b39e3ea0a79fb8ade6ba586734b59c2c5c..13128f9a3e5aef56b7453af34ce86aa0e99f0513 100644 (file)
@@ -41,9 +41,9 @@ static inline int selinux_xfrm_enabled(void)
 }
 
 int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
-                       struct avc_audit_data *ad);
+                       struct common_audit_data *ad);
 int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct avc_audit_data *ad, u8 proto);
+                       struct common_audit_data *ad, u8 proto);
 int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 
 static inline void selinux_xfrm_notify_policyload(void)
@@ -57,13 +57,13 @@ static inline int selinux_xfrm_enabled(void)
 }
 
 static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                       struct avc_audit_data *ad)
+                       struct common_audit_data *ad)
 {
        return 0;
 }
 
 static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                       struct avc_audit_data *ad, u8 proto)
+                       struct common_audit_data *ad, u8 proto)
 {
        return 0;
 }
index 2e984413c7b2daaa5313e7bf8fa1b3b193064252..e68823741ad5836a54f38f34e98a86193dbc70eb 100644 (file)
@@ -342,7 +342,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family)
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
                                struct sk_buff *skb,
                                u16 family,
-                               struct avc_audit_data *ad)
+                               struct common_audit_data *ad)
 {
        int rc;
        u32 nlbl_sid;
index 500e6f78e1159e1d568355ce74a113b4b49859c1..ff17820d35ec73bedfab174d3adfb7c87b762420 100644 (file)
  *
  *  Added validation of kernel classes and permissions
  *
+ * Updated: KaiGai Kohei <kaigai@ak.jp.nec.com>
+ *
+ *  Added support for bounds domain and audit messaged on masked permissions
+ *
+ * Copyright (C) 2008, 2009 NEC Corporation
  * Copyright (C) 2006, 2007 Hewlett-Packard Development Company, L.P.
  * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  * Copyright (C) 2003 - 2004, 2006 Tresys Technology, LLC
@@ -278,6 +283,95 @@ mls_ops:
        return s[0];
 }
 
+/*
+ * security_dump_masked_av - dumps masked permissions during
+ * security_compute_av due to RBAC, MLS/Constraint and Type bounds.
+ */
+static int dump_masked_av_helper(void *k, void *d, void *args)
+{
+       struct perm_datum *pdatum = d;
+       char **permission_names = args;
+
+       BUG_ON(pdatum->value < 1 || pdatum->value > 32);
+
+       permission_names[pdatum->value - 1] = (char *)k;
+
+       return 0;
+}
+
+static void security_dump_masked_av(struct context *scontext,
+                                   struct context *tcontext,
+                                   u16 tclass,
+                                   u32 permissions,
+                                   const char *reason)
+{
+       struct common_datum *common_dat;
+       struct class_datum *tclass_dat;
+       struct audit_buffer *ab;
+       char *tclass_name;
+       char *scontext_name = NULL;
+       char *tcontext_name = NULL;
+       char *permission_names[32];
+       int index, length;
+       bool need_comma = false;
+
+       if (!permissions)
+               return;
+
+       tclass_name = policydb.p_class_val_to_name[tclass - 1];
+       tclass_dat = policydb.class_val_to_struct[tclass - 1];
+       common_dat = tclass_dat->comdatum;
+
+       /* init permission_names */
+       if (common_dat &&
+           hashtab_map(common_dat->permissions.table,
+                       dump_masked_av_helper, permission_names) < 0)
+               goto out;
+
+       if (hashtab_map(tclass_dat->permissions.table,
+                       dump_masked_av_helper, permission_names) < 0)
+               goto out;
+
+       /* get scontext/tcontext in text form */
+       if (context_struct_to_string(scontext,
+                                    &scontext_name, &length) < 0)
+               goto out;
+
+       if (context_struct_to_string(tcontext,
+                                    &tcontext_name, &length) < 0)
+               goto out;
+
+       /* audit a message */
+       ab = audit_log_start(current->audit_context,
+                            GFP_ATOMIC, AUDIT_SELINUX_ERR);
+       if (!ab)
+               goto out;
+
+       audit_log_format(ab, "op=security_compute_av reason=%s "
+                        "scontext=%s tcontext=%s tclass=%s perms=",
+                        reason, scontext_name, tcontext_name, tclass_name);
+
+       for (index = 0; index < 32; index++) {
+               u32 mask = (1 << index);
+
+               if ((mask & permissions) == 0)
+                       continue;
+
+               audit_log_format(ab, "%s%s",
+                                need_comma ? "," : "",
+                                permission_names[index]
+                                ? permission_names[index] : "????");
+               need_comma = true;
+       }
+       audit_log_end(ab);
+out:
+       /* release scontext/tcontext */
+       kfree(tcontext_name);
+       kfree(scontext_name);
+
+       return;
+}
+
 /*
  * security_boundary_permission - drops violated permissions
  * on boundary constraint.
@@ -347,28 +441,12 @@ static void type_attribute_bounds_av(struct context *scontext,
        }
 
        if (masked) {
-               struct audit_buffer *ab;
-               char *stype_name
-                       = policydb.p_type_val_to_name[source->value - 1];
-               char *ttype_name
-                       = policydb.p_type_val_to_name[target->value - 1];
-               char *tclass_name
-                       = policydb.p_class_val_to_name[tclass - 1];
-
                /* mask violated permissions */
                avd->allowed &= ~masked;
 
-               /* notice to userspace via audit message */
-               ab = audit_log_start(current->audit_context,
-                                    GFP_ATOMIC, AUDIT_SELINUX_ERR);
-               if (!ab)
-                       return;
-
-               audit_log_format(ab, "av boundary violation: "
-                                "source=%s target=%s tclass=%s",
-                                stype_name, ttype_name, tclass_name);
-               avc_dump_av(ab, tclass, masked);
-               audit_log_end(ab);
+               /* audit masked permissions */
+               security_dump_masked_av(scontext, tcontext,
+                                       tclass, masked, "bounds");
        }
 }
 
@@ -480,7 +558,7 @@ static int context_struct_compute_av(struct context *scontext,
                if ((constraint->permissions & (avd->allowed)) &&
                    !constraint_expr_eval(scontext, tcontext, NULL,
                                          constraint->expr)) {
-                       avd->allowed = (avd->allowed) & ~(constraint->permissions);
+                       avd->allowed &= ~(constraint->permissions);
                }
                constraint = constraint->next;
        }
@@ -499,8 +577,8 @@ static int context_struct_compute_av(struct context *scontext,
                                break;
                }
                if (!ra)
-                       avd->allowed = (avd->allowed) & ~(PROCESS__TRANSITION |
-                                                       PROCESS__DYNTRANSITION);
+                       avd->allowed &= ~(PROCESS__TRANSITION |
+                                         PROCESS__DYNTRANSITION);
        }
 
        /*
@@ -687,6 +765,26 @@ int security_bounded_transition(u32 old_sid, u32 new_sid)
                }
                index = type->bounds;
        }
+
+       if (rc) {
+               char *old_name = NULL;
+               char *new_name = NULL;
+               int length;
+
+               if (!context_struct_to_string(old_context,
+                                             &old_name, &length) &&
+                   !context_struct_to_string(new_context,
+                                             &new_name, &length)) {
+                       audit_log(current->audit_context,
+                                 GFP_ATOMIC, AUDIT_SELINUX_ERR,
+                                 "op=security_bounded_transition "
+                                 "result=denied "
+                                 "oldcontext=%s newcontext=%s",
+                                 old_name, new_name);
+               }
+               kfree(new_name);
+               kfree(old_name);
+       }
 out:
        read_unlock(&policy_rwlock);
 
index 72b18452e1a12035f20e4269e0ed6d4474e135f2..f3cb9ed731a9d927319c37bfc3a2e0d63f330db9 100644 (file)
@@ -401,7 +401,7 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
  * gone thru the IPSec process.
  */
 int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-                               struct avc_audit_data *ad)
+                               struct common_audit_data *ad)
 {
        int i, rc = 0;
        struct sec_path *sp;
@@ -442,7 +442,7 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
  * checked in the selinux_xfrm_state_pol_flow_match hook above.
  */
 int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-                                       struct avc_audit_data *ad, u8 proto)
+                                       struct common_audit_data *ad, u8 proto)
 {
        struct dst_entry *dst;
        int rc = 0;
index 243bec175be050f930189a25d4a4bb6d5dc85ef3..c6e9acae72e4b74b9e75f037845e54b61fce6163 100644 (file)
@@ -275,7 +275,7 @@ static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
 {
        memset(a, 0, sizeof(*a));
        a->a.type = type;
-       a->a.function = func;
+       a->a.smack_audit_data.function = func;
 }
 
 static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
index 513dc1aa16dd1ff5594745b463dd67118648705a..0f9ac81469001d30d91a05af27974b116823d330 100644 (file)
@@ -240,8 +240,9 @@ static inline void smack_str_from_perm(char *string, int access)
 static void smack_log_callback(struct audit_buffer *ab, void *a)
 {
        struct common_audit_data *ad = a;
-       struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
-       audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function,
+       struct smack_audit_data *sad = &ad->smack_audit_data;
+       audit_log_format(ab, "lsm=SMACK fn=%s action=%s",
+                        ad->smack_audit_data.function,
                         sad->result ? "denied" : "granted");
        audit_log_format(ab, " subject=");
        audit_log_untrustedstring(ab, sad->subject);
@@ -274,11 +275,11 @@ void smack_log(char *subject_label, char *object_label, int request,
        if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
                return;
 
-       if (a->function == NULL)
-               a->function = "unknown";
+       if (a->smack_audit_data.function == NULL)
+               a->smack_audit_data.function = "unknown";
 
        /* end preparing the audit data */
-       sad = &a->lsm_priv.smack_audit_data;
+       sad = &a->smack_audit_data;
        smack_str_from_perm(request_buffer, request);
        sad->subject = subject_label;
        sad->object  = object_label;
index 0023182078c726797de5ccab784a1922eba17859..acae7ef4092dd02872b54d7861ad233f8a74978e 100644 (file)
@@ -91,7 +91,7 @@ struct inode_smack *new_inode_smack(char *smack)
  */
 
 /**
- * smack_ptrace_may_access - Smack approval on PTRACE_ATTACH
+ * smack_ptrace_access_check - Smack approval on PTRACE_ATTACH
  * @ctp: child task pointer
  * @mode: ptrace attachment mode
  *
@@ -99,13 +99,13 @@ struct inode_smack *new_inode_smack(char *smack)
  *
  * Do the capability checks, and require read and write.
  */
-static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
+static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
 {
        int rc;
        struct smk_audit_info ad;
        char *sp, *tsp;
 
-       rc = cap_ptrace_may_access(ctp, mode);
+       rc = cap_ptrace_access_check(ctp, mode);
        if (rc != 0)
                return rc;
 
@@ -1079,6 +1079,22 @@ static int smack_file_receive(struct file *file)
  * Task hooks
  */
 
+/**
+ * smack_cred_alloc_blank - "allocate" blank task-level security credentials
+ * @new: the new credentials
+ * @gfp: the atomicity of any memory allocations
+ *
+ * Prepare a blank set of credentials for modification.  This must allocate all
+ * the memory the LSM module might require such that cred_transfer() can
+ * complete without error.
+ */
+static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+       cred->security = NULL;
+       return 0;
+}
+
+
 /**
  * smack_cred_free - "free" task-level security credentials
  * @cred: the credentials in question
@@ -1116,6 +1132,18 @@ static void smack_cred_commit(struct cred *new, const struct cred *old)
 {
 }
 
+/**
+ * smack_cred_transfer - Transfer the old credentials to the new credentials
+ * @new: the new credentials
+ * @old: the original credentials
+ *
+ * Fill in a set of blank credentials from another set of credentials.
+ */
+static void smack_cred_transfer(struct cred *new, const struct cred *old)
+{
+       new->security = old->security;
+}
+
 /**
  * smack_kernel_act_as - Set the subjective context in a set of credentials
  * @new: points to the set of credentials to be modified.
@@ -1638,6 +1666,7 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name,
 
        if (strcmp(name, XATTR_SMACK_SUFFIX) == 0) {
                nsp->smk_inode = sp;
+               nsp->smk_flags |= SMK_INODE_INSTANT;
                return 0;
        }
        /*
@@ -2464,7 +2493,7 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg,
        /*
         * Perfectly reasonable for this to be NULL
         */
-       if (sip == NULL || sip->sin_family != PF_INET)
+       if (sip == NULL || sip->sin_family != AF_INET)
                return 0;
 
        return smack_netlabel_send(sock->sk, sip);
@@ -3029,10 +3058,31 @@ static void smack_release_secctx(char *secdata, u32 seclen)
 {
 }
 
+static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+       return smack_inode_setsecurity(inode, XATTR_SMACK_SUFFIX, ctx, ctxlen, 0);
+}
+
+static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+       return __vfs_setxattr_noperm(dentry, XATTR_NAME_SMACK, ctx, ctxlen, 0);
+}
+
+static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+       int len = 0;
+       len = smack_inode_getsecurity(inode, XATTR_SMACK_SUFFIX, ctx, true);
+
+       if (len < 0)
+               return len;
+       *ctxlen = len;
+       return 0;
+}
+
 struct security_operations smack_ops = {
        .name =                         "smack",
 
-       .ptrace_may_access =            smack_ptrace_may_access,
+       .ptrace_access_check =          smack_ptrace_access_check,
        .ptrace_traceme =               smack_ptrace_traceme,
        .syslog =                       smack_syslog,
 
@@ -3073,9 +3123,11 @@ struct security_operations smack_ops = {
        .file_send_sigiotask =          smack_file_send_sigiotask,
        .file_receive =                 smack_file_receive,
 
+       .cred_alloc_blank =             smack_cred_alloc_blank,
        .cred_free =                    smack_cred_free,
        .cred_prepare =                 smack_cred_prepare,
        .cred_commit =                  smack_cred_commit,
+       .cred_transfer =                smack_cred_transfer,
        .kernel_act_as =                smack_kernel_act_as,
        .kernel_create_files_as =       smack_kernel_create_files_as,
        .task_setpgid =                 smack_task_setpgid,
@@ -3155,6 +3207,9 @@ struct security_operations smack_ops = {
        .secid_to_secctx =              smack_secid_to_secctx,
        .secctx_to_secid =              smack_secctx_to_secid,
        .release_secctx =               smack_release_secctx,
+       .inode_notifysecctx =           smack_inode_notifysecctx,
+       .inode_setsecctx =              smack_inode_setsecctx,
+       .inode_getsecctx =              smack_inode_getsecctx,
 };
 
 
index fdd1f4b8c448e4c1fa6611bb9456f5e9c9d30070..3c8bd8ee0b95e70e17e801913eb7481030ebcfae 100644 (file)
@@ -1284,6 +1284,36 @@ static bool tomoyo_is_select_one(struct tomoyo_io_buffer *head,
        return true;
 }
 
+/**
+ * tomoyo_delete_domain - Delete a domain.
+ *
+ * @domainname: The name of domain.
+ *
+ * Returns 0.
+ */
+static int tomoyo_delete_domain(char *domainname)
+{
+       struct tomoyo_domain_info *domain;
+       struct tomoyo_path_info name;
+
+       name.name = domainname;
+       tomoyo_fill_path_info(&name);
+       down_write(&tomoyo_domain_list_lock);
+       /* Is there an active domain? */
+       list_for_each_entry(domain, &tomoyo_domain_list, list) {
+               /* Never delete tomoyo_kernel_domain */
+               if (domain == &tomoyo_kernel_domain)
+                       continue;
+               if (domain->is_deleted ||
+                   tomoyo_pathcmp(domain->domainname, &name))
+                       continue;
+               domain->is_deleted = true;
+               break;
+       }
+       up_write(&tomoyo_domain_list_lock);
+       return 0;
+}
+
 /**
  * tomoyo_write_domain_policy - Write domain policy.
  *
index 6d6ba09af4576b27d6aba92edad7d3c7b03515b3..31df541911f76f3d5cc39c92508eb37a25fb8545 100644 (file)
@@ -339,8 +339,6 @@ const char *tomoyo_get_last_name(const struct tomoyo_domain_info *domain);
 const char *tomoyo_get_msg(const bool is_enforce);
 /* Convert single path operation to operation name. */
 const char *tomoyo_sp2keyword(const u8 operation);
-/* Delete a domain. */
-int tomoyo_delete_domain(char *data);
 /* Create "alias" entry in exception policy. */
 int tomoyo_write_alias_policy(char *data, const bool is_delete);
 /*
index 1d8b16960576891615d7d523626d527030cf31bc..fcf52accce2b107798b7be675337eb53f9279a38 100644 (file)
@@ -717,38 +717,6 @@ int tomoyo_write_alias_policy(char *data, const bool is_delete)
        return tomoyo_update_alias_entry(data, cp, is_delete);
 }
 
-/* Domain create/delete handler. */
-
-/**
- * tomoyo_delete_domain - Delete a domain.
- *
- * @domainname: The name of domain.
- *
- * Returns 0.
- */
-int tomoyo_delete_domain(char *domainname)
-{
-       struct tomoyo_domain_info *domain;
-       struct tomoyo_path_info name;
-
-       name.name = domainname;
-       tomoyo_fill_path_info(&name);
-       down_write(&tomoyo_domain_list_lock);
-       /* Is there an active domain? */
-       list_for_each_entry(domain, &tomoyo_domain_list, list) {
-               /* Never delete tomoyo_kernel_domain */
-               if (domain == &tomoyo_kernel_domain)
-                       continue;
-               if (domain->is_deleted ||
-                   tomoyo_pathcmp(domain->domainname, &name))
-                       continue;
-               domain->is_deleted = true;
-               break;
-       }
-       up_write(&tomoyo_domain_list_lock);
-       return 0;
-}
-
 /**
  * tomoyo_find_or_assign_new_domain - Create a domain.
  *
@@ -818,13 +786,11 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char *
 /**
  * tomoyo_find_next_domain - Find a domain.
  *
- * @bprm:           Pointer to "struct linux_binprm".
- * @next_domain:    Pointer to pointer to "struct tomoyo_domain_info".
+ * @bprm: Pointer to "struct linux_binprm".
  *
  * Returns 0 on success, negative value otherwise.
  */
-int tomoyo_find_next_domain(struct linux_binprm *bprm,
-                           struct tomoyo_domain_info **next_domain)
+int tomoyo_find_next_domain(struct linux_binprm *bprm)
 {
        /*
         * This function assumes that the size of buffer returned by
@@ -946,9 +912,11 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm,
                tomoyo_set_domain_flag(old_domain, false,
                                       TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED);
  out:
+       if (!domain)
+               domain = old_domain;
+       bprm->cred->security = domain;
        tomoyo_free(real_program_name);
        tomoyo_free(symlink_program_name);
-       *next_domain = domain ? domain : old_domain;
        tomoyo_free(tmp);
        return retval;
 }
index 3194d09fe0f4ccd5311b819e526a1b696a8ecc33..9548a0984cc43674c8b5470ef5931bf71ac9cb1e 100644 (file)
 #include "tomoyo.h"
 #include "realpath.h"
 
+static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
+{
+       new->security = NULL;
+       return 0;
+}
+
 static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
                               gfp_t gfp)
 {
@@ -25,6 +31,15 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
        return 0;
 }
 
+static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
+{
+       /*
+        * Since "struct tomoyo_domain_info *" is a sharable pointer,
+        * we don't need to duplicate.
+        */
+       new->security = old->security;
+}
+
 static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
 {
        int rc;
@@ -61,14 +76,8 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
         * Execute permission is checked against pathname passed to do_execve()
         * using current domain.
         */
-       if (!domain) {
-               struct tomoyo_domain_info *next_domain = NULL;
-               int retval = tomoyo_find_next_domain(bprm, &next_domain);
-
-               if (!retval)
-                       bprm->cred->security = next_domain;
-               return retval;
-       }
+       if (!domain)
+               return tomoyo_find_next_domain(bprm);
        /*
         * Read permission is checked against interpreters using next domain.
         * '1' is the result of open_to_namei_flags(O_RDONLY).
@@ -268,7 +277,9 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
  */
 static struct security_operations tomoyo_security_ops = {
        .name                = "tomoyo",
+       .cred_alloc_blank    = tomoyo_cred_alloc_blank,
        .cred_prepare        = tomoyo_cred_prepare,
+       .cred_transfer       = tomoyo_cred_transfer,
        .bprm_set_creds      = tomoyo_bprm_set_creds,
        .bprm_check_security = tomoyo_bprm_check_security,
 #ifdef CONFIG_SYSCTL
index 0fd588a629cf915ca6be0a47dbc300292d908570..cd6ba0bf7069a98128bdac2d562c9746decdcc6f 100644 (file)
@@ -31,8 +31,7 @@ int tomoyo_check_2path_perm(struct tomoyo_domain_info *domain,
                            struct path *path2);
 int tomoyo_check_rewrite_permission(struct tomoyo_domain_info *domain,
                                    struct file *filp);
-int tomoyo_find_next_domain(struct linux_binprm *bprm,
-                           struct tomoyo_domain_info **next_domain);
+int tomoyo_find_next_domain(struct linux_binprm *bprm);
 
 /* Index numbers for Access Controls. */