]> git.karo-electronics.de Git - mv-sheeva.git/commitdiff
Merge branch 'upstream' of git://ftp.linux-mips.org/pub/scm/upstream-linus
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 19 Oct 2007 19:01:22 +0000 (12:01 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 19 Oct 2007 19:01:22 +0000 (12:01 -0700)
* 'upstream' of git://ftp.linux-mips.org/pub/scm/upstream-linus:
  [MIPS] Delete totally outdated Documentation/mips/time.README
  [MIPS] Kill duplicated setup_irq() for cp0 timer
  [MIPS] Sibyte: Finish conversion to modern time APIs.
  [MIPS] time: Helpers to compute clocksource/event shift and mult values.
  [MIPS] SMTC: Build fix.
  [MIPS] time: Delete dead code.
  [MIPS] MIPSsim: Strip defconfig file to the bones.

605 files changed:
Documentation/DocBook/kernel-api.tmpl
Documentation/accounting/cgroupstats.txt [new file with mode: 0644]
Documentation/cachetlb.txt
Documentation/cgroups.txt [new file with mode: 0644]
Documentation/cpu-hotplug.txt
Documentation/cpusets.txt
Documentation/input/input-programming.txt
Documentation/kdump/kdump.txt
Documentation/kernel-parameters.txt
Documentation/markers.txt [new file with mode: 0644]
Makefile
arch/alpha/Kconfig
arch/alpha/kernel/semaphore.c
arch/alpha/kernel/traps.c
arch/alpha/lib/fls.c
arch/alpha/mm/fault.c
arch/alpha/oprofile/Kconfig [deleted file]
arch/arm/Kconfig
arch/arm/kernel/process.c
arch/arm/kernel/ptrace.c
arch/arm/kernel/traps.c
arch/arm/mm/alignment.c
arch/arm/mm/fault.c
arch/arm/oprofile/Kconfig [deleted file]
arch/avr32/kernel/traps.c
arch/avr32/mm/fault.c
arch/blackfin/Kconfig
arch/blackfin/oprofile/Kconfig [deleted file]
arch/cris/Kconfig
arch/frv/Kconfig
arch/frv/kernel/irq-mb93091.c
arch/frv/kernel/irq-mb93093.c
arch/frv/kernel/irq-mb93493.c
arch/frv/kernel/irq.c
arch/h8300/Kconfig
arch/i386/Kconfig
arch/ia64/Kconfig
arch/ia64/configs/sn2_defconfig
arch/ia64/ia32/sys_ia32.c
arch/ia64/kernel/efi.c
arch/ia64/kernel/perfmon.c
arch/ia64/kernel/perfmon_default_smpl.c
arch/ia64/kernel/process.c
arch/ia64/kernel/setup.c
arch/ia64/kernel/signal.c
arch/ia64/kernel/traps.c
arch/ia64/kernel/unaligned.c
arch/ia64/mm/fault.c
arch/ia64/mm/init.c
arch/ia64/oprofile/Kconfig [deleted file]
arch/m32r/Kconfig
arch/m32r/kernel/traps.c
arch/m32r/mm/fault.c
arch/m32r/oprofile/Kconfig [deleted file]
arch/m68k/Kconfig
arch/m68k/kernel/traps.c
arch/m68k/mm/fault.c
arch/m68knommu/Kconfig
arch/mips/Kconfig
arch/mips/au1000/pb1200/irqmap.c
arch/mips/basler/excite/excite_irq.c
arch/mips/configs/ip27_defconfig
arch/mips/configs/sb1250-swarm_defconfig
arch/mips/kernel/irixelf.c
arch/mips/kernel/irixsig.c
arch/mips/kernel/sysirix.c
arch/mips/kernel/traps.c
arch/mips/mm/fault.c
arch/mips/oprofile/Kconfig [deleted file]
arch/mips/tx4938/common/setup.c
arch/parisc/Kconfig
arch/parisc/kernel/signal.c
arch/parisc/kernel/traps.c
arch/parisc/kernel/unaligned.c
arch/parisc/mm/fault.c
arch/parisc/oprofile/Kconfig [deleted file]
arch/powerpc/Kconfig
arch/powerpc/configs/cell_defconfig
arch/powerpc/configs/ppc64_defconfig
arch/powerpc/configs/pseries_defconfig
arch/powerpc/kernel/machine_kexec.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/traps.c
arch/powerpc/mm/fault.c
arch/powerpc/oprofile/Kconfig [deleted file]
arch/powerpc/platforms/maple/setup.c
arch/powerpc/platforms/pseries/ras.c
arch/ppc/Kconfig
arch/ppc/kernel/traps.c
arch/ppc/mm/fault.c
arch/ppc/platforms/chestnut.c
arch/s390/Kconfig
arch/s390/kernel/process.c
arch/s390/lib/uaccess_pt.c
arch/s390/mm/fault.c
arch/s390/oprofile/Kconfig [deleted file]
arch/sh/Kconfig
arch/sh/kernel/machine_kexec.c
arch/sh/kernel/process.c
arch/sh/kernel/setup.c
arch/sh/kernel/signal.c
arch/sh/kernel/traps.c
arch/sh/mm/fault.c
arch/sh/oprofile/Kconfig [deleted file]
arch/sh64/Kconfig
arch/sh64/kernel/traps.c
arch/sh64/mm/fault.c
arch/sh64/oprofile/Kconfig [deleted file]
arch/sparc/Kconfig
arch/sparc/kernel/ptrace.c
arch/sparc/kernel/sys_sparc.c
arch/sparc/kernel/sys_sunos.c
arch/sparc/kernel/traps.c
arch/sparc/oprofile/Kconfig [deleted file]
arch/sparc64/Kconfig
arch/sparc64/kernel/sys_sunos32.c
arch/sparc64/kernel/traps.c
arch/sparc64/oprofile/Kconfig [deleted file]
arch/sparc64/solaris/misc.c
arch/um/Kconfig
arch/um/kernel/trap.c
arch/um/sys-x86_64/sysrq.c
arch/v850/Kconfig
arch/x86/kernel/crash_dump_32.c
arch/x86/kernel/e820_32.c
arch/x86/kernel/e820_64.c
arch/x86/kernel/machine_kexec_32.c
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/process_32.c
arch/x86/kernel/setup_32.c
arch/x86/kernel/setup_64.c
arch/x86/kernel/signal_32.c
arch/x86/kernel/traps_32.c
arch/x86/lib/usercopy_32.c
arch/x86/mm/fault_32.c
arch/x86/mm/fault_64.c
arch/x86/mm/pageattr_64.c
arch/x86_64/Kconfig
arch/xtensa/Kconfig
arch/xtensa/kernel/traps.c
arch/xtensa/mm/fault.c
block/ll_rw_blk.c
drivers/acpi/button.c
drivers/block/nbd.c
drivers/cdrom/cdrom.c
drivers/char/drm/drm_bufs.c
drivers/char/drm/drm_drv.c
drivers/char/drm/drm_fops.c
drivers/char/drm/drm_lock.c
drivers/char/drm/drm_os_linux.h
drivers/char/drm/i810_dma.c
drivers/char/drm/i830_dma.c
drivers/char/esp.c
drivers/char/keyboard.c
drivers/char/mxser.c
drivers/char/mxser_new.c
drivers/char/sonypi.c
drivers/char/sx.c
drivers/char/sysrq.c
drivers/char/tty_io.c
drivers/char/vt.c
drivers/edac/edac_core.h
drivers/edac/pasemi_edac.c
drivers/firmware/dcdbas.h
drivers/hid/hidraw.c
drivers/hid/usbhid/usbkbd.c
drivers/hid/usbhid/usbmouse.c
drivers/hwmon/applesmc.c
drivers/hwmon/hdaps.c
drivers/i2c/busses/i2c-pxa.c
drivers/ide/ide-io.c
drivers/infiniband/core/cma.c
drivers/input/evdev.c
drivers/input/input.c
drivers/input/joydev.c
drivers/input/joystick/a3d.c
drivers/input/joystick/adi.c
drivers/input/joystick/amijoy.c
drivers/input/joystick/analog.c
drivers/input/joystick/cobra.c
drivers/input/joystick/db9.c
drivers/input/joystick/gamecon.c
drivers/input/joystick/gf2k.c
drivers/input/joystick/grip.c
drivers/input/joystick/grip_mp.c
drivers/input/joystick/guillemot.c
drivers/input/joystick/iforce/iforce-main.c
drivers/input/joystick/iforce/iforce.h
drivers/input/joystick/interact.c
drivers/input/joystick/magellan.c
drivers/input/joystick/sidewinder.c
drivers/input/joystick/spaceball.c
drivers/input/joystick/spaceorb.c
drivers/input/joystick/stinger.c
drivers/input/joystick/tmdc.c
drivers/input/joystick/turbografx.c
drivers/input/joystick/twidjoy.c
drivers/input/joystick/warrior.c
drivers/input/joystick/xpad.c
drivers/input/keyboard/aaed2000_kbd.c
drivers/input/keyboard/amikbd.c
drivers/input/keyboard/atakbd.c
drivers/input/keyboard/atkbd.c
drivers/input/keyboard/corgikbd.c
drivers/input/keyboard/gpio_keys.c
drivers/input/keyboard/hil_kbd.c
drivers/input/keyboard/hilkbd.c
drivers/input/keyboard/locomokbd.c
drivers/input/keyboard/newtonkbd.c
drivers/input/keyboard/pxa27x_keyboard.c
drivers/input/keyboard/spitzkbd.c
drivers/input/keyboard/stowaway.c
drivers/input/keyboard/sunkbd.c
drivers/input/keyboard/xtkbd.c
drivers/input/misc/ati_remote.c
drivers/input/misc/ati_remote2.c
drivers/input/misc/atlas_btns.c
drivers/input/misc/cobalt_btns.c
drivers/input/misc/ixp4xx-beeper.c
drivers/input/misc/keyspan_remote.c
drivers/input/misc/m68kspkr.c
drivers/input/misc/pcspkr.c
drivers/input/misc/powermate.c
drivers/input/misc/sparcspkr.c
drivers/input/misc/yealink.c
drivers/input/mouse/alps.c
drivers/input/mouse/amimouse.c
drivers/input/mouse/atarimouse.c
drivers/input/mouse/hil_ptr.c
drivers/input/mouse/inport.c
drivers/input/mouse/lifebook.c
drivers/input/mouse/logibm.c
drivers/input/mouse/pc110pad.c
drivers/input/mouse/psmouse-base.c
drivers/input/mouse/rpcmouse.c
drivers/input/mouse/sermouse.c
drivers/input/mouse/touchkit_ps2.c
drivers/input/mousedev.c
drivers/input/tablet/acecad.c
drivers/input/tablet/gtco.c
drivers/input/tablet/kbtab.c
drivers/input/tablet/wacom_sys.c
drivers/input/touchscreen/ads7846.c
drivers/input/touchscreen/corgi_ts.c
drivers/input/touchscreen/elo.c
drivers/input/touchscreen/fujitsu_ts.c
drivers/input/touchscreen/gunze.c
drivers/input/touchscreen/h3600_ts_input.c
drivers/input/touchscreen/hp680_ts_input.c
drivers/input/touchscreen/mk712.c
drivers/input/touchscreen/mtouch.c
drivers/input/touchscreen/penmount.c
drivers/input/touchscreen/touchright.c
drivers/input/touchscreen/touchwin.c
drivers/input/touchscreen/ucb1400_ts.c
drivers/input/touchscreen/usbtouchscreen.c
drivers/isdn/hardware/avm/b1dma.c
drivers/isdn/hardware/avm/c4.c
drivers/isdn/hardware/avm/t1isa.c
drivers/isdn/sc/debug.h [deleted file]
drivers/isdn/sc/includes.h
drivers/isdn/sc/init.c
drivers/macintosh/adbhid.c
drivers/macintosh/mac_hid.c
drivers/md/md.c
drivers/media/dvb/cinergyT2/cinergyT2.c
drivers/media/dvb/dvb-usb/dvb-usb-remote.c
drivers/media/dvb/ttpci/av7110_ir.c
drivers/media/dvb/ttusb-dec/ttusb_dec.c
drivers/media/video/usbvideo/konicawc.c
drivers/media/video/usbvideo/quickcam_messenger.c
drivers/media/video/zoran_driver.c
drivers/misc/ibmasm/remote.c
drivers/misc/phantom.c
drivers/misc/sony-laptop.c
drivers/mtd/ubi/wl.c
drivers/net/bnx2.c
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_3ad.h
drivers/net/cris/eth_v10.c
drivers/net/cxgb3/adapter.h
drivers/net/eth16i.c
drivers/net/hamradio/dmascc.c
drivers/net/mac89x0.c
drivers/net/meth.h
drivers/net/s2io-regs.h
drivers/net/s2io.c
drivers/net/s2io.h
drivers/net/spider_net.c
drivers/net/tulip/uli526x.c
drivers/net/wireless/bcm43xx/bcm43xx_leds.c
drivers/net/wireless/hostap/hostap_common.h
drivers/net/wireless/hostap/hostap_ioctl.c
drivers/pcmcia/m32r_pcc.c
drivers/pcmcia/m8xx_pcmcia.c
drivers/ps3/ps3av.c
drivers/ps3/vuart.c
drivers/rtc/rtc-pl031.c
drivers/rtc/rtc-sa1100.c
drivers/s390/cio/idset.c
drivers/s390/net/claw.c
drivers/scsi/FlashPoint.c
drivers/scsi/Kconfig
drivers/scsi/ide-scsi.c
drivers/scsi/libsas/sas_discover.c
drivers/scsi/nsp32.h
drivers/scsi/pcmcia/nsp_cs.h
drivers/scsi/qla4xxx/ql4_fw.h
drivers/scsi/qla4xxx/ql4_iocb.c
drivers/serial/amba-pl011.c
drivers/serial/crisv10.c
drivers/usb/core/devio.c
drivers/usb/gadget/file_storage.c
drivers/video/cyber2000fb.c
drivers/video/pnx4008/sdum.h
drivers/watchdog/at91rm9200_wdt.c
drivers/watchdog/ks8695_wdt.c
drivers/watchdog/omap_wdt.c
drivers/watchdog/sa1100_wdt.c
fs/Kconfig
fs/autofs/inode.c
fs/autofs/root.c
fs/autofs4/autofs_i.h
fs/autofs4/inode.c
fs/autofs4/root.c
fs/binfmt_elf.c
fs/binfmt_elf_fdpic.c
fs/cifs/CHANGES
fs/cifs/Makefile
fs/cifs/asn1.c
fs/cifs/cifs_debug.c
fs/cifs/cifsacl.c [new file with mode: 0644]
fs/cifs/cifsacl.h
fs/cifs/cifsencrypt.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/cifsglob.h
fs/cifs/cifspdu.h
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/export.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/link.c
fs/cifs/misc.c
fs/cifs/netmisc.c
fs/cifs/readdir.c
fs/cifs/sess.c
fs/cifs/smberr.h
fs/cifs/transport.c
fs/cifs/xattr.c
fs/coda/upcall.c
fs/dlm/user.c
fs/eventpoll.c
fs/exec.c
fs/ext3/inode.c
fs/ext3/xattr.c
fs/fcntl.c
fs/file_table.c
fs/fs-writeback.c
fs/ioprio.c
fs/jbd/commit.c
fs/jbd/journal.c
fs/jbd/recovery.c
fs/jbd/transaction.c
fs/jffs2/debug.h
fs/namespace.c
fs/nfsd/vfs.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/cluster/masklog.h
fs/ocfs2/dlm/dlmrecovery.c
fs/proc/array.c
fs/proc/base.c
fs/proc/inode.c
fs/proc/proc_misc.c
fs/proc/root.c
fs/reiserfs/bitmap.c
fs/reiserfs/inode.c
fs/reiserfs/journal.c
fs/reiserfs/prints.c
fs/reiserfs/resize.c
fs/reiserfs/stree.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/select.c
fs/super.c
fs/xfs/linux-2.6/xfs_export.c
fs/xfs/linux-2.6/xfs_export.h
fs/xfs/linux-2.6/xfs_ioctl.c
fs/xfs/xfs_dmops.c
fs/xfs/xfs_fs.h
fs/xfs/xfs_qmops.c
fs/xfs/xfs_vfsops.c
fs/xfs/xfs_vfsops.h
fs/xfs/xfs_vnodeops.c
fs/xfs/xfs_vnodeops.h
include/asm-alpha/bitops.h
include/asm-alpha/tlbflush.h
include/asm-arm/arch-ixp4xx/io.h
include/asm-arm/bitops.h
include/asm-arm/tlbflush.h
include/asm-avr32/bitops.h
include/asm-avr32/tlbflush.h
include/asm-blackfin/bitops.h
include/asm-blackfin/tlbflush.h
include/asm-cris/bitops.h
include/asm-cris/posix_types.h
include/asm-cris/tlbflush.h
include/asm-frv/bitops.h
include/asm-frv/tlbflush.h
include/asm-generic/bitops.h
include/asm-generic/bitops/atomic.h
include/asm-generic/bitops/non-atomic.h
include/asm-generic/vmlinux.lds.h
include/asm-h8300/bitops.h
include/asm-h8300/tlbflush.h
include/asm-ia64/bitops.h
include/asm-ia64/cacheflush.h
include/asm-ia64/meminit.h
include/asm-ia64/pgtable.h
include/asm-ia64/smp.h
include/asm-ia64/spinlock.h
include/asm-ia64/tlbflush.h
include/asm-m32r/bitops.h
include/asm-m32r/pgtable.h
include/asm-m32r/tlbflush.h
include/asm-m68k/bitops.h
include/asm-m68k/tlbflush.h
include/asm-m68knommu/bitops.h
include/asm-m68knommu/tlbflush.h
include/asm-mips/bitops.h
include/asm-mips/fpu.h
include/asm-mips/ip32/crime.h
include/asm-mips/ip32/mace.h
include/asm-mips/tlbflush.h
include/asm-parisc/bitops.h
include/asm-parisc/pgtable.h
include/asm-parisc/tlbflush.h
include/asm-powerpc/bitops.h
include/asm-powerpc/iommu.h
include/asm-powerpc/mmu_context.h
include/asm-powerpc/tlbflush.h
include/asm-ppc/mmu_context.h
include/asm-s390/bitops.h
include/asm-s390/tlbflush.h
include/asm-sh/bitops.h
include/asm-sh/tlbflush.h
include/asm-sh64/bitops.h
include/asm-sh64/tlbflush.h
include/asm-sparc/bitops.h
include/asm-sparc/tlbflush.h
include/asm-sparc64/bitops.h
include/asm-sparc64/smp.h
include/asm-sparc64/tlbflush.h
include/asm-um/bitops.h
include/asm-um/tlbflush.h
include/asm-v850/bitops.h
include/asm-v850/tlbflush.h
include/asm-x86/bitops_32.h
include/asm-x86/bitops_64.h
include/asm-x86/pgtable_32.h
include/asm-x86/pgtable_64.h
include/asm-x86/smp_32.h
include/asm-x86/tlbflush_32.h
include/asm-x86/tlbflush_64.h
include/asm-x86/topology_64.h
include/asm-xtensa/bitops.h
include/asm-xtensa/tlbflush.h
include/linux/Kbuild
include/linux/bitmap.h
include/linux/bitops.h
include/linux/cgroup.h [new file with mode: 0644]
include/linux/cgroup_subsys.h [new file with mode: 0644]
include/linux/cgroupstats.h [new file with mode: 0644]
include/linux/cpu_acct.h [new file with mode: 0644]
include/linux/cpuset.h
include/linux/delayacct.h
include/linux/fs.h
include/linux/hid.h
include/linux/init_task.h
include/linux/input.h
include/linux/ipc.h
include/linux/jbd.h
include/linux/kexec.h
include/linux/keyboard.h
include/linux/list.h
include/linux/lockdep.h
include/linux/magic.h
include/linux/marker.h [new file with mode: 0644]
include/linux/mempolicy.h
include/linux/module.h
include/linux/msg.h
include/linux/notifier.h
include/linux/nsproxy.h
include/linux/of.h
include/linux/phantom.h
include/linux/pid.h
include/linux/pid_namespace.h
include/linux/prio_heap.h [new file with mode: 0644]
include/linux/proc_fs.h
include/linux/reiserfs_fs.h
include/linux/reiserfs_fs_sb.h
include/linux/sched.h
include/linux/sem.h
include/linux/shm.h
include/linux/types.h
include/linux/uinput.h
include/linux/vt.h
include/linux/workqueue.h
include/net/9p/9p.h
include/net/scm.h
include/video/sstfb.h
include/video/tdfx.h
init/Kconfig
init/main.c
ipc/mqueue.c
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
kernel/Kconfig.instrumentation [new file with mode: 0644]
kernel/Makefile
kernel/capability.c
kernel/cgroup.c [new file with mode: 0644]
kernel/cgroup_debug.c [new file with mode: 0644]
kernel/cpu.c
kernel/cpu_acct.c [new file with mode: 0644]
kernel/cpuset.c
kernel/die_notifier.c [deleted file]
kernel/exit.c
kernel/fork.c
kernel/futex.c
kernel/futex_compat.c
kernel/kexec.c
kernel/lockdep.c
kernel/marker.c [new file with mode: 0644]
kernel/module.c
kernel/notifier.c [new file with mode: 0644]
kernel/ns_cgroup.c [new file with mode: 0644]
kernel/nsproxy.c
kernel/pid.c
kernel/posix-cpu-timers.c
kernel/posix-timers.c
kernel/ptrace.c
kernel/rtmutex-debug.c
kernel/rtmutex.c
kernel/sched.c
kernel/signal.c
kernel/softlockup.c
kernel/sys.c
kernel/sysctl.c
kernel/taskstats.c
kernel/time/clocksource.c
kernel/timer.c
kernel/workqueue.c
lib/Kconfig.debug
lib/Makefile
lib/hweight.c
lib/percpu_counter.c
lib/prio_heap.c [new file with mode: 0644]
lib/spinlock_debug.c
mm/filemap.c
mm/memory.c
mm/mempolicy.c
mm/migrate.c
mm/mmap.c
mm/mprotect.c
mm/oom_kill.c
net/bluetooth/hidp/core.c
net/core/filter.c
net/core/gen_estimator.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/scm.c
net/core/sock.c
net/ipv4/fib_trie.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_hashtables.c
net/ipv4/ipvs/ip_vs_sync.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/udp.c
net/ipv6/inet6_hashtables.c
net/llc/af_llc.c
net/mac80211/ieee80211_i.h
net/mac80211/ieee80211_sta.c
net/netfilter/xt_connbytes.c
net/packet/af_packet.c
net/rfkill/rfkill-input.c
net/sched/Kconfig
net/sched/sch_generic.c
net/sunrpc/sched.c
net/unix/af_unix.c
samples/Kconfig [new file with mode: 0644]
samples/Makefile [new file with mode: 0644]
samples/markers/Makefile [new file with mode: 0644]
samples/markers/marker-example.c [new file with mode: 0644]
samples/markers/probe-example.c [new file with mode: 0644]
scripts/checkstack.pl
security/commoncap.c
sound/ppc/beep.c
sound/usb/caiaq/caiaq-input.c

index d3290c46af51c0068148752d8b6ddab4d8bf88ca..aa38cc5692a005fe4a48f59bdabff3689865c6db 100644 (file)
@@ -46,7 +46,7 @@
 
      <sect1><title>Atomic and pointer manipulation</title>
 !Iinclude/asm-x86/atomic_32.h
-!Iinclude/asm-x86/unaligned_32.h
+!Iinclude/asm-x86/unaligned.h
      </sect1>
 
      <sect1><title>Delaying, scheduling, and timer routines</title>
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
new file mode 100644 (file)
index 0000000..eda40fd
--- /dev/null
@@ -0,0 +1,27 @@
+Control Groupstats is inspired by the discussion at
+http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
+suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
+
+Per cgroup statistics infrastructure re-uses code from the taskstats
+interface. A new set of cgroup operations are registered with commands
+and attributes specific to cgroups. It should be very easy to
+extend per cgroup statistics, by adding members to the cgroupstats
+structure.
+
+The current model for cgroupstats is a pull, a push model (to post
+statistics on interesting events), should be very easy to add. Currently
+user space requests for statistics by passing the cgroup path.
+Statistics about the state of all the tasks in the cgroup is returned to
+user space.
+
+NOTE: We currently rely on delay accounting for extracting information
+about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
+information will not be available.
+
+To extract cgroup statistics a utility very similar to getdelays.c
+has been developed, the sample output of the utility is shown below
+
+~/balbir/cgroupstats # ./getdelays  -C "/cgroup/a"
+sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
+~/balbir/cgroupstats # ./getdelays  -C "/cgroup"
+sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
index 552cabac06082c460c55befab72b637724c86cd5..da42ab414c4864db4bb9d35b0cca9005b27670d2 100644 (file)
@@ -87,30 +87,7 @@ changes occur:
 
        This is used primarily during fault processing.
 
-5) void flush_tlb_pgtables(struct mm_struct *mm,
-                          unsigned long start, unsigned long end)
-
-   The software page tables for address space 'mm' for virtual
-   addresses in the range 'start' to 'end-1' are being torn down.
-
-   Some platforms cache the lowest level of the software page tables
-   in a linear virtually mapped array, to make TLB miss processing
-   more efficient.  On such platforms, since the TLB is caching the
-   software page table structure, it needs to be flushed when parts
-   of the software page table tree are unlinked/freed.
-
-   Sparc64 is one example of a platform which does this.
-
-   Usually, when munmap()'ing an area of user virtual address
-   space, the kernel leaves the page table parts around and just
-   marks the individual pte's as invalid.  However, if very large
-   portions of the address space are unmapped, the kernel frees up
-   those portions of the software page tables to prevent potential
-   excessive kernel memory usage caused by erratic mmap/mmunmap
-   sequences.  It is at these times that flush_tlb_pgtables will
-   be invoked.
-
-6) void update_mmu_cache(struct vm_area_struct *vma,
+5) void update_mmu_cache(struct vm_area_struct *vma,
                         unsigned long address, pte_t pte)
 
        At the end of every page fault, this routine is invoked to
@@ -123,7 +100,7 @@ changes occur:
        translations for software managed TLB configurations.
        The sparc64 port currently does this.
 
-7) void tlb_migrate_finish(struct mm_struct *mm)
+6) void tlb_migrate_finish(struct mm_struct *mm)
 
        This interface is called at the end of an explicit
        process migration. This interface provides a hook
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt
new file mode 100644 (file)
index 0000000..98a26f8
--- /dev/null
@@ -0,0 +1,545 @@
+                               CGROUPS
+                               -------
+
+Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
+
+Original copyright statements from cpusets.txt:
+Portions Copyright (C) 2004 BULL SA.
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+Modified by Paul Jackson <pj@sgi.com>
+Modified by Christoph Lameter <clameter@sgi.com>
+
+CONTENTS:
+=========
+
+1. Control Groups
+  1.1 What are cgroups ?
+  1.2 Why are cgroups needed ?
+  1.3 How are cgroups implemented ?
+  1.4 What does notify_on_release do ?
+  1.5 How do I use cgroups ?
+2. Usage Examples and Syntax
+  2.1 Basic Usage
+  2.2 Attaching processes
+3. Kernel API
+  3.1 Overview
+  3.2 Synchronization
+  3.3 Subsystem API
+4. Questions
+
+1. Control Groups
+==========
+
+1.1 What are cgroups ?
+----------------------
+
+Control Groups provide a mechanism for aggregating/partitioning sets of
+tasks, and all their future children, into hierarchical groups with
+specialized behaviour.
+
+Definitions:
+
+A *cgroup* associates a set of tasks with a set of parameters for one
+or more subsystems.
+
+A *subsystem* is a module that makes use of the task grouping
+facilities provided by cgroups to treat groups of tasks in
+particular ways. A subsystem is typically a "resource controller" that
+schedules a resource or applies per-cgroup limits, but it may be
+anything that wants to act on a group of processes, e.g. a
+virtualization subsystem.
+
+A *hierarchy* is a set of cgroups arranged in a tree, such that
+every task in the system is in exactly one of the cgroups in the
+hierarchy, and a set of subsystems; each subsystem has system-specific
+state attached to each cgroup in the hierarchy.  Each hierarchy has
+an instance of the cgroup virtual filesystem associated with it.
+
+At any one time there may be multiple active hierachies of task
+cgroups. Each hierarchy is a partition of all tasks in the system.
+
+User level code may create and destroy cgroups by name in an
+instance of the cgroup virtual file system, specify and query to
+which cgroup a task is assigned, and list the task pids assigned to
+a cgroup. Those creations and assignments only affect the hierarchy
+associated with that instance of the cgroup file system.
+
+On their own, the only use for cgroups is for simple job
+tracking. The intention is that other subsystems hook into the generic
+cgroup support to provide new attributes for cgroups, such as
+accounting/limiting the resources which processes in a cgroup can
+access. For example, cpusets (see Documentation/cpusets.txt) allows
+you to associate a set of CPUs and a set of memory nodes with the
+tasks in each cgroup.
+
+1.2 Why are cgroups needed ?
+----------------------------
+
+There are multiple efforts to provide process aggregations in the
+Linux kernel, mainly for resource tracking purposes. Such efforts
+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
+namespaces. These all require the basic notion of a
+grouping/partitioning of processes, with newly forked processes ending
+in the same group (cgroup) as their parent process.
+
+The kernel cgroup patch provides the minimum essential kernel
+mechanisms required to efficiently implement such groups. It has
+minimal impact on the system fast paths, and provides hooks for
+specific subsystems such as cpusets to provide additional behaviour as
+desired.
+
+Multiple hierarchy support is provided to allow for situations where
+the division of tasks into cgroups is distinctly different for
+different subsystems - having parallel hierarchies allows each
+hierarchy to be a natural division of tasks, without having to handle
+complex combinations of tasks that would be present if several
+unrelated subsystems needed to be forced into the same tree of
+cgroups.
+
+At one extreme, each resource controller or subsystem could be in a
+separate hierarchy; at the other extreme, all subsystems
+would be attached to the same hierarchy.
+
+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
+that can benefit from multiple hierarchies, consider a large
+university server with various users - students, professors, system
+tasks etc. The resource planning for this server could be along the
+following lines:
+
+       CPU :           Top cpuset
+                       /       \
+               CPUSet1         CPUSet2
+                  |              |
+               (Profs)         (Students)
+
+               In addition (system tasks) are attached to topcpuset (so
+               that they can run anywhere) with a limit of 20%
+
+       Memory : Professors (50%), students (30%), system (20%)
+
+       Disk : Prof (50%), students (30%), system (20%)
+
+       Network : WWW browsing (20%), Network File System (60%), others (20%)
+                               / \
+                       Prof (15%) students (5%)
+
+Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
+into NFS network class.
+
+At the same time firefox/lynx will share an appropriate CPU/Memory class
+depending on who launched it (prof/student).
+
+With the ability to classify tasks differently for different resources
+(by putting those resource subsystems in different hierarchies) then
+the admin can easily set up a script which receives exec notifications
+and depending on who is launching the browser he can
+
+       # echo browser_pid > /mnt/<restype>/<userclass>/tasks
+
+With only a single hierarchy, he now would potentially have to create
+a separate cgroup for every browser launched and associate it with
+approp network and other resource class.  This may lead to
+proliferation of such cgroups.
+
+Also lets say that the administrator would like to give enhanced network
+access temporarily to a student's browser (since it is night and the user
+wants to do online gaming :)  OR give one of the students simulation
+apps enhanced CPU power,
+
+With ability to write pids directly to resource classes, its just a
+matter of :
+
+       # echo pid > /mnt/network/<new_class>/tasks
+       (after some time)
+       # echo pid > /mnt/network/<orig_class>/tasks
+
+Without this ability, he would have to split the cgroup into
+multiple separate ones and then associate the new cgroups with the
+new resource classes.
+
+
+
+1.3 How are cgroups implemented ?
+---------------------------------
+
+Control Groups extends the kernel as follows:
+
+ - Each task in the system has a reference-counted pointer to a
+   css_set.
+
+ - A css_set contains a set of reference-counted pointers to
+   cgroup_subsys_state objects, one for each cgroup subsystem
+   registered in the system. There is no direct link from a task to
+   the cgroup of which it's a member in each hierarchy, but this
+   can be determined by following pointers through the
+   cgroup_subsys_state objects. This is because accessing the
+   subsystem state is something that's expected to happen frequently
+   and in performance-critical code, whereas operations that require a
+   task's actual cgroup assignments (in particular, moving between
+   cgroups) are less common. A linked list runs through the cg_list
+   field of each task_struct using the css_set, anchored at
+   css_set->tasks.
+
+ - A cgroup hierarchy filesystem can be mounted  for browsing and
+   manipulation from user space.
+
+ - You can list all the tasks (by pid) attached to any cgroup.
+
+The implementation of cgroups requires a few, simple hooks
+into the rest of the kernel, none in performance critical paths:
+
+ - in init/main.c, to initialize the root cgroups and initial
+   css_set at system boot.
+
+ - in fork and exit, to attach and detach a task from its css_set.
+
+In addition a new file system, of type "cgroup" may be mounted, to
+enable browsing and modifying the cgroups presently known to the
+kernel.  When mounting a cgroup hierarchy, you may specify a
+comma-separated list of subsystems to mount as the filesystem mount
+options.  By default, mounting the cgroup filesystem attempts to
+mount a hierarchy containing all registered subsystems.
+
+If an active hierarchy with exactly the same set of subsystems already
+exists, it will be reused for the new mount. If no existing hierarchy
+matches, and any of the requested subsystems are in use in an existing
+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
+is activated, associated with the requested subsystems.
+
+It's not currently possible to bind a new subsystem to an active
+cgroup hierarchy, or to unbind a subsystem from an active cgroup
+hierarchy. This may be possible in future, but is fraught with nasty
+error-recovery issues.
+
+When a cgroup filesystem is unmounted, if there are any
+child cgroups created below the top-level cgroup, that hierarchy
+will remain active even though unmounted; if there are no
+child cgroups then the hierarchy will be deactivated.
+
+No new system calls are added for cgroups - all support for
+querying and modifying cgroups is via this cgroup file system.
+
+Each task under /proc has an added file named 'cgroup' displaying,
+for each active hierarchy, the subsystem names and the cgroup name
+as the path relative to the root of the cgroup file system.
+
+Each cgroup is represented by a directory in the cgroup file system
+containing the following files describing that cgroup:
+
+ - tasks: list of tasks (by pid) attached to that cgroup
+ - notify_on_release flag: run /sbin/cgroup_release_agent on exit?
+
+Other subsystems such as cpusets may add additional files in each
+cgroup dir
+
+New cgroups are created using the mkdir system call or shell
+command.  The properties of a cgroup, such as its flags, are
+modified by writing to the appropriate file in that cgroups
+directory, as listed above.
+
+The named hierarchical structure of nested cgroups allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cgroup allows organizing the work load
+on a system into related sets of tasks.  A task may be re-attached to
+any other cgroup, if allowed by the permissions on the necessary
+cgroup file system directories.
+
+When a task is moved from one cgroup to another, it gets a new
+css_set pointer - if there's an already existing css_set with the
+desired collection of cgroups then that group is reused, else a new
+css_set is allocated. Note that the current implementation uses a
+linear search to locate an appropriate existing css_set, so isn't
+very efficient. A future version will use a hash table for better
+performance.
+
+To allow access from a cgroup to the css_sets (and hence tasks)
+that comprise it, a set of cg_cgroup_link objects form a lattice;
+each cg_cgroup_link is linked into a list of cg_cgroup_links for
+a single cgroup on its cont_link_list field, and a list of
+cg_cgroup_links for a single css_set on its cg_link_list.
+
+Thus the set of tasks in a cgroup can be listed by iterating over
+each css_set that references the cgroup, and sub-iterating over
+each css_set's task set.
+
+The use of a Linux virtual file system (vfs) to represent the
+cgroup hierarchy provides for a familiar permission and name space
+for cgroups, with a minimum of additional kernel code.
+
+1.4 What does notify_on_release do ?
+------------------------------------
+
+*** notify_on_release is disabled in the current patch set. It will be
+*** reactivated in a future patch in a less-intrusive manner
+
+If the notify_on_release flag is enabled (1) in a cgroup, then
+whenever the last task in the cgroup leaves (exits or attaches to
+some other cgroup) and the last child cgroup of that cgroup
+is removed, then the kernel runs the command specified by the contents
+of the "release_agent" file in that hierarchy's root directory,
+supplying the pathname (relative to the mount point of the cgroup
+file system) of the abandoned cgroup.  This enables automatic
+removal of abandoned cgroups.  The default value of
+notify_on_release in the root cgroup at system boot is disabled
+(0).  The default value of other cgroups at creation is the current
+value of their parents notify_on_release setting. The default value of
+a cgroup hierarchy's release_agent path is empty.
+
+1.5 How do I use cgroups ?
+--------------------------
+
+To start a new job that is to be contained within a cgroup, using
+the "cpuset" cgroup subsystem, the steps are something like:
+
+ 1) mkdir /dev/cgroup
+ 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
+ 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
+    the /dev/cgroup virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+ 5) Attach that task to the new cgroup by writing its pid to the
+    /dev/cgroup tasks file for that cgroup.
+ 6) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cgroup
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cgroup:
+
+  mount -t cgroup cpuset -ocpuset /dev/cgroup
+  cd /dev/cgroup
+  mkdir Charlie
+  cd Charlie
+  /bin/echo 2-3 > cpus
+  /bin/echo 1 > mems
+  /bin/echo $$ > tasks
+  sh
+  # The subshell 'sh' is now running in cgroup Charlie
+  # The next line should display '/Charlie'
+  cat /proc/self/cgroup
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using the cgroups can be done through the cgroup
+virtual filesystem.
+
+To mount a cgroup hierarchy will all available subsystems, type:
+# mount -t cgroup xxx /dev/cgroup
+
+The "xxx" is not interpreted by the cgroup code, but will appear in
+/proc/mounts so may be any useful identifying string that you like.
+
+To mount a cgroup hierarchy with just the cpuset and numtasks
+subsystems, type:
+# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
+
+To change the set of subsystems bound to a mounted hierarchy, just
+remount with different options:
+
+# mount -o remount,cpuset,ns  /dev/cgroup
+
+Note that changing the set of subsystems is currently only supported
+when the hierarchy consists of a single (root) cgroup. Supporting
+the ability to arbitrarily bind/unbind subsystems from an existing
+cgroup hierarchy is intended to be implemented in the future.
+
+Then under /dev/cgroup you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /dev/cgroup
+is the cgroup that holds the whole system.
+
+If you want to create a new cgroup under /dev/cgroup:
+# cd /dev/cgroup
+# mkdir my_cgroup
+
+Now you want to do something with this cgroup.
+# cd my_cgroup
+
+In this directory you can find several files:
+# ls
+notify_on_release release_agent tasks
+(plus whatever files are added by the attached subsystems)
+
+Now attach your shell to this cgroup:
+# /bin/echo $$ > tasks
+
+You can also create cgroups inside your cgroup by using mkdir in this
+directory.
+# mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir:
+# rmdir my_sub_cs
+
+This will fail if the cgroup is in use (has cgroups inside, or
+has processes attached, or is held alive by other subsystem-specific
+reference).
+
+2.2 Attaching processes
+-----------------------
+
+# /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another:
+
+# /bin/echo PID1 > tasks
+# /bin/echo PID2 > tasks
+       ...
+# /bin/echo PIDn > tasks
+
+3. Kernel API
+=============
+
+3.1 Overview
+------------
+
+Each kernel subsystem that wants to hook into the generic cgroup
+system needs to create a cgroup_subsys object. This contains
+various methods, which are callbacks from the cgroup system, along
+with a subsystem id which will be assigned by the cgroup system.
+
+Other fields in the cgroup_subsys object include:
+
+- subsys_id: a unique array index for the subsystem, indicating which
+  entry in cgroup->subsys[] this subsystem should be
+  managing. Initialized by cgroup_register_subsys(); prior to this
+  it should be initialized to -1
+
+- hierarchy: an index indicating which hierarchy, if any, this
+  subsystem is currently attached to. If this is -1, then the
+  subsystem is not attached to any hierarchy, and all tasks should be
+  considered to be members of the subsystem's top_cgroup. It should
+  be initialized to -1.
+
+- name: should be initialized to a unique subsystem name prior to
+  calling cgroup_register_subsystem. Should be no longer than
+  MAX_CGROUP_TYPE_NAMELEN
+
+Each cgroup object created by the system has an array of pointers,
+indexed by subsystem id; this pointer is entirely managed by the
+subsystem; the generic cgroup code will never touch this pointer.
+
+3.2 Synchronization
+-------------------
+
+There is a global mutex, cgroup_mutex, used by the cgroup
+system. This should be taken by anything that wants to modify a
+cgroup. It may also be taken to prevent cgroups from being
+modified, but more specific locks may be more appropriate in that
+situation.
+
+See kernel/cgroup.c for more details.
+
+Subsystems can take/release the cgroup_mutex via the functions
+cgroup_lock()/cgroup_unlock(), and can
+take/release the callback_mutex via the functions
+cgroup_lock()/cgroup_unlock().
+
+Accessing a task's cgroup pointer may be done in the following ways:
+- while holding cgroup_mutex
+- while holding the task's alloc_lock (via task_lock())
+- inside an rcu_read_lock() section via rcu_dereference()
+
+3.3 Subsystem API
+--------------------------
+
+Each subsystem should:
+
+- add an entry in linux/cgroup_subsys.h
+- define a cgroup_subsys object called <name>_subsys
+
+Each subsystem may export the following methods. The only mandatory
+methods are create/destroy. Any others that are null are presumed to
+be successful no-ops.
+
+struct cgroup_subsys_state *create(struct cgroup *cont)
+LL=cgroup_mutex
+
+Called to create a subsystem state object for a cgroup. The
+subsystem should allocate its subsystem state object for the passed
+cgroup, returning a pointer to the new object on success or a
+negative error code. On success, the subsystem pointer should point to
+a structure of type cgroup_subsys_state (typically embedded in a
+larger subsystem-specific object), which will be initialized by the
+cgroup system. Note that this will be called at initialization to
+create the root subsystem state for this subsystem; this case can be
+identified by the passed cgroup object having a NULL parent (since
+it's the root of the hierarchy) and may be an appropriate place for
+initialization code.
+
+void destroy(struct cgroup *cont)
+LL=cgroup_mutex
+
+The cgroup system is about to destroy the passed cgroup; the
+subsystem should do any necessary cleanup
+
+int can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+              struct task_struct *task)
+LL=cgroup_mutex
+
+Called prior to moving a task into a cgroup; if the subsystem
+returns an error, this will abort the attach operation.  If a NULL
+task is passed, then a successful result indicates that *any*
+unspecified task can be moved into the cgroup. Note that this isn't
+called on a fork. If this method returns 0 (success) then this should
+remain valid while the caller holds cgroup_mutex.
+
+void attach(struct cgroup_subsys *ss, struct cgroup *cont,
+           struct cgroup *old_cont, struct task_struct *task)
+LL=cgroup_mutex
+
+
+Called after the task has been attached to the cgroup, to allow any
+post-attachment activity that requires memory allocations or blocking.
+
+void fork(struct cgroup_subsy *ss, struct task_struct *task)
+LL=callback_mutex, maybe read_lock(tasklist_lock)
+
+Called when a task is forked into a cgroup. Also called during
+registration for all existing tasks.
+
+void exit(struct cgroup_subsys *ss, struct task_struct *task)
+LL=callback_mutex
+
+Called during task exit
+
+int populate(struct cgroup_subsys *ss, struct cgroup *cont)
+LL=none
+
+Called after creation of a cgroup to allow a subsystem to populate
+the cgroup directory with file entries.  The subsystem should make
+calls to cgroup_add_file() with objects of type cftype (see
+include/linux/cgroup.h for details).  Note that although this
+method can return an error code, the error code is currently not
+always handled well.
+
+void post_clone(struct cgroup_subsys *ss, struct cgroup *cont)
+
+Called at the end of cgroup_clone() to do any paramater
+initialization which might be required before a task could attach.  For
+example in cpusets, no task may attach before 'cpus' and 'mems' are set
+up.
+
+void bind(struct cgroup_subsys *ss, struct cgroup *root)
+LL=callback_mutex
+
+Called when a cgroup subsystem is rebound to a different hierarchy
+and root cgroup. Currently this will only involve movement between
+the default hierarchy (which never has sub-cgroups) and a hierarchy
+that is being created/destroyed (and hence has no sub-cgroups).
+
+4. Questions
+============
+
+Q: what's up with this '/bin/echo' ?
+A: bash's builtin 'echo' command does not check calls to write() against
+   errors. If you use it in the cgroup file system, you won't be
+   able to tell whether a command succeeded or failed.
+
+Q: When I attach processes, only the first of the line gets really attached !
+A: We can only return one error code per call to write(). So you should also
+   put only ONE pid.
+
index b6d24c22274b8dfb0ea28c3e841594779f5b20e7..a741f658a3c9d7393c9693bb4486d30473438222 100644 (file)
@@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-)
   CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
   CPU is being offlined while tasks are frozen due to a suspend operation in
   progress
-- All process is migrated away from this outgoing CPU to a new CPU
+- All processes are migrated away from this outgoing CPU to new CPUs.
+  The new CPU is chosen from each process' current cpuset, which may be
+  a subset of all online CPUs.
 - All interrupts targeted to this CPU is migrated to a new CPU
 - timers/bottom half/task lets are also migrated to a new CPU
 - Once all services are migrated, kernel calls an arch specific routine
index ec9de6917f01f34c088cd612121caaadc3edf08b..141bef1c859903bdb31cada3b8e3f7745827dc57 100644 (file)
@@ -7,6 +7,7 @@ Written by Simon.Derr@bull.net
 Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj@sgi.com>
 Modified by Christoph Lameter <clameter@sgi.com>
+Modified by Paul Menage <menage@google.com>
 
 CONTENTS:
 =========
@@ -16,9 +17,9 @@ CONTENTS:
   1.2 Why are cpusets needed ?
   1.3 How are cpusets implemented ?
   1.4 What are exclusive cpusets ?
-  1.5 What does notify_on_release do ?
-  1.6 What is memory_pressure ?
-  1.7 What is memory spread ?
+  1.5 What is memory_pressure ?
+  1.6 What is memory spread ?
+  1.7 What is sched_load_balance ?
   1.8 How do I use cpusets ?
 2. Usage Examples and Syntax
   2.1 Basic Usage
@@ -44,18 +45,19 @@ hierarchy visible in a virtual file system.  These are the essential
 hooks, beyond what is already present, required to manage dynamic
 job placement on large systems.
 
-Each task has a pointer to a cpuset.  Multiple tasks may reference
-the same cpuset.  Requests by a task, using the sched_setaffinity(2)
-system call to include CPUs in its CPU affinity mask, and using the
-mbind(2) and set_mempolicy(2) system calls to include Memory Nodes
-in its memory policy, are both filtered through that tasks cpuset,
-filtering out any CPUs or Memory Nodes not in that cpuset.  The
-scheduler will not schedule a task on a CPU that is not allowed in
-its cpus_allowed vector, and the kernel page allocator will not
-allocate a page on a node that is not allowed in the requesting tasks
-mems_allowed vector.
-
-User level code may create and destroy cpusets by name in the cpuset
+Cpusets use the generic cgroup subsystem described in
+Documentation/cgroup.txt.
+
+Requests by a task, using the sched_setaffinity(2) system call to
+include CPUs in its CPU affinity mask, and using the mbind(2) and
+set_mempolicy(2) system calls to include Memory Nodes in its memory
+policy, are both filtered through that tasks cpuset, filtering out any
+CPUs or Memory Nodes not in that cpuset.  The scheduler will not
+schedule a task on a CPU that is not allowed in its cpus_allowed
+vector, and the kernel page allocator will not allocate a page on a
+node that is not allowed in the requesting tasks mems_allowed vector.
+
+User level code may create and destroy cpusets by name in the cgroup
 virtual file system, manage the attributes and permissions of these
 cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
 specify and query to which cpuset a task is assigned, and list the
@@ -115,7 +117,7 @@ Cpusets extends these two mechanisms as follows:
  - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
    kernel.
  - Each task in the system is attached to a cpuset, via a pointer
-   in the task structure to a reference counted cpuset structure.
+   in the task structure to a reference counted cgroup structure.
  - Calls to sched_setaffinity are filtered to just those CPUs
    allowed in that tasks cpuset.
  - Calls to mbind and set_mempolicy are filtered to just
@@ -145,15 +147,10 @@ into the rest of the kernel, none in performance critical paths:
  - in page_alloc.c, to restrict memory to allowed nodes.
  - in vmscan.c, to restrict page recovery to the current cpuset.
 
-In addition a new file system, of type "cpuset" may be mounted,
-typically at /dev/cpuset, to enable browsing and modifying the cpusets
-presently known to the kernel.  No new system calls are added for
-cpusets - all support for querying and modifying cpusets is via
-this cpuset file system.
-
-Each task under /proc has an added file named 'cpuset', displaying
-the cpuset name, as the path relative to the root of the cpuset file
-system.
+You should mount the "cgroup" filesystem type in order to enable
+browsing and modifying the cpusets presently known to the kernel.  No
+new system calls are added for cpusets - all support for querying and
+modifying cpusets is via this cpuset file system.
 
 The /proc/<pid>/status file for each task has two added lines,
 displaying the tasks cpus_allowed (on which CPUs it may be scheduled)
@@ -163,16 +160,15 @@ in the format seen in the following example:
   Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
   Mems_allowed:   ffffffff,ffffffff
 
-Each cpuset is represented by a directory in the cpuset file system
-containing the following files describing that cpuset:
+Each cpuset is represented by a directory in the cgroup file system
+containing (on top of the standard cgroup files) the following
+files describing that cpuset:
 
  - cpus: list of CPUs in that cpuset
  - mems: list of Memory Nodes in that cpuset
  - memory_migrate flag: if set, move pages to cpusets nodes
  - cpu_exclusive flag: is cpu placement exclusive?
  - mem_exclusive flag: is memory placement exclusive?
- - tasks: list of tasks (by pid) attached to that cpuset
- - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
  - memory_pressure: measure of how much paging pressure in cpuset
 
 In addition, the root cpuset only has the following file:
@@ -237,21 +233,7 @@ such as requests from interrupt handlers, is allowed to be taken
 outside even a mem_exclusive cpuset.
 
 
-1.5 What does notify_on_release do ?
-------------------------------------
-
-If the notify_on_release flag is enabled (1) in a cpuset, then whenever
-the last task in the cpuset leaves (exits or attaches to some other
-cpuset) and the last child cpuset of that cpuset is removed, then
-the kernel runs the command /sbin/cpuset_release_agent, supplying the
-pathname (relative to the mount point of the cpuset file system) of the
-abandoned cpuset.  This enables automatic removal of abandoned cpusets.
-The default value of notify_on_release in the root cpuset at system
-boot is disabled (0).  The default value of other cpusets at creation
-is the current value of their parents notify_on_release setting.
-
-
-1.6 What is memory_pressure ?
+1.5 What is memory_pressure ?
 -----------------------------
 The memory_pressure of a cpuset provides a simple per-cpuset metric
 of the rate that the tasks in a cpuset are attempting to free up in
@@ -308,7 +290,7 @@ the tasks in the cpuset, in units of reclaims attempted per second,
 times 1000.
 
 
-1.7 What is memory spread ?
+1.6 What is memory spread ?
 ---------------------------
 There are two boolean flag files per cpuset that control where the
 kernel allocates pages for the file system buffers and related in
@@ -378,6 +360,142 @@ policy, especially for jobs that might have one thread reading in the
 data set, the memory allocation across the nodes in the jobs cpuset
 can become very uneven.
 
+1.7 What is sched_load_balance ?
+--------------------------------
+
+The kernel scheduler (kernel/sched.c) automatically load balances
+tasks.  If one CPU is underutilized, kernel code running on that
+CPU will look for tasks on other more overloaded CPUs and move those
+tasks to itself, within the constraints of such placement mechanisms
+as cpusets and sched_setaffinity.
+
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the task list increases more than
+linearly with the number of CPUs being balanced.  So the scheduler
+has support to  partition the systems CPUs into a number of sched
+domains such that it only load balances within each sched domain.
+Each sched domain covers some subset of the CPUs in the system;
+no two sched domains overlap; some CPUs might not be in any sched
+domain and hence won't be load balanced.
+
+Put simply, it costs less to balance between two smaller sched domains
+than one big one, but doing so means that overloads in one of the
+two domains won't be load balanced to the other one.
+
+By default, there is one sched domain covering all CPUs, except those
+marked isolated using the kernel boot time "isolcpus=" argument.
+
+This default load balancing across all CPUs is not well suited for
+the following two situations:
+ 1) On large systems, load balancing across many CPUs is expensive.
+    If the system is managed using cpusets to place independent jobs
+    on separate sets of CPUs, full load balancing is unnecessary.
+ 2) Systems supporting realtime on some CPUs need to minimize
+    system overhead on those CPUs, including avoiding task load
+    balancing if that is not needed.
+
+When the per-cpuset flag "sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpus'
+be contained in a single sched domain, ensuring that load balancing
+can move a task (not otherwised pinned, as by sched_setaffinity)
+from any CPU in that cpuset to any other.
+
+When the per-cpuset flag "sched_load_balance" is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+--except-- in so far as is necessary because some overlapping cpuset
+has "sched_load_balance" enabled.
+
+So, for example, if the top cpuset has the flag "sched_load_balance"
+enabled, then the scheduler will have one sched domain covering all
+CPUs, and the setting of the "sched_load_balance" flag in any other
+cpusets won't matter, as we're already fully load balancing.
+
+Therefore in the above two situations, the top cpuset flag
+"sched_load_balance" should be disabled, and only some of the smaller,
+child cpusets have this flag enabled.
+
+When doing this, you don't usually want to leave any unpinned tasks in
+the top cpuset that might use non-trivial amounts of CPU, as such tasks
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendent cpusets.  Even if
+such a task could use spare CPU cycles in some other CPUs, the kernel
+scheduler might not consider the possibility of load balancing that
+task to that underused CPU.
+
+Of course, tasks pinned to a particular CPU can be left in a cpuset
+that disables "sched_load_balance" as those tasks aren't going anywhere
+else anyway.
+
+There is an impedance mismatch here, between cpusets and sched domains.
+Cpusets are hierarchical and nest.  Sched domains are flat; they don't
+overlap and each CPU is in at most one sched domain.
+
+It is necessary for sched domains to be flat because load balancing
+across partially overlapping sets of CPUs would risk unstable dynamics
+that would be beyond our understanding.  So if each of two partially
+overlapping cpusets enables the flag 'sched_load_balance', then we
+form a single sched domain that is a superset of both.  We won't move
+a task to a CPU outside it cpuset, but the scheduler load balancing
+code might waste some compute cycles considering that possibility.
+
+This mismatch is why there is not a simple one-to-one relation
+between which cpusets have the flag "sched_load_balance" enabled,
+and the sched domain configuration.  If a cpuset enables the flag, it
+will get balancing across all its CPUs, but if it disables the flag,
+it will only be assured of no load balancing if no other overlapping
+cpuset enables the flag.
+
+If two cpusets have partially overlapping 'cpus' allowed, and only
+one of them has this flag enabled, then the other may find its
+tasks only partially load balanced, just on the overlapping CPUs.
+This is just the general case of the top_cpuset example given a few
+paragraphs above.  In the general case, as in the top cpuset case,
+don't leave tasks that might use non-trivial amounts of CPU in
+such partially load balanced cpusets, as they may be artificially
+constrained to some subset of the CPUs allowed to them, for lack of
+load balancing to the other CPUs.
+
+1.7.1 sched_load_balance implementation details.
+------------------------------------------------
+
+The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary
+to most cpuset flags.)  When enabled for a cpuset, the kernel will
+ensure that it can load balance across all the CPUs in that cpuset
+(makes sure that all the CPUs in the cpus_allowed of that cpuset are
+in the same sched domain.)
+
+If two overlapping cpusets both have 'sched_load_balance' enabled,
+then they will be (must be) both in the same sched domain.
+
+If, as is the default, the top cpuset has 'sched_load_balance' enabled,
+then by the above that means there is a single sched domain covering
+the whole system, regardless of any other cpuset settings.
+
+The kernel commits to user space that it will avoid load balancing
+where it can.  It will pick as fine a granularity partition of sched
+domains as it can while still providing load balancing for any set
+of CPUs allowed to a cpuset having 'sched_load_balance' enabled.
+
+The internal kernel cpuset to scheduler interface passes from the
+cpuset code to the scheduler code a partition of the load balanced
+CPUs in the system. This partition is a set of subsets (represented
+as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all
+the CPUs that must be load balanced.
+
+Whenever the 'sched_load_balance' flag changes, or CPUs come or go
+from a cpuset with this flag enabled, or a cpuset with this flag
+enabled is removed, the cpuset code builds a new such partition and
+passes it to the scheduler sched domain setup code, to have the sched
+domains rebuilt as necessary.
+
+This partition exactly defines what sched domains the scheduler should
+setup - one sched domain for each element (cpumask_t) in the partition.
+
+The scheduler remembers the currently active sched domain partitions.
+When the scheduler routine partition_sched_domains() is invoked from
+the cpuset code to update these sched domains, it compares the new
+partition requested with the current, and updates its sched domains,
+removing the old and adding the new, for each change.
 
 1.8 How do I use cpusets ?
 --------------------------
@@ -469,7 +587,7 @@ than stress the kernel.
 To start a new job that is to be contained within a cpuset, the steps are:
 
  1) mkdir /dev/cpuset
- 2) mount -t cpuset none /dev/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /dev/cpuset
  3) Create the new cpuset by doing mkdir's and write's (or echo's) in
     the /dev/cpuset virtual file system.
  4) Start a task that will be the "founding father" of the new job.
@@ -481,7 +599,7 @@ For example, the following sequence of commands will setup a cpuset
 named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
 and then start a subshell 'sh' in that cpuset:
 
-  mount -t cpuset none /dev/cpuset
+  mount -t cgroup -ocpuset cpuset /dev/cpuset
   cd /dev/cpuset
   mkdir Charlie
   cd Charlie
@@ -513,7 +631,7 @@ Creating, modifying, using the cpusets can be done through the cpuset
 virtual filesystem.
 
 To mount it, type:
-# mount -t cpuset none /dev/cpuset
+# mount -t cgroup -o cpuset cpuset /dev/cpuset
 
 Then under /dev/cpuset you can find a tree that corresponds to the
 tree of the cpusets in the system. For instance, /dev/cpuset
@@ -556,6 +674,18 @@ To remove a cpuset, just use rmdir:
 This will fail if the cpuset is in use (has cpusets inside, or has
 processes attached).
 
+Note that for legacy reasons, the "cpuset" filesystem exists as a
+wrapper around the cgroup filesystem.
+
+The command
+
+mount -t cpuset X /dev/cpuset
+
+is equivalent to
+
+mount -t cgroup -ocpuset X /dev/cpuset
+echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
+
 2.2 Adding/removing cpus
 ------------------------
 
index d9d523099bb7d69d3dd3e4124167e5dd9bad1fc4..4d932dc660982d34fe488a2e9d8f10957a4d030b 100644 (file)
@@ -42,8 +42,8 @@ static int __init button_init(void)
                goto err_free_irq;
        }
 
-       button_dev->evbit[0] = BIT(EV_KEY);
-       button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+       button_dev->evbit[0] = BIT_MASK(EV_KEY);
+       button_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
        error = input_register_device(button_dev);
        if (error) {
@@ -217,14 +217,15 @@ If you don't need absfuzz and absflat, you can set them to zero, which mean
 that the thing is precise and always returns to exactly the center position
 (if it has any).
 
-1.4 NBITS(), LONG(), BIT()
+1.4 BITS_TO_LONGS(), BIT_WORD(), BIT_MASK()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-These three macros from input.h help some bitfield computations:
+These three macros from bitops.h help some bitfield computations:
 
-       NBITS(x) - returns the length of a bitfield array in longs for x bits
-       LONG(x)  - returns the index in the array in longs for bit x
-       BIT(x)   - returns the index in a long for bit x
+       BITS_TO_LONGS(x) - returns the length of a bitfield array in longs for
+                          x bits
+       BIT_WORD(x)      - returns the index in the array in longs for bit x
+       BIT_MASK(x)      - returns the index in a long for bit x
 
 1.5 The id* and name fields
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
index 1b37b28cc2343b82a7150d8c74724f4d2ecb8e7f..d0ac72cc19ff8e29667e7a2c58e21d2d0cfafcb5 100644 (file)
@@ -231,6 +231,32 @@ Dump-capture kernel config options (Arch Dependent, ia64)
   any space below the alignment point will be wasted.
 
 
+Extended crashkernel syntax
+===========================
+
+While the "crashkernel=size[@offset]" syntax is sufficient for most
+configurations, sometimes it's handy to have the reserved memory dependent
+on the value of System RAM -- that's mostly for distributors that pre-setup
+the kernel command line to avoid a unbootable system after some memory has
+been removed from the machine.
+
+The syntax is:
+
+    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+    range=start-[end]
+
+For example:
+
+    crashkernel=512M-2G:64M,2G-:128M
+
+This would mean:
+
+    1) if the RAM is smaller than 512M, then don't reserve anything
+       (this is the "rescue" case)
+    2) if the RAM size is between 512M and 2G, then reserve 64M
+    3) if the RAM size is larger than 2G, then reserve 128M
+
+
 Boot into System Kernel
 =======================
 
index 189df0bcab99bfa1dfdeff83c1fd8d67a7fe83d3..0a3fed445249f84b8e42c9f904d0e0df16a75d42 100644 (file)
@@ -497,6 +497,13 @@ and is between 256 and 4096 characters. It is defined in the file
                        [KNL] Reserve a chunk of physical memory to
                        hold a kernel to switch to with kexec on panic.
 
+       crashkernel=range1:size1[,range2:size2,...][@offset]
+                       [KNL] Same as above, but depends on the memory
+                       in the running system. The syntax of range is
+                       start-[end] where start and end are both
+                       a memory unit (amount[KMG]). See also
+                       Documentation/kdump/kdump.txt for a example.
+
        cs4232=         [HW,OSS]
                        Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq>
 
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
new file mode 100644 (file)
index 0000000..295a71b
--- /dev/null
@@ -0,0 +1,81 @@
+                    Using the Linux Kernel Markers
+
+                           Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Markers and their use. It provides
+examples of how to insert markers in the kernel and connect probe functions to
+them and provides some examples of probe functions.
+
+
+* Purpose of markers
+
+A marker placed in code provides a hook to call a function (probe) that you can
+provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
+(no probe is attached). When a marker is "off" it has no effect, except for
+adding a tiny time penalty (checking a condition for a branch) and space
+penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+marker is "on", the function you provide is called each time the marker is
+executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the marker site).
+
+You can put markers at important locations in the code. Markers are
+lightweight hooks that can pass an arbitrary number of parameters,
+described in a printk-like format string, to the attached probe function.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+In order to use the macro trace_mark, you should include linux/marker.h.
+
+#include <linux/marker.h>
+
+And,
+
+trace_mark(subsystem_event, "%d %s", someint, somestring);
+Where :
+- subsystem_event is an identifier unique to your event
+    - subsystem is the name of your subsystem.
+    - event is the name of the event to mark.
+- "%d %s" is the formatted string for the serializer.
+- someint is an integer.
+- somestring is a char pointer.
+
+Connecting a function (probe) to a marker is done by providing a probe (function
+to call) for the specific marker through marker_probe_register() and can be
+activated by calling marker_arm(). Marker deactivation can be done by calling
+marker_disarm() as many times as marker_arm() has been called. Removing a probe
+is done through marker_probe_unregister(); it will disarm the probe and make
+sure there is no caller left using the probe when it returns. Probe removal is
+preempt-safe because preemption is disabled around the probe call. See the
+"Probe example" section below for a sample probe module.
+
+The marker mechanism supports inserting multiple instances of the same marker.
+Markers can be put in inline functions, inlined static functions, and
+unrolled loops as well as regular functions.
+
+The naming scheme "subsystem_event" is suggested here as a convention intended
+to limit collisions. Marker names are global to the kernel: they are considered
+as being the same whether they are in the core kernel image or in modules.
+Conflicting format strings for markers with the same name will cause the markers
+to be detected to have a different format string not to be armed and will output
+a printk warning which identifies the inconsistency:
+
+"Format mismatch for probe probe_name (format), marker (format)"
+
+
+* Probe / marker example
+
+See the example provided in samples/markers/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe marker-example (insmod order is not important)
+modprobe probe-example
+cat /proc/marker-example (returns an expected error)
+rmmod marker-example probe-example
+dmesg
index 529b9048d97e950d2ea58948d2c5b93e67c0a563..68ef80b30340371e13ff8e38777d0bf9d715dc33 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -773,6 +773,9 @@ endef
 vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) vmlinux.o FORCE
 ifdef CONFIG_HEADERS_CHECK
        $(Q)$(MAKE) -f $(srctree)/Makefile headers_check
+endif
+ifdef CONFIG_SAMPLES
+       $(Q)$(MAKE) $(build)=samples
 endif
        $(call vmlinux-modpost)
        $(call if_changed_rule,vmlinux__)
index 2a85dc33907c2c66f09c3e3ce61e96ef19e23d3a..4c002ba37e5076fe726826e3a2a73deb29d25176 100644 (file)
@@ -654,7 +654,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/alpha/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/alpha/Kconfig.debug"
 
index 8c8aaa205eae123f208163749efaabe4ad2df710..8d2982aa1b8db8f3fdf8fbbf29321cb3b4dea010 100644 (file)
@@ -69,7 +69,7 @@ __down_failed(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): down failed(%p)\n",
-              tsk->comm, tsk->pid, sem);
+              tsk->comm, task_pid_nr(tsk), sem);
 #endif
 
        tsk->state = TASK_UNINTERRUPTIBLE;
@@ -98,7 +98,7 @@ __down_failed(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): down acquired(%p)\n",
-              tsk->comm, tsk->pid, sem);
+              tsk->comm, task_pid_nr(tsk), sem);
 #endif
 }
 
@@ -111,7 +111,7 @@ __down_failed_interruptible(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): down failed(%p)\n",
-              tsk->comm, tsk->pid, sem);
+              tsk->comm, task_pid_nr(tsk), sem);
 #endif
 
        tsk->state = TASK_INTERRUPTIBLE;
@@ -139,7 +139,7 @@ __down_failed_interruptible(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): down %s(%p)\n",
-              current->comm, current->pid,
+              current->comm, task_pid_nr(current),
               (ret < 0 ? "interrupted" : "acquired"), sem);
 #endif
        return ret;
@@ -168,7 +168,7 @@ down(struct semaphore *sem)
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): down(%p) <count=%d> from %p\n",
-              current->comm, current->pid, sem,
+              current->comm, task_pid_nr(current), sem,
               atomic_read(&sem->count), __builtin_return_address(0));
 #endif
        __down(sem);
@@ -182,7 +182,7 @@ down_interruptible(struct semaphore *sem)
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): down(%p) <count=%d> from %p\n",
-              current->comm, current->pid, sem,
+              current->comm, task_pid_nr(current), sem,
               atomic_read(&sem->count), __builtin_return_address(0));
 #endif
        return __down_interruptible(sem);
@@ -201,7 +201,7 @@ down_trylock(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): down_trylock %s from %p\n",
-              current->comm, current->pid,
+              current->comm, task_pid_nr(current),
               ret ? "failed" : "acquired",
               __builtin_return_address(0));
 #endif
@@ -217,7 +217,7 @@ up(struct semaphore *sem)
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
        printk("%s(%d): up(%p) <count=%d> from %p\n",
-              current->comm, current->pid, sem,
+              current->comm, task_pid_nr(current), sem,
               atomic_read(&sem->count), __builtin_return_address(0));
 #endif
        __up(sem);
index ec0f05e0d8ffc169567c0f78cbde1e194b7105db..2dc7f9fed213704749e0e2b7d58e161b909fad7e 100644 (file)
@@ -182,7 +182,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
 #ifdef CONFIG_SMP
        printk("CPU %d ", hard_smp_processor_id());
 #endif
-       printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err);
+       printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
        dik_show_regs(regs, r9_15);
        add_taint(TAINT_DIE);
        dik_show_trace((unsigned long *)(regs+1));
@@ -646,7 +646,7 @@ got_exception:
        lock_kernel();
 
        printk("%s(%d): unhandled unaligned exception\n",
-              current->comm, current->pid);
+              current->comm, task_pid_nr(current));
 
        printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx\n",
               pc, una_reg(26), regs->ps);
@@ -786,7 +786,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
                }
                if (++cnt < 5) {
                        printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n",
-                              current->comm, current->pid,
+                              current->comm, task_pid_nr(current),
                               regs->pc - 4, va, opcode, reg);
                }
                last_time = jiffies;
index 7ad84ea0acf879e58aac56a5845c0ec8271566a4..32afaa3fa6860e65762a06bcf6b13c961739b12f 100644 (file)
@@ -3,7 +3,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 /* This is fls(x)-1, except zero is held to zero.  This allows most
    efficent input into extbl, plus it allows easy handling of fls(0)=0.  */
index 25154df3055abf7a501db5b5b00255d0bcbb410e..4829f96585b15a1fd59f0409750df4ed59d99d35 100644 (file)
@@ -188,13 +188,13 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
        /* We ran out of memory, or some other thing happened to us that
           made us unable to handle the page fault gracefully.  */
  out_of_memory:
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
        }
        printk(KERN_ALERT "VM: killing process %s(%d)\n",
-              current->comm, current->pid);
+              current->comm, task_pid_nr(current));
        if (!user_mode(regs))
                goto no_context;
        do_group_exit(SIGKILL);
diff --git a/arch/alpha/oprofile/Kconfig b/arch/alpha/oprofile/Kconfig
deleted file mode 100644 (file)
index 5ade198..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-       depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-         
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-endmenu
-
index 0a0c88d0039ca43a195d2b746fc7b6943aca882b..4cee938df01ef87e951613d54b6b23c890d6ade5 100644 (file)
@@ -1068,7 +1068,7 @@ endmenu
 
 source "fs/Kconfig"
 
-source "arch/arm/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/arm/Kconfig.debug"
 
index 93b7f8e22dcc1b3b69aa424c60d0a91bed19f772..4f1a03124a74ae4a543df710a8c3e6af3d0200ad 100644 (file)
@@ -265,7 +265,7 @@ void __show_regs(struct pt_regs *regs)
 void show_regs(struct pt_regs * regs)
 {
        printk("\n");
-       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+       printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
        __show_regs(regs);
        __backtrace();
 }
index 5feee722ea9878e08a9f6415fe7de8e45d7595f9..4b05dc5c1023666ce6efaa1c684776de98587875 100644 (file)
@@ -382,16 +382,16 @@ static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp)
 
                if (ret != 2 || old_insn.thumb != BREAKINST_THUMB)
                        printk(KERN_ERR "%s:%d: corrupted Thumb breakpoint at "
-                               "0x%08lx (0x%04x)\n", task->comm, task->pid,
-                               addr, old_insn.thumb);
+                               "0x%08lx (0x%04x)\n", task->comm,
+                               task_pid_nr(task), addr, old_insn.thumb);
        } else {
                ret = swap_insn(task, addr & ~3, &old_insn.arm,
                                &bp->insn.arm, 4);
 
                if (ret != 4 || old_insn.arm != BREAKINST_ARM)
                        printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at "
-                               "0x%08lx (0x%08x)\n", task->comm, task->pid,
-                               addr, old_insn.arm);
+                               "0x%08lx (0x%08x)\n", task->comm,
+                               task_pid_nr(task), addr, old_insn.arm);
        }
 }
 
index 8ad47619c07906b31dcd60c97490426f6330e34c..4764bd9ccee801ced5838d9043a5923c9e931cb8 100644 (file)
@@ -223,7 +223,7 @@ static void __die(const char *str, int err, struct thread_info *thread, struct p
        print_modules();
        __show_regs(regs);
        printk("Process %s (pid: %d, stack limit = 0x%p)\n",
-               tsk->comm, tsk->pid, thread + 1);
+               tsk->comm, task_pid_nr(tsk), thread + 1);
 
        if (!user_mode(regs) || in_interrupt()) {
                dump_mem("Stack: ", regs->ARM_sp,
@@ -337,7 +337,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_USER
        if (user_debug & UDBG_UNDEFINED) {
                printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
-                       current->comm, current->pid, pc);
+                       current->comm, task_pid_nr(current), pc);
                dump_instr(regs);
        }
 #endif
@@ -388,7 +388,7 @@ static int bad_syscall(int n, struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_USER
        if (user_debug & UDBG_SYSCALL) {
                printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
-                       current->pid, current->comm, n);
+                       task_pid_nr(current), current->comm, n);
                dump_instr(regs);
        }
 #endif
@@ -565,7 +565,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
         */
        if (user_debug & UDBG_SYSCALL) {
                printk("[%d] %s: arm syscall %d\n",
-                      current->pid, current->comm, no);
+                      task_pid_nr(current), current->comm, no);
                dump_instr(regs);
                if (user_mode(regs)) {
                        __show_regs(regs);
@@ -642,7 +642,7 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_USER
        if (user_debug & UDBG_BADABORT) {
                printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
-                       current->pid, current->comm, code, instr);
+                       task_pid_nr(current), current->comm, code, instr);
                dump_instr(regs);
                show_pte(current->mm, addr);
        }
index 074b7cb0774349d6a51e7c4d210e472a892c4e01..e162cca5917fb21c72e29ae127da355c36e7cd27 100644 (file)
@@ -757,7 +757,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        if (ai_usermode & 1)
                printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
                       "Address=0x%08lx FSR 0x%03x\n", current->comm,
-                       current->pid, instrptr,
+                       task_pid_nr(current), instrptr,
                        thumb_mode(regs) ? 4 : 8,
                        thumb_mode(regs) ? tinstr : instr,
                        addr, fsr);
index 59ed1d05b71bf6d858e515d928da4ad102276d91..a8a7dab757eb4977bfc88201da37dfe85713cdbc 100644 (file)
@@ -197,7 +197,7 @@ survive:
        return fault;
 
 out_of_memory:
-       if (!is_init(tsk))
+       if (!is_global_init(tsk))
                goto out;
 
        /*
diff --git a/arch/arm/oprofile/Kconfig b/arch/arm/oprofile/Kconfig
deleted file mode 100644 (file)
index afd93ad..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-
-menu "Profiling support"
-       depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-if OPROFILE
-
-config OPROFILE_ARMV6
-       bool
-       depends on CPU_V6 && !SMP
-       default y
-       select OPROFILE_ARM11_CORE
-
-config OPROFILE_MPCORE
-       bool
-       depends on CPU_V6 && SMP
-       default y
-       select OPROFILE_ARM11_CORE
-
-config OPROFILE_ARM11_CORE
-       bool
-
-endif
-
-endmenu
-
index 9a73ce7eb50fb1da8e6c04ca41c327108b602b6f..8a7caf8e7b454da39a915426a1d7ea363faf361f 100644 (file)
@@ -89,7 +89,7 @@ void _exception(long signr, struct pt_regs *regs, int code,
         * generate the same exception over and over again and we get
         * nowhere.  Better to kill it and let the kernel panic.
         */
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                __sighandler_t handler;
 
                spin_lock_irq(&current->sighand->siglock);
index 11472f8701bdf4a88202d81e723efef38208e743..6560cb18b4e3403e498cb9076ea7025b61cbb2d0 100644 (file)
@@ -160,7 +160,7 @@ bad_area:
                if (exception_trace && printk_ratelimit())
                        printk("%s%s[%d]: segfault at %08lx pc %08lx "
                               "sp %08lx ecr %lu\n",
-                              is_init(tsk) ? KERN_EMERG : KERN_INFO,
+                              is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
                               tsk->comm, tsk->pid, address, regs->pc,
                               regs->sp, ecr);
                _exception(SIGSEGV, regs, code, address);
@@ -209,7 +209,7 @@ no_context:
         */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
@@ -231,7 +231,7 @@ do_sigbus:
        if (exception_trace)
                printk("%s%s[%d]: bus error at %08lx pc %08lx "
                       "sp %08lx ecr %lu\n",
-                      is_init(tsk) ? KERN_EMERG : KERN_INFO,
+                      is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
                       tsk->comm, tsk->pid, address, regs->pc,
                       regs->sp, ecr);
 
index aa9db3073312c2d8c18ce1726e492a4f4eb61e3a..4c5ca9d5e40f74f0083e21b7dd2cc1f0bdf78467 100644 (file)
@@ -1012,7 +1012,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/blackfin/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 menu "Kernel hacking"
 
diff --git a/arch/blackfin/oprofile/Kconfig b/arch/blackfin/oprofile/Kconfig
deleted file mode 100644 (file)
index 0a2fd99..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-menu "Profiling support"
-depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-config HARDWARE_PM
-       tristate "Hardware Performance Monitor Profiling"
-       depends on PROFILING
-       help
-         take use of hardware performance monitor to profiling the kernel
-         and application.
-
-         If unsure, say N.
-
-endmenu
index 6b4d026a00a120e5728d8930bb6d4ad43a6b4006..21900a9378bbc04b4bb5fccd6161fb86a4d86ecd 100644 (file)
@@ -196,6 +196,8 @@ source "sound/Kconfig"
 
 source "drivers/usb/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/cris/Kconfig.debug"
 
 source "security/Kconfig"
index 74eef7111f2bd429e60d93e1a5d021dd3ca5f50d..43153e767bb1ab548f1edc52bb755233b38830c3 100644 (file)
@@ -375,6 +375,8 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/frv/Kconfig.debug"
 
 source "security/Kconfig"
index ad753c1e9b8fffa652e4e682f107f42ec400c5fc..9e38f99bbab89c7e0677e5da0f38f09d9d6590ea 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
index e0983f6926ed77be5b8874a7624051c258b63bc4..3c2752ca97754eb881d5fef6ceeeb576ee679df1 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
index c157eeff871d218348e12bcb48f2c3b951e1eaef..7754c7338e4b42cfaeb4c3a76158665f3b05d275 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
index c7e59dcadee47d58c7c44dac6d1cbce7a2134730..7ddb69089ed47c1373cad3e65a517cb2833d33e0 100644 (file)
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/module.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/delay.h>
index e35f74e6e50566d489852d5156013e5e65de9c5d..e2e9f57abe2e33c012961f49734dea1837a69834 100644 (file)
@@ -223,6 +223,8 @@ endmenu
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/h8300/Kconfig.debug"
 
 source "security/Kconfig"
index b84d5050e92ee7189d7aba36d3fd6e398b20e920..04be7a7d090fc5306371481d92f1908e8312ee6d 100644 (file)
@@ -1256,31 +1256,6 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-menuconfig INSTRUMENTATION
-       bool "Instrumentation Support"
-       default y
-       ---help---
-         Say Y here to get to see options related to performance measurement,
-         debugging, and testing. This option alone does not add any kernel code.
-
-         If you say N, all options in this submenu will be skipped and disabled.
-
-if INSTRUMENTATION
-
-source "arch/x86/oprofile/Kconfig"
-
-config KPROBES
-       bool "Kprobes"
-       depends on KALLSYMS && MODULES
-       help
-         Kprobes allows you to trap at almost any kernel address and
-         execute a callback function.  register_kprobe() establishes
-         a probepoint and specifies the callback.  Kprobes is useful
-         for kernel debugging, non-intrusive instrumentation and testing.
-         If in doubt, say "N".
-
-endif # INSTRUMENTATION
-
 source "arch/i386/Kconfig.debug"
 
 source "security/Kconfig"
index c60532d93c541714efd6111b871a530bedf3479a..c89108e9770dbea0c29a21a25ea8d422b995bf78 100644 (file)
@@ -592,20 +592,7 @@ config IRQ_PER_CPU
 
 source "arch/ia64/hp/sim/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/ia64/oprofile/Kconfig"
-
-config KPROBES
-       bool "Kprobes"
-       depends on KALLSYMS && MODULES
-       help
-         Kprobes allows you to trap at almost any kernel address and
-         execute a callback function.  register_kprobe() establishes
-         a probepoint and specifies the callback.  Kprobes is useful
-         for kernel debugging, non-intrusive instrumentation and testing.
-         If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/ia64/Kconfig.debug"
 
index 449d3e75bfc29153752a4f732ce1d07ee6719891..75fd90dc76a3e73165a3f75964b7ea1823445392 100644 (file)
@@ -26,6 +26,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=20
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
index a3405b3c1eefba2a0ba94ac0e4ecd9b647992d84..d025a22eb225f6bf455ec5a76b47434dc09a0977 100644 (file)
@@ -773,7 +773,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
                        if (flags & MAP_SHARED)
                                printk(KERN_INFO
                                       "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
-                                      current->comm, current->pid, start);
+                                      current->comm, task_pid_nr(current), start);
                        ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
                                           off);
                        if (IS_ERR((void *) ret))
@@ -786,7 +786,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
                        if (flags & MAP_SHARED)
                                printk(KERN_INFO
                                       "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
-                                      current->comm, current->pid, end);
+                                      current->comm, task_pid_nr(current), end);
                        ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
                                           (off + len) - offset_in_page(end));
                        if (IS_ERR((void *) ret))
@@ -816,7 +816,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
 
        if ((flags & MAP_SHARED) && !is_congruent)
                printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
-                      "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
+                      "(addr=0x%lx,off=0x%llx)\n", current->comm, task_pid_nr(current), start, off);
 
        DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
            is_congruent ? "congruent" : "not congruent", poff);
index 73ca86d03810429fe29df09926741131017d600f..8e4894b205e25dbd98cc1708b8504a1d1dc485dd 100644 (file)
@@ -967,7 +967,7 @@ find_memmap_space (void)
  * to use.  We can allocate partial granules only if the unavailable
  * parts exist, and are WB.
  */
-void
+unsigned long
 efi_memmap_init(unsigned long *s, unsigned long *e)
 {
        struct kern_memdesc *k, *prev = NULL;
@@ -1084,6 +1084,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
        /* reserve the memory we are using for kern_memmap */
        *s = (u64)kern_memmap;
        *e = (u64)++k;
+
+       return total_mem;
 }
 
 void
index f55fa07849c4c4383182d8b2acd6abfa62067d41..59169bf7145f69f955708f49c4a9debb5c2fa713 100644 (file)
  */
 #define PROTECT_CTX(c, f) \
        do {  \
-               DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
+               DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
                spin_lock_irqsave(&(c)->ctx_lock, f); \
-               DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
+               DPRINT(("spinlocked ctx %p  by [%d]\n", c, task_pid_nr(current))); \
        } while(0)
 
 #define UNPROTECT_CTX(c, f) \
        do { \
-               DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
+               DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
                spin_unlock_irqrestore(&(c)->ctx_lock, f); \
        } while(0)
 
 #ifdef PFM_DEBUGGING
 #define DPRINT(a) \
        do { \
-               if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+               if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
        } while (0)
 
 #define DPRINT_ovfl(a) \
        do { \
-               if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+               if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
        } while (0)
 #endif
 
@@ -913,7 +913,7 @@ pfm_mask_monitoring(struct task_struct *task)
        unsigned long mask, val, ovfl_mask;
        int i;
 
-       DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
+       DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
 
        ovfl_mask = pmu_conf->ovfl_val;
        /*
@@ -992,12 +992,12 @@ pfm_restore_monitoring(struct task_struct *task)
        ovfl_mask = pmu_conf->ovfl_val;
 
        if (task != current) {
-               printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
+               printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current));
                return;
        }
        if (ctx->ctx_state != PFM_CTX_MASKED) {
                printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
-                       task->pid, current->pid, ctx->ctx_state);
+                       task_pid_nr(task), task_pid_nr(current), ctx->ctx_state);
                return;
        }
        psr = pfm_get_psr();
@@ -1051,7 +1051,8 @@ pfm_restore_monitoring(struct task_struct *task)
                if ((mask & 0x1) == 0UL) continue;
                ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
                ia64_set_pmc(i, ctx->th_pmcs[i]);
-               DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i]));
+               DPRINT(("[%d] pmc[%d]=0x%lx\n",
+                                       task_pid_nr(task), i, ctx->th_pmcs[i]));
        }
        ia64_srlz_d();
 
@@ -1370,7 +1371,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 
 error_conflict:
        DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
-               pfm_sessions.pfs_sys_session[cpu]->pid,
+               task_pid_nr(pfm_sessions.pfs_sys_session[cpu]),
                cpu));
 abort:
        UNLOCK_PFS(flags);
@@ -1442,7 +1443,7 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
 
        /* sanity checks */
        if (task->mm == NULL || size == 0UL || vaddr == NULL) {
-               printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
+               printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm);
                return -EINVAL;
        }
 
@@ -1459,7 +1460,7 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
 
        up_write(&task->mm->mmap_sem);
        if (r !=0) {
-               printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
+               printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size);
        }
 
        DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
@@ -1501,7 +1502,7 @@ pfm_free_smpl_buffer(pfm_context_t *ctx)
        return 0;
 
 invalid_free:
-       printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
+       printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current));
        return -EINVAL;
 }
 #endif
@@ -1547,13 +1548,13 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
        unsigned long flags;
        DECLARE_WAITQUEUE(wait, current);
        if (PFM_IS_FILE(filp) == 0) {
-               printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
                return -EINVAL;
        }
 
        ctx = (pfm_context_t *)filp->private_data;
        if (ctx == NULL) {
-               printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
                return -EINVAL;
        }
 
@@ -1607,7 +1608,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 
                PROTECT_CTX(ctx, flags);
        }
-       DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
+       DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret));
        set_current_state(TASK_RUNNING);
        remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
 
@@ -1616,7 +1617,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
        ret = -EINVAL;
        msg = pfm_get_next_msg(ctx);
        if (msg == NULL) {
-               printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
+               printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current));
                goto abort_locked;
        }
 
@@ -1647,13 +1648,13 @@ pfm_poll(struct file *filp, poll_table * wait)
        unsigned int mask = 0;
 
        if (PFM_IS_FILE(filp) == 0) {
-               printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
                return 0;
        }
 
        ctx = (pfm_context_t *)filp->private_data;
        if (ctx == NULL) {
-               printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
                return 0;
        }
 
@@ -1692,7 +1693,7 @@ pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
        ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
 
        DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
-               current->pid,
+               task_pid_nr(current),
                fd,
                on,
                ctx->ctx_async_queue, ret));
@@ -1707,13 +1708,13 @@ pfm_fasync(int fd, struct file *filp, int on)
        int ret;
 
        if (PFM_IS_FILE(filp) == 0) {
-               printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current));
                return -EBADF;
        }
 
        ctx = (pfm_context_t *)filp->private_data;
        if (ctx == NULL) {
-               printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
                return -EBADF;
        }
        /*
@@ -1759,7 +1760,7 @@ pfm_syswide_force_stop(void *info)
        if (owner != ctx->ctx_task) {
                printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
                        smp_processor_id(),
-                       owner->pid, ctx->ctx_task->pid);
+                       task_pid_nr(owner), task_pid_nr(ctx->ctx_task));
                return;
        }
        if (GET_PMU_CTX() != ctx) {
@@ -1769,7 +1770,7 @@ pfm_syswide_force_stop(void *info)
                return;
        }
 
-       DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));       
+       DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task)));
        /*
         * the context is already protected in pfm_close(), we simply
         * need to mask interrupts to avoid a PMU interrupt race on
@@ -1821,7 +1822,7 @@ pfm_flush(struct file *filp, fl_owner_t id)
 
        ctx = (pfm_context_t *)filp->private_data;
        if (ctx == NULL) {
-               printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
                return -EBADF;
        }
 
@@ -1969,7 +1970,7 @@ pfm_close(struct inode *inode, struct file *filp)
        
        ctx = (pfm_context_t *)filp->private_data;
        if (ctx == NULL) {
-               printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
+               printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
                return -EBADF;
        }
 
@@ -2066,7 +2067,7 @@ pfm_close(struct inode *inode, struct file *filp)
                 */
                ctx->ctx_state = PFM_CTX_ZOMBIE;
 
-               DPRINT(("zombie ctx for [%d]\n", task->pid));
+               DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task)));
                /*
                 * cannot free the context on the spot. deferred until
                 * the task notices the ZOMBIE state
@@ -2472,7 +2473,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
        /* invoke and lock buffer format, if found */
        fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
        if (fmt == NULL) {
-               DPRINT(("[%d] cannot find buffer format\n", task->pid));
+               DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task)));
                return -EINVAL;
        }
 
@@ -2483,7 +2484,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
 
        ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
 
-       DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
+       DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret));
 
        if (ret) goto error;
 
@@ -2605,23 +2606,23 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
         * no kernel task or task not owner by caller
         */
        if (task->mm == NULL) {
-               DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
+               DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task)));
                return -EPERM;
        }
        if (pfm_bad_permissions(task)) {
-               DPRINT(("no permission to attach to  [%d]\n", task->pid));
+               DPRINT(("no permission to attach to  [%d]\n", task_pid_nr(task)));
                return -EPERM;
        }
        /*
         * cannot block in self-monitoring mode
         */
        if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
-               DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
+               DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task)));
                return -EINVAL;
        }
 
        if (task->exit_state == EXIT_ZOMBIE) {
-               DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
+               DPRINT(("cannot attach to  zombie task [%d]\n", task_pid_nr(task)));
                return -EBUSY;
        }
 
@@ -2631,7 +2632,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
        if (task == current) return 0;
 
        if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-               DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
+               DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
                return -EBUSY;
        }
        /*
@@ -3512,7 +3513,7 @@ pfm_use_debug_registers(struct task_struct *task)
 
        if (pmu_conf->use_rr_dbregs == 0) return 0;
 
-       DPRINT(("called for [%d]\n", task->pid));
+       DPRINT(("called for [%d]\n", task_pid_nr(task)));
 
        /*
         * do it only once
@@ -3543,7 +3544,7 @@ pfm_use_debug_registers(struct task_struct *task)
        DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
                  pfm_sessions.pfs_ptrace_use_dbregs,
                  pfm_sessions.pfs_sys_use_dbregs,
-                 task->pid, ret));
+                 task_pid_nr(task), ret));
 
        UNLOCK_PFS(flags);
 
@@ -3568,7 +3569,7 @@ pfm_release_debug_registers(struct task_struct *task)
 
        LOCK_PFS(flags);
        if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
-               printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+               printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task));
                ret = -1;
        }  else {
                pfm_sessions.pfs_ptrace_use_dbregs--;
@@ -3620,7 +3621,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
        /* sanity check */
        if (unlikely(task == NULL)) {
-               printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
+               printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current));
                return -EINVAL;
        }
 
@@ -3629,7 +3630,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
                fmt = ctx->ctx_buf_fmt;
 
                DPRINT(("restarting self %d ovfl=0x%lx\n",
-                       task->pid,
+                       task_pid_nr(task),
                        ctx->ctx_ovfl_regs[0]));
 
                if (CTX_HAS_SMPL(ctx)) {
@@ -3653,11 +3654,11 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
                                pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
                        if (rst_ctrl.bits.mask_monitoring == 0) {
-                               DPRINT(("resuming monitoring for [%d]\n", task->pid));
+                               DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task)));
 
                                if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
                        } else {
-                               DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
+                               DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task)));
 
                                // cannot use pfm_stop_monitoring(task, regs);
                        }
@@ -3714,10 +3715,10 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
         * "self-monitoring".
         */
        if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
-               DPRINT(("unblocking [%d] \n", task->pid));
+               DPRINT(("unblocking [%d] \n", task_pid_nr(task)));
                complete(&ctx->ctx_restart_done);
        } else {
-               DPRINT(("[%d] armed exit trap\n", task->pid));
+               DPRINT(("[%d] armed exit trap\n", task_pid_nr(task)));
 
                ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
 
@@ -3805,7 +3806,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
         * don't bother if we are loaded and task is being debugged
         */
        if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
-               DPRINT(("debug registers already in use for [%d]\n", task->pid));
+               DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task)));
                return -EBUSY;
        }
 
@@ -3846,7 +3847,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
         * is shared by all processes running on it
         */
        if (first_time && can_access_pmu) {
-               DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
+               DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task)));
                for (i=0; i < pmu_conf->num_ibrs; i++) {
                        ia64_set_ibr(i, 0UL);
                        ia64_dv_serialize_instruction();
@@ -4035,7 +4036,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
                return -EBUSY;
        }
        DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
-               PFM_CTX_TASK(ctx)->pid,
+               task_pid_nr(PFM_CTX_TASK(ctx)),
                state,
                is_system));
        /*
@@ -4093,7 +4094,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
                 * monitoring disabled in kernel at next reschedule
                 */
                ctx->ctx_saved_psr_up = 0;
-               DPRINT(("task=[%d]\n", task->pid));
+               DPRINT(("task=[%d]\n", task_pid_nr(task)));
        }
        return 0;
 }
@@ -4298,11 +4299,12 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
                if (is_system) {
                        if (pfm_sessions.pfs_ptrace_use_dbregs) {
-                               DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
+                               DPRINT(("cannot load [%d] dbregs in use\n",
+                                                       task_pid_nr(task)));
                                ret = -EBUSY;
                        } else {
                                pfm_sessions.pfs_sys_use_dbregs++;
-                               DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
+                               DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs));
                                set_dbregs = 1;
                        }
                }
@@ -4394,7 +4396,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
                        /* allow user level control */
                        ia64_psr(regs)->sp = 0;
-                       DPRINT(("clearing psr.sp for [%d]\n", task->pid));
+                       DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task)));
 
                        SET_LAST_CPU(ctx, smp_processor_id());
                        INC_ACTIVATION();
@@ -4429,7 +4431,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
                 */
                SET_PMU_OWNER(task, ctx);
 
-               DPRINT(("context loaded on PMU for [%d]\n", task->pid));
+               DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task)));
        } else {
                /*
                 * when not current, task MUST be stopped, so this is safe
@@ -4493,7 +4495,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
        int prev_state, is_system;
        int ret;
 
-       DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
+       DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1));
 
        prev_state = ctx->ctx_state;
        is_system  = ctx->ctx_fl_system;
@@ -4568,7 +4570,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
                 */
                ia64_psr(regs)->sp = 1;
 
-               DPRINT(("setting psr.sp for [%d]\n", task->pid));
+               DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task)));
        }
        /*
         * save PMDs to context
@@ -4608,7 +4610,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
        ctx->ctx_fl_can_restart  = 0;
        ctx->ctx_fl_going_zombie = 0;
 
-       DPRINT(("disconnected [%d] from context\n", task->pid));
+       DPRINT(("disconnected [%d] from context\n", task_pid_nr(task)));
 
        return 0;
 }
@@ -4631,7 +4633,7 @@ pfm_exit_thread(struct task_struct *task)
 
        PROTECT_CTX(ctx, flags);
 
-       DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
+       DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task)));
 
        state = ctx->ctx_state;
        switch(state) {
@@ -4640,13 +4642,13 @@ pfm_exit_thread(struct task_struct *task)
                         * only comes to this function if pfm_context is not NULL, i.e., cannot
                         * be in unloaded state
                         */
-                       printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
+                       printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task));
                        break;
                case PFM_CTX_LOADED:
                case PFM_CTX_MASKED:
                        ret = pfm_context_unload(ctx, NULL, 0, regs);
                        if (ret) {
-                               printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+                               printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
                        }
                        DPRINT(("ctx unloaded for current state was %d\n", state));
 
@@ -4655,12 +4657,12 @@ pfm_exit_thread(struct task_struct *task)
                case PFM_CTX_ZOMBIE:
                        ret = pfm_context_unload(ctx, NULL, 0, regs);
                        if (ret) {
-                               printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+                               printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
                        }
                        free_ok = 1;
                        break;
                default:
-                       printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
+                       printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state);
                        break;
        }
        UNPROTECT_CTX(ctx, flags);
@@ -4744,7 +4746,7 @@ recheck:
        DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
                ctx->ctx_fd,
                state,
-               task->pid,
+               task_pid_nr(task),
                task->state, PFM_CMD_STOPPED(cmd)));
 
        /*
@@ -4791,7 +4793,7 @@ recheck:
         */
        if (PFM_CMD_STOPPED(cmd)) {
                if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-                       DPRINT(("[%d] task not in stopped state\n", task->pid));
+                       DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
                        return -EBUSY;
                }
                /*
@@ -4884,7 +4886,7 @@ restart_args:
         * limit abuse to min page size
         */
        if (unlikely(sz > PFM_MAX_ARGSIZE)) {
-               printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
+               printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz);
                return -E2BIG;
        }
 
@@ -5031,11 +5033,11 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
 {
        int ret;
 
-       DPRINT(("entering for [%d]\n", current->pid));
+       DPRINT(("entering for [%d]\n", task_pid_nr(current)));
 
        ret = pfm_context_unload(ctx, NULL, 0, regs);
        if (ret) {
-               printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
+               printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret);
        }
 
        /*
@@ -5072,7 +5074,7 @@ pfm_handle_work(void)
 
        ctx = PFM_GET_CTX(current);
        if (ctx == NULL) {
-               printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
+               printk(KERN_ERR "perfmon: [%d] has no PFM context\n", task_pid_nr(current));
                return;
        }
 
@@ -5269,7 +5271,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
        DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
                     "used_pmds=0x%lx\n",
                        pmc0,
-                       task ? task->pid: -1,
+                       task ? task_pid_nr(task): -1,
                        (regs ? regs->cr_iip : 0),
                        CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
                        ctx->ctx_used_pmds[0]));
@@ -5458,7 +5460,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
        }
 
        DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
-                       GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
+                       GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1,
                        PFM_GET_WORK_PENDING(task),
                        ctx->ctx_fl_trap_reason,
                        ovfl_pmds,
@@ -5483,7 +5485,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 sanity_check:
        printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
                        smp_processor_id(),
-                       task ? task->pid : -1,
+                       task ? task_pid_nr(task) : -1,
                        pmc0);
        return;
 
@@ -5516,7 +5518,7 @@ stop_monitoring:
         *
         * Overall pretty hairy stuff....
         */
-       DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
+       DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1));
        pfm_clear_psr_up();
        ia64_psr(regs)->up = 0;
        ia64_psr(regs)->sp = 1;
@@ -5577,13 +5579,13 @@ pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 
 report_spurious1:
        printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
-               this_cpu, task->pid);
+               this_cpu, task_pid_nr(task));
        pfm_unfreeze_pmu();
        return -1;
 report_spurious2:
        printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
                this_cpu, 
-               task->pid);
+               task_pid_nr(task));
        pfm_unfreeze_pmu();
        return -1;
 }
@@ -5870,7 +5872,8 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
        ia64_psr(regs)->sp = 1;
 
        if (GET_PMU_OWNER() == task) {
-               DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+               DPRINT(("cleared ownership for [%d]\n",
+                                       task_pid_nr(ctx->ctx_task)));
                SET_PMU_OWNER(NULL, NULL);
        }
 
@@ -5882,7 +5885,7 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
        task->thread.pfm_context  = NULL;
        task->thread.flags       &= ~IA64_THREAD_PM_VALID;
 
-       DPRINT(("force cleanup for [%d]\n",  task->pid));
+       DPRINT(("force cleanup for [%d]\n",  task_pid_nr(task)));
 }
 
 
@@ -6426,7 +6429,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 
                if (PMD_IS_COUNTING(i)) {
                        DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
-                               task->pid,
+                               task_pid_nr(task),
                                i,
                                ctx->ctx_pmds[i].val,
                                val & ovfl_val));
@@ -6448,11 +6451,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
                         */
                        if (pmc0 & (1UL << i)) {
                                val += 1 + ovfl_val;
-                               DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
+                               DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i));
                        }
                }
 
-               DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
+               DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val));
 
                if (is_self) ctx->th_pmds[i] = pmd_val;
 
@@ -6793,14 +6796,14 @@ dump_pmu_state(const char *from)
        printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
                this_cpu, 
                from, 
-               current->pid, 
+               task_pid_nr(current),
                regs->cr_iip,
                current->comm);
 
        task = GET_PMU_OWNER();
        ctx  = GET_PMU_CTX();
 
-       printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
+       printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx);
 
        psr = pfm_get_psr();
 
@@ -6848,7 +6851,7 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
 {
        struct thread_struct *thread;
 
-       DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
+       DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task)));
 
        thread = &task->thread;
 
index ff80eab83b38668b5d3a573f064e97b1d13f5715..a7af1cb419f921490991a2d29c3a6e6bc69d6012 100644 (file)
@@ -44,11 +44,11 @@ default_validate(struct task_struct *task, unsigned int flags, int cpu, void *da
        int ret = 0;
 
        if (data == NULL) {
-               DPRINT(("[%d] no argument passed\n", task->pid));
+               DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
                return -EINVAL;
        }
 
-       DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+       DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
 
        /*
         * must hold at least the buffer header + one minimally sized entry
@@ -88,7 +88,7 @@ default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, v
        hdr->hdr_count        = 0UL;
 
        DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
-               task->pid,
+               task_pid_nr(task),
                buf,
                hdr->hdr_buf_size,
                sizeof(*hdr),
@@ -245,7 +245,7 @@ default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, stru
 static int
 default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
 {
-       DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+       DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
        return 0;
 }
 
index c613fc0e91cc8bc5d1cdc6f204d4c3e76037b589..2418289ee5ca4a89e598f057c26f2f201497ea0e 100644 (file)
@@ -105,7 +105,8 @@ show_regs (struct pt_regs *regs)
        unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
        print_modules();
-       printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
+       printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
+                       smp_processor_id(), current->comm);
        printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
               regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
        print_symbol("ip is at %s\n", ip);
index c5cfcfa4c87c1ae92db64feeda71039cd529e245..cbf67f1aa2913f7bbc3ee342af979b0bba6fc503 100644 (file)
@@ -208,6 +208,48 @@ static int __init register_memory(void)
 
 __initcall(register_memory);
 
+
+#ifdef CONFIG_KEXEC
+static void __init setup_crashkernel(unsigned long total, int *n)
+{
+       unsigned long long base = 0, size = 0;
+       int ret;
+
+       ret = parse_crashkernel(boot_command_line, total,
+                       &size, &base);
+       if (ret == 0 && size > 0) {
+               if (!base) {
+                       sort_regions(rsvd_region, *n);
+                       base = kdump_find_rsvd_region(size,
+                                       rsvd_region, *n);
+               }
+               if (base != ~0UL) {
+                       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                                       "for crashkernel (System RAM: %ldMB)\n",
+                                       (unsigned long)(size >> 20),
+                                       (unsigned long)(base >> 20),
+                                       (unsigned long)(total >> 20));
+                       rsvd_region[*n].start =
+                               (unsigned long)__va(base);
+                       rsvd_region[*n].end =
+                               (unsigned long)__va(base + size);
+                       (*n)++;
+                       crashk_res.start = base;
+                       crashk_res.end = base + size - 1;
+               }
+       }
+       efi_memmap_res.start = ia64_boot_param->efi_memmap;
+       efi_memmap_res.end = efi_memmap_res.start +
+               ia64_boot_param->efi_memmap_size;
+       boot_param_res.start = __pa(ia64_boot_param);
+       boot_param_res.end = boot_param_res.start +
+               sizeof(*ia64_boot_param);
+}
+#else
+static inline void __init setup_crashkernel(unsigned long total, int *n)
+{}
+#endif
+
 /**
  * reserve_memory - setup reserved memory areas
  *
@@ -219,6 +261,7 @@ void __init
 reserve_memory (void)
 {
        int n = 0;
+       unsigned long total_memory;
 
        /*
         * none of the entries in this table overlap
@@ -254,50 +297,11 @@ reserve_memory (void)
                n++;
 #endif
 
-       efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+       total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
        n++;
 
-#ifdef CONFIG_KEXEC
-       /* crashkernel=size@offset specifies the size to reserve for a crash
-        * kernel. If offset is 0, then it is determined automatically.
-        * By reserving this memory we guarantee that linux never set's it
-        * up as a DMA target.Useful for holding code to do something
-        * appropriate after a kernel panic.
-        */
-       {
-               char *from = strstr(boot_command_line, "crashkernel=");
-               unsigned long base, size;
-               if (from) {
-                       size = memparse(from + 12, &from);
-                       if (*from == '@')
-                               base = memparse(from+1, &from);
-                       else
-                               base = 0;
-                       if (size) {
-                               if (!base) {
-                                       sort_regions(rsvd_region, n);
-                                       base = kdump_find_rsvd_region(size,
-                                                               rsvd_region, n);
-                                       }
-                               if (base != ~0UL) {
-                                       rsvd_region[n].start =
-                                               (unsigned long)__va(base);
-                                       rsvd_region[n].end =
-                                               (unsigned long)__va(base + size);
-                                       n++;
-                                       crashk_res.start = base;
-                                       crashk_res.end = base + size - 1;
-                               }
-                       }
-               }
-               efi_memmap_res.start = ia64_boot_param->efi_memmap;
-                efi_memmap_res.end = efi_memmap_res.start +
-                        ia64_boot_param->efi_memmap_size;
-                boot_param_res.start = __pa(ia64_boot_param);
-                boot_param_res.end = boot_param_res.start +
-                        sizeof(*ia64_boot_param);
-       }
-#endif
+       setup_crashkernel(total_memory, &n);
+
        /* end of memory marker */
        rsvd_region[n].start = ~0UL;
        rsvd_region[n].end   = ~0UL;
index aeec8184e862535e6676b2cafae699f502fa56f0..cdb64cc4d9c85d93ad21b26756047d358387700a 100644 (file)
@@ -227,7 +227,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
        si.si_signo = SIGSEGV;
        si.si_errno = 0;
        si.si_code = SI_KERNEL;
-       si.si_pid = current->pid;
+       si.si_pid = task_pid_vnr(current);
        si.si_uid = current->uid;
        si.si_addr = sc;
        force_sig_info(SIGSEGV, &si, current);
@@ -332,7 +332,7 @@ force_sigsegv_info (int sig, void __user *addr)
        si.si_signo = SIGSEGV;
        si.si_errno = 0;
        si.si_code = SI_KERNEL;
-       si.si_pid = current->pid;
+       si.si_pid = task_pid_vnr(current);
        si.si_uid = current->uid;
        si.si_addr = addr;
        force_sig_info(SIGSEGV, &si, current);
index 3aeaf15e468ba180381e79f7e19d94aad98e97e6..78d65cb947d29c4fb763c346efed30a307130bfc 100644 (file)
@@ -61,7 +61,7 @@ die (const char *str, struct pt_regs *regs, long err)
 
        if (++die.lock_owner_depth < 3) {
                printk("%s[%d]: %s %ld [%d]\n",
-                       current->comm, current->pid, str, err, ++die_counter);
+               current->comm, task_pid_nr(current), str, err, ++die_counter);
                (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
                show_regs(regs);
        } else
@@ -315,7 +315,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
                                last.time = current_jiffies + 5 * HZ;
                                printk(KERN_WARNING
                                        "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
-                                       current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
+                                       current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
                        }
                }
        }
@@ -453,7 +453,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
                if (code == 8) {
 # ifdef CONFIG_IA64_PRINT_HAZARDS
                        printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
-                              current->comm, current->pid,
+                              current->comm, task_pid_nr(current),
                               regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
 # endif
                        return;
index fe6aa5a9f8fa94edc851c59b4d1cd9d28839cf32..2173de9fe9173773a960dbd9347367242968011f 100644 (file)
@@ -1340,7 +1340,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
                        size_t len;
 
                        len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
-                                     "ip=0x%016lx\n\r", current->comm, current->pid,
+                                     "ip=0x%016lx\n\r", current->comm,
+                                     task_pid_nr(current),
                                      ifa, regs->cr_iip + ipsr->ri);
                        /*
                         * Don't call tty_write_message() if we're in the kernel; we might
@@ -1363,7 +1364,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
                                       "administrator\n"
                                       "echo 0 > /proc/sys/kernel/ignore-"
                                       "unaligned-usertrap to re-enable\n",
-                                      current->comm, current->pid);
+                                      current->comm, task_pid_nr(current));
                        }
                }
        } else {
index 32f26253c4e8cea6b938e8babdffb96e826e6ac8..7571076a16a1991742af9f502b2113f7d9bd35a4 100644 (file)
@@ -274,7 +274,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 
   out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 3e10152abbf0f5873194e383cfcd8845f202b252..c6c19bf11becd573c38362462bf3c1ad0cce91bd 100644 (file)
@@ -127,8 +127,8 @@ ia64_init_addr_space (void)
                vma->vm_mm = current->mm;
                vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
                vma->vm_end = vma->vm_start + PAGE_SIZE;
-               vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
                vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
+               vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
                down_write(&current->mm->mmap_sem);
                if (insert_vm_struct(current->mm, vma)) {
                        up_write(&current->mm->mmap_sem);
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
deleted file mode 100644 (file)
index 97271ab..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         Due to firmware bugs, you may need to use the "nohalt" boot
-         option if you're using OProfile with the hardware performance
-         counters.
-
-         If unsure, say N.
-
index bd5fe76401f18dea117b6042362fc70a759a4a6f..ab9a264cb1947cb6a938c523e677f4c64873b72e 100644 (file)
@@ -426,7 +426,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/m32r/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/m32r/Kconfig.debug"
 
index 97e0b1c0830e81cf218d4e61c4dbbc0a1de8e0ff..89ba4a0b5d5109d0be4569947e13591d115a2f91 100644 (file)
@@ -196,7 +196,7 @@ static void show_registers(struct pt_regs *regs)
                printk("SPI: %08lx\n", sp);
        }
        printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
-               current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
+               current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
 
        /*
         * When in-kernel, we also print out the stack and code at the
index 70a766aad3e0bdbcdc22b6dfd11033d3183846aa..4a71df4c1b3022b1c8f324b50d78dd2b79f8e51a 100644 (file)
@@ -271,7 +271,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
diff --git a/arch/m32r/oprofile/Kconfig b/arch/m32r/oprofile/Kconfig
deleted file mode 100644 (file)
index 19d3773..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-       depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-endmenu
-
index 20a9c08e59c34ee34b21adb100b33014c99a39ff..01dee84f840ac72608dae4fe0ea4997b3e911f81 100644 (file)
@@ -683,6 +683,8 @@ endmenu
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/m68k/Kconfig.debug"
 
 source "security/Kconfig"
index 4e2752a0e89b32b2dccd4e2576844378887bab16..97f556fa493290822dea1673986d1b269c23bcfe 100644 (file)
@@ -900,7 +900,7 @@ void show_registers(struct pt_regs *regs)
               regs->d4, regs->d5, regs->a0, regs->a1);
 
        printk("Process %s (pid: %d, task=%p)\n",
-               current->comm, current->pid, current);
+               current->comm, task_pid_nr(current), current);
        addr = (unsigned long)&fp->un;
        printk("Frame format=%X ", regs->format);
        switch (regs->format) {
@@ -1038,7 +1038,7 @@ void bad_super_trap (struct frame *fp)
                                fp->un.fmtb.daddr, space_names[ssw & DFC],
                                fp->ptregs.pc);
        }
-       printk ("Current process id is %d\n", current->pid);
+       printk ("Current process id is %d\n", task_pid_nr(current));
        die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
 }
 
index eaa618681159524ec1cf287b2ff1c91bf72d2da7..f493f03231d5ad3594d0813bd11a8855e9efcfbb 100644 (file)
@@ -180,7 +180,7 @@ good_area:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 185906b54cb03ff00bd94721a652c19c76a4dc1f..f52c627bdadddb94da22e4a0563b9ee65facb14a 100644 (file)
@@ -696,6 +696,8 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/m68knommu/Kconfig.debug"
 
 source "security/Kconfig"
index cb027580cd1d140d1a95cfb63a2e3626344af830..4dc142d394a348233c5c764c9105f61f5cee2c64 100644 (file)
@@ -2005,7 +2005,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/mips/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/mips/Kconfig.debug"
 
index 5f48b0603796bc077fb18fed96e33a1b1343ccbe..bdf00e2a35e47db2fd10c59cb4f29a30cefe7588 100644 (file)
@@ -36,8 +36,8 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/mipsregs.h>
index 1ecab6350421cdeef9101185aac4ce0a3e26179f..4903e067916b4b5badaca3f2b44bfb2d8c3cbe8d 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/timex.h>
 #include <linux/slab.h>
 #include <linux/random.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/irq.h>
index 49bcc58929ba3a0329a300e56ffe502b4e512a99..892d4c38fd0dd533a242fe45edc8eb632c50af1a 100644 (file)
@@ -175,6 +175,7 @@ CONFIG_POSIX_MQUEUE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
index 3ed991ae0ebe180d5942f42bab47f5573594620b..49dfcef2518ce3bf9ea5f282d1adf421be923df4 100644 (file)
@@ -196,6 +196,7 @@ CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
index b997af713eb393aa6d4322d8f1d3d326e5dd57e9..7852c7cdf29e7e5d575d049d5367a57ed4f21fe5 100644 (file)
@@ -1172,8 +1172,8 @@ static int irix_core_dump(long signr, struct pt_regs *regs, struct file *file, u
        prstatus.pr_sighold = current->blocked.sig[0];
        psinfo.pr_pid = prstatus.pr_pid = current->pid;
        psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
-       psinfo.pr_pgrp = prstatus.pr_pgrp = process_group(current);
-       psinfo.pr_sid = prstatus.pr_sid = process_session(current);
+       psinfo.pr_pgrp = prstatus.pr_pgrp = task_pgrp_nr(current);
+       psinfo.pr_sid = prstatus.pr_sid = task_session_nr(current);
        if (current->pid == current->tgid) {
                /*
                 * This is the record for the group leader.  Add in the
index 85c2e389edd6f1fea3f2c4465396ef73364604dd..a0a91056fda722472edd644e9fa73e379d697a3a 100644 (file)
@@ -609,7 +609,7 @@ repeat:
                p = list_entry(_p, struct task_struct, sibling);
                if ((type == IRIX_P_PID) && p->pid != pid)
                        continue;
-               if ((type == IRIX_P_PGID) && process_group(p) != pid)
+               if ((type == IRIX_P_PGID) && task_pgrp_nr(p) != pid)
                        continue;
                if ((p->exit_signal != SIGCHLD))
                        continue;
index ee7790d9debe2e249e0ebf3480aaebb885fa4fe8..4c477c7ff74a1388c21a85392ed19ac7033cbeec 100644 (file)
@@ -763,11 +763,11 @@ asmlinkage int irix_setpgrp(int flags)
        printk("[%s:%d] setpgrp(%d) ", current->comm, current->pid, flags);
 #endif
        if(!flags)
-               error = process_group(current);
+               error = task_pgrp_nr(current);
        else
                error = sys_setsid();
 #ifdef DEBUG_PROCGRPS
-       printk("returning %d\n", process_group(current));
+       printk("returning %d\n", task_pgrp_nr(current));
 #endif
 
        return error;
index 7b78d137259fdca7a14ea2789b4e034a026a58fb..fa500787152d2cfbc40ffc38075f7ee7545ea103 100644 (file)
@@ -314,7 +314,7 @@ void show_registers(const struct pt_regs *regs)
        __show_regs(regs);
        print_modules();
        printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
-               current->comm, current->pid, current_thread_info(), current);
+               current->comm, task_pid_nr(current), current_thread_info(), current);
        show_stacktrace(current, regs);
        show_code((unsigned int __user *) regs->cp0_epc);
        printk("\n");
index 5699c7713e2f01c5b42f7803df82c61c4b48ec8c..fa636fc6b7b90403a2a8771ef3c5f023a1a82cd4 100644 (file)
@@ -173,7 +173,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
diff --git a/arch/mips/oprofile/Kconfig b/arch/mips/oprofile/Kconfig
deleted file mode 100644 (file)
index fb6f235..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-       depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING && !MIPS_MT_SMTC && EXPERIMENTAL
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-endmenu
-
index be3b88dd4c1fbee4a23fcd0ac7ca3b1fe660a760..3ba4101d141eda4360f6a438cf2fafbcb5572553 100644 (file)
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/irq.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/irq.h>
index 3d73545e8c484d158b96788be65a9a7fcab60f48..b8ef1787a191ce81337590391067b9dda811b89d 100644 (file)
@@ -267,7 +267,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/parisc/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/parisc/Kconfig.debug"
 
index fb35ebc0c4da241a5d6da6e7e3b00de10a581b81..2ce3806f02e1989d5157ac9c9a3a037f40a384fc 100644 (file)
@@ -181,7 +181,7 @@ give_sigsegv:
        si.si_signo = SIGSEGV;
        si.si_errno = 0;
        si.si_code = SI_KERNEL;
-       si.si_pid = current->pid;
+       si.si_pid = task_pid_vnr(current);
        si.si_uid = current->uid;
        si.si_addr = &frame->uc;
        force_sig_info(SIGSEGV, &si, current);
index bbf029a184acce9be221def228b2e575160be633..99fd56939afa73909f4aabbe9fe88a490afa3748 100644 (file)
@@ -219,7 +219,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
                        return; /* STFU */
 
                printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
-                       current->comm, current->pid, str, err, regs->iaoq[0]);
+                       current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
 #ifdef PRINT_USER_FAULTS
                /* XXX for debugging only */
                show_regs(regs);
@@ -252,7 +252,7 @@ KERN_CRIT "                     ||     ||\n");
        
        if (err)
                printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
-                       current->comm, current->pid, str, err);
+                       current->comm, task_pid_nr(current), str, err);
 
        /* Wot's wrong wif bein' racy? */
        if (current->thread.flags & PARISC_KERNEL_DEATH) {
@@ -317,7 +317,7 @@ static void handle_break(struct pt_regs *regs)
        if (unlikely(iir != GDB_BREAK_INSN)) {
                printk(KERN_DEBUG "break %d,%d: pid=%d command='%s'\n",
                        iir & 31, (iir>>13) & ((1<<13)-1),
-                       current->pid, current->comm);
+                       task_pid_nr(current), current->comm);
                show_regs(regs);
        }
 #endif
@@ -747,7 +747,7 @@ void handle_interruption(int code, struct pt_regs *regs)
                if (user_mode(regs)) {
 #ifdef PRINT_USER_FAULTS
                        printk(KERN_DEBUG "\nhandle_interruption() pid=%d command='%s'\n",
-                           current->pid, current->comm);
+                           task_pid_nr(current), current->comm);
                        show_regs(regs);
 #endif
                        /* SIGBUS, for lack of a better one. */
@@ -772,7 +772,7 @@ void handle_interruption(int code, struct pt_regs *regs)
                else
                        printk(KERN_DEBUG "User Fault (long pointer) (fault %d) ",
                               code);
-               printk("pid=%d command='%s'\n", current->pid, current->comm);
+               printk("pid=%d command='%s'\n", task_pid_nr(current), current->comm);
                show_regs(regs);
 #endif
                si.si_signo = SIGSEGV;
index 347bb922e6d077f717799dd2c642804dbe0f23dd..aebf3c1688719b0b6ce735f258c3eb068a68b845 100644 (file)
@@ -469,7 +469,7 @@ void handle_unaligned(struct pt_regs *regs)
                    && ++unaligned_count < 5) {
                        char buf[256];
                        sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
-                               current->comm, current->pid, regs->ior, regs->iaoq[0]);
+                               current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
                        printk(KERN_WARNING "%s", buf);
 #ifdef DEBUG_UNALIGNED
                        show_regs(regs);
index 1c091b415cd9b8375506c470527ae929b9900b9c..b2e3e9a8cecefce049d43ef1a5d36d61403cc9b9 100644 (file)
@@ -211,7 +211,7 @@ bad_area:
 #ifdef PRINT_USER_FAULTS
                printk(KERN_DEBUG "\n");
                printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
-                   tsk->pid, tsk->comm, code, address);
+                   task_pid_nr(tsk), tsk->comm, code, address);
                if (vma) {
                        printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
                                        vma->vm_start, vma->vm_end);
diff --git a/arch/parisc/oprofile/Kconfig b/arch/parisc/oprofile/Kconfig
deleted file mode 100644 (file)
index 5ade198..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-       depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-         
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-endmenu
-
index 3763f681ce4cfa0cc2e09e9332e226414ec8a66a..18f397ca05efbc27966ee2bfebd50c68dffe0448 100644 (file)
@@ -669,20 +669,7 @@ source "arch/powerpc/sysdev/qe_lib/Kconfig"
 
 source "lib/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/powerpc/oprofile/Kconfig"
-
-config KPROBES
-       bool "Kprobes"
-       depends on !BOOKE && !4xx && KALLSYMS && MODULES
-       help
-         Kprobes allows you to trap at almost any kernel address and
-         execute a callback function.  register_kprobe() establishes
-         a probepoint and specifies the callback.  Kprobes is useful
-         for kernel debugging, non-intrusive instrumentation and testing.
-         If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/powerpc/Kconfig.debug"
 
index 8b47c846421cba2cb064bbeff8233a31b485984b..dcd7c02727c2e713c1dac991faf7b94602322b8a 100644 (file)
@@ -68,6 +68,7 @@ CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
index bb8d4e46f0c509e6283ea4f09da521e4a8006c33..05582af50c5beb8c45494b18c916b5848ac88678 100644 (file)
@@ -71,6 +71,7 @@ CONFIG_TASK_DELAY_ACCT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
index c09eb8cfbe711471786509c91b2b026811add369..62a38406b62f8a6336e053f27df9c8d90e215a0c 100644 (file)
@@ -71,6 +71,7 @@ CONFIG_AUDITSYSCALL=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
index e60a0c544d6347684dda853203a30a9c362d3d8b..c0c8e8c3ced90a62b8c5996dcd75bb1a6bae0001 100644 (file)
@@ -61,45 +61,39 @@ NORET_TYPE void machine_kexec(struct kimage *image)
        for(;;);
 }
 
-static int __init early_parse_crashk(char *p)
-{
-       unsigned long size;
-
-       if (!p)
-               return 1;
-
-       size = memparse(p, &p);
-
-       if (*p == '@')
-               crashk_res.start = memparse(p + 1, &p);
-       else
-               crashk_res.start = KDUMP_KERNELBASE;
-
-       crashk_res.end = crashk_res.start + size - 1;
-
-       return 0;
-}
-early_param("crashkernel", early_parse_crashk);
-
 void __init reserve_crashkernel(void)
 {
-       unsigned long size;
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       /* this is necessary because of lmb_phys_mem_size() */
+       lmb_analyze();
+
+       /* use common parsing */
+       ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(),
+                       &crash_size, &crash_base);
+       if (ret == 0 && crash_size > 0) {
+               if (crash_base == 0)
+                       crash_base = KDUMP_KERNELBASE;
+               crashk_res.start = crash_base;
+       } else {
+               /* handle the device tree */
+               crash_size = crashk_res.end - crashk_res.start + 1;
+       }
 
-       if (crashk_res.start == 0)
+       if (crash_size == 0)
                return;
 
        /* We might have got these values via the command line or the
         * device tree, either way sanitise them now. */
 
-       size = crashk_res.end - crashk_res.start + 1;
-
        if (crashk_res.start != KDUMP_KERNELBASE)
                printk("Crash kernel location must be 0x%x\n",
                                KDUMP_KERNELBASE);
 
        crashk_res.start = KDUMP_KERNELBASE;
-       size = PAGE_ALIGN(size);
-       crashk_res.end = crashk_res.start + size - 1;
+       crash_size = PAGE_ALIGN(crash_size);
+       crashk_res.end = crashk_res.start + crash_size - 1;
 
        /* Crash kernel trumps memory limit */
        if (memory_limit && memory_limit <= crashk_res.end) {
@@ -108,7 +102,13 @@ void __init reserve_crashkernel(void)
                                memory_limit);
        }
 
-       lmb_reserve(crashk_res.start, size);
+       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                       "for crashkernel (System RAM: %ldMB)\n",
+                       (unsigned long)(crash_size >> 20),
+                       (unsigned long)(crashk_res.start >> 20),
+                       (unsigned long)(lmb_phys_mem_size() >> 20));
+
+       lmb_reserve(crashk_res.start, crash_size);
 }
 
 int overlaps_crashkernel(unsigned long start, unsigned long size)
index ea6ad7a2a7e3df329b9e56e4aaca83e524905148..b9d88374f14f07abe21e51367110e6c82129ba99 100644 (file)
@@ -459,7 +459,7 @@ void show_regs(struct pt_regs * regs)
                printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
 #endif
        printk("TASK = %p[%d] '%s' THREAD: %p",
-              current, current->pid, current->comm, task_thread_info(current));
+              current, task_pid_nr(current), current->comm, task_thread_info(current));
 
 #ifdef CONFIG_SMP
        printk(" CPU: %d", smp_processor_id());
index bf9e39c6e296209f0f6bc8f5f08c35286df279d4..59c464e26f38886ff0c6d9ed7f20488dc66b4740 100644 (file)
@@ -201,7 +201,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
         * generate the same exception over and over again and we get
         * nowhere.  Better to kill it and let the kernel panic.
         */
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                __sighandler_t handler;
 
                spin_lock_irq(&current->sighand->siglock);
@@ -881,7 +881,7 @@ void nonrecoverable_exception(struct pt_regs *regs)
 void trace_syscall(struct pt_regs *regs)
 {
        printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
-              current, current->pid, regs->nip, regs->link, regs->gpr[0],
+              current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
               regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
 }
 
index ab3546c5ac3a2a8f4b022ba8150c13a9c7686728..a18fda361cc0fe19103add8034d5b3485b012079 100644 (file)
@@ -375,7 +375,7 @@ bad_area_nosemaphore:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
deleted file mode 100644 (file)
index 7089e79..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-config OPROFILE_CELL
-       bool "OProfile for Cell Broadband Engine"
-       depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
-       default y
-       help
-         Profiling of Cell BE SPUs requires special support enabled
-         by this option.
index 354c058616299e7fdc7102a5c55fb89ee49fbe29..144177d77cf1075bf590afb8492141d932f59200 100644 (file)
 #include <linux/root_dev.h>
 #include <linux/serial.h>
 #include <linux/smp.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/kexec.h>
 #include <asm/pci-bridge.h>
index 3a393c7f390e1cce07e0ee0d1c4c7bfc24284057..a1ab25c7082f1031343d492445c37ce3845580dd 100644 (file)
@@ -332,7 +332,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
                   err->disposition == RTAS_DISP_NOT_RECOVERED &&
                   err->target == RTAS_TARGET_MEMORY &&
                   err->type == RTAS_TYPE_ECC_UNCORR &&
-                  !(current->pid == 0 || is_init(current))) {
+                  !(current->pid == 0 || is_global_init(current))) {
                /* Kill off a user process with an ECC error */
                printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
                       current->pid);
index 607925c8a99e050f0e1b042b531de6acc0290627..6473fa7cb4b95c43326def7b2dbe3a5d94e46d06 100644 (file)
@@ -1317,7 +1317,7 @@ endmenu
 
 source "lib/Kconfig"
 
-source "arch/powerpc/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/ppc/Kconfig.debug"
 
index 3f3b292eb773ba80d2d3f79bdfd231bbcea95ecb..c78568905c3b1c2b7676446748e57774a737399a 100644 (file)
@@ -121,7 +121,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
         * generate the same exception over and over again and we get
         * nowhere.  Better to kill it and let the kernel panic.
         */
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                __sighandler_t handler;
 
                spin_lock_irq(&current->sighand->siglock);
index 94913ddcf76e8524392dec4f5d9b53bccbc2011f..254c23b755e689006a7073a13a57cbf35db4643b 100644 (file)
@@ -290,7 +290,7 @@ bad_area:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 248684f50dd9e4c7754503951259b0639d2cdba6..dcd6070b85eb10efa6e644e5115e17def1ab8bc5 100644 (file)
@@ -49,7 +49,6 @@ extern void gen550_progress(char *, unsigned short);
 extern void gen550_init(int, struct uart_port *);
 extern void mv64360_pcibios_fixup(mv64x60_handle_t *bh);
 
-#define BIT(x) (1<<x)
 #define CHESTNUT_PRESERVE_MASK (BIT(MV64x60_CPU2DEV_0_WIN) | \
                                BIT(MV64x60_CPU2DEV_1_WIN) | \
                                BIT(MV64x60_CPU2DEV_2_WIN) | \
index b71132166f606343f810e47387c7a26b4918fced..4ec716d8c1a623b4e123b8e2923d4443a3f3b42a 100644 (file)
@@ -529,21 +529,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/s390/oprofile/Kconfig"
-
-config KPROBES
-       bool "Kprobes (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && MODULES
-       help
-         Kprobes allows you to trap at almost any kernel address and
-         execute a callback function.  register_kprobe() establishes
-         a probepoint and specifies the callback.  Kprobes is useful
-         for kernel debugging, non-intrusive instrumentation and testing.
-         If in doubt, say "N".
-
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/s390/Kconfig.debug"
 
index abb447a3e472443b6925b1a60f08909a28d067f0..70c57378f426c3a8d7250479fd5e524ef68678fa 100644 (file)
@@ -166,7 +166,7 @@ void show_regs(struct pt_regs *regs)
 
         printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
         printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-              current->comm, current->pid, (void *) tsk,
+              current->comm, task_pid_nr(current), (void *) tsk,
               (void *) tsk->thread.ksp);
 
        show_registers(regs);
index 60604b2819b2a0d07c4664860ecb4c462ee92ee1..b159a9d656807f9283102e5cbeee2d9920ab9b27 100644 (file)
@@ -64,7 +64,7 @@ out:
 
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 14c241ccdd4d9aa47c8697cdb4f9492821c1e224..2456b52ed0687e4ac588fa47ae860cc0973636fa 100644 (file)
@@ -211,7 +211,7 @@ static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
        struct mm_struct *mm = tsk->mm;
 
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                return 1;
diff --git a/arch/s390/oprofile/Kconfig b/arch/s390/oprofile/Kconfig
deleted file mode 100644 (file)
index 208220a..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-
-menu "Profiling support"
-
-config PROFILING
-       bool "Profiling support"
-       help
-         Say Y here to enable profiling support mechanisms used by
-         profilers such as readprofile or OProfile.
-
-
-config OPROFILE
-       tristate "OProfile system profiling"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-endmenu
-
index 44982c1dfa23c7adc28b422d8b2d35fa6b6c92b4..247f8a65e7333c585c06566eb4069051327c2c87 100644 (file)
@@ -758,7 +758,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/sh/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sh/Kconfig.debug"
 
index 790ed69b866670de9f5ac6d63890f740e694ff8e..5c17de51987e59eed62b937e601372b824b7bc4e 100644 (file)
@@ -104,24 +104,3 @@ NORET_TYPE void machine_kexec(struct kimage *image)
        (*rnk)(page_list, reboot_code_buffer, image->start, vbr_reg);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
-{
-       unsigned long size, base;
-       size = memparse(arg, &arg);
-       if (*arg == '@') {
-               base = memparse(arg+1, &arg);
-               /* FIXME: Do I want a sanity check
-                * to validate the memory range?
-                */
-               crashk_res.start = base;
-               crashk_res.end   = base + size - 1;
-       }
-       return 0;
-}
-early_param("crashkernel", parse_crashkernel);
index b4469992d6b25aafab1e8fcc1f2cd089bee5f9e0..6d7f2b07e4917ee8ff7e25b7ca01bad63ff0f311 100644 (file)
@@ -121,7 +121,7 @@ void machine_power_off(void)
 void show_regs(struct pt_regs * regs)
 {
        printk("\n");
-       printk("Pid : %d, Comm: %20s\n", current->pid, current->comm);
+       printk("Pid : %d, Comm: %20s\n", task_pid_nr(current), current->comm);
        print_symbol("PC is at %s\n", instruction_pointer(regs));
        printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
               regs->pc, regs->regs[15], regs->sr);
index b3027a6775b91106d8b5b61c85fb5ed0bbb8c5d1..b749403f6b382fe3c4108be924e5caa5e10afe80 100644 (file)
@@ -128,6 +128,37 @@ static void __init register_bootmem_low_pages(void)
        free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(pages));
 }
 
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+       unsigned long long free_mem;
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+       ret = parse_crashkernel(boot_command_line, free_mem,
+                       &crash_size, &crash_base);
+       if (ret == 0 && crash_size) {
+               if (crash_base > 0) {
+                       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                                       "for crashkernel (System RAM: %ldMB)\n",
+                                       (unsigned long)(crash_size >> 20),
+                                       (unsigned long)(crash_base >> 20),
+                                       (unsigned long)(free_mem >> 20));
+                       crashk_res.start = crash_base;
+                       crashk_res.end   = crash_base + crash_size - 1;
+                       reserve_bootmem(crash_base, crash_size);
+               } else
+                       printk(KERN_INFO "crashkernel reservation failed - "
+                                       "you have to specify a base address\n");
+       }
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 void __init setup_bootmem_allocator(unsigned long free_pfn)
 {
        unsigned long bootmap_size;
@@ -189,11 +220,8 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
                }
        }
 #endif
-#ifdef CONFIG_KEXEC
-       if (crashk_res.start != crashk_res.end)
-               reserve_bootmem(crashk_res.start,
-                       crashk_res.end - crashk_res.start + 1);
-#endif
+
+       reserve_crashkernel();
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
index 2f42442cf164e9d6921b22bec246596304d3d640..ca754fd4243734eed9d80ea90266f528c7469d6e 100644 (file)
@@ -382,7 +382,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
        set_fs(USER_DS);
 
        pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-                current->comm, current->pid, frame, regs->pc, regs->pr);
+                current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
        flush_cache_sigtramp(regs->pr);
 
@@ -462,7 +462,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
        set_fs(USER_DS);
 
        pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-                current->comm, current->pid, frame, regs->pc, regs->pr);
+                current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
        flush_cache_sigtramp(regs->pr);
 
index dcb46e71da1c6d23217c6d22a5221ff59a931ff6..cf99111cb33fd0a2d370dd62e2de694782cc2901 100644 (file)
@@ -95,8 +95,8 @@ void die(const char * str, struct pt_regs * regs, long err)
        print_modules();
        show_regs(regs);
 
-       printk("Process: %s (pid: %d, stack limit = %p)\n",
-              current->comm, current->pid, task_stack_page(current) + 1);
+       printk("Process: %s (pid: %d, stack limit = %p)\n", current->comm,
+                       task_pid_nr(current), task_stack_page(current) + 1);
 
        if (!user_mode(regs) || in_interrupt())
                dump_mem("Stack: ", regs->regs[15], THREAD_SIZE +
@@ -386,7 +386,8 @@ static int handle_unaligned_access(u16 instruction, struct pt_regs *regs)
 
                printk(KERN_NOTICE "Fixing up unaligned userspace access "
                       "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
-                      current->comm,current->pid,(u16*)regs->pc,instruction);
+                      current->comm, task_pid_nr(current),
+                      (u16 *)regs->pc, instruction);
        }
 
        ret = -EFAULT;
index 4729668ce5bf3b4ca261d689906ed14db30e01e5..f33cedb353fc6af215bc7f86b37120553d8e7532 100644 (file)
@@ -207,7 +207,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
diff --git a/arch/sh/oprofile/Kconfig b/arch/sh/oprofile/Kconfig
deleted file mode 100644 (file)
index 5ade198..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-       depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-         
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-endmenu
-
index b3327ce8e82f464f2cf81adf92c8f8e30d571d0c..ba204bac49dfa32536ff8561821f2cc3a5ffc2cb 100644 (file)
@@ -284,7 +284,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/sh64/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sh64/Kconfig.debug"
 
index 9d0d58fb29fae75f8bb45f73e5a2a22c044052ea..c03101fab467443a8b6d33e9cf0111d4d48972cb 100644 (file)
@@ -764,7 +764,7 @@ static int misaligned_fixup(struct pt_regs *regs)
                --user_mode_unaligned_fixup_count;
                /* Only do 'count' worth of these reports, to remove a potential DoS against syslog */
                printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
-                      current->comm, current->pid, (__u32)regs->pc, opcode);
+                      current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
        } else
 #endif
        if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
@@ -774,7 +774,7 @@ static int misaligned_fixup(struct pt_regs *regs)
                               (__u32)regs->pc, opcode);
                } else {
                        printk("Fixing up unaligned kernelspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
-                              current->comm, current->pid, (__u32)regs->pc, opcode);
+                              current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
                }
        }
 
index dd81c669c79b5b63df7c52447f320c96d8a15dd4..7c79a1ba8059347bd583913df653bbaea301535d 100644 (file)
@@ -81,7 +81,7 @@ static inline void print_vma(struct vm_area_struct *vma)
 
 static inline void print_task(struct task_struct *tsk)
 {
-       printk("Task pid %d\n", tsk->pid);
+       printk("Task pid %d\n", task_pid_nr(tsk));
 }
 
 static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
@@ -272,13 +272,13 @@ bad_area:
                         * usermode, so only need a few */
                        count++;
                        printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
-                               address, current->pid, current->comm,
+                               address, task_pid_nr(current), current->comm,
                                (unsigned long) regs->pc);
 #if 0
                        show_regs(regs);
 #endif
                }
-               if (is_init(tsk)) {
+               if (is_global_init(tsk)) {
                        panic("INIT had user mode bad_area\n");
                }
                tsk->thread.address = address;
@@ -320,14 +320,14 @@ no_context:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                panic("INIT out of memory\n");
                yield();
                goto survive;
        }
        printk("fault:Out of memory\n");
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
diff --git a/arch/sh64/oprofile/Kconfig b/arch/sh64/oprofile/Kconfig
deleted file mode 100644 (file)
index 19d3773..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-       depends on EXPERIMENTAL
-
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
-endmenu
-
index c0f4ba109daab7ef41e302d46de488fa29aa622b..527adc808ad6c8609bc81581d49bcceec1ceb0aa 100644 (file)
@@ -320,11 +320,7 @@ endmenu
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/sparc/oprofile/Kconfig"
-
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sparc/Kconfig.debug"
 
index 003f8eed32f413e8ba835bdcfea2e16d703789c5..fe562db475e9c454c42d930d9f64808410aa392c 100644 (file)
@@ -155,7 +155,7 @@ static inline void read_sunos_user(struct pt_regs *regs, unsigned long offset,
                /* Rest of them are completely unsupported. */
        default:
                printk("%s [%d]: Wants to read user offset %ld\n",
-                      current->comm, current->pid, offset);
+                      current->comm, task_pid_nr(current), offset);
                pt_error_return(regs, EIO);
                return;
        }
@@ -222,7 +222,7 @@ static inline void write_sunos_user(struct pt_regs *regs, unsigned long offset,
                /* Rest of them are completely unsupported or "no-touch". */
        default:
                printk("%s [%d]: Wants to write user offset %ld\n",
-                      current->comm, current->pid, offset);
+                      current->comm, task_pid_nr(current), offset);
                goto failure;
        }
 success:
index 6c0221e9a9f5a8abafefa9f8e26aa55e0c068b5c..42bf09db9a81ffaa11972289daffcb884fa22225 100644 (file)
@@ -357,7 +357,7 @@ c_sys_nis_syscall (struct pt_regs *regs)
        if (count++ > 5)
                return -ENOSYS;
        printk ("%s[%d]: Unimplemented SPARC system call %d\n",
-               current->comm, current->pid, (int)regs->u_regs[1]);
+               current->comm, task_pid_nr(current), (int)regs->u_regs[1]);
 #ifdef DEBUG_UNIMP_SYSCALL     
        show_regs (regs);
 #endif
index f807172cab0ea139d07ea70fb7c1e795708e013b..28c187c5d9fd68dc4eea2008fa253d63d8e0b9d8 100644 (file)
@@ -866,7 +866,7 @@ asmlinkage int sunos_killpg(int pgrp, int sig)
        rcu_read_lock();
        ret = -EINVAL;
        if (pgrp > 0)
-               ret = kill_pgrp(find_pid(pgrp), sig, 0);
+               ret = kill_pgrp(find_vpid(pgrp), sig, 0);
        rcu_read_unlock();
 
        return ret;
index 3bc3bff51e08d284d376dbe061457db053111367..d404e7994527760cb6129654726ff4cd44ab145d 100644 (file)
@@ -38,7 +38,7 @@ struct trap_trace_entry trapbuf[1024];
 
 void syscall_trace_entry(struct pt_regs *regs)
 {
-       printk("%s[%d]: ", current->comm, current->pid);
+       printk("%s[%d]: ", current->comm, task_pid_nr(current));
        printk("scall<%d> (could be %d)\n", (int) regs->u_regs[UREG_G1],
               (int) regs->u_regs[UREG_I0]);
 }
@@ -99,7 +99,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 "              /_| \\__/ |_\\\n"
 "                 \\__U_/\n");
 
-       printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+       printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
        show_regs(regs);
        add_taint(TAINT_DIE);
 
diff --git a/arch/sparc/oprofile/Kconfig b/arch/sparc/oprofile/Kconfig
deleted file mode 100644 (file)
index d8a8408..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-         
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
index 59c4d752d286d5638e02e9e82e190c2b56457074..c7a74e376985874afa22cc8347d4dcf4c19df3d6 100644 (file)
@@ -460,20 +460,7 @@ source "drivers/fc4/Kconfig"
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/sparc64/oprofile/Kconfig"
-
-config KPROBES
-       bool "Kprobes (EXPERIMENTAL)"
-       depends on KALLSYMS && EXPERIMENTAL && MODULES
-       help
-         Kprobes allows you to trap at almost any kernel address and
-         execute a callback function.  register_kprobe() establishes
-         a probepoint and specifies the callback.  Kprobes is useful
-         for kernel debugging, non-intrusive instrumentation and testing.
-         If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sparc64/Kconfig.debug"
 
index 8f7a06e2c7e739dfec3f3206c2b34745f222e3d7..170d6ca8de6f0e4d36b26c81ffdd0592059acfe2 100644 (file)
@@ -831,7 +831,7 @@ asmlinkage int sunos_killpg(int pgrp, int sig)
        rcu_read_lock();
        ret = -EINVAL;
        if (pgrp > 0)
-               ret = kill_pgrp(find_pid(pgrp), sig, 0);
+               ret = kill_pgrp(find_vpid(pgrp), sig, 0);
        rcu_read_unlock();
 
        return ret;
index 34573a55b6e5a92d65a420ca2daf168c65bfda65..e9c7e4f07abfd99d9f9e361ce461a7417b341aa7 100644 (file)
@@ -2225,7 +2225,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 "              /_| \\__/ |_\\\n"
 "                 \\__U_/\n");
 
-       printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+       printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
        notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
        __asm__ __volatile__("flushw");
        __show_regs(regs);
diff --git a/arch/sparc64/oprofile/Kconfig b/arch/sparc64/oprofile/Kconfig
deleted file mode 100644 (file)
index d8a8408..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-config PROFILING
-       bool "Profiling support (EXPERIMENTAL)"
-       help
-         Say Y here to enable the extended profiling support mechanisms used
-         by profilers such as OProfile.
-         
-
-config OPROFILE
-       tristate "OProfile system profiling (EXPERIMENTAL)"
-       depends on PROFILING
-       help
-         OProfile is a profiling system capable of profiling the
-         whole system, include the kernel, kernel modules, libraries,
-         and applications.
-
-         If unsure, say N.
-
index 3b67de7455f15c45f53524cc03269e18fa987521..c86cb3091a8ea8f28201f9afc66b718f230a107a 100644 (file)
@@ -415,7 +415,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
        
        switch (cmd) {
        case 0: /* getpgrp */
-               return process_group(current);
+               return task_pgrp_nr(current);
        case 1: /* setpgrp */
                {
                        int (*sys_setpgid)(pid_t,pid_t) =
@@ -426,7 +426,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
                        ret = sys_setpgid(0, 0);
                        if (ret) return ret;
                        proc_clear_tty(current);
-                       return process_group(current);
+                       return task_pgrp_nr(current);
                }
        case 2: /* getsid */
                {
index 740d8a922e4867e457c30c513fb3fa37e912fc8c..d8925d285573c1ce67ce9acfd5a0cac9eadb72a1 100644 (file)
@@ -289,4 +289,6 @@ config INPUT
        bool
        default n
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/um/Kconfig.debug"
index bd060551e6190d6fc92b2148840f94c9e71098c9..cb3321f8e0a91924167b0f83ea7f34c30df55080 100644 (file)
@@ -108,7 +108,7 @@ out_nosemaphore:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                up_read(&mm->mmap_sem);
                yield();
                down_read(&mm->mmap_sem);
index ce3e07fcf283747bdb72216bcf2f7968d2abe0af..765444031819c55ce6c51f92f09e665c57448ccd 100644 (file)
@@ -15,8 +15,8 @@ void __show_regs(struct pt_regs * regs)
 {
        printk("\n");
        print_modules();
-       printk("Pid: %d, comm: %.20s %s %s\n",
-              current->pid, current->comm, print_tainted(), init_utsname()->release);
+       printk("Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
+               current->comm, print_tainted(), init_utsname()->release);
        printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
               PT_REGS_RIP(regs));
        printk("\nRSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
index ace479ab273ff329cc387ce536e8598f171054b4..b6a50b8b38de2c4edc2fdbc8b30012d772c3b672 100644 (file)
@@ -331,6 +331,8 @@ source "sound/Kconfig"
 
 source "drivers/usb/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/v850/Kconfig.debug"
 
 source "security/Kconfig"
index 32e75d0731a901681c118fb073944992a082fa1c..72d0c56c1b48eff12779b557a75f3aee376c5571 100644 (file)
@@ -47,6 +47,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
                if (!kdump_buf_page) {
                        printk(KERN_WARNING "Kdump: Kdump buffer page not"
                                " allocated\n");
+                       kunmap_atomic(vaddr, KM_PTE0);
                        return -EFAULT;
                }
                copy_page(kdump_buf_page, vaddr);
index 3c86b979a40aed829b52797cd8e90c88a6c8ed81..d58039e8de7480163af7d153d4df3b2396948e1a 100644 (file)
@@ -288,7 +288,8 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
                        request_resource(res, code_resource);
                        request_resource(res, data_resource);
 #ifdef CONFIG_KEXEC
-                       request_resource(res, &crashk_res);
+                       if (crashk_res.start != crashk_res.end)
+                               request_resource(res, &crashk_res);
 #endif
                }
        }
index e422b8159f694a7fa2c8e3172d8d63f49c1d9c78..57616865d8a033595daff1a12be912151b17a6d7 100644 (file)
@@ -226,7 +226,8 @@ void __init e820_reserve_resources(void)
                        request_resource(res, &code_resource);
                        request_resource(res, &data_resource);
 #ifdef CONFIG_KEXEC
-                       request_resource(res, &crashk_res);
+                       if (crashk_res.start != crashk_res.end)
+                               request_resource(res, &crashk_res);
 #endif
                }
        }
index 8459ca64bc2f9c089acca975de93434d57ad065f..11b935f4f886d34679dcf594ac8465ab2a57fe00 100644 (file)
@@ -149,28 +149,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
                        image->start, cpu_has_pae);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
-{
-       unsigned long size, base;
-       size = memparse(arg, &arg);
-       if (*arg == '@') {
-               base = memparse(arg+1, &arg);
-               /* FIXME: Do I want a sanity check
-                * to validate the memory range?
-                */
-               crashk_res.start = base;
-               crashk_res.end   = base + size - 1;
-       }
-       return 0;
-}
-early_param("crashkernel", parse_crashkernel);
-
 void arch_crash_save_vmcoreinfo(void)
 {
 #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
index 7450b69710b5557f2f81c4a5ebf187b9c00ac58e..0d8577f0542257c325031d45693e7458622166ad 100644 (file)
@@ -231,33 +231,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
                        image->start);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init setup_crashkernel(char *arg)
-{
-       unsigned long size, base;
-       char *p;
-       if (!arg)
-               return -EINVAL;
-       size = memparse(arg, &p);
-       if (arg == p)
-               return -EINVAL;
-       if (*p == '@') {
-               base = memparse(p+1, &p);
-               /* FIXME: Do I want a sanity check to validate the
-                * memory range?  Yes you do, but it's too early for
-                * e820 -AK */
-               crashk_res.start = base;
-               crashk_res.end   = base + size - 1;
-       }
-       return 0;
-}
-early_param("crashkernel", setup_crashkernel);
-
 void arch_crash_save_vmcoreinfo(void)
 {
 #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
index 097aeafce5fff14bf9c02874f5ca69f68aaa4f42..044a47745a5cfe5eee6999297da3582e18681dc2 100644 (file)
@@ -301,7 +301,7 @@ void show_regs(struct pt_regs * regs)
        unsigned long d0, d1, d2, d3, d6, d7;
 
        printk("\n");
-       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+       printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
        printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
        print_symbol("EIP is at %s\n", regs->eip);
 
index b87a6fd5ba48739b7be30f6cbcb434be8027a5cd..978dc0196a0f1101ab77095b8d74948159cead4b 100644 (file)
@@ -378,6 +378,49 @@ extern unsigned long __init setup_memory(void);
 extern void zone_sizes_init(void);
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
+static inline unsigned long long get_total_mem(void)
+{
+       unsigned long long total;
+
+       total = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+       total += highend_pfn - highstart_pfn;
+#endif
+
+       return total << PAGE_SHIFT;
+}
+
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+       unsigned long long total_mem;
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       total_mem = get_total_mem();
+
+       ret = parse_crashkernel(boot_command_line, total_mem,
+                       &crash_size, &crash_base);
+       if (ret == 0 && crash_size > 0) {
+               if (crash_base > 0) {
+                       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                                       "for crashkernel (System RAM: %ldMB)\n",
+                                       (unsigned long)(crash_size >> 20),
+                                       (unsigned long)(crash_base >> 20),
+                                       (unsigned long)(total_mem >> 20));
+                       crashk_res.start = crash_base;
+                       crashk_res.end   = crash_base + crash_size - 1;
+                       reserve_bootmem(crash_base, crash_size);
+               } else
+                       printk(KERN_INFO "crashkernel reservation failed - "
+                                       "you have to specify a base address\n");
+       }
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 void __init setup_bootmem_allocator(void)
 {
        unsigned long bootmap_size;
@@ -453,11 +496,7 @@ void __init setup_bootmem_allocator(void)
                }
        }
 #endif
-#ifdef CONFIG_KEXEC
-       if (crashk_res.start != crashk_res.end)
-               reserve_bootmem(crashk_res.start,
-                       crashk_res.end - crashk_res.start + 1);
-#endif
+       reserve_crashkernel();
 }
 
 /*
index 5a19f0cc5b6766c8c0ea3eed1164ec3605ac7c32..cdcba69752263678c1d5337c5a8d75a000f30be7 100644 (file)
@@ -191,6 +191,37 @@ static inline void copy_edd(void)
 }
 #endif
 
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+       unsigned long long free_mem;
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+       ret = parse_crashkernel(boot_command_line, free_mem,
+                       &crash_size, &crash_base);
+       if (ret == 0 && crash_size) {
+               if (crash_base > 0) {
+                       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                                       "for crashkernel (System RAM: %ldMB)\n",
+                                       (unsigned long)(crash_size >> 20),
+                                       (unsigned long)(crash_base >> 20),
+                                       (unsigned long)(free_mem >> 20));
+                       crashk_res.start = crash_base;
+                       crashk_res.end   = crash_base + crash_size - 1;
+                       reserve_bootmem(crash_base, crash_size);
+               } else
+                       printk(KERN_INFO "crashkernel reservation failed - "
+                                       "you have to specify a base address\n");
+       }
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 #define EBDA_ADDR_POINTER 0x40E
 
 unsigned __initdata ebda_addr;
@@ -357,13 +388,7 @@ void __init setup_arch(char **cmdline_p)
                }
        }
 #endif
-#ifdef CONFIG_KEXEC
-       if (crashk_res.start != crashk_res.end) {
-               reserve_bootmem_generic(crashk_res.start,
-                       crashk_res.end - crashk_res.start + 1);
-       }
-#endif
-
+       reserve_crashkernel();
        paging_init();
 
 #ifdef CONFIG_PCI
index 0d79df3c5631e015886619d29870d9c9d3a3884e..6dc394b8725526dfc03c7fbefd04aedc64ec671f 100644 (file)
@@ -200,8 +200,8 @@ badframe:
        if (show_unhandled_signals && printk_ratelimit())
                printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
                       " esp:%lx oeax:%lx\n",
-                   current->pid > 1 ? KERN_INFO : KERN_EMERG,
-                   current->comm, current->pid, frame, regs->eip,
+                   task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+                   current->comm, task_pid_nr(current), frame, regs->eip,
                    regs->esp, regs->orig_eax);
 
        force_sig(SIGSEGV, current);
index b132d3957dfc48c88684cb5b3fe492238f02480f..1e9d57256eb11ff9061d3713f3844ce1fa8634aa 100644 (file)
@@ -316,7 +316,7 @@ void show_registers(struct pt_regs *regs)
        printk(KERN_EMERG "ds: %04x   es: %04x   fs: %04x  gs: %04x  ss: %04x\n",
               regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
        printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
-               TASK_COMM_LEN, current->comm, current->pid,
+               TASK_COMM_LEN, current->comm, task_pid_nr(current),
                current_thread_info(), current, task_thread_info(current));
        /*
         * When in-kernel, we also print out the stack and code at the
@@ -622,7 +622,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
            printk_ratelimit())
                printk(KERN_INFO
                    "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
-                   current->comm, current->pid,
+                   current->comm, task_pid_nr(current),
                    regs->eip, regs->esp, error_code);
 
        force_sig(SIGSEGV, current);
index 9f38b12b4af1bf103b1261b1ceee185c541a28cd..8bab2b2efaff86c2707ccc929b996f5a245cecd5 100644 (file)
@@ -748,7 +748,7 @@ survive:
                        retval = get_user_pages(current, current->mm,
                                        (unsigned long )to, 1, 1, 0, &pg, NULL);
 
-                       if (retval == -ENOMEM && is_init(current)) {
+                       if (retval == -ENOMEM && is_global_init(current)) {
                                up_read(&current->mm->mmap_sem);
                                congestion_wait(WRITE, HZ/50);
                                goto survive;
index 6555c3d143716c74dc97d466ffafd1baf1399f35..4d3e538c57ab55e81faa71bfec11a002c51d03a8 100644 (file)
@@ -471,8 +471,8 @@ bad_area_nosemaphore:
                    printk_ratelimit()) {
                        printk("%s%s[%d]: segfault at %08lx eip %08lx "
                            "esp %08lx error %lx\n",
-                           tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-                           tsk->comm, tsk->pid, address, regs->eip,
+                           task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+                           tsk->comm, task_pid_nr(tsk), address, regs->eip,
                            regs->esp, error_code);
                }
                tsk->thread.cr2 = address;
@@ -587,7 +587,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(tsk)) {
+       if (is_global_init(tsk)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 5e0e54906c488fe5b7266d013c072672a606ba29..5149ac136a5db0bcc25330b86b188b71e570b858 100644 (file)
@@ -554,7 +554,7 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                goto again;
        }
index 8a4f65bf956ea85ef45ecb6157eccceca82f92a5..c7b7dfe1d405153717fa8f91fa15ecbde809b2b7 100644 (file)
@@ -230,9 +230,14 @@ void global_flush_tlb(void)
        struct page *pg, *next;
        struct list_head l;
 
-       down_read(&init_mm.mmap_sem);
+       /*
+        * Write-protect the semaphore, to exclude two contexts
+        * doing a list_replace_init() call in parallel and to
+        * exclude new additions to the deferred_pages list:
+        */
+       down_write(&init_mm.mmap_sem);
        list_replace_init(&deferred_pages, &l);
-       up_read(&init_mm.mmap_sem);
+       up_write(&init_mm.mmap_sem);
 
        flush_map(&l);
 
index 78cb68f2ebbd7653c3b9d9395369fff7904e5761..d2521942e5bdca4213f2978319a0bf009f585469 100644 (file)
@@ -799,21 +799,6 @@ source "drivers/firmware/Kconfig"
 
 source fs/Kconfig
 
-menu "Instrumentation Support"
-
-source "arch/x86/oprofile/Kconfig"
-
-config KPROBES
-       bool "Kprobes"
-       depends on KALLSYMS && MODULES
-       help
-         Kprobes allows you to trap at almost any kernel address and
-         execute a callback function.  register_kprobe() establishes
-         a probepoint and specifies the callback.  Kprobes is useful
-         for kernel debugging, non-intrusive instrumentation and testing.
-         If in doubt, say "N".
-endmenu
-
 source "arch/x86_64/Kconfig.debug"
 
 source "security/Kconfig"
index 7fbb44bea37f85048ebb8c6c55e2a9063477083d..85ffbb491490b47f78b4347c4af35d375ca6c988 100644 (file)
@@ -251,6 +251,8 @@ config EMBEDDED_RAMDISK_IMAGE
          provide one yourself.
 endmenu
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/xtensa/Kconfig.debug"
 
 source "security/Kconfig"
index 8be99c777d9d2b55c69c39f8252630f82192a1b1..397bcd6ad08d4ca860e8d936862fd22f1f1629ac 100644 (file)
@@ -176,7 +176,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
        printk("Caught unhandled exception in '%s' "
               "(pid = %d, pc = %#010lx) - should not happen\n"
               "\tEXCCAUSE is %ld\n",
-              current->comm, current->pid, regs->pc, exccause);
+              current->comm, task_pid_nr(current), regs->pc, exccause);
        force_sig(SIGILL, current);
 }
 
@@ -228,7 +228,7 @@ do_illegal_instruction(struct pt_regs *regs)
        /* If in user mode, send SIGILL signal to current process. */
 
        printk("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n",
-           current->comm, current->pid, regs->pc);
+           current->comm, task_pid_nr(current), regs->pc);
        force_sig(SIGILL, current);
 }
 
@@ -254,7 +254,7 @@ do_unaligned_user (struct pt_regs *regs)
        current->thread.error_code = -3;
        printk("Unaligned memory access to %08lx in '%s' "
               "(pid = %d, pc = %#010lx)\n",
-              regs->excvaddr, current->comm, current->pid, regs->pc);
+              regs->excvaddr, current->comm, task_pid_nr(current), regs->pc);
        info.si_signo = SIGBUS;
        info.si_errno = 0;
        info.si_code = BUS_ADRALN;
index 2f842859948f5993780b83fae5d3e66390bf1d6d..33f366be323fc05f075519a0dde7826795f8c568 100644 (file)
@@ -145,7 +145,7 @@ bad_area:
         */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_init(current)) {
+       if (is_global_init(current)) {
                yield();
                down_read(&mm->mmap_sem);
                goto survive;
index 3935469e366222c793a429f65461bf4a0becf62e..8025d646ab3085ff5627fc0f082465eba8329ce7 100644 (file)
@@ -3367,7 +3367,7 @@ void submit_bio(int rw, struct bio *bio)
                if (unlikely(block_dump)) {
                        char b[BDEVNAME_SIZE];
                        printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-                               current->comm, current->pid,
+                       current->comm, task_pid_nr(current),
                                (rw & WRITE) ? "WRITE" : "READ",
                                (unsigned long long)bio->bi_sector,
                                bdevname(bio->bi_bdev,b));
@@ -3739,7 +3739,7 @@ EXPORT_SYMBOL(end_dequeued_request);
 
 /**
  * end_request - end I/O on the current segment of the request
- * @rq:                the request being processed
+ * @req:       the request being processed
  * @uptodate:  error value or 0/1 uptodate flag
  *
  * Description:
index 2e79a3395ecffca8641cc02e64b616ef88be5ba6..301e832e6961290912396f92fd54244f30b5ba7f 100644 (file)
@@ -434,18 +434,18 @@ static int acpi_button_add(struct acpi_device *device)
        switch (button->type) {
        case ACPI_BUTTON_TYPE_POWER:
        case ACPI_BUTTON_TYPE_POWERF:
-               input->evbit[0] = BIT(EV_KEY);
+               input->evbit[0] = BIT_MASK(EV_KEY);
                set_bit(KEY_POWER, input->keybit);
                break;
 
        case ACPI_BUTTON_TYPE_SLEEP:
        case ACPI_BUTTON_TYPE_SLEEPF:
-               input->evbit[0] = BIT(EV_KEY);
+               input->evbit[0] = BIT_MASK(EV_KEY);
                set_bit(KEY_SLEEP, input->keybit);
                break;
 
        case ACPI_BUTTON_TYPE_LID:
-               input->evbit[0] = BIT(EV_SW);
+               input->evbit[0] = BIT_MASK(EV_SW);
                set_bit(SW_LID, input->swbit);
                break;
        }
index cb136a919f2a83f1f97e6936949cebe6e4fd9f72..ac4a0cb217ab33b75a6a41814df03d08f39ccca2 100644 (file)
@@ -188,7 +188,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
                if (signal_pending(current)) {
                        siginfo_t info;
                        printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
-                               current->pid, current->comm,
+                               task_pid_nr(current), current->comm,
                                dequeue_signal_lock(current, &current->blocked, &info));
                        result = -EINTR;
                        sock_shutdown(lo, !send);
index d70745c84250cb4b3f15d51adae267019a2d32c9..af0561053167cd177d89c499c3d9f2b108843d46 100644 (file)
@@ -1107,7 +1107,7 @@ int open_for_data(struct cdrom_device_info * cdi)
                       is the default case! */
                    cdinfo(CD_OPEN, "bummer. wrong media type.\n"); 
                    cdinfo(CD_WARNING, "pid %d must open device O_NONBLOCK!\n",
-                                       (unsigned int)current->pid); 
+                                       (unsigned int)task_pid_nr(current));
                    ret=-EMEDIUMTYPE;
                    goto clean_up_and_return;
                }
index 856774fbe025f17e6f7ed4b3014d2d37e8b08c22..d24a6c2c2c24e10f33d7498fccd30e16a8574ec0 100644 (file)
@@ -1456,7 +1456,7 @@ int drm_freebufs(struct drm_device *dev, void *data,
                buf = dma->buflist[idx];
                if (buf->file_priv != file_priv) {
                        DRM_ERROR("Process %d freeing buffer not owned\n",
-                                 current->pid);
+                                 task_pid_nr(current));
                        return -EINVAL;
                }
                drm_free_buffer(dev, buf);
index 72668b15e5ce2a5a8db7aedce9b73691762a8f05..44a46268b02b138c310c345b6ed6ecbdad7599f3 100644 (file)
@@ -463,7 +463,7 @@ int drm_ioctl(struct inode *inode, struct file *filp,
        ++file_priv->ioctl_count;
 
        DRM_DEBUG("pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n",
-                 current->pid, cmd, nr,
+                 task_pid_nr(current), cmd, nr,
                  (long)old_encode_dev(file_priv->head->device),
                  file_priv->authenticated);
 
index f383fc37190c1a1d2a3a18396adda4d130a6b3f4..3992f73299cc8e161ffd67db642040051a9c8243 100644 (file)
@@ -234,7 +234,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
        if (!drm_cpu_valid())
                return -EINVAL;
 
-       DRM_DEBUG("pid = %d, minor = %d\n", current->pid, minor);
+       DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor);
 
        priv = drm_alloc(sizeof(*priv), DRM_MEM_FILES);
        if (!priv)
@@ -244,7 +244,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
        filp->private_data = priv;
        priv->filp = filp;
        priv->uid = current->euid;
-       priv->pid = current->pid;
+       priv->pid = task_pid_nr(current);
        priv->minor = minor;
        priv->head = drm_heads[minor];
        priv->ioctl_count = 0;
@@ -339,7 +339,8 @@ int drm_release(struct inode *inode, struct file *filp)
         */
 
        DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n",
-                 current->pid, (long)old_encode_dev(file_priv->head->device),
+                 task_pid_nr(current),
+                 (long)old_encode_dev(file_priv->head->device),
                  dev->open_count);
 
        if (dev->driver->reclaim_buffers_locked && dev->lock.hw_lock) {
index c6b73e744d67baba2fcd96094e3e3748f8738df2..bea2a7d5b2b22f695d3c6131d2068450ed55f074 100644 (file)
@@ -58,12 +58,12 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
        if (lock->context == DRM_KERNEL_CONTEXT) {
                DRM_ERROR("Process %d using kernel context %d\n",
-                         current->pid, lock->context);
+                         task_pid_nr(current), lock->context);
                return -EINVAL;
        }
 
        DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n",
-                 lock->context, current->pid,
+                 lock->context, task_pid_nr(current),
                  dev->lock.hw_lock->lock, lock->flags);
 
        if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE))
@@ -153,7 +153,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
        if (lock->context == DRM_KERNEL_CONTEXT) {
                DRM_ERROR("Process %d using kernel context %d\n",
-                         current->pid, lock->context);
+                         task_pid_nr(current), lock->context);
                return -EINVAL;
        }
 
index 114e54e0f61bc8d5f0cc1c7d85d2c72cfa1f9750..76e44ac94fb52f4315a6c3498359d1e7d523ec69 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/delay.h>
 
 /** Current process ID */
-#define DRM_CURRENTPID                 current->pid
+#define DRM_CURRENTPID                 task_pid_nr(current)
 #define DRM_SUSER(p)                   capable(CAP_SYS_ADMIN)
 #define DRM_UDELAY(d)                  udelay(d)
 /** Read a byte from a MMIO region */
index 8e841bdee6dc219c067032c7add3b74152f5fa63..eb381a7c5beed40920fa577061ebec084331da86 100644 (file)
@@ -1024,7 +1024,7 @@ static int i810_getbuf(struct drm_device *dev, void *data,
        retcode = i810_dma_get_buffer(dev, d, file_priv);
 
        DRM_DEBUG("i810_dma: %d returning %d, granted = %d\n",
-                 current->pid, retcode, d->granted);
+                 task_pid_nr(current), retcode, d->granted);
 
        sarea_priv->last_dispatch = (int)hw_status[5];
 
index 43a1f78712d6cc784f24048d10b9ca71cbc060a9..69a363edb0d202b157d262ea0150aa185384eb1a 100644 (file)
@@ -1409,7 +1409,7 @@ static int i830_getbuf(struct drm_device *dev, void *data,
        retcode = i830_dma_get_buffer(dev, d, file_priv);
 
        DRM_DEBUG("i830_dma: %d returning %d, granted = %d\n",
-                 current->pid, retcode, d->granted);
+                 task_pid_nr(current), retcode, d->granted);
 
        sarea_priv->last_dispatch = (int)hw_status[5];
 
index 2e7ae42a5503be7b3120ee91acc2e9fb50e00cde..0f8fb135da53108f1015aee4ebd47e821373bb0c 100644 (file)
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 
 #include <asm/dma.h>
 #include <linux/slab.h>
index 212276affa1f5c887233cdd3c14e987a6941fd22..fc54d234507ace68d0470122f8c3dba967b977e5 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/sysrq.h>
 #include <linux/input.h>
 #include <linux/reboot.h>
+#include <linux/notifier.h>
 
 extern void ctrl_alt_del(void);
 
@@ -81,7 +82,8 @@ void compute_shiftstate(void);
 typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
                            char up_flag);
 static k_handler_fn K_HANDLERS;
-static k_handler_fn *k_handler[16] = { K_HANDLERS };
+k_handler_fn *k_handler[16] = { K_HANDLERS };
+EXPORT_SYMBOL_GPL(k_handler);
 
 #define FN_HANDLERS\
        fn_null,        fn_enter,       fn_show_ptregs, fn_show_mem,\
@@ -127,7 +129,7 @@ int shift_state = 0;
  */
 
 static struct input_handler kbd_handler;
-static unsigned long key_down[NBITS(KEY_MAX)];         /* keyboard key bitmap */
+static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)]; /* keyboard key bitmap */
 static unsigned char shift_down[NR_SHIFT];             /* shift state counters.. */
 static int dead_key_next;
 static int npadch = -1;                                        /* -1 or number assembled on pad */
@@ -159,6 +161,23 @@ static int sysrq_alt_use;
 #endif
 static int sysrq_alt;
 
+/*
+ * Notifier list for console keyboard events
+ */
+static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list);
+
+int register_keyboard_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_register(&keyboard_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_keyboard_notifier);
+
+int unregister_keyboard_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
+
 /*
  * Translation of scancodes to keycodes. We set them on only the first
  * keyboard in the list that accepts the scancode and keycode.
@@ -1130,6 +1149,7 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
        unsigned char type, raw_mode;
        struct tty_struct *tty;
        int shift_final;
+       struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down };
 
        tty = vc->vc_tty;
 
@@ -1217,10 +1237,11 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
                return;
        }
 
-       shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
+       param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
        key_map = key_maps[shift_final];
 
-       if (!key_map) {
+       if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYCODE, &param) == NOTIFY_STOP || !key_map) {
+               atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNBOUND_KEYCODE, &param);
                compute_shiftstate();
                kbd->slockstate = 0;
                return;
@@ -1237,6 +1258,9 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
        type = KTYP(keysym);
 
        if (type < 0xf0) {
+               param.value = keysym;
+               if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNICODE, &param) == NOTIFY_STOP)
+                       return;
                if (down && !raw_mode)
                        to_utf8(vc, keysym);
                return;
@@ -1244,9 +1268,6 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 
        type -= 0xf0;
 
-       if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
-               return;
-
        if (type == KT_LETTER) {
                type = KT_LATIN;
                if (vc_kbd_led(kbd, VC_CAPSLOCK)) {
@@ -1255,9 +1276,18 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
                                keysym = key_map[keycode];
                }
        }
+       param.value = keysym;
+
+       if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYSYM, &param) == NOTIFY_STOP)
+               return;
+
+       if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
+               return;
 
        (*k_handler[type])(vc, keysym & 0xff, !down);
 
+       atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, &param);
+
        if (type != KT_SLOCK)
                kbd->slockstate = 0;
 }
@@ -1347,12 +1377,12 @@ static void kbd_start(struct input_handle *handle)
 static const struct input_device_id kbd_ids[] = {
        {
                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
-                .evbit = { BIT(EV_KEY) },
+                .evbit = { BIT_MASK(EV_KEY) },
         },
 
        {
                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
-                .evbit = { BIT(EV_SND) },
+                .evbit = { BIT_MASK(EV_SND) },
         },
 
        { },    /* Terminating entry */
index 661aca0e155da9a7140605069f52637abd1faa18..fd0abef7ee0867dfb86ccc0958e0a1c64402e947 100644 (file)
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include "mxser.h"
index 854dbf59eb68b036b82afab1e77dd58257a86e79..081c84c7b5489ae9b3e80fb2730d8622dc552790 100644 (file)
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include "mxser_new.h"
index 859858561ab6705e5f398117dc5e83e8866df77c..9782cb4d30dcaedbd0e5405dfc657c111b33c49b 100644 (file)
@@ -1178,9 +1178,9 @@ static int __devinit sonypi_create_input_devices(void)
        jog_dev->id.bustype = BUS_ISA;
        jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
-       jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE);
-       jog_dev->relbit[0] = BIT(REL_WHEEL);
+       jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
+       jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
 
        sonypi_device.input_key_dev = key_dev = input_allocate_device();
        if (!key_dev) {
@@ -1193,7 +1193,7 @@ static int __devinit sonypi_create_input_devices(void)
        key_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
        /* Initialize the Input Drivers: special keys */
-       key_dev->evbit[0] = BIT(EV_KEY);
+       key_dev->evbit[0] = BIT_MASK(EV_KEY);
        for (i = 0; sonypi_inputkeys[i].sonypiev; i++)
                if (sonypi_inputkeys[i].inputev)
                        set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit);
index 85a23283dff55144b594ae86b6a87100814fdf66..a6e1c9ba12174d769e1389d5329b70676e5e3b16 100644 (file)
@@ -1467,7 +1467,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp)
 
        line = tty->index;
        sx_dprintk(SX_DEBUG_OPEN, "%d: opening line %d. tty=%p ctty=%p, "
-                       "np=%d)\n", current->pid, line, tty,
+                       "np=%d)\n", task_pid_nr(current), line, tty,
                        current->signal->tty, sx_nports);
 
        if ((line < 0) || (line >= SX_NPORTS) || (line >= sx_nports))
index 78d14935f2b8c4b1dbb4678f0d918858c6a15832..de60e1ea4fb3830e01524b73381738d7e98485b6 100644 (file)
@@ -251,7 +251,7 @@ static void send_sig_all(int sig)
        struct task_struct *p;
 
        for_each_process(p) {
-               if (p->mm && !is_init(p))
+               if (p->mm && !is_global_init(p))
                        /* Not swapper, init nor kernel thread */
                        force_sig(sig, p);
        }
index 9c867cf6de6456293314eb1675e697c87a9f64d2..13a53575a0164bf82ea5716c7c1d5fbc30b8cbbf 100644 (file)
 #include <linux/selection.h>
 
 #include <linux/kmod.h>
+#include <linux/nsproxy.h>
 
 #undef TTY_DEBUG_HANGUP
 
@@ -3107,7 +3108,7 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
         */
        if (tty == real_tty && current->signal->tty != real_tty)
                return -ENOTTY;
-       return put_user(pid_nr(real_tty->pgrp), p);
+       return put_user(pid_vnr(real_tty->pgrp), p);
 }
 
 /**
@@ -3141,7 +3142,7 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
        if (pgrp_nr < 0)
                return -EINVAL;
        rcu_read_lock();
-       pgrp = find_pid(pgrp_nr);
+       pgrp = find_vpid(pgrp_nr);
        retval = -ESRCH;
        if (!pgrp)
                goto out_unlock;
@@ -3178,7 +3179,7 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _
                return -ENOTTY;
        if (!real_tty->session)
                return -ENOTTY;
-       return put_user(pid_nr(real_tty->session), p);
+       return put_user(pid_vnr(real_tty->session), p);
 }
 
 /**
@@ -3528,8 +3529,8 @@ void __do_SAK(struct tty_struct *tty)
        /* Kill the entire session */
        do_each_pid_task(session, PIDTYPE_SID, p) {
                printk(KERN_NOTICE "SAK: killed process %d"
-                       " (%s): process_session(p)==tty->session\n",
-                       p->pid, p->comm);
+                       " (%s): task_session_nr(p)==tty->session\n",
+                       task_pid_nr(p), p->comm);
                send_sig(SIGKILL, p, 1);
        } while_each_pid_task(session, PIDTYPE_SID, p);
        /* Now kill any processes that happen to have the
@@ -3538,8 +3539,8 @@ void __do_SAK(struct tty_struct *tty)
        do_each_thread(g, p) {
                if (p->signal->tty == tty) {
                        printk(KERN_NOTICE "SAK: killed process %d"
-                           " (%s): process_session(p)==tty->session\n",
-                           p->pid, p->comm);
+                           " (%s): task_session_nr(p)==tty->session\n",
+                           task_pid_nr(p), p->comm);
                        send_sig(SIGKILL, p, 1);
                        continue;
                }
@@ -3559,7 +3560,7 @@ void __do_SAK(struct tty_struct *tty)
                                    filp->private_data == tty) {
                                        printk(KERN_NOTICE "SAK: killed process %d"
                                            " (%s): fd#%d opened to the tty\n",
-                                           p->pid, p->comm, i);
+                                           task_pid_nr(p), p->comm, i);
                                        force_sig(SIGKILL, p);
                                        break;
                                }
index 1764c67b585f6cc5fd045877b517bef5e3492f72..7a5badfb7d846e00d85dbbd2c082461264b7a660 100644 (file)
@@ -99,6 +99,7 @@
 #include <linux/pm.h>
 #include <linux/font.h>
 #include <linux/bitops.h>
+#include <linux/notifier.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -222,6 +223,35 @@ enum {
        blank_vesa_wait,
 };
 
+/*
+ * Notifier list for console events.
+ */
+static ATOMIC_NOTIFIER_HEAD(vt_notifier_list);
+
+int register_vt_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_register(&vt_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_vt_notifier);
+
+int unregister_vt_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_unregister(&vt_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_vt_notifier);
+
+static void notify_write(struct vc_data *vc, unsigned int unicode)
+{
+       struct vt_notifier_param param = { .vc = vc, unicode = unicode };
+       atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, &param);
+}
+
+static void notify_update(struct vc_data *vc)
+{
+       struct vt_notifier_param param = { .vc = vc };
+       atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
+}
+
 /*
  *     Low-Level Functions
  */
@@ -718,6 +748,7 @@ int vc_allocate(unsigned int currcons)      /* return 0 on success */
                return -ENXIO;
        if (!vc_cons[currcons].d) {
            struct vc_data *vc;
+           struct vt_notifier_param param;
 
            /* prevent users from taking too much memory */
            if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE))
@@ -729,7 +760,7 @@ int vc_allocate(unsigned int currcons)      /* return 0 on success */
            /* although the numbers above are not valid since long ago, the
               point is still up-to-date and the comment still has its value
               even if only as a historical artifact.  --mj, July 1998 */
-           vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
+           param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
            if (!vc)
                return -ENOMEM;
            vc_cons[currcons].d = vc;
@@ -746,6 +777,7 @@ int vc_allocate(unsigned int currcons)      /* return 0 on success */
            }
            vc->vc_kmalloced = 1;
            vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
+           atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);
        }
        return 0;
 }
@@ -907,6 +939,8 @@ void vc_deallocate(unsigned int currcons)
 
        if (vc_cons_allocated(currcons)) {
                struct vc_data *vc = vc_cons[currcons].d;
+               struct vt_notifier_param param = { .vc = vc };
+               atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
                vc->vc_sw->con_deinit(vc);
                put_pid(vc->vt_pid);
                module_put(vc->vc_sw->owner);
@@ -1019,6 +1053,7 @@ static void lf(struct vc_data *vc)
                vc->vc_pos += vc->vc_size_row;
        }
        vc->vc_need_wrap = 0;
+       notify_write(vc, '\n');
 }
 
 static void ri(struct vc_data *vc)
@@ -1039,6 +1074,7 @@ static inline void cr(struct vc_data *vc)
 {
        vc->vc_pos -= vc->vc_x << 1;
        vc->vc_need_wrap = vc->vc_x = 0;
+       notify_write(vc, '\r');
 }
 
 static inline void bs(struct vc_data *vc)
@@ -1047,6 +1083,7 @@ static inline void bs(struct vc_data *vc)
                vc->vc_pos -= 2;
                vc->vc_x--;
                vc->vc_need_wrap = 0;
+               notify_write(vc, '\b');
        }
 }
 
@@ -1593,6 +1630,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
                                break;
                }
                vc->vc_pos += (vc->vc_x << 1);
+               notify_write(vc, '\t');
                return;
        case 10: case 11: case 12:
                lf(vc);
@@ -2252,6 +2290,7 @@ rescan_last_byte:
                                tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
                                if (tc < 0) tc = ' ';
                        }
+                       notify_write(vc, c);
 
                        if (inverse) {
                                FLUSH
@@ -2274,6 +2313,7 @@ rescan_last_byte:
        release_console_sem();
 
 out:
+       notify_update(vc);
        return n;
 #undef FLUSH
 }
@@ -2317,6 +2357,7 @@ static void console_callback(struct work_struct *ignored)
                do_blank_screen(0);
                blank_timer_expired = 0;
        }
+       notify_update(vc_cons[fg_console].d);
 
        release_console_sem();
 }
@@ -2418,6 +2459,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
                                continue;
                }
                scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
+               notify_write(vc, c);
                cnt++;
                if (myx == vc->vc_cols - 1) {
                        vc->vc_need_wrap = 1;
@@ -2436,6 +2478,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
                }
        }
        set_cursor(vc);
+       notify_update(vc);
 
 quit:
        clear_bit(0, &printing);
index e80af67664cc72829fbd0caad82a7ba50c60c5c2..2d23e304f5ec1083b9850e46fe817c74b4e78d00 100644 (file)
@@ -94,8 +94,6 @@ extern int edac_debug_level;
 
 #endif                         /* !CONFIG_EDAC_DEBUG */
 
-#define BIT(x) (1 << (x))
-
 #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
        PCI_DEVICE_ID_ ## vend ## _ ## dev
 
index e66cdd42a392f5eadeb84bfc20038e3818b835f0..9007d0677220cf3aef8a524608d345452820ba6b 100644 (file)
@@ -270,6 +270,7 @@ static void __devexit pasemi_edac_remove(struct pci_dev *pdev)
 
 static const struct pci_device_id pasemi_edac_pci_tbl[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) },
+       { }
 };
 
 MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl);
index dcdba0f1b32c835cdcd37d0d2cb975442ab31348..87bc3417de2745c62de30ddc88c9fc8cc6e5a8e1 100644 (file)
@@ -17,7 +17,6 @@
 #define _DCDBAS_H_
 
 #include <linux/device.h>
-#include <linux/input.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
index a702e2f6da7d953892a468574e8348e23397e735..1ca6f4635eeb3e0474abada594892ac5683ce7ba 100644 (file)
@@ -113,13 +113,13 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
 
        if (count > HID_MIN_BUFFER_SIZE) {
                printk(KERN_WARNING "hidraw: pid %d passed too large report\n",
-                               current->pid);
+                               task_pid_nr(current));
                return -EINVAL;
        }
 
        if (count < 2) {
                printk(KERN_WARNING "hidraw: pid %d passed too short report\n",
-                               current->pid);
+                               task_pid_nr(current));
                return -EINVAL;
        }
 
index b76b02f7b52de04a11e17d2a17e0e80b850d9922..775a1ef28a297fc3035f484845917b55ba6c5b29 100644 (file)
@@ -274,8 +274,11 @@ static int usb_kbd_probe(struct usb_interface *iface,
 
        input_set_drvdata(input_dev, kbd);
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP);
-       input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL) | BIT(LED_COMPOSE) | BIT(LED_KANA);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+               BIT_MASK(EV_REP);
+       input_dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+               BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_COMPOSE) |
+               BIT_MASK(LED_KANA);
 
        for (i = 0; i < 255; i++)
                set_bit(usb_kbd_keycode[i], input_dev->keybit);
index 5345c73bcf622a2918df1982ba24749e2dfd0c68..f8ad6910d3d920ccec4f8cf66dcd6b737cbf958f 100644 (file)
@@ -173,11 +173,13 @@ static int usb_mouse_probe(struct usb_interface *intf, const struct usb_device_i
        usb_to_input_id(dev, &input_dev->id);
        input_dev->dev.parent = &intf->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-       input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-       input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-       input_dev->relbit[0] |= BIT(REL_WHEEL);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+       input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+       input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
+               BIT_MASK(BTN_EXTRA);
+       input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
 
        input_set_drvdata(input_dev, mouse);
 
index 4879125b4cdc1963534928181aed8088a30f3900..1001d2e122a262eafa72bb80857260bcc1659472 100644 (file)
@@ -1099,7 +1099,7 @@ static int applesmc_create_accelerometer(void)
        idev->name = "applesmc";
        idev->id.bustype = BUS_HOST;
        idev->dev.parent = &pdev->dev;
-       idev->evbit[0] = BIT(EV_ABS);
+       idev->evbit[0] = BIT_MASK(EV_ABS);
        input_set_abs_params(idev, ABS_X,
                        -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
        input_set_abs_params(idev, ABS_Y,
index 8a7ae03aeee4e5aa1adb629ce77055b10c2a1f77..bab5fd2e4dfdafd4442fa13bdd90f3a3f2022d39 100644 (file)
@@ -574,7 +574,7 @@ static int __init hdaps_init(void)
        idev = hdaps_idev->input;
        idev->name = "hdaps";
        idev->dev.parent = &pdev->dev;
-       idev->evbit[0] = BIT(EV_ABS);
+       idev->evbit[0] = BIT_MASK(EV_ABS);
        input_set_abs_params(idev, ABS_X,
                        -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
        input_set_abs_params(idev, ABS_Y,
index 00fad11733ad09f4ea92370a2722d832ef426023..6426a61f8d4df16e19427cfca375d4313e1e09f4 100644 (file)
@@ -85,7 +85,7 @@ struct bits {
        const char *set;
        const char *unset;
 };
-#define BIT(m, s, u)   { .mask = m, .set = s, .unset = u }
+#define PXA_BIT(m, s, u)       { .mask = m, .set = s, .unset = u }
 
 static inline void
 decode_bits(const char *prefix, const struct bits *bits, int num, u32 val)
@@ -100,17 +100,17 @@ decode_bits(const char *prefix, const struct bits *bits, int num, u32 val)
 }
 
 static const struct bits isr_bits[] = {
-       BIT(ISR_RWM,    "RX",           "TX"),
-       BIT(ISR_ACKNAK, "NAK",          "ACK"),
-       BIT(ISR_UB,     "Bsy",          "Rdy"),
-       BIT(ISR_IBB,    "BusBsy",       "BusRdy"),
-       BIT(ISR_SSD,    "SlaveStop",    NULL),
-       BIT(ISR_ALD,    "ALD",          NULL),
-       BIT(ISR_ITE,    "TxEmpty",      NULL),
-       BIT(ISR_IRF,    "RxFull",       NULL),
-       BIT(ISR_GCAD,   "GenCall",      NULL),
-       BIT(ISR_SAD,    "SlaveAddr",    NULL),
-       BIT(ISR_BED,    "BusErr",       NULL),
+       PXA_BIT(ISR_RWM,        "RX",           "TX"),
+       PXA_BIT(ISR_ACKNAK,     "NAK",          "ACK"),
+       PXA_BIT(ISR_UB,         "Bsy",          "Rdy"),
+       PXA_BIT(ISR_IBB,        "BusBsy",       "BusRdy"),
+       PXA_BIT(ISR_SSD,        "SlaveStop",    NULL),
+       PXA_BIT(ISR_ALD,        "ALD",          NULL),
+       PXA_BIT(ISR_ITE,        "TxEmpty",      NULL),
+       PXA_BIT(ISR_IRF,        "RxFull",       NULL),
+       PXA_BIT(ISR_GCAD,       "GenCall",      NULL),
+       PXA_BIT(ISR_SAD,        "SlaveAddr",    NULL),
+       PXA_BIT(ISR_BED,        "BusErr",       NULL),
 };
 
 static void decode_ISR(unsigned int val)
@@ -120,21 +120,21 @@ static void decode_ISR(unsigned int val)
 }
 
 static const struct bits icr_bits[] = {
-       BIT(ICR_START,  "START",        NULL),
-       BIT(ICR_STOP,   "STOP",         NULL),
-       BIT(ICR_ACKNAK, "ACKNAK",       NULL),
-       BIT(ICR_TB,     "TB",           NULL),
-       BIT(ICR_MA,     "MA",           NULL),
-       BIT(ICR_SCLE,   "SCLE",         "scle"),
-       BIT(ICR_IUE,    "IUE",          "iue"),
-       BIT(ICR_GCD,    "GCD",          NULL),
-       BIT(ICR_ITEIE,  "ITEIE",        NULL),
-       BIT(ICR_IRFIE,  "IRFIE",        NULL),
-       BIT(ICR_BEIE,   "BEIE",         NULL),
-       BIT(ICR_SSDIE,  "SSDIE",        NULL),
-       BIT(ICR_ALDIE,  "ALDIE",        NULL),
-       BIT(ICR_SADIE,  "SADIE",        NULL),
-       BIT(ICR_UR,     "UR",           "ur"),
+       PXA_BIT(ICR_START,  "START",    NULL),
+       PXA_BIT(ICR_STOP,   "STOP",     NULL),
+       PXA_BIT(ICR_ACKNAK, "ACKNAK",   NULL),
+       PXA_BIT(ICR_TB,     "TB",       NULL),
+       PXA_BIT(ICR_MA,     "MA",       NULL),
+       PXA_BIT(ICR_SCLE,   "SCLE",     "scle"),
+       PXA_BIT(ICR_IUE,    "IUE",      "iue"),
+       PXA_BIT(ICR_GCD,    "GCD",      NULL),
+       PXA_BIT(ICR_ITEIE,  "ITEIE",    NULL),
+       PXA_BIT(ICR_IRFIE,  "IRFIE",    NULL),
+       PXA_BIT(ICR_BEIE,   "BEIE",     NULL),
+       PXA_BIT(ICR_SSDIE,  "SSDIE",    NULL),
+       PXA_BIT(ICR_ALDIE,  "ALDIE",    NULL),
+       PXA_BIT(ICR_SADIE,  "SADIE",    NULL),
+       PXA_BIT(ICR_UR,     "UR",               "ur"),
 };
 
 static void decode_ICR(unsigned int val)
index 5c8b008676fb3acab8272af5146a3b91597b8d26..32eaa3f8051592f4ecb132ccaa08fb8ced966d60 100644 (file)
 #include <linux/device.h>
 #include <linux/kmod.h>
 #include <linux/scatterlist.h>
+#include <linux/bitops.h>
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 
 static int __ide_end_request(ide_drive_t *drive, struct request *rq,
                             int uptodate, unsigned int nr_bytes)
index 93644f82592c426074a9beba068b4e23d4910b79..d08fb30768bcc73169884af2b2d011e6e07d695c 100644 (file)
@@ -2797,11 +2797,12 @@ static void cma_remove_one(struct ib_device *device)
 
 static int cma_init(void)
 {
-       int ret, low, high;
+       int ret, low, high, remaining;
 
        get_random_bytes(&next_port, sizeof next_port);
        inet_get_local_port_range(&low, &high);
-       next_port = ((unsigned int) next_port % (high - low)) + low;
+       remaining = (high - low) + 1;
+       next_port = ((unsigned int) next_port % remaining) + low;
 
        cma_wq = create_singlethread_workqueue("rdma_cm");
        if (!cma_wq)
index 1d62c8b88e121307427a491a204a023c9510c3d5..e5b4e9bfbdc5b044b968a70579f13eea513a12c6 100644 (file)
@@ -495,7 +495,7 @@ static unsigned int evdev_poll(struct file *file, poll_table *wait)
 #ifdef CONFIG_COMPAT
 
 #define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8)
-#define NBITS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)
+#define BITS_TO_LONGS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)
 
 #ifdef __BIG_ENDIAN
 static int bits_to_user(unsigned long *bits, unsigned int maxbit,
@@ -504,7 +504,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
        int len, i;
 
        if (compat) {
-               len = NBITS_COMPAT(maxbit) * sizeof(compat_long_t);
+               len = BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t);
                if (len > maxlen)
                        len = maxlen;
 
@@ -515,7 +515,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
                                         sizeof(compat_long_t)))
                                return -EFAULT;
        } else {
-               len = NBITS(maxbit) * sizeof(long);
+               len = BITS_TO_LONGS(maxbit) * sizeof(long);
                if (len > maxlen)
                        len = maxlen;
 
@@ -530,8 +530,8 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
                        unsigned int maxlen, void __user *p, int compat)
 {
        int len = compat ?
-                       NBITS_COMPAT(maxbit) * sizeof(compat_long_t) :
-                       NBITS(maxbit) * sizeof(long);
+                       BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t) :
+                       BITS_TO_LONGS(maxbit) * sizeof(long);
 
        if (len > maxlen)
                len = maxlen;
@@ -545,7 +545,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
 static int bits_to_user(unsigned long *bits, unsigned int maxbit,
                        unsigned int maxlen, void __user *p, int compat)
 {
-       int len = NBITS(maxbit) * sizeof(long);
+       int len = BITS_TO_LONGS(maxbit) * sizeof(long);
 
        if (len > maxlen)
                len = maxlen;
index 2f2b020cd6294109da6e757e2968c12bca97aaef..307c7b5c2b33a59fa33825523cca639af4f4ccd2 100644 (file)
@@ -584,10 +584,10 @@ static int input_default_setkeycode(struct input_dev *dev,
 
 
 #define MATCH_BIT(bit, max) \
-               for (i = 0; i < NBITS(max); i++) \
+               for (i = 0; i < BITS_TO_LONGS(max); i++) \
                        if ((id->bit[i] & dev->bit[i]) != id->bit[i]) \
                                break; \
-               if (i != NBITS(max)) \
+               if (i != BITS_TO_LONGS(max)) \
                        continue;
 
 static const struct input_device_id *input_match_device(const struct input_device_id *id,
@@ -698,7 +698,7 @@ static void input_seq_print_bitmap(struct seq_file *seq, const char *name,
 {
        int i;
 
-       for (i = NBITS(max) - 1; i > 0; i--)
+       for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
                if (bitmap[i])
                        break;
 
@@ -892,7 +892,7 @@ static int input_print_modalias_bits(char *buf, int size,
 
        len += snprintf(buf, max(size, 0), "%c", name);
        for (i = min_bit; i < max_bit; i++)
-               if (bm[LONG(i)] & BIT(i))
+               if (bm[BIT_WORD(i)] & BIT_MASK(i))
                        len += snprintf(buf + len, max(size - len, 0), "%X,", i);
        return len;
 }
@@ -991,7 +991,7 @@ static int input_print_bitmap(char *buf, int buf_size, unsigned long *bitmap,
        int i;
        int len = 0;
 
-       for (i = NBITS(max) - 1; i > 0; i--)
+       for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
                if (bitmap[i])
                        break;
 
index 2b201f9aa02405fce891b9aee53c757307609a28..22b2789ef58ad17e30299f5fcbc7f5587d11dba9 100644 (file)
@@ -844,8 +844,8 @@ static const struct input_device_id joydev_blacklist[] = {
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_KEYBIT,
-               .evbit = { BIT(EV_KEY) },
-               .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
+               .evbit = { BIT_MASK(EV_KEY) },
+               .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
        },      /* Avoid itouchpads, touchscreens and tablets */
        { }     /* Terminating entry */
 };
@@ -854,20 +854,20 @@ static const struct input_device_id joydev_ids[] = {
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_ABSBIT,
-               .evbit = { BIT(EV_ABS) },
-               .absbit = { BIT(ABS_X) },
+               .evbit = { BIT_MASK(EV_ABS) },
+               .absbit = { BIT_MASK(ABS_X) },
        },
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_ABSBIT,
-               .evbit = { BIT(EV_ABS) },
-               .absbit = { BIT(ABS_WHEEL) },
+               .evbit = { BIT_MASK(EV_ABS) },
+               .absbit = { BIT_MASK(ABS_WHEEL) },
        },
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_ABSBIT,
-               .evbit = { BIT(EV_ABS) },
-               .absbit = { BIT(ABS_THROTTLE) },
+               .evbit = { BIT_MASK(EV_ABS) },
+               .absbit = { BIT_MASK(ABS_THROTTLE) },
        },
        { }     /* Terminating entry */
 };
index ff701ab10d74f71e0ffcee5f953fc2428d3ef817..52ba16f487c793a36acc464dc4c0e3cc80e77fbc 100644 (file)
@@ -326,14 +326,19 @@ static int a3d_connect(struct gameport *gameport, struct gameport_driver *drv)
 
                a3d->length = 33;
 
-               input_dev->evbit[0] |= BIT(EV_ABS) | BIT(EV_KEY) | BIT(EV_REL);
-               input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y);
-               input_dev->absbit[0] |= BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_THROTTLE) | BIT(ABS_RUDDER)
-                                       | BIT(ABS_HAT0X) | BIT(ABS_HAT0Y) | BIT(ABS_HAT1X) | BIT(ABS_HAT1Y);
-               input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE)
-                                                       | BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-               input_dev->keybit[LONG(BTN_JOYSTICK)] |= BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP)
-                                                       | BIT(BTN_PINKIE);
+               input_dev->evbit[0] |= BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY) |
+                       BIT_MASK(EV_REL);
+               input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+               input_dev->absbit[0] |= BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+                       BIT_MASK(ABS_THROTTLE) | BIT_MASK(ABS_RUDDER) |
+                       BIT_MASK(ABS_HAT0X) | BIT_MASK(ABS_HAT0Y) |
+                       BIT_MASK(ABS_HAT1X) | BIT_MASK(ABS_HAT1Y);
+               input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
+                       BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) |
+                       BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+               input_dev->keybit[BIT_WORD(BTN_JOYSTICK)] |=
+                       BIT_MASK(BTN_TRIGGER) | BIT_MASK(BTN_THUMB) |
+                       BIT_MASK(BTN_TOP) | BIT_MASK(BTN_PINKIE);
 
                a3d_read(a3d, data);
 
@@ -348,9 +353,10 @@ static int a3d_connect(struct gameport *gameport, struct gameport_driver *drv)
        } else {
                a3d->length = 29;
 
-               input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_REL);
-               input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y);
-               input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE);
+               input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+               input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+               input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
+                       BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE);
 
                a3d_read(a3d, data);
 
index 28140c4a110df399c713d7ad337dfd177885455e..d1ca8a14950f62f914e9d4f9f81ae34a7eabadfe 100644 (file)
@@ -431,7 +431,7 @@ static int adi_init_input(struct adi *adi, struct adi_port *port, int half)
        input_dev->open = adi_open;
        input_dev->close = adi_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = 0; i < adi->axes10 + adi->axes8 + (adi->hats + (adi->pad != -1)) * 2; i++)
                set_bit(adi->abs[i], input_dev->absbit);
index b0f5541ec3e6849725955c655cbb4c5ea1f6559c..5cf9f3610e67f3924b4a8eb71fcc9d9156b93bb3 100644 (file)
@@ -137,9 +137,10 @@ static int __init amijoy_init(void)
                amijoy_dev[i]->open = amijoy_open;
                amijoy_dev[i]->close = amijoy_close;
 
-               amijoy_dev[i]->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-               amijoy_dev[i]->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
-               amijoy_dev[i]->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+               amijoy_dev[i]->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+               amijoy_dev[i]->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
+               amijoy_dev[i]->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+                       BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
                for (j = 0; j < 2; j++) {
                        amijoy_dev[i]->absmin[ABS_X + j] = -1;
                        amijoy_dev[i]->absmax[ABS_X + j] = 1;
index bdd157c1ebf8929b763a9d56815ce209c5d906f2..15739880afc6af31829f8a3058c701efa0976207 100644 (file)
@@ -456,7 +456,7 @@ static int analog_init_device(struct analog_port *port, struct analog *analog, i
        input_dev->open = analog_open;
        input_dev->close = analog_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = j = 0; i < 4; i++)
                if (analog->mask & (1 << i)) {
index d3352a849b85f592b08df3140af53c0b594c0366..55646a6d89f586de8f35616a947b41c84bd854c2 100644 (file)
@@ -218,7 +218,7 @@ static int cobra_connect(struct gameport *gameport, struct gameport_driver *drv)
                input_dev->open = cobra_open;
                input_dev->close = cobra_close;
 
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
                input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
                input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
                for (j = 0; cobra_btn[j]; j++)
index b069ee18e35312078ec3cff1d6910d102dd851a7..27fc475bd3a19166eb42bcef23d09a0faa24b6f9 100644 (file)
@@ -631,7 +631,7 @@ static struct db9 __init *db9_probe(int parport, int mode)
                input_dev->open = db9_open;
                input_dev->close = db9_close;
 
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
                for (j = 0; j < db9_mode->n_buttons; j++)
                        set_bit(db9_mode->buttons[j], input_dev->keybit);
                for (j = 0; j < db9_mode->n_axis; j++) {
index 1a452e0e5f25788e37c972998912245747ec1657..df2a9d02ca6c1f32d8bf31fc24270883f912aa17 100644 (file)
@@ -653,12 +653,12 @@ static int __init gc_setup_pad(struct gc *gc, int idx, int pad_type)
        input_dev->close = gc_close;
 
        if (pad_type != GC_SNESMOUSE) {
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
                for (i = 0; i < 2; i++)
                        input_set_abs_params(input_dev, ABS_X + i, -1, 1, 0, 0);
        } else
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
 
        gc->pads[0] |= gc_status_bit[idx];
        gc->pads[pad_type] |= gc_status_bit[idx];
index d514aebf75541fc871b56b510f1c4410bef2990c..1f6302c0eb3fd8e47c5f8e5a5edfdd95836d5fe8 100644 (file)
@@ -315,7 +315,7 @@ static int gf2k_connect(struct gameport *gameport, struct gameport_driver *drv)
        input_dev->open = gf2k_open;
        input_dev->close = gf2k_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = 0; i < gf2k_axes[gf2k->id]; i++)
                set_bit(gf2k_abs[i], input_dev->absbit);
index 73eb5ab6f140caa4bc8b9dd5eb34346335f1b1c2..fd3853ab1aad4db5499cea82c62872d720c16edc 100644 (file)
@@ -370,7 +370,7 @@ static int grip_connect(struct gameport *gameport, struct gameport_driver *drv)
                input_dev->open = grip_open;
                input_dev->close = grip_close;
 
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
                for (j = 0; (t = grip_abs[grip->mode[i]][j]) >= 0; j++) {
 
index 4ed3a3eadf1964eaad388e4f603720910e675811..c57e21d68c003e2970949744120b3384b8fe98e6 100644 (file)
@@ -606,7 +606,7 @@ static int register_slot(int slot, struct grip_mp *grip)
        input_dev->open = grip_open;
        input_dev->close = grip_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (j = 0; (t = grip_abs[port->mode][j]) >= 0; j++)
                input_set_abs_params(input_dev, t, -1, 1, 0, 0);
index d4e8073caf27a98a603a33a9eb0323f7e300701d..aa6bfb3fb8cd2b135201e97b7ee7dc99d6263660 100644 (file)
@@ -238,7 +238,7 @@ static int guillemot_connect(struct gameport *gameport, struct gameport_driver *
        input_dev->open = guillemot_open;
        input_dev->close = guillemot_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = 0; (t = guillemot->type->abs[i]) >= 0; i++)
                input_set_abs_params(input_dev, t, 0, 255, 0, 0);
index 682244b1c0422610e9475020d83f67cb7fff51e4..6f826b37d9aa6274e470ed07493e42c2f95bcf8a 100644 (file)
@@ -389,7 +389,8 @@ int iforce_init_device(struct iforce *iforce)
  * Set input device bitfields and ranges.
  */
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_FF_STATUS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+               BIT_MASK(EV_FF_STATUS);
 
        for (i = 0; iforce->type->btn[i] >= 0; i++)
                set_bit(iforce->type->btn[i], input_dev->keybit);
index 40a853ac21c795c2e3db467be967f203a91f1d0b..a964a7cfd210cdc29b4fac6fcd9ee4c2c230d60b 100644 (file)
 #define FF_CORE_IS_PLAYED      3       /* Effect is currently being played */
 #define FF_CORE_SHOULD_PLAY    4       /* User wants the effect to be played */
 #define FF_CORE_UPDATE         5       /* Effect is being updated */
-#define FF_MODCORE_MAX         5
+#define FF_MODCORE_CNT         6
 
 struct iforce_core_effect {
        /* Information about where modifiers are stored in the device's memory */
        struct resource mod1_chunk;
        struct resource mod2_chunk;
-       unsigned long flags[NBITS(FF_MODCORE_MAX)];
+       unsigned long flags[BITS_TO_LONGS(FF_MODCORE_CNT)];
 };
 
 #define FF_CMD_EFFECT          0x010e
index 1aec1e9d7c5942f2b58d090778a08516ccee79b4..bc8ea95dfd0e8328736097ec4b1b132256d978ca 100644 (file)
@@ -269,7 +269,7 @@ static int interact_connect(struct gameport *gameport, struct gameport_driver *d
        input_dev->open = interact_open;
        input_dev->close = interact_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = 0; (t = interact_type[interact->type].abs[i]) >= 0; i++) {
                set_bit(t, input_dev->absbit);
index b35604ee43aecd4318a4fb126a4c0d40a9db7279..54e676948ebb1caa777dee0195f942fb876478e3 100644 (file)
@@ -170,7 +170,7 @@ static int magellan_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = 0; i < 9; i++)
                set_bit(magellan_buttons[i], input_dev->keybit);
index 2adf73f63c94dd7898d7ef1091a4c5842bb832c4..7b4865fdee54cdcf48998cff18dc7bdd4316a9c0 100644 (file)
@@ -758,7 +758,7 @@ static int sw_connect(struct gameport *gameport, struct gameport_driver *drv)
                input_dev->open = sw_open;
                input_dev->close = sw_close;
 
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
                for (j = 0; (bits = sw_bit[sw->type][j]); j++) {
                        code = sw_abs[sw->type][j];
index abb7c4cf54ad50d07fe3bcff57d7eceea86f18d4..d4087fd496563367bea2c7894c1e4a71c8477853 100644 (file)
@@ -228,18 +228,23 @@ static int spaceball_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        switch (id) {
                case SPACEBALL_4000FLX:
                case SPACEBALL_4000FLX_L:
-                       input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_9);
-                       input_dev->keybit[LONG(BTN_A)] |= BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_MODE);
+                       input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_9);
+                       input_dev->keybit[BIT_WORD(BTN_A)] |= BIT_MASK(BTN_A) |
+                               BIT_MASK(BTN_B) | BIT_MASK(BTN_C) |
+                               BIT_MASK(BTN_MODE);
                default:
-                       input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_2) | BIT(BTN_3) | BIT(BTN_4)
-                               | BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7) | BIT(BTN_8);
+                       input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_2) |
+                               BIT_MASK(BTN_3) | BIT_MASK(BTN_4) |
+                               BIT_MASK(BTN_5) | BIT_MASK(BTN_6) |
+                               BIT_MASK(BTN_7) | BIT_MASK(BTN_8);
                case SPACEBALL_3003C:
-                       input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_1) | BIT(BTN_8);
+                       input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_1) |
+                               BIT_MASK(BTN_8);
        }
 
        for (i = 0; i < 3; i++) {
index c4937f1e837c192dd41ab252d933ac81308e8e4b..f7ce4004f4ba1baf036766e5af0f90d7ef14c8e1 100644 (file)
@@ -185,7 +185,7 @@ static int spaceorb_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = 0; i < 6; i++)
                set_bit(spaceorb_buttons[i], input_dev->keybit);
index 8581ee991d4e8e90cff96d8dee1fcfc3f038855d..baa10b2f7ba1c74843b8d8bbf2060e3649b045f7 100644 (file)
@@ -156,10 +156,11 @@ static int stinger_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_A)] = BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_X) |
-                                        BIT(BTN_Y) | BIT(BTN_Z) | BIT(BTN_TL) | BIT(BTN_TR) |
-                                        BIT(BTN_START) | BIT(BTN_SELECT);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_A)] = BIT_MASK(BTN_A) | BIT_MASK(BTN_B) |
+               BIT_MASK(BTN_C) | BIT_MASK(BTN_X) | BIT_MASK(BTN_Y) |
+               BIT_MASK(BTN_Z) | BIT_MASK(BTN_TL) | BIT_MASK(BTN_TR) |
+               BIT_MASK(BTN_START) | BIT_MASK(BTN_SELECT);
        input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 4);
        input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 4);
 
index 3b36ee04f7261848e87c0c61c45b9e517e1bf218..0feeb8acb532a297a819d5713a3aa2bce3490af8 100644 (file)
@@ -333,7 +333,7 @@ static int tmdc_setup_port(struct tmdc *tmdc, int idx, unsigned char *data)
        input_dev->open = tmdc_open;
        input_dev->close = tmdc_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        for (i = 0; i < port->absc && i < TMDC_ABS; i++)
                if (port->abs[i] >= 0)
index 8381c6f143735f69af27b714cfcd7d79d2a37ed7..bbebd4e2ad7f35845770540f5c3a74f74fd3de1a 100644 (file)
@@ -229,7 +229,7 @@ static struct tgfx __init *tgfx_probe(int parport, int *n_buttons, int n_devs)
                input_dev->open = tgfx_open;
                input_dev->close = tgfx_close;
 
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
                input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
                input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
 
index c91504ec38ebf012423b46db735d90174697580a..1085c841fec48ca932263e898b0e759ca363fb62 100644 (file)
@@ -207,7 +207,7 @@ static int twidjoy_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
        input_set_abs_params(input_dev, ABS_X, -50, 50, 4, 4);
        input_set_abs_params(input_dev, ABS_Y, -50, 50, 4, 4);
 
index 4e85f72eefd7075b4a6a7c9ae339c64a8dec6d4d..e928b6e3724a528b632f1a3af472073eaf888419 100644 (file)
@@ -162,9 +162,11 @@ static int warrior_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TRIGGER)] = BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP) | BIT(BTN_TOP2);
-       input_dev->relbit[0] = BIT(REL_DIAL);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
+               BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TRIGGER)] = BIT_MASK(BTN_TRIGGER) |
+               BIT_MASK(BTN_THUMB) | BIT_MASK(BTN_TOP) | BIT_MASK(BTN_TOP2);
+       input_dev->relbit[0] = BIT_MASK(REL_DIAL);
        input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 8);
        input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 8);
        input_set_abs_params(input_dev, ABS_THROTTLE, -112, 112, 0, 0);
index 623629a69b03e9be1b2a000a98cff8ccede174ef..6dd375825a1403226c9c727da6d3fc460ed17b6f 100644 (file)
@@ -658,7 +658,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
        input_dev->open = xpad_open;
        input_dev->close = xpad_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
        /* set up buttons */
        for (i = 0; xpad_btn[i] >= 0; i++)
index 63d6ead6b877051baf18392140cd9e23a2df9f68..72abc196ce66580000b25e0800e5b4908fc03383 100644 (file)
@@ -125,7 +125,7 @@ static int __devinit aaedkbd_probe(struct platform_device *pdev)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &pdev->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        input_dev->keycode = aaedkbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
        input_dev->keycodemax = ARRAY_SIZE(aaedkbd_keycode);
index c67e84ec2d6a506b94696c1739dd9d9a76752181..81bf7562aca0d94c65c7ec75278c240055692916 100644 (file)
@@ -209,7 +209,7 @@ static int __init amikbd_init(void)
        amikbd_dev->id.product = 0x0001;
        amikbd_dev->id.version = 0x0100;
 
-       amikbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       amikbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 
        for (i = 0; i < 0x78; i++)
                set_bit(i, amikbd_dev->keybit);
index a1800151b6ce8dc045005138154b065055afe171..4e92100c56a8dddcc342c40c5c2867e46e6c0fcf 100644 (file)
@@ -237,7 +237,7 @@ static int __init atakbd_init(void)
        atakbd_dev->id.product = 0x0001;
        atakbd_dev->id.version = 0x0100;
 
-       atakbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       atakbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        atakbd_dev->keycode = atakbd_keycode;
        atakbd_dev->keycodesize = sizeof(unsigned char);
        atakbd_dev->keycodemax = ARRAY_SIZE(atakbd_keycode);
index 41fc3d03b6eb495759cb93fbb7611e2acf76b712..b39c5b31e6201155ece9d224ab7e013ed9cb157f 100644 (file)
@@ -900,27 +900,32 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd)
 
        input_set_drvdata(input_dev, atkbd);
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_MSC);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+               BIT_MASK(EV_MSC);
 
        if (atkbd->write) {
-               input_dev->evbit[0] |= BIT(EV_LED);
-               input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+               input_dev->evbit[0] |= BIT_MASK(EV_LED);
+               input_dev->ledbit[0] = BIT_MASK(LED_NUML) |
+                       BIT_MASK(LED_CAPSL) | BIT_MASK(LED_SCROLLL);
        }
 
        if (atkbd->extra)
-               input_dev->ledbit[0] |= BIT(LED_COMPOSE) | BIT(LED_SUSPEND) |
-                                       BIT(LED_SLEEP) | BIT(LED_MUTE) | BIT(LED_MISC);
+               input_dev->ledbit[0] |= BIT_MASK(LED_COMPOSE) |
+                       BIT_MASK(LED_SUSPEND) | BIT_MASK(LED_SLEEP) |
+                       BIT_MASK(LED_MUTE) | BIT_MASK(LED_MISC);
 
        if (!atkbd->softrepeat) {
                input_dev->rep[REP_DELAY] = 250;
                input_dev->rep[REP_PERIOD] = 33;
        }
 
-       input_dev->mscbit[0] = atkbd->softraw ? BIT(MSC_SCAN) : BIT(MSC_RAW) | BIT(MSC_SCAN);
+       input_dev->mscbit[0] = atkbd->softraw ? BIT_MASK(MSC_SCAN) :
+               BIT_MASK(MSC_RAW) | BIT_MASK(MSC_SCAN);
 
        if (atkbd->scroll) {
-               input_dev->evbit[0] |= BIT(EV_REL);
-               input_dev->relbit[0] = BIT(REL_WHEEL) | BIT(REL_HWHEEL);
+               input_dev->evbit[0] |= BIT_MASK(EV_REL);
+               input_dev->relbit[0] = BIT_MASK(REL_WHEEL) |
+                       BIT_MASK(REL_HWHEEL);
                set_bit(BTN_MIDDLE, input_dev->keybit);
        }
 
index 6578bfff644bdc5099f0da31d9876c451af4fa27..790fed368aae0ea144ddb053b6c20276d6717792 100644 (file)
@@ -325,7 +325,8 @@ static int __init corgikbd_probe(struct platform_device *pdev)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &pdev->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+               BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
        input_dev->keycode = corgikbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
        input_dev->keycodemax = ARRAY_SIZE(corgikbd_keycode);
index e2a3293bc67ed37b66949747de99d0e3d4d1dfd9..3eddf52a0bba5cabbe76a1f41cedee7c5fbf8ffc 100644 (file)
@@ -62,7 +62,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, input);
 
-       input->evbit[0] = BIT(EV_KEY);
+       input->evbit[0] = BIT_MASK(EV_KEY);
 
        input->name = pdev->name;
        input->phys = "gpio-keys/input0";
index cdd254f2e6c7970a93819d7966fead2454c56613..adbf29f0169d81f1f5a740243ade99756fc798cb 100644 (file)
@@ -323,8 +323,9 @@ static int hil_kbd_connect(struct serio *serio, struct serio_driver *drv)
                goto bail2;
        }
 
-       kbd->dev->evbit[0]      = BIT(EV_KEY) | BIT(EV_REP);
-       kbd->dev->ledbit[0]     = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+       kbd->dev->evbit[0]      = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+       kbd->dev->ledbit[0]     = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+               BIT_MASK(LED_SCROLLL);
        kbd->dev->keycodemax    = HIL_KEYCODES_SET1_TBLSIZE;
        kbd->dev->keycodesize   = sizeof(hil_kbd_set1[0]);
        kbd->dev->keycode       = hil_kbd_set1;
index 499b6974457f7039366941e1a4ae299fd0580d8b..50d80ecf0b80915ee81d2ab5c59b01be9cb1c4b7 100644 (file)
@@ -266,8 +266,9 @@ hil_keyb_init(void)
                if (hphilkeyb_keycode[i] != KEY_RESERVED)
                        set_bit(hphilkeyb_keycode[i], hil_dev.dev->keybit);
 
-       hil_dev.dev->evbit[0]   = BIT(EV_KEY) | BIT(EV_REP);
-       hil_dev.dev->ledbit[0]  = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+       hil_dev.dev->evbit[0]   = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+       hil_dev.dev->ledbit[0]  = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+               BIT_MASK(LED_SCROLLL);
        hil_dev.dev->keycodemax = HIL_KEYCODES_SET1_TBLSIZE;
        hil_dev.dev->keycodesize= sizeof(hphilkeyb_keycode[0]);
        hil_dev.dev->keycode    = hphilkeyb_keycode;
index 7a41b271f222cfb5bdf5b68cd6946e27d6c88bf8..5a0ca18d6755827b77d776af407364b1a8b2d30d 100644 (file)
@@ -233,7 +233,7 @@ static int locomokbd_probe(struct locomo_dev *dev)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &dev->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        input_dev->keycode = locomokbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
        input_dev->keycodemax = ARRAY_SIZE(locomokbd_keycode);
index b97a41e3ee56cef949835fdc9135f597aefa7850..48d1cab0aa1c7284dcc795391d1759303f39db7c 100644 (file)
@@ -106,7 +106,7 @@ static int nkbd_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        input_dev->keycode = nkbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
        input_dev->keycodemax = ARRAY_SIZE(nkbd_keycode);
index b7061aa38816d13b1a9469706cb34e9b4c3d5d88..bdd64ee4c5c8fa5187cfce8cb3f718dd7b5f4a3c 100644 (file)
@@ -183,8 +183,9 @@ static int __devinit pxakbd_probe(struct platform_device *pdev)
        input_dev->close = pxakbd_close;
        input_dev->dev.parent = &pdev->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL);
-       input_dev->relbit[LONG(REL_WHEEL)] = BIT(REL_WHEEL);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+               BIT_MASK(EV_REL);
+       input_dev->relbit[BIT_WORD(REL_WHEEL)] = BIT_MASK(REL_WHEEL);
        for (row = 0; row < pdata->nr_rows; row++) {
                for (col = 0; col < pdata->nr_cols; col++) {
                        int code = pdata->keycodes[row][col];
index 41b80385476c82fd67a170a12dae1eb5f6e2f34e..410d78a774d03c2eab0ce95a1fd608f73df09436 100644 (file)
@@ -381,7 +381,8 @@ static int __init spitzkbd_probe(struct platform_device *dev)
        input_dev->id.product = 0x0001;
        input_dev->id.version = 0x0100;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+               BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
        input_dev->keycode = spitzkbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
        input_dev->keycodemax = ARRAY_SIZE(spitzkbd_keycode);
index b44b0684d5439e7e18ce620f5fe65a6962997261..7437219370b1a22e0695003d7318f596e60350d1 100644 (file)
@@ -110,7 +110,7 @@ static int skbd_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        input_dev->keycode = skbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
        input_dev->keycodemax = ARRAY_SIZE(skbd_keycode);
index 1d4e39624cfecf08d1a4ef4f6a81fce8da3b5112..be0f5d19d0238590f2ffd219a5ef602e505fcb3c 100644 (file)
@@ -277,9 +277,11 @@ static int sunkbd_connect(struct serio *serio, struct serio_driver *drv)
 
        input_dev->event = sunkbd_event;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_SND) | BIT(EV_REP);
-       input_dev->ledbit[0] = BIT(LED_CAPSL) | BIT(LED_COMPOSE) | BIT(LED_SCROLLL) | BIT(LED_NUML);
-       input_dev->sndbit[0] = BIT(SND_CLICK) | BIT(SND_BELL);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+               BIT_MASK(EV_SND) | BIT_MASK(EV_REP);
+       input_dev->ledbit[0] = BIT_MASK(LED_CAPSL) | BIT_MASK(LED_COMPOSE) |
+               BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_NUML);
+       input_dev->sndbit[0] = BIT_MASK(SND_CLICK) | BIT_MASK(SND_BELL);
 
        input_dev->keycode = sunkbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
index f3a56eb58ed1fa05d6abf0c572cf34aef4019095..152a2c0705080f6432fcfb2f4fb4e27c7fa4d47e 100644 (file)
@@ -110,7 +110,7 @@ static int xtkbd_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        input_dev->keycode = xtkbd->keycode;
        input_dev->keycodesize = sizeof(unsigned char);
        input_dev->keycodemax = ARRAY_SIZE(xtkbd_keycode);
index 471aab206443bd8641d03e561aa58565ce5b3cb4..3a7937481ad8730d654835d4014557a364c70a3f 100644 (file)
@@ -662,10 +662,10 @@ static void ati_remote_input_init(struct ati_remote *ati_remote)
        struct input_dev *idev = ati_remote->idev;
        int i;
 
-       idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       idev->keybit[LONG(BTN_MOUSE)] = ( BIT(BTN_LEFT) | BIT(BTN_RIGHT) |
-                                         BIT(BTN_SIDE) | BIT(BTN_EXTRA) );
-       idev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+       idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
        for (i = 0; ati_remote_tbl[i].kind != KIND_END; i++)
                if (ati_remote_tbl[i].type == EV_KEY)
                        set_bit(ati_remote_tbl[i].code, idev->keybit);
index 1031543e5c3f3e1f71e4e15bcae5ae600aa0f087..f2709b82485c181067b8a8fccab942a83bd425ed 100644 (file)
@@ -346,9 +346,10 @@ static int ati_remote2_input_init(struct ati_remote2 *ar2)
        ar2->idev = idev;
        input_set_drvdata(idev, ar2);
 
-       idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL);
-       idev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
-       idev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_REL);
+       idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT);
+       idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
        for (i = 0; ati_remote2_key_table[i].key_code != KEY_RESERVED; i++)
                set_bit(ati_remote2_key_table[i].key_code, idev->keybit);
 
index e43e92fd9e2311f8a15c01f52b2eb769b003cb7c..4e3ad657ed8033b7ab917a1e4c886296b3722849 100644 (file)
@@ -81,7 +81,7 @@ static int atlas_acpi_button_add(struct acpi_device *device)
        input_dev->name = "Atlas ACPI button driver";
        input_dev->phys = "ASIM0000/atlas/input0";
        input_dev->id.bustype = BUS_HOST;
-       input_dev->evbit[LONG(EV_KEY)] = BIT(EV_KEY);
+       input_dev->evbit[BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY);
 
        set_bit(KEY_F1, input_dev->keybit);
        set_bit(KEY_F2, input_dev->keybit);
index 064b07936019c1f6465eb6aa1c1391666734a653..1aef97ed5e84575cc43225fd020038e28659f599 100644 (file)
@@ -104,7 +104,7 @@ static int __devinit cobalt_buttons_probe(struct platform_device *pdev)
        input->id.bustype = BUS_HOST;
        input->cdev.dev = &pdev->dev;
 
-       input->evbit[0] = BIT(EV_KEY);
+       input->evbit[0] = BIT_MASK(EV_KEY);
        for (i = 0; i < ARRAY_SIZE(buttons_map); i++) {
                set_bit(buttons_map[i].keycode, input->keybit);
                buttons_map[i].count = 0;
index e759944041abd7c3a92b0849df88e34e59a47b92..d2ade7443b7db832ba756992f16117ee19b63799 100644 (file)
@@ -109,8 +109,8 @@ static int __devinit ixp4xx_spkr_probe(struct platform_device *dev)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &dev->dev;
 
-       input_dev->evbit[0] = BIT(EV_SND);
-       input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+       input_dev->evbit[0] = BIT_MASK(EV_SND);
+       input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
        input_dev->event = ixp4xx_spkr_event;
 
        err = request_irq(IRQ_IXP4XX_TIMER2, &ixp4xx_spkr_interrupt,
index 1bffc9fa98c270f3c8898a70a423602867846d9e..fd74347047dde8315cec4457caaf560b00e02b2a 100644 (file)
@@ -497,7 +497,7 @@ static int keyspan_probe(struct usb_interface *interface, const struct usb_devic
        usb_to_input_id(udev, &input_dev->id);
        input_dev->dev.parent = &interface->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY);              /* We will only report KEY events. */
+       input_dev->evbit[0] = BIT_MASK(EV_KEY);         /* We will only report KEY events. */
        for (i = 0; i < ARRAY_SIZE(keyspan_key_table); i++)
                if (keyspan_key_table[i] != KEY_RESERVED)
                        set_bit(keyspan_key_table[i], input_dev->keybit);
index e9f26e766b4d84606a5d0d79dc3f255a825773e9..0c64d9bb718ecf9250b84137fa581d1c552339a0 100644 (file)
@@ -65,8 +65,8 @@ static int __devinit m68kspkr_probe(struct platform_device *dev)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &dev->dev;
 
-       input_dev->evbit[0] = BIT(EV_SND);
-       input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+       input_dev->evbit[0] = BIT_MASK(EV_SND);
+       input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
        input_dev->event = m68kspkr_event;
 
        err = input_register_device(input_dev);
index c19f77fbaf2a9ffb7646f03bfcb8493797830f93..4941a9e61e902967a410404f0b67171bdbdb51d5 100644 (file)
@@ -86,8 +86,8 @@ static int __devinit pcspkr_probe(struct platform_device *dev)
        pcspkr_dev->id.version = 0x0100;
        pcspkr_dev->dev.parent = &dev->dev;
 
-       pcspkr_dev->evbit[0] = BIT(EV_SND);
-       pcspkr_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+       pcspkr_dev->evbit[0] = BIT_MASK(EV_SND);
+       pcspkr_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
        pcspkr_dev->event = pcspkr_event;
 
        err = input_register_device(pcspkr_dev);
index 448a470d28f264d4ee6f666438d162a17225f694..7a7b8c7b96333b441bf9746b42e75d7a31f359d5 100644 (file)
@@ -363,10 +363,11 @@ static int powermate_probe(struct usb_interface *intf, const struct usb_device_i
 
        input_dev->event = powermate_input_event;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_MSC);
-       input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
-       input_dev->relbit[LONG(REL_DIAL)] = BIT(REL_DIAL);
-       input_dev->mscbit[LONG(MSC_PULSELED)] = BIT(MSC_PULSELED);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
+               BIT_MASK(EV_MSC);
+       input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
+       input_dev->relbit[BIT_WORD(REL_DIAL)] = BIT_MASK(REL_DIAL);
+       input_dev->mscbit[BIT_WORD(MSC_PULSELED)] = BIT_MASK(MSC_PULSELED);
 
        /* get a handle to the interrupt data pipe */
        pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
index e36ec1d92be895a4279e12f88d7654d2eddc466d..a3637d870880a22a8d92befb7e75aa9272096c1d 100644 (file)
@@ -115,8 +115,8 @@ static int __devinit sparcspkr_probe(struct device *dev)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = dev;
 
-       input_dev->evbit[0] = BIT(EV_SND);
-       input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+       input_dev->evbit[0] = BIT_MASK(EV_SND);
+       input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 
        input_dev->event = state->event;
 
index ab15880fd5663ab63cd0bbbc085dbea617eb7410..46279ef2b649b7f523006eeffa1bfd14e0173f77 100644 (file)
@@ -945,7 +945,7 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
        /* input_dev->event = input_ev; TODO */
 
        /* register available key events */
-       input_dev->evbit[0] = BIT(EV_KEY);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY);
        for (i = 0; i < 256; i++) {
                int k = map_p1k_to_key(i);
                if (k >= 0) {
index 64d70a9b714c9d84572f5c39d5f98b7a60f9d5b9..2b5ed119c9a90e9b5d66245eebfe9fcc1e11a197 100644 (file)
@@ -455,24 +455,25 @@ int alps_init(struct psmouse *psmouse)
        if (alps_hw_init(psmouse, &version))
                goto init_fail;
 
-       dev1->evbit[LONG(EV_KEY)] |= BIT(EV_KEY);
-       dev1->keybit[LONG(BTN_TOUCH)] |= BIT(BTN_TOUCH);
-       dev1->keybit[LONG(BTN_TOOL_FINGER)] |= BIT(BTN_TOOL_FINGER);
-       dev1->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+       dev1->evbit[BIT_WORD(EV_KEY)] |= BIT_MASK(EV_KEY);
+       dev1->keybit[BIT_WORD(BTN_TOUCH)] |= BIT_MASK(BTN_TOUCH);
+       dev1->keybit[BIT_WORD(BTN_TOOL_FINGER)] |= BIT_MASK(BTN_TOOL_FINGER);
+       dev1->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 
-       dev1->evbit[LONG(EV_ABS)] |= BIT(EV_ABS);
+       dev1->evbit[BIT_WORD(EV_ABS)] |= BIT_MASK(EV_ABS);
        input_set_abs_params(dev1, ABS_X, 0, 1023, 0, 0);
        input_set_abs_params(dev1, ABS_Y, 0, 767, 0, 0);
        input_set_abs_params(dev1, ABS_PRESSURE, 0, 127, 0, 0);
 
        if (priv->i->flags & ALPS_WHEEL) {
-               dev1->evbit[LONG(EV_REL)] |= BIT(EV_REL);
-               dev1->relbit[LONG(REL_WHEEL)] |= BIT(REL_WHEEL);
+               dev1->evbit[BIT_WORD(EV_REL)] |= BIT_MASK(EV_REL);
+               dev1->relbit[BIT_WORD(REL_WHEEL)] |= BIT_MASK(REL_WHEEL);
        }
 
        if (priv->i->flags & (ALPS_FW_BK_1 | ALPS_FW_BK_2)) {
-               dev1->keybit[LONG(BTN_FORWARD)] |= BIT(BTN_FORWARD);
-               dev1->keybit[LONG(BTN_BACK)] |= BIT(BTN_BACK);
+               dev1->keybit[BIT_WORD(BTN_FORWARD)] |= BIT_MASK(BTN_FORWARD);
+               dev1->keybit[BIT_WORD(BTN_BACK)] |= BIT_MASK(BTN_BACK);
        }
 
        snprintf(priv->phys, sizeof(priv->phys), "%s/input1", psmouse->ps2dev.serio->phys);
@@ -483,9 +484,10 @@ int alps_init(struct psmouse *psmouse)
        dev2->id.product = PSMOUSE_ALPS;
        dev2->id.version = 0x0000;
 
-       dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       dev2->relbit[LONG(REL_X)] |= BIT(REL_X) | BIT(REL_Y);
-       dev2->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+       dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       dev2->relbit[BIT_WORD(REL_X)] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+       dev2->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 
        if (input_register_device(priv->dev2))
                goto init_fail;
index 239a0e16d91abf215904584e72b3985c746e770b..a185ac78a42ca441a193229f2bd04ccf7cfd8079 100644 (file)
@@ -111,9 +111,10 @@ static int __init amimouse_init(void)
        amimouse_dev->id.product = 0x0002;
        amimouse_dev->id.version = 0x0100;
 
-       amimouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       amimouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-       amimouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+       amimouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       amimouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+       amimouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
        amimouse_dev->open = amimouse_open;
        amimouse_dev->close = amimouse_close;
 
index c8c7244b48a1bae505aab6b5e70f48657ad30dbb..98a3561d4b054f21633433b4f12d6e1367a94f44 100644 (file)
@@ -137,9 +137,10 @@ static int __init atamouse_init(void)
        atamouse_dev->id.product = 0x0002;
        atamouse_dev->id.version = 0x0100;
 
-       atamouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       atamouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-       atamouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+       atamouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       atamouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+       atamouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
        atamouse_dev->open = atamouse_open;
        atamouse_dev->close = atamouse_close;
 
index 449bf4dcbbcc726cc7546edbbeb122078d039a1c..27f88fbb7136c0a40f6257db82266f105fee0a7e 100644 (file)
@@ -298,12 +298,12 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver)
        idd = ptr->idd + 1;
        txt = "unknown";
        if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_REL) {
-               ptr->dev->evbit[0] = BIT(EV_REL);
+               ptr->dev->evbit[0] = BIT_MASK(EV_REL);
                txt = "relative";
        }
 
        if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_ABS) {
-               ptr->dev->evbit[0] = BIT(EV_ABS);
+               ptr->dev->evbit[0] = BIT_MASK(EV_ABS);
                txt = "absolute";
        }
        if (!ptr->dev->evbit[0])
@@ -311,7 +311,7 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver)
 
        ptr->nbtn = HIL_IDD_NUM_BUTTONS(idd);
        if (ptr->nbtn)
-               ptr->dev->evbit[0] |= BIT(EV_KEY);
+               ptr->dev->evbit[0] |= BIT_MASK(EV_KEY);
 
        naxsets = HIL_IDD_NUM_AXSETS(*idd);
        ptr->naxes = HIL_IDD_NUM_AXES_PER_SET(*idd);
index 79b624fe8994ff2d513b91a984840df869704e80..655a392174322605a37cf6d2fda29f3cf02c1d32 100644 (file)
@@ -163,9 +163,10 @@ static int __init inport_init(void)
        inport_dev->id.product = 0x0001;
        inport_dev->id.version = 0x0100;
 
-       inport_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       inport_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-       inport_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       inport_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       inport_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+       inport_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
        inport_dev->open  = inport_open;
        inport_dev->close = inport_close;
index d7de4c53b3d8cb8cd853c14ae382f5f47f2fda83..9ec57d80186e43b7f776ec3b9aed0c7c385ce625 100644 (file)
@@ -270,9 +270,10 @@ static int lifebook_create_relative_device(struct psmouse *psmouse)
        dev2->id.version = 0x0000;
        dev2->dev.parent = &psmouse->ps2dev.serio->dev;
 
-       dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       dev2->relbit[LONG(REL_X)] = BIT(REL_X) | BIT(REL_Y);
-       dev2->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
+       dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       dev2->relbit[BIT_WORD(REL_X)] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+       dev2->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT);
 
        error = input_register_device(priv->dev2);
        if (error)
@@ -295,9 +296,9 @@ int lifebook_init(struct psmouse *psmouse)
        if (lifebook_absolute_mode(psmouse))
                return -1;
 
-       dev1->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
+       dev1->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
        dev1->relbit[0] = 0;
-       dev1->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       dev1->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(dev1, ABS_X, 0, max_coord, 0, 0);
        input_set_abs_params(dev1, ABS_Y, 0, max_coord, 0, 0);
 
index 26c3b2e2ca94a3eff06a3ef800f59f78a19f5c58..b23a4f3ea5cdcf48d5e09cf789ba610940a3f68f 100644 (file)
@@ -156,9 +156,10 @@ static int __init logibm_init(void)
        logibm_dev->id.product = 0x0001;
        logibm_dev->id.version = 0x0100;
 
-       logibm_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       logibm_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-       logibm_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       logibm_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       logibm_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+       logibm_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
        logibm_dev->open  = logibm_open;
        logibm_dev->close = logibm_close;
index 05d992e514f0bd5452dc0a6f0c9161b01a01c753..8991ab0b4fe3b3caed329b0a32aea07d29c6d303 100644 (file)
@@ -144,9 +144,9 @@ static int __init pc110pad_init(void)
        pc110pad_dev->id.product = 0x0001;
        pc110pad_dev->id.version = 0x0100;
 
-       pc110pad_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       pc110pad_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
-       pc110pad_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       pc110pad_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       pc110pad_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
+       pc110pad_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
        pc110pad_dev->absmax[ABS_X] = 0x1ff;
        pc110pad_dev->absmax[ABS_Y] = 0x0ff;
index 0735257565329aa590096f1f9bc8fd0ab49e665b..da316d13d7f578cdd6b3b0aa8f1a122f8c055ccd 100644 (file)
@@ -1115,9 +1115,10 @@ static int psmouse_switch_protocol(struct psmouse *psmouse, const struct psmouse
 
        input_dev->dev.parent = &psmouse->ps2dev.serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-       input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+       input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
        psmouse->set_rate = psmouse_set_rate;
        psmouse->set_resolution = psmouse_set_resolution;
index 355efd0423e7df49cd5cbbf35b26eb3da774adf5..18a48636ba4a96f88c8b7f253fd9aaf894e593b6 100644 (file)
@@ -78,9 +78,10 @@ static int __init rpcmouse_init(void)
        rpcmouse_dev->id.product = 0x0001;
        rpcmouse_dev->id.version = 0x0100;
 
-       rpcmouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       rpcmouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-       rpcmouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       rpcmouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       rpcmouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+       rpcmouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
        rpcmouse_lastx = (short) iomd_readl(IOMD_MOUSEX);
        rpcmouse_lasty = (short) iomd_readl(IOMD_MOUSEY);
index 77b8ee2b9651750728fe47409042ab0710057cc1..ed917bfd086aa13dbafc56bf19a971ee906307e3 100644 (file)
@@ -268,9 +268,10 @@ static int sermouse_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
-       input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT);
+       input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
        if (c & 0x01) set_bit(BTN_MIDDLE, input_dev->keybit);
        if (c & 0x02) set_bit(BTN_SIDE, input_dev->keybit);
index 7b977fd23571737ee3e49cb70ee4174e4c083376..3fadb2accac0264baa3dc2790241fd5e4ae1783a 100644 (file)
@@ -85,7 +85,7 @@ int touchkit_ps2_detect(struct psmouse *psmouse, int set_properties)
                return -ENODEV;
 
        if (set_properties) {
-               dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+               dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
                set_bit(BTN_TOUCH, dev->keybit);
                input_set_abs_params(dev, ABS_X, 0, TOUCHKIT_MAX_XC, 0, 0);
                input_set_abs_params(dev, ABS_Y, 0, TOUCHKIT_MAX_YC, 0, 0);
index 79146d6ed2ab2d392988d196d64d4f9d6c0e92bf..78c3ea75da2a056b2799045c6c2329f5834a8705 100644 (file)
@@ -998,34 +998,36 @@ static const struct input_device_id mousedev_ids[] = {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_KEYBIT |
                                INPUT_DEVICE_ID_MATCH_RELBIT,
-               .evbit = { BIT(EV_KEY) | BIT(EV_REL) },
-               .keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) },
-               .relbit = { BIT(REL_X) | BIT(REL_Y) },
+               .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
+               .keybit = { [BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) },
+               .relbit = { BIT_MASK(REL_X) | BIT_MASK(REL_Y) },
        },      /* A mouse like device, at least one button,
                   two relative axes */
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_RELBIT,
-               .evbit = { BIT(EV_KEY) | BIT(EV_REL) },
-               .relbit = { BIT(REL_WHEEL) },
+               .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
+               .relbit = { BIT_MASK(REL_WHEEL) },
        },      /* A separate scrollwheel */
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_KEYBIT |
                                INPUT_DEVICE_ID_MATCH_ABSBIT,
-               .evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
-               .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
-               .absbit = { BIT(ABS_X) | BIT(ABS_Y) },
+               .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
+               .keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
+               .absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) },
        },      /* A tablet like device, at least touch detection,
                   two absolute axes */
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT |
                                INPUT_DEVICE_ID_MATCH_KEYBIT |
                                INPUT_DEVICE_ID_MATCH_ABSBIT,
-               .evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
-               .keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) },
-               .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) |
-                               BIT(ABS_TOOL_WIDTH) },
+               .evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
+               .keybit = { [BIT_WORD(BTN_TOOL_FINGER)] =
+                               BIT_MASK(BTN_TOOL_FINGER) },
+               .absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+                               BIT_MASK(ABS_PRESSURE) |
+                               BIT_MASK(ABS_TOOL_WIDTH) },
        },      /* A touchpad */
 
        { },    /* Terminating entry */
index dd2310458c46a87aafaf0d2873a54f535dde0781..b973d0ef6d16ae6d332838204a57a20bf184d575 100644 (file)
@@ -192,10 +192,14 @@ static int usb_acecad_probe(struct usb_interface *intf, const struct usb_device_
        input_dev->open = usb_acecad_open;
        input_dev->close = usb_acecad_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE);
-       input_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-       input_dev->keybit[LONG(BTN_DIGI)] = BIT(BTN_TOOL_PEN) |BIT(BTN_TOUCH) | BIT(BTN_STYLUS) | BIT(BTN_STYLUS2);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+               BIT_MASK(ABS_PRESSURE);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_TOOL_PEN) |
+               BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS) |
+               BIT_MASK(BTN_STYLUS2);
 
        switch (id->driver_info) {
                case 0:
index b2ca10f2fe0e48b74d8cf22739f8623561a09c7c..d2c6da264722576ddee43d1041ed641fe25af422 100644 (file)
@@ -573,10 +573,12 @@ static void gtco_setup_caps(struct input_dev *inputdev)
        struct gtco *device = input_get_drvdata(inputdev);
 
        /* Which events */
-       inputdev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC);
+       inputdev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+               BIT_MASK(EV_MSC);
 
        /* Misc event menu block */
-       inputdev->mscbit[0] = BIT(MSC_SCAN)|BIT(MSC_SERIAL)|BIT(MSC_RAW) ;
+       inputdev->mscbit[0] = BIT_MASK(MSC_SCAN) | BIT_MASK(MSC_SERIAL) |
+               BIT_MASK(MSC_RAW);
 
        /* Absolute values based on HID report info */
        input_set_abs_params(inputdev, ABS_X, device->min_X, device->max_X,
index 91e6d00d4a43ffdc4a8a26e21ea1ca9bd6e9df90..1182fc133167ac203e2d5e52cc71201ce4108c24 100644 (file)
@@ -153,10 +153,13 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i
        input_dev->open = kbtab_open;
        input_dev->close = kbtab_close;
 
-       input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC);
-       input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH);
-       input_dev->mscbit[0] |= BIT(MSC_SERIAL);
+       input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+               BIT_MASK(EV_MSC);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
+               BIT_MASK(BTN_TOUCH);
+       input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
        input_set_abs_params(input_dev, ABS_X, 0, 0x2000, 4, 0);
        input_set_abs_params(input_dev, ABS_Y, 0, 0x1750, 4, 0);
        input_set_abs_params(input_dev, ABS_PRESSURE, 0, 0xff, 0, 0);
index 064e123c9b766170be132d2bb894b535d168b961..d64b1ea136b35c1a79ee18ca658f717e27f408f2 100644 (file)
@@ -140,48 +140,58 @@ static void wacom_close(struct input_dev *dev)
 
 void input_dev_mo(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_1) | BIT(BTN_5);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_1) |
+               BIT_MASK(BTN_5);
        input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
 }
 
 void input_dev_g4(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->evbit[0] |= BIT(EV_MSC);
-       input_dev->mscbit[0] |= BIT(MSC_SERIAL);
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER);
-       input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_4);
+       input_dev->evbit[0] |= BIT_MASK(EV_MSC);
+       input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
+               BIT_MASK(BTN_4);
 }
 
 void input_dev_g(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->evbit[0] |= BIT(EV_REL);
-       input_dev->relbit[0] |= BIT(REL_WHEEL);
-       input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE) | BIT(BTN_STYLUS2);
+       input_dev->evbit[0] |= BIT_MASK(EV_REL);
+       input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
+               BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_STYLUS2);
        input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
 }
 
 void input_dev_i3s(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER);
-       input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_1) | BIT(BTN_2) | BIT(BTN_3);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
+               BIT_MASK(BTN_1) | BIT_MASK(BTN_2) | BIT_MASK(BTN_3);
        input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
 }
 
 void input_dev_i3(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_4) | BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_4) |
+               BIT_MASK(BTN_5) | BIT_MASK(BTN_6) | BIT_MASK(BTN_7);
        input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
 }
 
 void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->evbit[0] |= BIT(EV_MSC) | BIT(EV_REL);
-       input_dev->mscbit[0] |= BIT(MSC_SERIAL);
-       input_dev->relbit[0] |= BIT(REL_WHEEL);
-       input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE) | BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE) | BIT(BTN_TOOL_BRUSH)
-               | BIT(BTN_TOOL_PENCIL) | BIT(BTN_TOOL_AIRBRUSH) | BIT(BTN_TOOL_LENS) | BIT(BTN_STYLUS2);
+       input_dev->evbit[0] |= BIT_MASK(EV_MSC) | BIT_MASK(EV_REL);
+       input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
+       input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
+       input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE) |
+               BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
+               BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_TOOL_BRUSH) |
+               BIT_MASK(BTN_TOOL_PENCIL) | BIT_MASK(BTN_TOOL_AIRBRUSH) |
+               BIT_MASK(BTN_TOOL_LENS) | BIT_MASK(BTN_STYLUS2);
        input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
        input_set_abs_params(input_dev, ABS_WHEEL, 0, 1023, 0, 0);
        input_set_abs_params(input_dev, ABS_TILT_X, 0, 127, 0, 0);
@@ -192,12 +202,13 @@ void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 
 void input_dev_pl(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_STYLUS2) | BIT(BTN_TOOL_RUBBER);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_STYLUS2) |
+               BIT_MASK(BTN_TOOL_RUBBER);
 }
 
 void input_dev_pt(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER);
 }
 
 static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *id)
@@ -243,12 +254,13 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i
        input_dev->open = wacom_open;
        input_dev->close = wacom_close;
 
-       input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH) | BIT(BTN_STYLUS);
+       input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
+               BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS);
        input_set_abs_params(input_dev, ABS_X, 0, wacom_wac->features->x_max, 4, 0);
        input_set_abs_params(input_dev, ABS_Y, 0, wacom_wac->features->y_max, 4, 0);
        input_set_abs_params(input_dev, ABS_PRESSURE, 0, wacom_wac->features->pressure_max, 0, 0);
-       input_dev->absbit[LONG(ABS_MISC)] |= BIT(ABS_MISC);
+       input_dev->absbit[BIT_WORD(ABS_MISC)] |= BIT_MASK(ABS_MISC);
 
        wacom_init_input_dev(input_dev, wacom_wac);
 
index 51ae4fb7d123b2882380f1ec78531949c70c59a9..f59aecf5ec1565fe4959308edc8cc034c3a0a9a3 100644 (file)
@@ -917,8 +917,8 @@ static int __devinit ads7846_probe(struct spi_device *spi)
        input_dev->phys = ts->phys;
        input_dev->dev.parent = &spi->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(input_dev, ABS_X,
                        pdata->x_min ? : 0,
                        pdata->x_max ? : MAX_12BIT,
index e6a31d118786c67c76913f7270e2761eecd69a8a..b1b2e07bf080b3cb348726e3c7a3aff935ad7725 100644 (file)
@@ -302,8 +302,8 @@ static int __init corgits_probe(struct platform_device *pdev)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &pdev->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(input_dev, ABS_X, X_AXIS_MIN, X_AXIS_MAX, 0, 0);
        input_set_abs_params(input_dev, ABS_Y, Y_AXIS_MIN, Y_AXIS_MAX, 0, 0);
        input_set_abs_params(input_dev, ABS_PRESSURE, PRESSURE_MIN, PRESSURE_MAX, 0, 0);
index 557d781719f12b631c6dd940b109c91be04cef9b..d20689cdbd5d63665601e9e00baca7ffd08eb42d 100644 (file)
@@ -320,8 +320,8 @@ static int elo_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
        serio_set_drvdata(serio, elo);
        err = serio_open(serio, drv);
index daf7a4afc9352e08afe3e04637a18bd8698382eb..80b21800355f10d255a8e8f06fdad8a3b37270fa 100644 (file)
@@ -122,8 +122,8 @@ static int fujitsu_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.vendor = SERIO_FUJITSU;
        input_dev->id.product = 0;
        input_dev->id.version = 0x0100;
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
        input_set_abs_params(input_dev, ABS_X, 0, 4096, 0, 0);
        input_set_abs_params(input_dev, ABS_Y, 0, 4096, 0, 0);
index 39d602600d7cfe1e7a89d5f9f4f23652d8ad90d9..a48a15868c4ade3b1067a97f955d819df739c831 100644 (file)
@@ -137,8 +137,8 @@ static int gunze_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.product = 0x0051;
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(input_dev, ABS_X, 24, 1000, 0, 0);
        input_set_abs_params(input_dev, ABS_Y, 24, 1000, 0, 0);
 
index 09ed7803cb8fcd07143bd3c1dfb3d571fb0efb58..2ae6c6016a8665d79eb4f6dd63d76a3675230f30 100644 (file)
@@ -373,8 +373,9 @@ static int h3600ts_connect(struct serio *serio, struct serio_driver *drv)
 
        input_dev->event = h3600ts_event;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_LED) | BIT(EV_PWR);
-       input_dev->ledbit[0] = BIT(LED_SLEEP);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+               BIT_MASK(EV_LED) | BIT_MASK(EV_PWR);
+       input_dev->ledbit[0] = BIT_MASK(LED_SLEEP);
        input_set_abs_params(input_dev, ABS_X, 60, 985, 0, 0);
        input_set_abs_params(input_dev, ABS_Y, 35, 1024, 0, 0);
 
index 1a15475aedfcc269d5bbfc033c15db3025a3dd5b..c38d4e0f95c6a093e7bf009fcb662160b97a3a8e 100644 (file)
@@ -81,8 +81,8 @@ static int __init hp680_ts_init(void)
        if (!hp680_ts_dev)
                return -ENOMEM;
 
-       hp680_ts_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
-       hp680_ts_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       hp680_ts_dev->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
+       hp680_ts_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
        input_set_abs_params(hp680_ts_dev, ABS_X,
                HP680_TS_ABS_X_MIN, HP680_TS_ABS_X_MAX, 0, 0);
index 44140feeffc503428a54399b591f1628f3842010..80a65886870624300be79bfb008f4add780d68e4 100644 (file)
@@ -186,8 +186,8 @@ static int __init mk712_init(void)
        mk712_dev->open    = mk712_open;
        mk712_dev->close   = mk712_close;
 
-       mk712_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       mk712_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       mk712_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       mk712_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(mk712_dev, ABS_X, 0, 0xfff, 88, 0);
        input_set_abs_params(mk712_dev, ABS_Y, 0, 0xfff, 88, 0);
 
index 4ec3b1f940c8f424560f4d5ebcc1bb060ce44162..9077228418b7028242caa03cc99292ee8f2c62cd 100644 (file)
@@ -151,8 +151,8 @@ static int mtouch_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.product = 0;
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(mtouch->dev, ABS_X, MTOUCH_MIN_XC, MTOUCH_MAX_XC, 0, 0);
        input_set_abs_params(mtouch->dev, ABS_Y, MTOUCH_MIN_YC, MTOUCH_MAX_YC, 0, 0);
 
index f2c0d3c7149cacc684a3cb30f20ba159dbd1ac00..c7f9cebebbb6dbe5376e53407b44d4c41370aee8 100644 (file)
@@ -113,8 +113,8 @@ static int pm_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
 
-        input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-        input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+        input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+        input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
         input_set_abs_params(pm->dev, ABS_X, 0, 0x3ff, 0, 0);
         input_set_abs_params(pm->dev, ABS_Y, 0, 0x3ff, 0, 0);
 
index 3def7bb1df44641c569880e64f389613e2ad3425..3a5c142c2a78a6fd6b0e55cbf79c5c57bf236999 100644 (file)
@@ -125,8 +125,8 @@ static int tr_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.product = 0;
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(tr->dev, ABS_X, TR_MIN_XC, TR_MAX_XC, 0, 0);
        input_set_abs_params(tr->dev, ABS_Y, TR_MIN_YC, TR_MAX_YC, 0, 0);
 
index ac4bdcf186660a8c040ea5604648a59438061808..763a656a59f8270929f2366f710147ef2339e43e 100644 (file)
@@ -132,8 +132,8 @@ static int tw_connect(struct serio *serio, struct serio_driver *drv)
        input_dev->id.product = 0;
        input_dev->id.version = 0x0100;
        input_dev->dev.parent = &serio->dev;
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(tw->dev, ABS_X, TW_MIN_XC, TW_MAX_XC, 0, 0);
        input_set_abs_params(tw->dev, ABS_Y, TW_MIN_YC, TW_MAX_YC, 0, 0);
 
index 89373b01d8f5694148dab2f6ccf6d67839c65ee6..7549939b9535634de87ddb09a829b186758303e0 100644 (file)
@@ -517,7 +517,7 @@ static int ucb1400_ts_probe(struct device *dev)
        idev->id.product        = id;
        idev->open              = ucb1400_ts_open;
        idev->close             = ucb1400_ts_close;
-       idev->evbit[0]          = BIT(EV_ABS);
+       idev->evbit[0]          = BIT_MASK(EV_ABS);
 
        ucb1400_adc_enable(ucb);
        x_res = ucb1400_ts_read_xres(ucb);
index 9fb3d5c309998162c5f14d40c6587e95c95395a5..5f34b78d5ddbef126193d13b1c2afc4e8a3600b8 100644 (file)
@@ -868,8 +868,8 @@ static int usbtouch_probe(struct usb_interface *intf,
        input_dev->open = usbtouch_open;
        input_dev->close = usbtouch_close;
 
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-       input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
        input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0);
        input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0);
        if (type->max_press)
index 428872b653e9b0cd0c1c06d33ff7d6cf834344c6..669f6f67449c46f26416a27c82939ad734256306 100644 (file)
@@ -486,11 +486,13 @@ static void b1dma_handle_rx(avmcard *card)
                                        card->name);
                } else {
                        memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
-                       if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF)
+                       if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF) {
+                               spin_lock(&card->lock);
                                capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
-                                                    CAPIMSG_NCCI(skb->data),
-                                                    CAPIMSG_MSGID(skb->data));
-
+                                       CAPIMSG_NCCI(skb->data),
+                                       CAPIMSG_MSGID(skb->data));
+                               spin_unlock(&card->lock);
+                       }
                        capi_ctr_handle_message(ctrl, ApplId, skb);
                }
                break;
@@ -500,9 +502,9 @@ static void b1dma_handle_rx(avmcard *card)
                ApplId = _get_word(&p);
                NCCI = _get_word(&p);
                WindowSize = _get_word(&p);
-
+               spin_lock(&card->lock);
                capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
-
+               spin_unlock(&card->lock);
                break;
 
        case RECEIVE_FREE_NCCI:
@@ -510,9 +512,11 @@ static void b1dma_handle_rx(avmcard *card)
                ApplId = _get_word(&p);
                NCCI = _get_word(&p);
 
-               if (NCCI != 0xffffffff)
+               if (NCCI != 0xffffffff) {
+                       spin_lock(&card->lock);
                        capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
-
+                       spin_unlock(&card->lock);
+               }
                break;
 
        case RECEIVE_START:
@@ -751,10 +755,10 @@ void b1dma_reset_ctr(struct capi_ctr *ctrl)
 
        spin_lock_irqsave(&card->lock, flags);
        b1dma_reset(card);
-       spin_unlock_irqrestore(&card->lock, flags);
 
        memset(cinfo->version, 0, sizeof(cinfo->version));
        capilib_release(&cinfo->ncci_head);
+       spin_unlock_irqrestore(&card->lock, flags);
        capi_ctr_reseted(ctrl);
 }
 
@@ -803,8 +807,11 @@ void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl)
        avmcard *card = cinfo->card;
        struct sk_buff *skb;
        void *p;
+       unsigned long flags;
 
+       spin_lock_irqsave(&card->lock, flags);
        capilib_release_appl(&cinfo->ncci_head, appl);
+       spin_unlock_irqrestore(&card->lock, flags);
 
        skb = alloc_skb(7, GFP_ATOMIC);
        if (!skb) {
@@ -832,10 +839,13 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
        u16 retval = CAPI_NOERROR;
 
        if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) {
+               unsigned long flags;
+               spin_lock_irqsave(&card->lock, flags);
                retval = capilib_data_b3_req(&cinfo->ncci_head,
                                             CAPIMSG_APPID(skb->data),
                                             CAPIMSG_NCCI(skb->data),
                                             CAPIMSG_MSGID(skb->data));
+               spin_unlock_irqrestore(&card->lock, flags);
        }
        if (retval == CAPI_NOERROR) 
                b1dma_queue_tx(card, skb);
index 8710cf6214d9fc9487841547a4139625948d51c1..4bbbbe688077558f1793c746e0f95380d8b65f17 100644 (file)
@@ -678,7 +678,9 @@ static irqreturn_t c4_handle_interrupt(avmcard *card)
                 for (i=0; i < card->nr_controllers; i++) {
                        avmctrl_info *cinfo = &card->ctrlinfo[i];
                        memset(cinfo->version, 0, sizeof(cinfo->version));
+                       spin_lock_irqsave(&card->lock, flags);
                        capilib_release(&cinfo->ncci_head);
+                       spin_unlock_irqrestore(&card->lock, flags);
                        capi_ctr_reseted(&cinfo->capi_ctrl);
                }
                card->nlogcontr = 0;
index c925020fe9b7874732383d8605994d30096f3c10..6130724e46e775a98e9f37621d9aa4533eb2c553 100644 (file)
@@ -180,8 +180,8 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
 
                        ApplId = (unsigned) b1_get_word(card->port);
                        MsgLen = t1_get_slice(card->port, card->msgbuf);
-                       spin_unlock_irqrestore(&card->lock, flags);
                        if (!(skb = alloc_skb(MsgLen, GFP_ATOMIC))) {
+                               spin_unlock_irqrestore(&card->lock, flags);
                                printk(KERN_ERR "%s: incoming packet dropped\n",
                                                card->name);
                        } else {
@@ -190,7 +190,7 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
                                        capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
                                                             CAPIMSG_NCCI(skb->data),
                                                             CAPIMSG_MSGID(skb->data));
-
+                               spin_unlock_irqrestore(&card->lock, flags);
                                capi_ctr_handle_message(ctrl, ApplId, skb);
                        }
                        break;
@@ -200,21 +200,17 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
                        ApplId = b1_get_word(card->port);
                        NCCI = b1_get_word(card->port);
                        WindowSize = b1_get_word(card->port);
-                       spin_unlock_irqrestore(&card->lock, flags);
-
                        capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
-
+                       spin_unlock_irqrestore(&card->lock, flags);
                        break;
 
                case RECEIVE_FREE_NCCI:
 
                        ApplId = b1_get_word(card->port);
                        NCCI = b1_get_word(card->port);
-                       spin_unlock_irqrestore(&card->lock, flags);
-
                        if (NCCI != 0xffffffff)
                                capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
-
+                       spin_unlock_irqrestore(&card->lock, flags);
                        break;
 
                case RECEIVE_START:
@@ -333,13 +329,16 @@ static void t1isa_reset_ctr(struct capi_ctr *ctrl)
        avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
        avmcard *card = cinfo->card;
        unsigned int port = card->port;
+       unsigned long flags;
 
        t1_disable_irq(port);
        b1_reset(port);
        b1_reset(port);
 
        memset(cinfo->version, 0, sizeof(cinfo->version));
+       spin_lock_irqsave(&card->lock, flags);
        capilib_release(&cinfo->ncci_head);
+       spin_unlock_irqrestore(&card->lock, flags);
        capi_ctr_reseted(ctrl);
 }
 
@@ -466,29 +465,26 @@ static u16 t1isa_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
        u8 subcmd = CAPIMSG_SUBCOMMAND(skb->data);
        u16 dlen, retval;
 
+       spin_lock_irqsave(&card->lock, flags);
        if (CAPICMD(cmd, subcmd) == CAPI_DATA_B3_REQ) {
                retval = capilib_data_b3_req(&cinfo->ncci_head,
                                             CAPIMSG_APPID(skb->data),
                                             CAPIMSG_NCCI(skb->data),
                                             CAPIMSG_MSGID(skb->data));
-               if (retval != CAPI_NOERROR) 
+               if (retval != CAPI_NOERROR) {
+                       spin_unlock_irqrestore(&card->lock, flags);
                        return retval;
-
+               }
                dlen = CAPIMSG_DATALEN(skb->data);
 
-               spin_lock_irqsave(&card->lock, flags);
                b1_put_byte(port, SEND_DATA_B3_REQ);
                t1_put_slice(port, skb->data, len);
                t1_put_slice(port, skb->data + len, dlen);
-               spin_unlock_irqrestore(&card->lock, flags);
        } else {
-
-               spin_lock_irqsave(&card->lock, flags);
                b1_put_byte(port, SEND_MESSAGE);
                t1_put_slice(port, skb->data, len);
-               spin_unlock_irqrestore(&card->lock, flags);
        }
-
+       spin_unlock_irqrestore(&card->lock, flags);
        dev_kfree_skb_any(skb);
        return CAPI_NOERROR;
 }
diff --git a/drivers/isdn/sc/debug.h b/drivers/isdn/sc/debug.h
deleted file mode 100644 (file)
index e9db96e..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* $Id: debug.h,v 1.2.8.1 2001/09/23 22:24:59 kai Exp $
- *
- * Copyright (C) 1996  SpellCaster Telecommunications Inc.
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- * For more information, please contact gpl-info@spellcast.com or write:
- *
- *     SpellCaster Telecommunications Inc.
- *     5621 Finch Avenue East, Unit #3
- *     Scarborough, Ontario  Canada
- *     M1B 2T9
- *     +1 (416) 297-8565
- *     +1 (416) 297-6433 Facsimile
- */
-
-#define REQUEST_IRQ(a,b,c,d,e) request_irq(a,b,c,d,e)
-#define FREE_IRQ(a,b) free_irq(a,b)
index 5286e0c810a9f4c26254e9638a049bf2d38c501f..4766e5b77378369aa09f9d73084e83728d76a9f3 100644 (file)
@@ -14,4 +14,3 @@
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/isdnif.h>
-#include "debug.h"
index 0bf76344a0d54fcbd00752fc65d2a1b4f390332f..d09c854cfac7f819721cc91c5624c1bbc6a67c2a 100644 (file)
@@ -404,7 +404,7 @@ static void __exit sc_exit(void)
                /*
                 * Release the IRQ
                 */
-               FREE_IRQ(sc_adapter[i]->interrupt, NULL);
+               free_irq(sc_adapter[i]->interrupt, NULL);
 
                /*
                 * Reset for a clean start
index 2766e4fc4ea82e36cfa7ce205076ad7f1c3cae44..883da72b5368d236d6c0246da16509b9a97bc42f 100644 (file)
@@ -791,8 +791,10 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
                        if (hid->keycode[i])
                                set_bit(hid->keycode[i], input_dev->keybit);
 
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP);
-               input_dev->ledbit[0] = BIT(LED_SCROLLL) | BIT(LED_CAPSL) | BIT(LED_NUML);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+                       BIT_MASK(EV_REP);
+               input_dev->ledbit[0] = BIT_MASK(LED_SCROLLL) |
+                       BIT_MASK(LED_CAPSL) | BIT_MASK(LED_NUML);
                input_dev->event = adbhid_kbd_event;
                input_dev->keycodemax = KEY_FN;
                input_dev->keycodesize = sizeof(hid->keycode[0]);
@@ -801,16 +803,18 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
        case ADB_MOUSE:
                sprintf(hid->name, "ADB mouse");
 
-               input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-               input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-               input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+               input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+               input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+                       BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+               input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
                break;
 
        case ADB_MISC:
                switch (original_handler_id) {
                case 0x02: /* Adjustable keyboard button device */
                        sprintf(hid->name, "ADB adjustable keyboard buttons");
-                       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+                       input_dev->evbit[0] = BIT_MASK(EV_KEY) |
+                               BIT_MASK(EV_REP);
                        set_bit(KEY_SOUND, input_dev->keybit);
                        set_bit(KEY_MUTE, input_dev->keybit);
                        set_bit(KEY_VOLUMEUP, input_dev->keybit);
@@ -818,7 +822,8 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
                        break;
                case 0x1f: /* Powerbook button device */
                        sprintf(hid->name, "ADB Powerbook buttons");
-                       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+                       input_dev->evbit[0] = BIT_MASK(EV_KEY) |
+                               BIT_MASK(EV_REP);
                        set_bit(KEY_MUTE, input_dev->keybit);
                        set_bit(KEY_VOLUMEUP, input_dev->keybit);
                        set_bit(KEY_VOLUMEDOWN, input_dev->keybit);
index 33dee3a773ed29ae4cad14fe9fa37203ba7cbfef..89302309da92d56f397096ad8412a4e7fa2ef1d4 100644 (file)
@@ -117,9 +117,10 @@ static int emumousebtn_input_register(void)
        emumousebtn->id.product = 0x0001;
        emumousebtn->id.version = 0x0100;
 
-       emumousebtn->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       emumousebtn->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-       emumousebtn->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+       emumousebtn->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       emumousebtn->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+       emumousebtn->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
        ret = input_register_device(emumousebtn);
        if (ret)
index c059ae6f37e5ae1fe789e3fad66ad0a91da3a264..808cd95494563d260456733e0d1e9ba80cbee21e 100644 (file)
@@ -4717,7 +4717,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
 
 void md_unregister_thread(mdk_thread_t *thread)
 {
-       dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
+       dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
 
        kthread_stop(thread->tsk);
        kfree(thread);
index 5a12b5679556a78a2fc70be51beaa3f0433d2fd7..154a7ce7cb826dd43443b4492e9f8ca92a322ce7 100644 (file)
@@ -820,7 +820,7 @@ static int cinergyt2_register_rc(struct cinergyt2 *cinergyt2)
 
        input_dev->name = DRIVER_NAME " remote control";
        input_dev->phys = cinergyt2->phys;
-       input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
        for (i = 0; i < ARRAY_SIZE(rc_keys); i += 3)
                set_bit(rc_keys[i + 2], input_dev->keybit);
        input_dev->keycodesize = 0;
index 7b9f35bfb4f062fe132f78de4fd69736bf293b2f..c0c2c22ddd835143d087ca450306e90f342310c4 100644 (file)
@@ -106,7 +106,7 @@ int dvb_usb_remote_init(struct dvb_usb_device *d)
        if (!input_dev)
                return -ENOMEM;
 
-       input_dev->evbit[0] = BIT(EV_KEY);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY);
        input_dev->name = "IR-receiver inside an USB DVB receiver";
        input_dev->phys = d->rc_phys;
        usb_to_input_id(d->udev, &input_dev->id);
index 5d19c402dad13fbf8096aa05147f28f684d93258..a283e1de83facf6edc5ebae02b6ae232d477b885 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 #include "av7110.h"
 #include "av7110_hw.h"
index 5e691fd79904e98cbbc8cc5b9a6ddacf1b6d0658..1ec981d98b91bf60da958e0530dccca725b68474 100644 (file)
@@ -1198,7 +1198,7 @@ static int ttusb_init_rc( struct ttusb_dec *dec)
 
        input_dev->name = "ttusb_dec remote control";
        input_dev->phys = dec->rc_phys;
-       input_dev->evbit[0] = BIT(EV_KEY);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY);
        input_dev->keycodesize = sizeof(u16);
        input_dev->keycodemax = 0x1a;
        input_dev->keycode = rc_keys;
index 491505d6fdeea14d4c7b9934aa656666d83eaa6e..3e93f80587708f2f118d150c8c9a0ac3cc2c3208 100644 (file)
@@ -238,8 +238,8 @@ static void konicawc_register_input(struct konicawc *cam, struct usb_device *dev
        usb_to_input_id(dev, &input_dev->id);
        input_dev->dev.parent = &dev->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY);
-       input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY);
+       input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
        input_dev->private = cam;
 
index dd1a6d6bbc9eef80a81f30162f9f20ca49eaf780..d847273eeba0c247c4f707a4fed3b00feef04b3e 100644 (file)
@@ -102,8 +102,8 @@ static void qcm_register_input(struct qcm *cam, struct usb_device *dev)
        usb_to_input_id(dev, &input_dev->id);
        input_dev->dev.parent = &dev->dev;
 
-       input_dev->evbit[0] = BIT(EV_KEY);
-       input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+       input_dev->evbit[0] = BIT_MASK(EV_KEY);
+       input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
        input_dev->private = cam;
 
index 1c14fa2bd4115623de441807e76688133f6664f3..419e5af785332925a997ec3224c867e92dffb7d4 100644 (file)
@@ -1285,7 +1285,7 @@ zoran_open (struct inode *inode,
        }
 
        dprintk(1, KERN_INFO "%s: zoran_open(%s, pid=[%d]), users(-)=%d\n",
-               ZR_DEVNAME(zr), current->comm, current->pid, zr->user);
+               ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
 
        /* now, create the open()-specific file_ops struct */
        fh = kzalloc(sizeof(struct zoran_fh), GFP_KERNEL);
@@ -1358,7 +1358,7 @@ zoran_close (struct inode *inode,
        struct zoran *zr = fh->zr;
 
        dprintk(1, KERN_INFO "%s: zoran_close(%s, pid=[%d]), users(+)=%d\n",
-               ZR_DEVNAME(zr), current->comm, current->pid, zr->user);
+               ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
 
        /* kernel locks (fs/device.c), so don't do that ourselves
         * (prevents deadlocks) */
index 0550ce075fc4f5cde48b3de4a12ea428288b7ad6..1d9defb1a10c04d442b3edde8920977d686616ad 100644 (file)
@@ -226,9 +226,9 @@ int ibmasm_init_remote_input_dev(struct service_processor *sp)
        mouse_dev->id.product = pdev->device;
        mouse_dev->id.version = 1;
        mouse_dev->dev.parent = sp->dev;
-       mouse_dev->evbit[0]  = BIT(EV_KEY) | BIT(EV_ABS);
-       mouse_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) |
-               BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
+       mouse_dev->evbit[0]  = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+       mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+               BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
        set_bit(BTN_TOUCH, mouse_dev->keybit);
        mouse_dev->name = "ibmasm RSA I remote mouse";
        input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
@@ -239,7 +239,7 @@ int ibmasm_init_remote_input_dev(struct service_processor *sp)
        keybd_dev->id.product = pdev->device;
        keybd_dev->id.version = 2;
        keybd_dev->dev.parent = sp->dev;
-       keybd_dev->evbit[0]  = BIT(EV_KEY);
+       keybd_dev->evbit[0]  = BIT_MASK(EV_KEY);
        keybd_dev->name = "ibmasm RSA I remote keyboard";
 
        for (i = 0; i < XLATE_SIZE; i++) {
index 5108b7c576dfba8fab2e5794188a04a0fdf7044e..cd221fd0fb94b89a4c54ac3cc52da09cf7a13dee 100644 (file)
@@ -9,6 +9,7 @@
  *  You need an userspace library to cooperate with this driver. It (and other
  *  info) may be obtained here:
  *  http://www.fi.muni.cz/~xslaby/phantom.html
+ *  or alternatively, you might use OpenHaptics provided by Sensable.
  */
 
 #include <linux/kernel.h>
 #include <asm/atomic.h>
 #include <asm/io.h>
 
-#define PHANTOM_VERSION                "n0.9.5"
+#define PHANTOM_VERSION                "n0.9.7"
 
 #define PHANTOM_MAX_MINORS     8
 
 #define PHN_IRQCTL             0x4c    /* irq control in caddr space */
 
 #define PHB_RUNNING            1
+#define PHB_NOT_OH             2
 
 static struct class *phantom_class;
 static int phantom_major;
@@ -47,7 +49,11 @@ struct phantom_device {
        struct cdev cdev;
 
        struct mutex open_lock;
-       spinlock_t ioctl_lock;
+       spinlock_t regs_lock;
+
+       /* used in NOT_OH mode */
+       struct phm_regs oregs;
+       u32 ctl_reg;
 };
 
 static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
@@ -82,6 +88,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
        struct phm_regs rs;
        struct phm_reg r;
        void __user *argp = (void __user *)arg;
+       unsigned long flags;
        unsigned int i;
 
        if (_IOC_TYPE(cmd) != PH_IOC_MAGIC ||
@@ -96,32 +103,45 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
                if (r.reg > 7)
                        return -EINVAL;
 
-               spin_lock(&dev->ioctl_lock);
+               spin_lock_irqsave(&dev->regs_lock, flags);
                if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
                                phantom_status(dev, dev->status | PHB_RUNNING)){
-                       spin_unlock(&dev->ioctl_lock);
+                       spin_unlock_irqrestore(&dev->regs_lock, flags);
                        return -ENODEV;
                }
 
                pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
+
+               /* preserve amp bit (don't allow to change it when in NOT_OH) */
+               if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
+                       r.value &= ~PHN_CTL_AMP;
+                       r.value |= dev->ctl_reg & PHN_CTL_AMP;
+                       dev->ctl_reg = r.value;
+               }
+
                iowrite32(r.value, dev->iaddr + r.reg);
                ioread32(dev->iaddr); /* PCI posting */
 
                if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
                        phantom_status(dev, dev->status & ~PHB_RUNNING);
-               spin_unlock(&dev->ioctl_lock);
+               spin_unlock_irqrestore(&dev->regs_lock, flags);
                break;
        case PHN_SET_REGS:
                if (copy_from_user(&rs, argp, sizeof(rs)))
                        return -EFAULT;
 
                pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
-               spin_lock(&dev->ioctl_lock);
-               for (i = 0; i < min(rs.count, 8U); i++)
-                       if ((1 << i) & rs.mask)
-                               iowrite32(rs.values[i], dev->oaddr + i);
-               ioread32(dev->iaddr); /* PCI posting */
-               spin_unlock(&dev->ioctl_lock);
+               spin_lock_irqsave(&dev->regs_lock, flags);
+               if (dev->status & PHB_NOT_OH)
+                       memcpy(&dev->oregs, &rs, sizeof(rs));
+               else {
+                       u32 m = min(rs.count, 8U);
+                       for (i = 0; i < m; i++)
+                               if (rs.mask & BIT(i))
+                                       iowrite32(rs.values[i], dev->oaddr + i);
+                       ioread32(dev->iaddr); /* PCI posting */
+               }
+               spin_unlock_irqrestore(&dev->regs_lock, flags);
                break;
        case PHN_GET_REG:
                if (copy_from_user(&r, argp, sizeof(r)))
@@ -135,20 +155,35 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
                if (copy_to_user(argp, &r, sizeof(r)))
                        return -EFAULT;
                break;
-       case PHN_GET_REGS:
+       case PHN_GET_REGS: {
+               u32 m;
+
                if (copy_from_user(&rs, argp, sizeof(rs)))
                        return -EFAULT;
 
+               m = min(rs.count, 8U);
+
                pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
-               spin_lock(&dev->ioctl_lock);
-               for (i = 0; i < min(rs.count, 8U); i++)
-                       if ((1 << i) & rs.mask)
+               spin_lock_irqsave(&dev->regs_lock, flags);
+               for (i = 0; i < m; i++)
+                       if (rs.mask & BIT(i))
                                rs.values[i] = ioread32(dev->iaddr + i);
-               spin_unlock(&dev->ioctl_lock);
+               spin_unlock_irqrestore(&dev->regs_lock, flags);
 
                if (copy_to_user(argp, &rs, sizeof(rs)))
                        return -EFAULT;
                break;
+       } case PHN_NOT_OH:
+               spin_lock_irqsave(&dev->regs_lock, flags);
+               if (dev->status & PHB_RUNNING) {
+                       printk(KERN_ERR "phantom: you need to set NOT_OH "
+                                       "before you start the device!\n");
+                       spin_unlock_irqrestore(&dev->regs_lock, flags);
+                       return -EINVAL;
+               }
+               dev->status |= PHB_NOT_OH;
+               spin_unlock_irqrestore(&dev->regs_lock, flags);
+               break;
        default:
                return -ENOTTY;
        }
@@ -171,8 +206,11 @@ static int phantom_open(struct inode *inode, struct file *file)
                return -EINVAL;
        }
 
+       WARN_ON(dev->status & PHB_NOT_OH);
+
        file->private_data = dev;
 
+       atomic_set(&dev->counter, 0);
        dev->opened++;
        mutex_unlock(&dev->open_lock);
 
@@ -187,6 +225,7 @@ static int phantom_release(struct inode *inode, struct file *file)
 
        dev->opened = 0;
        phantom_status(dev, dev->status & ~PHB_RUNNING);
+       dev->status &= ~PHB_NOT_OH;
 
        mutex_unlock(&dev->open_lock);
 
@@ -220,12 +259,32 @@ static struct file_operations phantom_file_ops = {
 static irqreturn_t phantom_isr(int irq, void *data)
 {
        struct phantom_device *dev = data;
+       unsigned int i;
+       u32 ctl;
 
-       if (!(ioread32(dev->iaddr + PHN_CONTROL) & PHN_CTL_IRQ))
+       spin_lock(&dev->regs_lock);
+       ctl = ioread32(dev->iaddr + PHN_CONTROL);
+       if (!(ctl & PHN_CTL_IRQ)) {
+               spin_unlock(&dev->regs_lock);
                return IRQ_NONE;
+       }
 
        iowrite32(0, dev->iaddr);
        iowrite32(0xc0, dev->iaddr);
+
+       if (dev->status & PHB_NOT_OH) {
+               struct phm_regs *r = &dev->oregs;
+               u32 m = min(r->count, 8U);
+
+               for (i = 0; i < m; i++)
+                       if (r->mask & BIT(i))
+                               iowrite32(r->values[i], dev->oaddr + i);
+
+               dev->ctl_reg ^= PHN_CTL_AMP;
+               iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
+       }
+       spin_unlock(&dev->regs_lock);
+
        ioread32(dev->iaddr); /* PCI posting */
 
        atomic_inc(&dev->counter);
@@ -297,7 +356,7 @@ static int __devinit phantom_probe(struct pci_dev *pdev,
        }
 
        mutex_init(&pht->open_lock);
-       spin_lock_init(&pht->ioctl_lock);
+       spin_lock_init(&pht->regs_lock);
        init_waitqueue_head(&pht->wait);
        cdev_init(&pht->cdev, &phantom_file_ops);
        pht->cdev.owner = THIS_MODULE;
@@ -378,6 +437,8 @@ static int phantom_suspend(struct pci_dev *pdev, pm_message_t state)
        iowrite32(0, dev->caddr + PHN_IRQCTL);
        ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
 
+       synchronize_irq(pdev->irq);
+
        return 0;
 }
 
index e73a71f04bb41b26d10b31e4608f8e33adf8ddc6..86da96becd28101118d619d56cbe93b290005b8c 100644 (file)
@@ -411,9 +411,9 @@ static int sony_laptop_setup_input(void)
        jog_dev->id.bustype = BUS_ISA;
        jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
-       jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-       jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE);
-       jog_dev->relbit[0] = BIT(REL_WHEEL);
+       jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+       jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
+       jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
 
        error = input_register_device(jog_dev);
        if (error)
index a4f1bf33164a1426eb92ea59df1093d856986633..6330c8cc72b5521fa8574d7f4d3f492b7e9cb231 100644 (file)
@@ -1309,7 +1309,7 @@ static int ubi_thread(void *u)
        struct ubi_device *ubi = u;
 
        ubi_msg("background thread \"%s\" started, PID %d",
-               ubi->bgt_name, current->pid);
+               ubi->bgt_name, task_pid_nr(current));
 
        set_freezable();
        for (;;) {
index 96cee4badd289c4f4d78515d7db2588bd6f2ba1c..da767d3d5af5bb552a6d0dce0792112a3179fc6c 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <linux/delay.h>
index 7a045a37056e0de7a93e6d44d296e1801f48db6b..084f0292ea6e13d029244564b112b1a0582a8c34 100644 (file)
@@ -126,7 +126,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
 
 // ================= main 802.3ad protocol functions ==================
 static int ad_lacpdu_send(struct port *port);
-static int ad_marker_send(struct port *port, struct marker *marker);
+static int ad_marker_send(struct port *port, struct bond_marker *marker);
 static void ad_mux_machine(struct port *port);
 static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
 static void ad_tx_machine(struct port *port);
@@ -139,8 +139,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
 static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
 static void ad_enable_collecting_distributing(struct port *port);
 static void ad_disable_collecting_distributing(struct port *port);
-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
-static void ad_marker_response_received(struct marker *marker, struct port *port);
+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
+static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -889,12 +889,12 @@ static int ad_lacpdu_send(struct port *port)
  * Returns:   0 on success
  *          < 0 on error
  */
-static int ad_marker_send(struct port *port, struct marker *marker)
+static int ad_marker_send(struct port *port, struct bond_marker *marker)
 {
        struct slave *slave = port->slave;
        struct sk_buff *skb;
-       struct marker_header *marker_header;
-       int length = sizeof(struct marker_header);
+       struct bond_marker_header *marker_header;
+       int length = sizeof(struct bond_marker_header);
        struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
 
        skb = dev_alloc_skb(length + 16);
@@ -909,7 +909,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
        skb->network_header = skb->mac_header + ETH_HLEN;
        skb->protocol = PKT_TYPE_LACPDU;
 
-       marker_header = (struct marker_header *)skb_put(skb, length);
+       marker_header = (struct bond_marker_header *)skb_put(skb, length);
 
        marker_header->ad_header.destination_address = lacpdu_multicast_address;
        /* Note: source addres is set to be the member's PERMANENT address, because we use it
@@ -1709,7 +1709,7 @@ static void ad_disable_collecting_distributing(struct port *port)
  */
 static void ad_marker_info_send(struct port *port)
 {
-       struct marker marker;
+       struct bond_marker marker;
        u16 index;
 
        // fill the marker PDU with the appropriate values
@@ -1742,13 +1742,14 @@ static void ad_marker_info_send(struct port *port)
  * @port: the port we're looking at
  *
  */
-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
+static void ad_marker_info_received(struct bond_marker *marker_info,
+       struct port *port)
 {
-       struct marker marker;
+       struct bond_marker marker;
 
        // copy the received marker data to the response marker
        //marker = *marker_info;
-       memcpy(&marker, marker_info, sizeof(struct marker));
+       memcpy(&marker, marker_info, sizeof(struct bond_marker));
        // change the marker subtype to marker response
        marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
        // send the marker response
@@ -1767,7 +1768,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
  * response for marker PDU's, in this stage, but only to respond to marker
  * information.
  */
-static void ad_marker_response_received(struct marker *marker, struct port *port)
+static void ad_marker_response_received(struct bond_marker *marker,
+       struct port *port)
 {
        marker=NULL; // just to satisfy the compiler
        port=NULL;  // just to satisfy the compiler
@@ -2164,15 +2166,15 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
                case AD_TYPE_MARKER:
                        // No need to convert fields to Little Endian since we don't use the marker's fields.
 
-                       switch (((struct marker *)lacpdu)->tlv_type) {
+                       switch (((struct bond_marker *)lacpdu)->tlv_type) {
                        case AD_MARKER_INFORMATION_SUBTYPE:
                                dprintk("Received Marker Information on port %d\n", port->actor_port_number);
-                               ad_marker_info_received((struct marker *)lacpdu, port);
+                               ad_marker_info_received((struct bond_marker *)lacpdu, port);
                                break;
 
                        case AD_MARKER_RESPONSE_SUBTYPE:
                                dprintk("Received Marker Response on port %d\n", port->actor_port_number);
-                               ad_marker_response_received((struct marker *)lacpdu, port);
+                               ad_marker_response_received((struct bond_marker *)lacpdu, port);
                                break;
 
                        default:
index 862952fa6fd99e84962d2187699c2c1255f1684d..f16557264944eab308bd7d778e035de1f6bf6cac 100644 (file)
@@ -92,7 +92,7 @@ typedef enum {
 typedef enum {
        AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
        AD_MARKER_RESPONSE_SUBTYPE     // marker response subtype
-} marker_subtype_t;
+} bond_marker_subtype_t;
 
 // timers types(43.4.9 in the 802.3ad standard)
 typedef enum {
@@ -148,7 +148,7 @@ typedef struct lacpdu_header {
 } lacpdu_header_t;
 
 // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
-typedef struct marker {
+typedef struct bond_marker {
        u8 subtype;              //  = 0x02  (marker PDU)
        u8 version_number;       //  = 0x01
        u8 tlv_type;             //  = 0x01  (marker information)
@@ -161,12 +161,12 @@ typedef struct marker {
        u8 tlv_type_terminator;      //  = 0x00
        u8 terminator_length;        //  = 0x00
        u8 reserved_90[90];          //  = 0
-} marker_t;
+} bond_marker_t;
 
-typedef struct marker_header {
+typedef struct bond_marker_header {
        struct ad_header ad_header;
-       struct marker marker;
-} marker_header_t;
+       struct bond_marker marker;
+} bond_marker_header_t;
 
 #pragma pack()
 
index 314b2f68f78fc14f6140aa743aca4910c1da7a93..edd6828f0a78c594649b3f354836cffe56066939 100644 (file)
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 
 #include <linux/if.h>
 #include <linux/mii.h>
 #include <asm/irq.h>
 #include <asm/dma.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/ethernet.h>
 #include <asm/cache.h>
 
index 044261703381a86439adcdab0f4808834cd1aa09..2a3df145850d8e5004a6485b1be976fe93b5fa8d 100644 (file)
@@ -41,9 +41,9 @@
 #include <linux/timer.h>
 #include <linux/cache.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 #include "t3cdev.h"
 #include <asm/semaphore.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 
 typedef irqreturn_t(*intr_handler_t) (int, void *);
index 243fc6b354b5effd99030e2fee51efaf9dc9b78b..e3dd8b1369086369f10f30be29a6b716ff2ec552 100644 (file)
@@ -170,7 +170,6 @@ static char *version =
 
 
 /* Few macros */
-#define BIT(a)                ( (1 << (a)) )
 #define BITSET(ioaddr, bnum)   ((outb(((inb(ioaddr)) | (bnum)), ioaddr)))
 #define BITCLR(ioaddr, bnum)   ((outb(((inb(ioaddr)) & (~(bnum))), ioaddr)))
 
index bc02e469480401274cee6e7f771f9ab62afe3c0e..11b83dae00ac762f8732ccefaf92ab92716a9959 100644 (file)
@@ -21,6 +21,7 @@
 
 
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/if_arp.h>
@@ -35,7 +36,6 @@
 #include <linux/sockios.h>
 #include <linux/workqueue.h>
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/irq.h>
index 30854f094965c840fd11e8f8eb1bd55e26449305..a19b5958cee949e89548bbb50acf7768ac44de2f 100644 (file)
@@ -99,9 +99,9 @@ static char *version =
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/hwtest.h>
 #include <asm/macints.h>
index ea3b8fc86d1ef239cef9b6557f1468ad0cdf4a95..a78dc1ca8c29d16324f2fbf1b696e94bf1cef1b4 100644 (file)
@@ -28,9 +28,6 @@
 #define RX_BUFFER_OFFSET (sizeof(rx_status_vector)+2) /* staus vector + 2 bytes of padding */
 #define RX_BUCKET_SIZE 256
 
-#undef BIT
-#define BIT(x) (1UL << (x))
-
 /* For more detailed explanations of what each field menas,
    see Nick's great comments to #defines below (or docs, if
    you are lucky enough toget hold of them :)*/
index aef66e2d98d2d36ba3d559f860efba434cb9c200..01f08d726ace1450fc7c1c51cef20e98c4fb420c 100644 (file)
@@ -20,17 +20,17 @@ struct XENA_dev_config {
 
 /* General Control-Status Registers */
        u64 general_int_status;
-#define GEN_INTR_TXPIC             BIT(0)
-#define GEN_INTR_TXDMA             BIT(1)
-#define GEN_INTR_TXMAC             BIT(2)
-#define GEN_INTR_TXXGXS            BIT(3)
-#define GEN_INTR_TXTRAFFIC         BIT(8)
-#define GEN_INTR_RXPIC             BIT(32)
-#define GEN_INTR_RXDMA             BIT(33)
-#define GEN_INTR_RXMAC             BIT(34)
-#define GEN_INTR_MC                BIT(35)
-#define GEN_INTR_RXXGXS            BIT(36)
-#define GEN_INTR_RXTRAFFIC         BIT(40)
+#define GEN_INTR_TXPIC             s2BIT(0)
+#define GEN_INTR_TXDMA             s2BIT(1)
+#define GEN_INTR_TXMAC             s2BIT(2)
+#define GEN_INTR_TXXGXS            s2BIT(3)
+#define GEN_INTR_TXTRAFFIC         s2BIT(8)
+#define GEN_INTR_RXPIC             s2BIT(32)
+#define GEN_INTR_RXDMA             s2BIT(33)
+#define GEN_INTR_RXMAC             s2BIT(34)
+#define GEN_INTR_MC                s2BIT(35)
+#define GEN_INTR_RXXGXS            s2BIT(36)
+#define GEN_INTR_RXTRAFFIC         s2BIT(40)
 #define GEN_ERROR_INTR             GEN_INTR_TXPIC | GEN_INTR_RXPIC | \
                                    GEN_INTR_TXDMA | GEN_INTR_RXDMA | \
                                    GEN_INTR_TXMAC | GEN_INTR_RXMAC | \
@@ -54,36 +54,36 @@ struct XENA_dev_config {
 
 
        u64 adapter_status;
-#define ADAPTER_STATUS_TDMA_READY          BIT(0)
-#define ADAPTER_STATUS_RDMA_READY          BIT(1)
-#define ADAPTER_STATUS_PFC_READY           BIT(2)
-#define ADAPTER_STATUS_TMAC_BUF_EMPTY      BIT(3)
-#define ADAPTER_STATUS_PIC_QUIESCENT       BIT(5)
-#define ADAPTER_STATUS_RMAC_REMOTE_FAULT   BIT(6)
-#define ADAPTER_STATUS_RMAC_LOCAL_FAULT    BIT(7)
+#define ADAPTER_STATUS_TDMA_READY          s2BIT(0)
+#define ADAPTER_STATUS_RDMA_READY          s2BIT(1)
+#define ADAPTER_STATUS_PFC_READY           s2BIT(2)
+#define ADAPTER_STATUS_TMAC_BUF_EMPTY      s2BIT(3)
+#define ADAPTER_STATUS_PIC_QUIESCENT       s2BIT(5)
+#define ADAPTER_STATUS_RMAC_REMOTE_FAULT   s2BIT(6)
+#define ADAPTER_STATUS_RMAC_LOCAL_FAULT    s2BIT(7)
 #define ADAPTER_STATUS_RMAC_PCC_IDLE       vBIT(0xFF,8,8)
 #define ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE  vBIT(0x0F,8,8)
 #define ADAPTER_STATUS_RC_PRC_QUIESCENT    vBIT(0xFF,16,8)
-#define ADAPTER_STATUS_MC_DRAM_READY       BIT(24)
-#define ADAPTER_STATUS_MC_QUEUES_READY     BIT(25)
-#define ADAPTER_STATUS_M_PLL_LOCK          BIT(30)
-#define ADAPTER_STATUS_P_PLL_LOCK          BIT(31)
+#define ADAPTER_STATUS_MC_DRAM_READY       s2BIT(24)
+#define ADAPTER_STATUS_MC_QUEUES_READY     s2BIT(25)
+#define ADAPTER_STATUS_M_PLL_LOCK          s2BIT(30)
+#define ADAPTER_STATUS_P_PLL_LOCK          s2BIT(31)
 
        u64 adapter_control;
-#define ADAPTER_CNTL_EN                    BIT(7)
-#define ADAPTER_EOI_TX_ON                  BIT(15)
-#define ADAPTER_LED_ON                     BIT(23)
+#define ADAPTER_CNTL_EN                    s2BIT(7)
+#define ADAPTER_EOI_TX_ON                  s2BIT(15)
+#define ADAPTER_LED_ON                     s2BIT(23)
 #define ADAPTER_UDPI(val)                  vBIT(val,36,4)
-#define ADAPTER_WAIT_INT                   BIT(48)
-#define ADAPTER_ECC_EN                     BIT(55)
+#define ADAPTER_WAIT_INT                   s2BIT(48)
+#define ADAPTER_ECC_EN                     s2BIT(55)
 
        u64 serr_source;
-#define SERR_SOURCE_PIC                        BIT(0)
-#define SERR_SOURCE_TXDMA              BIT(1)
-#define SERR_SOURCE_RXDMA              BIT(2)
-#define SERR_SOURCE_MAC                 BIT(3)
-#define SERR_SOURCE_MC                  BIT(4)
-#define SERR_SOURCE_XGXS                BIT(5)
+#define SERR_SOURCE_PIC                        s2BIT(0)
+#define SERR_SOURCE_TXDMA              s2BIT(1)
+#define SERR_SOURCE_RXDMA              s2BIT(2)
+#define SERR_SOURCE_MAC                 s2BIT(3)
+#define SERR_SOURCE_MC                  s2BIT(4)
+#define SERR_SOURCE_XGXS                s2BIT(5)
 #define        SERR_SOURCE_ANY                 (SERR_SOURCE_PIC        | \
                                        SERR_SOURCE_TXDMA       | \
                                        SERR_SOURCE_RXDMA       | \
@@ -101,41 +101,41 @@ struct XENA_dev_config {
 #define        PCI_MODE_PCIX_M2_66             0x5
 #define        PCI_MODE_PCIX_M2_100            0x6
 #define        PCI_MODE_PCIX_M2_133            0x7
-#define        PCI_MODE_UNSUPPORTED            BIT(0)
-#define        PCI_MODE_32_BITS                BIT(8)
-#define        PCI_MODE_UNKNOWN_MODE           BIT(9)
+#define        PCI_MODE_UNSUPPORTED            s2BIT(0)
+#define        PCI_MODE_32_BITS                s2BIT(8)
+#define        PCI_MODE_UNKNOWN_MODE           s2BIT(9)
 
        u8 unused_0[0x800 - 0x128];
 
 /* PCI-X Controller registers */
        u64 pic_int_status;
        u64 pic_int_mask;
-#define PIC_INT_TX                     BIT(0)
-#define PIC_INT_FLSH                   BIT(1)
-#define PIC_INT_MDIO                   BIT(2)
-#define PIC_INT_IIC                    BIT(3)
-#define PIC_INT_GPIO                   BIT(4)
-#define PIC_INT_RX                     BIT(32)
+#define PIC_INT_TX                     s2BIT(0)
+#define PIC_INT_FLSH                   s2BIT(1)
+#define PIC_INT_MDIO                   s2BIT(2)
+#define PIC_INT_IIC                    s2BIT(3)
+#define PIC_INT_GPIO                   s2BIT(4)
+#define PIC_INT_RX                     s2BIT(32)
 
        u64 txpic_int_reg;
        u64 txpic_int_mask;
-#define PCIX_INT_REG_ECC_SG_ERR                BIT(0)
-#define PCIX_INT_REG_ECC_DB_ERR                BIT(1)
-#define PCIX_INT_REG_FLASHR_R_FSM_ERR          BIT(8)
-#define PCIX_INT_REG_FLASHR_W_FSM_ERR          BIT(9)
-#define PCIX_INT_REG_INI_TX_FSM_SERR           BIT(10)
-#define PCIX_INT_REG_INI_TXO_FSM_ERR           BIT(11)
-#define PCIX_INT_REG_TRT_FSM_SERR              BIT(13)
-#define PCIX_INT_REG_SRT_FSM_SERR              BIT(14)
-#define PCIX_INT_REG_PIFR_FSM_SERR             BIT(15)
-#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR      BIT(21)
-#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR       BIT(23)
-#define PCIX_INT_REG_INI_RX_FSM_SERR           BIT(48)
-#define PCIX_INT_REG_RA_RX_FSM_SERR            BIT(50)
+#define PCIX_INT_REG_ECC_SG_ERR                s2BIT(0)
+#define PCIX_INT_REG_ECC_DB_ERR                s2BIT(1)
+#define PCIX_INT_REG_FLASHR_R_FSM_ERR          s2BIT(8)
+#define PCIX_INT_REG_FLASHR_W_FSM_ERR          s2BIT(9)
+#define PCIX_INT_REG_INI_TX_FSM_SERR           s2BIT(10)
+#define PCIX_INT_REG_INI_TXO_FSM_ERR           s2BIT(11)
+#define PCIX_INT_REG_TRT_FSM_SERR              s2BIT(13)
+#define PCIX_INT_REG_SRT_FSM_SERR              s2BIT(14)
+#define PCIX_INT_REG_PIFR_FSM_SERR             s2BIT(15)
+#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR      s2BIT(21)
+#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR       s2BIT(23)
+#define PCIX_INT_REG_INI_RX_FSM_SERR           s2BIT(48)
+#define PCIX_INT_REG_RA_RX_FSM_SERR            s2BIT(50)
 /*
-#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR      BIT(52)
-#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR       BIT(54)
-#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR     BIT(58)
+#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR      s2BIT(52)
+#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR       s2BIT(54)
+#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR     s2BIT(58)
 */
        u64 txpic_alarms;
        u64 rxpic_int_reg;
@@ -144,92 +144,92 @@ struct XENA_dev_config {
 
        u64 flsh_int_reg;
        u64 flsh_int_mask;
-#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR         BIT(63)
-#define PIC_FLSH_INT_REG_ERR                   BIT(62)
+#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR         s2BIT(63)
+#define PIC_FLSH_INT_REG_ERR                   s2BIT(62)
        u64 flash_alarms;
 
        u64 mdio_int_reg;
        u64 mdio_int_mask;
-#define MDIO_INT_REG_MDIO_BUS_ERR              BIT(0)
-#define MDIO_INT_REG_DTX_BUS_ERR               BIT(8)
-#define MDIO_INT_REG_LASI                      BIT(39)
+#define MDIO_INT_REG_MDIO_BUS_ERR              s2BIT(0)
+#define MDIO_INT_REG_DTX_BUS_ERR               s2BIT(8)
+#define MDIO_INT_REG_LASI                      s2BIT(39)
        u64 mdio_alarms;
 
        u64 iic_int_reg;
        u64 iic_int_mask;
-#define IIC_INT_REG_BUS_FSM_ERR                BIT(4)
-#define IIC_INT_REG_BIT_FSM_ERR                BIT(5)
-#define IIC_INT_REG_CYCLE_FSM_ERR              BIT(6)
-#define IIC_INT_REG_REQ_FSM_ERR                BIT(7)
-#define IIC_INT_REG_ACK_ERR                    BIT(8)
+#define IIC_INT_REG_BUS_FSM_ERR                s2BIT(4)
+#define IIC_INT_REG_BIT_FSM_ERR                s2BIT(5)
+#define IIC_INT_REG_CYCLE_FSM_ERR              s2BIT(6)
+#define IIC_INT_REG_REQ_FSM_ERR                s2BIT(7)
+#define IIC_INT_REG_ACK_ERR                    s2BIT(8)
        u64 iic_alarms;
 
        u8 unused4[0x08];
 
        u64 gpio_int_reg;
-#define GPIO_INT_REG_DP_ERR_INT                BIT(0)
-#define GPIO_INT_REG_LINK_DOWN                 BIT(1)
-#define GPIO_INT_REG_LINK_UP                   BIT(2)
+#define GPIO_INT_REG_DP_ERR_INT                s2BIT(0)
+#define GPIO_INT_REG_LINK_DOWN                 s2BIT(1)
+#define GPIO_INT_REG_LINK_UP                   s2BIT(2)
        u64 gpio_int_mask;
-#define GPIO_INT_MASK_LINK_DOWN                BIT(1)
-#define GPIO_INT_MASK_LINK_UP                  BIT(2)
+#define GPIO_INT_MASK_LINK_DOWN                s2BIT(1)
+#define GPIO_INT_MASK_LINK_UP                  s2BIT(2)
        u64 gpio_alarms;
 
        u8 unused5[0x38];
 
        u64 tx_traffic_int;
-#define TX_TRAFFIC_INT_n(n)                    BIT(n)
+#define TX_TRAFFIC_INT_n(n)                    s2BIT(n)
        u64 tx_traffic_mask;
 
        u64 rx_traffic_int;
-#define RX_TRAFFIC_INT_n(n)                    BIT(n)
+#define RX_TRAFFIC_INT_n(n)                    s2BIT(n)
        u64 rx_traffic_mask;
 
 /* PIC Control registers */
        u64 pic_control;
-#define PIC_CNTL_RX_ALARM_MAP_1                BIT(0)
+#define PIC_CNTL_RX_ALARM_MAP_1                s2BIT(0)
 #define PIC_CNTL_SHARED_SPLITS(n)              vBIT(n,11,5)
 
        u64 swapper_ctrl;
-#define SWAPPER_CTRL_PIF_R_FE                  BIT(0)
-#define SWAPPER_CTRL_PIF_R_SE                  BIT(1)
-#define SWAPPER_CTRL_PIF_W_FE                  BIT(8)
-#define SWAPPER_CTRL_PIF_W_SE                  BIT(9)
-#define SWAPPER_CTRL_TXP_FE                    BIT(16)
-#define SWAPPER_CTRL_TXP_SE                    BIT(17)
-#define SWAPPER_CTRL_TXD_R_FE                  BIT(18)
-#define SWAPPER_CTRL_TXD_R_SE                  BIT(19)
-#define SWAPPER_CTRL_TXD_W_FE                  BIT(20)
-#define SWAPPER_CTRL_TXD_W_SE                  BIT(21)
-#define SWAPPER_CTRL_TXF_R_FE                  BIT(22)
-#define SWAPPER_CTRL_TXF_R_SE                  BIT(23)
-#define SWAPPER_CTRL_RXD_R_FE                  BIT(32)
-#define SWAPPER_CTRL_RXD_R_SE                  BIT(33)
-#define SWAPPER_CTRL_RXD_W_FE                  BIT(34)
-#define SWAPPER_CTRL_RXD_W_SE                  BIT(35)
-#define SWAPPER_CTRL_RXF_W_FE                  BIT(36)
-#define SWAPPER_CTRL_RXF_W_SE                  BIT(37)
-#define SWAPPER_CTRL_XMSI_FE                   BIT(40)
-#define SWAPPER_CTRL_XMSI_SE                   BIT(41)
-#define SWAPPER_CTRL_STATS_FE                  BIT(48)
-#define SWAPPER_CTRL_STATS_SE                  BIT(49)
+#define SWAPPER_CTRL_PIF_R_FE                  s2BIT(0)
+#define SWAPPER_CTRL_PIF_R_SE                  s2BIT(1)
+#define SWAPPER_CTRL_PIF_W_FE                  s2BIT(8)
+#define SWAPPER_CTRL_PIF_W_SE                  s2BIT(9)
+#define SWAPPER_CTRL_TXP_FE                    s2BIT(16)
+#define SWAPPER_CTRL_TXP_SE                    s2BIT(17)
+#define SWAPPER_CTRL_TXD_R_FE                  s2BIT(18)
+#define SWAPPER_CTRL_TXD_R_SE                  s2BIT(19)
+#define SWAPPER_CTRL_TXD_W_FE                  s2BIT(20)
+#define SWAPPER_CTRL_TXD_W_SE                  s2BIT(21)
+#define SWAPPER_CTRL_TXF_R_FE                  s2BIT(22)
+#define SWAPPER_CTRL_TXF_R_SE                  s2BIT(23)
+#define SWAPPER_CTRL_RXD_R_FE                  s2BIT(32)
+#define SWAPPER_CTRL_RXD_R_SE                  s2BIT(33)
+#define SWAPPER_CTRL_RXD_W_FE                  s2BIT(34)
+#define SWAPPER_CTRL_RXD_W_SE                  s2BIT(35)
+#define SWAPPER_CTRL_RXF_W_FE                  s2BIT(36)
+#define SWAPPER_CTRL_RXF_W_SE                  s2BIT(37)
+#define SWAPPER_CTRL_XMSI_FE                   s2BIT(40)
+#define SWAPPER_CTRL_XMSI_SE                   s2BIT(41)
+#define SWAPPER_CTRL_STATS_FE                  s2BIT(48)
+#define SWAPPER_CTRL_STATS_SE                  s2BIT(49)
 
        u64 pif_rd_swapper_fb;
 #define IF_RD_SWAPPER_FB                            0x0123456789ABCDEF
 
        u64 scheduled_int_ctrl;
-#define SCHED_INT_CTRL_TIMER_EN                BIT(0)
-#define SCHED_INT_CTRL_ONE_SHOT                BIT(1)
+#define SCHED_INT_CTRL_TIMER_EN                s2BIT(0)
+#define SCHED_INT_CTRL_ONE_SHOT                s2BIT(1)
 #define SCHED_INT_CTRL_INT2MSI(val)            vBIT(val,10,6)
 #define SCHED_INT_PERIOD                       TBD
 
        u64 txreqtimeout;
 #define TXREQTO_VAL(val)                                               vBIT(val,0,32)
-#define TXREQTO_EN                                                             BIT(63)
+#define TXREQTO_EN                                                             s2BIT(63)
 
        u64 statsreqtimeout;
 #define STATREQTO_VAL(n)                       TBD
-#define STATREQTO_EN                           BIT(63)
+#define STATREQTO_EN                           s2BIT(63)
 
        u64 read_retry_delay;
        u64 read_retry_acceleration;
@@ -255,10 +255,10 @@ struct XENA_dev_config {
 
        /* Automated statistics collection */
        u64 stat_cfg;
-#define STAT_CFG_STAT_EN           BIT(0)
-#define STAT_CFG_ONE_SHOT_EN       BIT(1)
-#define STAT_CFG_STAT_NS_EN        BIT(8)
-#define STAT_CFG_STAT_RO           BIT(9)
+#define STAT_CFG_STAT_EN           s2BIT(0)
+#define STAT_CFG_ONE_SHOT_EN       s2BIT(1)
+#define STAT_CFG_STAT_NS_EN        s2BIT(8)
+#define STAT_CFG_STAT_RO           s2BIT(9)
 #define STAT_TRSF_PER(n)           TBD
 #define        PER_SEC                                    0x208d5
 #define        SET_UPDT_PERIOD(n)                 vBIT((PER_SEC*n),32,32)
@@ -290,18 +290,18 @@ struct XENA_dev_config {
 #define        I2C_CONTROL_DEV_ID(id)          vBIT(id,1,3)
 #define        I2C_CONTROL_ADDR(addr)          vBIT(addr,5,11)
 #define        I2C_CONTROL_BYTE_CNT(cnt)       vBIT(cnt,22,2)
-#define        I2C_CONTROL_READ                        BIT(24)
-#define        I2C_CONTROL_NACK                        BIT(25)
+#define        I2C_CONTROL_READ                        s2BIT(24)
+#define        I2C_CONTROL_NACK                        s2BIT(25)
 #define        I2C_CONTROL_CNTL_START          vBIT(0xE,28,4)
 #define        I2C_CONTROL_CNTL_END(val)       (val & vBIT(0x1,28,4))
 #define        I2C_CONTROL_GET_DATA(val)       (u32)(val & 0xFFFFFFFF)
 #define        I2C_CONTROL_SET_DATA(val)       vBIT(val,32,32)
 
        u64 gpio_control;
-#define GPIO_CTRL_GPIO_0               BIT(8)
+#define GPIO_CTRL_GPIO_0               s2BIT(8)
        u64 misc_control;
-#define FAULT_BEHAVIOUR                        BIT(0)
-#define EXT_REQ_EN                     BIT(1)
+#define FAULT_BEHAVIOUR                        s2BIT(0)
+#define EXT_REQ_EN                     s2BIT(1)
 #define MISC_LINK_STABILITY_PRD(val)   vBIT(val,29,3)
 
        u8 unused7_1[0x230 - 0x208];
@@ -317,29 +317,29 @@ struct XENA_dev_config {
 /* TxDMA registers */
        u64 txdma_int_status;
        u64 txdma_int_mask;
-#define TXDMA_PFC_INT                  BIT(0)
-#define TXDMA_TDA_INT                  BIT(1)
-#define TXDMA_PCC_INT                  BIT(2)
-#define TXDMA_TTI_INT                  BIT(3)
-#define TXDMA_LSO_INT                  BIT(4)
-#define TXDMA_TPA_INT                  BIT(5)
-#define TXDMA_SM_INT                   BIT(6)
+#define TXDMA_PFC_INT                  s2BIT(0)
+#define TXDMA_TDA_INT                  s2BIT(1)
+#define TXDMA_PCC_INT                  s2BIT(2)
+#define TXDMA_TTI_INT                  s2BIT(3)
+#define TXDMA_LSO_INT                  s2BIT(4)
+#define TXDMA_TPA_INT                  s2BIT(5)
+#define TXDMA_SM_INT                   s2BIT(6)
        u64 pfc_err_reg;
-#define PFC_ECC_SG_ERR                 BIT(7)
-#define PFC_ECC_DB_ERR                 BIT(15)
-#define PFC_SM_ERR_ALARM               BIT(23)
-#define PFC_MISC_0_ERR                 BIT(31)
-#define PFC_MISC_1_ERR                 BIT(32)
-#define PFC_PCIX_ERR                   BIT(39)
+#define PFC_ECC_SG_ERR                 s2BIT(7)
+#define PFC_ECC_DB_ERR                 s2BIT(15)
+#define PFC_SM_ERR_ALARM               s2BIT(23)
+#define PFC_MISC_0_ERR                 s2BIT(31)
+#define PFC_MISC_1_ERR                 s2BIT(32)
+#define PFC_PCIX_ERR                   s2BIT(39)
        u64 pfc_err_mask;
        u64 pfc_err_alarm;
 
        u64 tda_err_reg;
 #define TDA_Fn_ECC_SG_ERR              vBIT(0xff,0,8)
 #define TDA_Fn_ECC_DB_ERR              vBIT(0xff,8,8)
-#define TDA_SM0_ERR_ALARM              BIT(22)
-#define TDA_SM1_ERR_ALARM              BIT(23)
-#define TDA_PCIX_ERR                   BIT(39)
+#define TDA_SM0_ERR_ALARM              s2BIT(22)
+#define TDA_SM1_ERR_ALARM              s2BIT(23)
+#define TDA_PCIX_ERR                   s2BIT(39)
        u64 tda_err_mask;
        u64 tda_err_alarm;
 
@@ -351,40 +351,40 @@ struct XENA_dev_config {
 #define PCC_SM_ERR_ALARM               vBIT(0xff,32,8)
 #define PCC_WR_ERR_ALARM               vBIT(0xff,40,8)
 #define PCC_N_SERR                     vBIT(0xff,48,8)
-#define PCC_6_COF_OV_ERR               BIT(56)
-#define PCC_7_COF_OV_ERR               BIT(57)
-#define PCC_6_LSO_OV_ERR               BIT(58)
-#define PCC_7_LSO_OV_ERR               BIT(59)
+#define PCC_6_COF_OV_ERR               s2BIT(56)
+#define PCC_7_COF_OV_ERR               s2BIT(57)
+#define PCC_6_LSO_OV_ERR               s2BIT(58)
+#define PCC_7_LSO_OV_ERR               s2BIT(59)
 #define PCC_ENABLE_FOUR                        vBIT(0x0F,0,8)
        u64 pcc_err_mask;
        u64 pcc_err_alarm;
 
        u64 tti_err_reg;
-#define TTI_ECC_SG_ERR                 BIT(7)
-#define TTI_ECC_DB_ERR                 BIT(15)
-#define TTI_SM_ERR_ALARM               BIT(23)
+#define TTI_ECC_SG_ERR                 s2BIT(7)
+#define TTI_ECC_DB_ERR                 s2BIT(15)
+#define TTI_SM_ERR_ALARM               s2BIT(23)
        u64 tti_err_mask;
        u64 tti_err_alarm;
 
        u64 lso_err_reg;
-#define LSO6_SEND_OFLOW                        BIT(12)
-#define LSO7_SEND_OFLOW                        BIT(13)
-#define LSO6_ABORT                     BIT(14)
-#define LSO7_ABORT                     BIT(15)
-#define LSO6_SM_ERR_ALARM              BIT(22)
-#define LSO7_SM_ERR_ALARM              BIT(23)
+#define LSO6_SEND_OFLOW                        s2BIT(12)
+#define LSO7_SEND_OFLOW                        s2BIT(13)
+#define LSO6_ABORT                     s2BIT(14)
+#define LSO7_ABORT                     s2BIT(15)
+#define LSO6_SM_ERR_ALARM              s2BIT(22)
+#define LSO7_SM_ERR_ALARM              s2BIT(23)
        u64 lso_err_mask;
        u64 lso_err_alarm;
 
        u64 tpa_err_reg;
-#define TPA_TX_FRM_DROP                        BIT(7)
-#define TPA_SM_ERR_ALARM               BIT(23)
+#define TPA_TX_FRM_DROP                        s2BIT(7)
+#define TPA_SM_ERR_ALARM               s2BIT(23)
 
        u64 tpa_err_mask;
        u64 tpa_err_alarm;
 
        u64 sm_err_reg;
-#define SM_SM_ERR_ALARM                        BIT(15)
+#define SM_SM_ERR_ALARM                        s2BIT(15)
        u64 sm_err_mask;
        u64 sm_err_alarm;
 
@@ -397,7 +397,7 @@ struct XENA_dev_config {
 #define X_MAX_FIFOS                        8
 #define X_FIFO_MAX_LEN                     0x1FFF      /*8191 */
        u64 tx_fifo_partition_0;
-#define TX_FIFO_PARTITION_EN               BIT(0)
+#define TX_FIFO_PARTITION_EN               s2BIT(0)
 #define TX_FIFO_PARTITION_0_PRI(val)       vBIT(val,5,3)
 #define TX_FIFO_PARTITION_0_LEN(val)       vBIT(val,19,13)
 #define TX_FIFO_PARTITION_1_PRI(val)       vBIT(val,37,3)
@@ -437,16 +437,16 @@ struct XENA_dev_config {
        u64 tx_w_round_robin_4;
 
        u64 tti_command_mem;
-#define TTI_CMD_MEM_WE                     BIT(7)
-#define TTI_CMD_MEM_STROBE_NEW_CMD         BIT(15)
-#define TTI_CMD_MEM_STROBE_BEING_EXECUTED  BIT(15)
+#define TTI_CMD_MEM_WE                     s2BIT(7)
+#define TTI_CMD_MEM_STROBE_NEW_CMD         s2BIT(15)
+#define TTI_CMD_MEM_STROBE_BEING_EXECUTED  s2BIT(15)
 #define TTI_CMD_MEM_OFFSET(n)              vBIT(n,26,6)
 
        u64 tti_data1_mem;
 #define TTI_DATA1_MEM_TX_TIMER_VAL(n)      vBIT(n,6,26)
 #define TTI_DATA1_MEM_TX_TIMER_AC_CI(n)    vBIT(n,38,2)
-#define TTI_DATA1_MEM_TX_TIMER_AC_EN       BIT(38)
-#define TTI_DATA1_MEM_TX_TIMER_CI_EN       BIT(39)
+#define TTI_DATA1_MEM_TX_TIMER_AC_EN       s2BIT(38)
+#define TTI_DATA1_MEM_TX_TIMER_CI_EN       s2BIT(39)
 #define TTI_DATA1_MEM_TX_URNG_A(n)         vBIT(n,41,7)
 #define TTI_DATA1_MEM_TX_URNG_B(n)         vBIT(n,49,7)
 #define TTI_DATA1_MEM_TX_URNG_C(n)         vBIT(n,57,7)
@@ -459,11 +459,11 @@ struct XENA_dev_config {
 
 /* Tx Protocol assist */
        u64 tx_pa_cfg;
-#define TX_PA_CFG_IGNORE_FRM_ERR           BIT(1)
-#define TX_PA_CFG_IGNORE_SNAP_OUI          BIT(2)
-#define TX_PA_CFG_IGNORE_LLC_CTRL          BIT(3)
-#define        TX_PA_CFG_IGNORE_L2_ERR                    BIT(6)
-#define RX_PA_CFG_STRIP_VLAN_TAG               BIT(15)
+#define TX_PA_CFG_IGNORE_FRM_ERR           s2BIT(1)
+#define TX_PA_CFG_IGNORE_SNAP_OUI          s2BIT(2)
+#define TX_PA_CFG_IGNORE_LLC_CTRL          s2BIT(3)
+#define        TX_PA_CFG_IGNORE_L2_ERR                    s2BIT(6)
+#define RX_PA_CFG_STRIP_VLAN_TAG               s2BIT(15)
 
 /* Recent add, used only debug purposes. */
        u64 pcc_enable;
@@ -477,31 +477,31 @@ struct XENA_dev_config {
 /* RxDMA Registers */
        u64 rxdma_int_status;
        u64 rxdma_int_mask;
-#define RXDMA_INT_RC_INT_M             BIT(0)
-#define RXDMA_INT_RPA_INT_M            BIT(1)
-#define RXDMA_INT_RDA_INT_M            BIT(2)
-#define RXDMA_INT_RTI_INT_M            BIT(3)
+#define RXDMA_INT_RC_INT_M             s2BIT(0)
+#define RXDMA_INT_RPA_INT_M            s2BIT(1)
+#define RXDMA_INT_RDA_INT_M            s2BIT(2)
+#define RXDMA_INT_RTI_INT_M            s2BIT(3)
 
        u64 rda_err_reg;
 #define RDA_RXDn_ECC_SG_ERR            vBIT(0xFF,0,8)
 #define RDA_RXDn_ECC_DB_ERR            vBIT(0xFF,8,8)
-#define RDA_FRM_ECC_SG_ERR             BIT(23)
-#define RDA_FRM_ECC_DB_N_AERR          BIT(31)
-#define RDA_SM1_ERR_ALARM              BIT(38)
-#define RDA_SM0_ERR_ALARM              BIT(39)
-#define RDA_MISC_ERR                   BIT(47)
-#define RDA_PCIX_ERR                   BIT(55)
-#define RDA_RXD_ECC_DB_SERR            BIT(63)
+#define RDA_FRM_ECC_SG_ERR             s2BIT(23)
+#define RDA_FRM_ECC_DB_N_AERR          s2BIT(31)
+#define RDA_SM1_ERR_ALARM              s2BIT(38)
+#define RDA_SM0_ERR_ALARM              s2BIT(39)
+#define RDA_MISC_ERR                   s2BIT(47)
+#define RDA_PCIX_ERR                   s2BIT(55)
+#define RDA_RXD_ECC_DB_SERR            s2BIT(63)
        u64 rda_err_mask;
        u64 rda_err_alarm;
 
        u64 rc_err_reg;
 #define RC_PRCn_ECC_SG_ERR             vBIT(0xFF,0,8)
 #define RC_PRCn_ECC_DB_ERR             vBIT(0xFF,8,8)
-#define RC_FTC_ECC_SG_ERR              BIT(23)
-#define RC_FTC_ECC_DB_ERR              BIT(31)
+#define RC_FTC_ECC_SG_ERR              s2BIT(23)
+#define RC_FTC_ECC_DB_ERR              s2BIT(31)
 #define RC_PRCn_SM_ERR_ALARM           vBIT(0xFF,32,8)
-#define RC_FTC_SM_ERR_ALARM            BIT(47)
+#define RC_FTC_SM_ERR_ALARM            s2BIT(47)
 #define RC_RDA_FAIL_WR_Rn              vBIT(0xFF,48,8)
        u64 rc_err_mask;
        u64 rc_err_alarm;
@@ -517,18 +517,18 @@ struct XENA_dev_config {
        u64 prc_pcix_err_alarm;
 
        u64 rpa_err_reg;
-#define RPA_ECC_SG_ERR                 BIT(7)
-#define RPA_ECC_DB_ERR                 BIT(15)
-#define RPA_FLUSH_REQUEST              BIT(22)
-#define RPA_SM_ERR_ALARM               BIT(23)
-#define RPA_CREDIT_ERR                 BIT(31)
+#define RPA_ECC_SG_ERR                 s2BIT(7)
+#define RPA_ECC_DB_ERR                 s2BIT(15)
+#define RPA_FLUSH_REQUEST              s2BIT(22)
+#define RPA_SM_ERR_ALARM               s2BIT(23)
+#define RPA_CREDIT_ERR                 s2BIT(31)
        u64 rpa_err_mask;
        u64 rpa_err_alarm;
 
        u64 rti_err_reg;
-#define RTI_ECC_SG_ERR                 BIT(7)
-#define RTI_ECC_DB_ERR                 BIT(15)
-#define RTI_SM_ERR_ALARM               BIT(23)
+#define RTI_ECC_SG_ERR                 s2BIT(7)
+#define RTI_ECC_DB_ERR                 s2BIT(15)
+#define RTI_SM_ERR_ALARM               s2BIT(23)
        u64 rti_err_mask;
        u64 rti_err_alarm;
 
@@ -568,49 +568,49 @@ struct XENA_dev_config {
 #endif
        u64 prc_rxd0_n[RX_MAX_RINGS];
        u64 prc_ctrl_n[RX_MAX_RINGS];
-#define PRC_CTRL_RC_ENABLED                    BIT(7)
-#define PRC_CTRL_RING_MODE                     (BIT(14)|BIT(15))
+#define PRC_CTRL_RC_ENABLED                    s2BIT(7)
+#define PRC_CTRL_RING_MODE                     (s2BIT(14)|s2BIT(15))
 #define PRC_CTRL_RING_MODE_1                   vBIT(0,14,2)
 #define PRC_CTRL_RING_MODE_3                   vBIT(1,14,2)
 #define PRC_CTRL_RING_MODE_5                   vBIT(2,14,2)
 #define PRC_CTRL_RING_MODE_x                   vBIT(3,14,2)
-#define PRC_CTRL_NO_SNOOP                      (BIT(22)|BIT(23))
-#define PRC_CTRL_NO_SNOOP_DESC                 BIT(22)
-#define PRC_CTRL_NO_SNOOP_BUFF                 BIT(23)
-#define PRC_CTRL_BIMODAL_INTERRUPT             BIT(37)
-#define PRC_CTRL_GROUP_READS                   BIT(38)
+#define PRC_CTRL_NO_SNOOP                      (s2BIT(22)|s2BIT(23))
+#define PRC_CTRL_NO_SNOOP_DESC                 s2BIT(22)
+#define PRC_CTRL_NO_SNOOP_BUFF                 s2BIT(23)
+#define PRC_CTRL_BIMODAL_INTERRUPT             s2BIT(37)
+#define PRC_CTRL_GROUP_READS                   s2BIT(38)
 #define PRC_CTRL_RXD_BACKOFF_INTERVAL(val)     vBIT(val,40,24)
 
        u64 prc_alarm_action;
-#define PRC_ALARM_ACTION_RR_R0_STOP            BIT(3)
-#define PRC_ALARM_ACTION_RW_R0_STOP            BIT(7)
-#define PRC_ALARM_ACTION_RR_R1_STOP            BIT(11)
-#define PRC_ALARM_ACTION_RW_R1_STOP            BIT(15)
-#define PRC_ALARM_ACTION_RR_R2_STOP            BIT(19)
-#define PRC_ALARM_ACTION_RW_R2_STOP            BIT(23)
-#define PRC_ALARM_ACTION_RR_R3_STOP            BIT(27)
-#define PRC_ALARM_ACTION_RW_R3_STOP            BIT(31)
-#define PRC_ALARM_ACTION_RR_R4_STOP            BIT(35)
-#define PRC_ALARM_ACTION_RW_R4_STOP            BIT(39)
-#define PRC_ALARM_ACTION_RR_R5_STOP            BIT(43)
-#define PRC_ALARM_ACTION_RW_R5_STOP            BIT(47)
-#define PRC_ALARM_ACTION_RR_R6_STOP            BIT(51)
-#define PRC_ALARM_ACTION_RW_R6_STOP            BIT(55)
-#define PRC_ALARM_ACTION_RR_R7_STOP            BIT(59)
-#define PRC_ALARM_ACTION_RW_R7_STOP            BIT(63)
+#define PRC_ALARM_ACTION_RR_R0_STOP            s2BIT(3)
+#define PRC_ALARM_ACTION_RW_R0_STOP            s2BIT(7)
+#define PRC_ALARM_ACTION_RR_R1_STOP            s2BIT(11)
+#define PRC_ALARM_ACTION_RW_R1_STOP            s2BIT(15)
+#define PRC_ALARM_ACTION_RR_R2_STOP            s2BIT(19)
+#define PRC_ALARM_ACTION_RW_R2_STOP            s2BIT(23)
+#define PRC_ALARM_ACTION_RR_R3_STOP            s2BIT(27)
+#define PRC_ALARM_ACTION_RW_R3_STOP            s2BIT(31)
+#define PRC_ALARM_ACTION_RR_R4_STOP            s2BIT(35)
+#define PRC_ALARM_ACTION_RW_R4_STOP            s2BIT(39)
+#define PRC_ALARM_ACTION_RR_R5_STOP            s2BIT(43)
+#define PRC_ALARM_ACTION_RW_R5_STOP            s2BIT(47)
+#define PRC_ALARM_ACTION_RR_R6_STOP            s2BIT(51)
+#define PRC_ALARM_ACTION_RW_R6_STOP            s2BIT(55)
+#define PRC_ALARM_ACTION_RR_R7_STOP            s2BIT(59)
+#define PRC_ALARM_ACTION_RW_R7_STOP            s2BIT(63)
 
 /* Receive traffic interrupts */
        u64 rti_command_mem;
-#define RTI_CMD_MEM_WE                          BIT(7)
-#define RTI_CMD_MEM_STROBE                      BIT(15)
-#define RTI_CMD_MEM_STROBE_NEW_CMD              BIT(15)
-#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED   BIT(15)
+#define RTI_CMD_MEM_WE                          s2BIT(7)
+#define RTI_CMD_MEM_STROBE                      s2BIT(15)
+#define RTI_CMD_MEM_STROBE_NEW_CMD              s2BIT(15)
+#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED   s2BIT(15)
 #define RTI_CMD_MEM_OFFSET(n)                   vBIT(n,29,3)
 
        u64 rti_data1_mem;
 #define RTI_DATA1_MEM_RX_TIMER_VAL(n)      vBIT(n,3,29)
-#define RTI_DATA1_MEM_RX_TIMER_AC_EN       BIT(38)
-#define RTI_DATA1_MEM_RX_TIMER_CI_EN       BIT(39)
+#define RTI_DATA1_MEM_RX_TIMER_AC_EN       s2BIT(38)
+#define RTI_DATA1_MEM_RX_TIMER_CI_EN       s2BIT(39)
 #define RTI_DATA1_MEM_RX_URNG_A(n)         vBIT(n,41,7)
 #define RTI_DATA1_MEM_RX_URNG_B(n)         vBIT(n,49,7)
 #define RTI_DATA1_MEM_RX_URNG_C(n)         vBIT(n,57,7)
@@ -622,10 +622,10 @@ struct XENA_dev_config {
 #define RTI_DATA2_MEM_RX_UFC_D(n)          vBIT(n,48,16)
 
        u64 rx_pa_cfg;
-#define RX_PA_CFG_IGNORE_FRM_ERR           BIT(1)
-#define RX_PA_CFG_IGNORE_SNAP_OUI          BIT(2)
-#define RX_PA_CFG_IGNORE_LLC_CTRL          BIT(3)
-#define RX_PA_CFG_IGNORE_L2_ERR            BIT(6)
+#define RX_PA_CFG_IGNORE_FRM_ERR           s2BIT(1)
+#define RX_PA_CFG_IGNORE_SNAP_OUI          s2BIT(2)
+#define RX_PA_CFG_IGNORE_LLC_CTRL          s2BIT(3)
+#define RX_PA_CFG_IGNORE_L2_ERR            s2BIT(6)
 
        u64 unused_11_1;
 
@@ -641,64 +641,64 @@ struct XENA_dev_config {
 /* Media Access Controller Register */
        u64 mac_int_status;
        u64 mac_int_mask;
-#define MAC_INT_STATUS_TMAC_INT            BIT(0)
-#define MAC_INT_STATUS_RMAC_INT            BIT(1)
+#define MAC_INT_STATUS_TMAC_INT            s2BIT(0)
+#define MAC_INT_STATUS_RMAC_INT            s2BIT(1)
 
        u64 mac_tmac_err_reg;
-#define TMAC_ECC_SG_ERR                                BIT(7)
-#define TMAC_ECC_DB_ERR                                BIT(15)
-#define TMAC_TX_BUF_OVRN                       BIT(23)
-#define TMAC_TX_CRI_ERR                                BIT(31)
-#define TMAC_TX_SM_ERR                         BIT(39)
-#define TMAC_DESC_ECC_SG_ERR                   BIT(47)
-#define TMAC_DESC_ECC_DB_ERR                   BIT(55)
+#define TMAC_ECC_SG_ERR                                s2BIT(7)
+#define TMAC_ECC_DB_ERR                                s2BIT(15)
+#define TMAC_TX_BUF_OVRN                       s2BIT(23)
+#define TMAC_TX_CRI_ERR                                s2BIT(31)
+#define TMAC_TX_SM_ERR                         s2BIT(39)
+#define TMAC_DESC_ECC_SG_ERR                   s2BIT(47)
+#define TMAC_DESC_ECC_DB_ERR                   s2BIT(55)
 
        u64 mac_tmac_err_mask;
        u64 mac_tmac_err_alarm;
 
        u64 mac_rmac_err_reg;
-#define RMAC_RX_BUFF_OVRN                      BIT(0)
-#define RMAC_FRM_RCVD_INT                      BIT(1)
-#define RMAC_UNUSED_INT                                BIT(2)
-#define RMAC_RTS_PNUM_ECC_SG_ERR               BIT(5)
-#define RMAC_RTS_DS_ECC_SG_ERR                 BIT(6)
-#define RMAC_RD_BUF_ECC_SG_ERR                 BIT(7)
-#define RMAC_RTH_MAP_ECC_SG_ERR                        BIT(8)
-#define RMAC_RTH_SPDM_ECC_SG_ERR               BIT(9)
-#define RMAC_RTS_VID_ECC_SG_ERR                        BIT(10)
-#define RMAC_DA_SHADOW_ECC_SG_ERR              BIT(11)
-#define RMAC_RTS_PNUM_ECC_DB_ERR               BIT(13)
-#define RMAC_RTS_DS_ECC_DB_ERR                 BIT(14)
-#define RMAC_RD_BUF_ECC_DB_ERR                 BIT(15)
-#define RMAC_RTH_MAP_ECC_DB_ERR                        BIT(16)
-#define RMAC_RTH_SPDM_ECC_DB_ERR               BIT(17)
-#define RMAC_RTS_VID_ECC_DB_ERR                        BIT(18)
-#define RMAC_DA_SHADOW_ECC_DB_ERR              BIT(19)
-#define RMAC_LINK_STATE_CHANGE_INT             BIT(31)
-#define RMAC_RX_SM_ERR                         BIT(39)
-#define RMAC_SINGLE_ECC_ERR                    (BIT(5) | BIT(6) | BIT(7) |\
-                                               BIT(8)  | BIT(9) | BIT(10)|\
-                                               BIT(11))
-#define RMAC_DOUBLE_ECC_ERR                    (BIT(13) | BIT(14) | BIT(15) |\
-                                               BIT(16)  | BIT(17) | BIT(18)|\
-                                               BIT(19))
+#define RMAC_RX_BUFF_OVRN                      s2BIT(0)
+#define RMAC_FRM_RCVD_INT                      s2BIT(1)
+#define RMAC_UNUSED_INT                                s2BIT(2)
+#define RMAC_RTS_PNUM_ECC_SG_ERR               s2BIT(5)
+#define RMAC_RTS_DS_ECC_SG_ERR                 s2BIT(6)
+#define RMAC_RD_BUF_ECC_SG_ERR                 s2BIT(7)
+#define RMAC_RTH_MAP_ECC_SG_ERR                        s2BIT(8)
+#define RMAC_RTH_SPDM_ECC_SG_ERR               s2BIT(9)
+#define RMAC_RTS_VID_ECC_SG_ERR                        s2BIT(10)
+#define RMAC_DA_SHADOW_ECC_SG_ERR              s2BIT(11)
+#define RMAC_RTS_PNUM_ECC_DB_ERR               s2BIT(13)
+#define RMAC_RTS_DS_ECC_DB_ERR                 s2BIT(14)
+#define RMAC_RD_BUF_ECC_DB_ERR                 s2BIT(15)
+#define RMAC_RTH_MAP_ECC_DB_ERR                        s2BIT(16)
+#define RMAC_RTH_SPDM_ECC_DB_ERR               s2BIT(17)
+#define RMAC_RTS_VID_ECC_DB_ERR                        s2BIT(18)
+#define RMAC_DA_SHADOW_ECC_DB_ERR              s2BIT(19)
+#define RMAC_LINK_STATE_CHANGE_INT             s2BIT(31)
+#define RMAC_RX_SM_ERR                         s2BIT(39)
+#define RMAC_SINGLE_ECC_ERR                    (s2BIT(5) | s2BIT(6) | s2BIT(7) |\
+                                               s2BIT(8)  | s2BIT(9) | s2BIT(10)|\
+                                               s2BIT(11))
+#define RMAC_DOUBLE_ECC_ERR                    (s2BIT(13) | s2BIT(14) | s2BIT(15) |\
+                                               s2BIT(16)  | s2BIT(17) | s2BIT(18)|\
+                                               s2BIT(19))
        u64 mac_rmac_err_mask;
        u64 mac_rmac_err_alarm;
 
        u8 unused14[0x100 - 0x40];
 
        u64 mac_cfg;
-#define MAC_CFG_TMAC_ENABLE             BIT(0)
-#define MAC_CFG_RMAC_ENABLE             BIT(1)
-#define MAC_CFG_LAN_NOT_WAN             BIT(2)
-#define MAC_CFG_TMAC_LOOPBACK           BIT(3)
-#define MAC_CFG_TMAC_APPEND_PAD         BIT(4)
-#define MAC_CFG_RMAC_STRIP_FCS          BIT(5)
-#define MAC_CFG_RMAC_STRIP_PAD          BIT(6)
-#define MAC_CFG_RMAC_PROM_ENABLE        BIT(7)
-#define MAC_RMAC_DISCARD_PFRM           BIT(8)
-#define MAC_RMAC_BCAST_ENABLE           BIT(9)
-#define MAC_RMAC_ALL_ADDR_ENABLE        BIT(10)
+#define MAC_CFG_TMAC_ENABLE             s2BIT(0)
+#define MAC_CFG_RMAC_ENABLE             s2BIT(1)
+#define MAC_CFG_LAN_NOT_WAN             s2BIT(2)
+#define MAC_CFG_TMAC_LOOPBACK           s2BIT(3)
+#define MAC_CFG_TMAC_APPEND_PAD         s2BIT(4)
+#define MAC_CFG_RMAC_STRIP_FCS          s2BIT(5)
+#define MAC_CFG_RMAC_STRIP_PAD          s2BIT(6)
+#define MAC_CFG_RMAC_PROM_ENABLE        s2BIT(7)
+#define MAC_RMAC_DISCARD_PFRM           s2BIT(8)
+#define MAC_RMAC_BCAST_ENABLE           s2BIT(9)
+#define MAC_RMAC_ALL_ADDR_ENABLE        s2BIT(10)
 #define MAC_RMAC_INVLD_IPG_THR(val)     vBIT(val,16,8)
 
        u64 tmac_avg_ipg;
@@ -710,14 +710,14 @@ struct XENA_dev_config {
 #define RMAC_MAX_PYLD_LEN_JUMBO_DEF vBIT(9600,2,14)
 
        u64 rmac_err_cfg;
-#define RMAC_ERR_FCS                    BIT(0)
-#define RMAC_ERR_FCS_ACCEPT             BIT(1)
-#define RMAC_ERR_TOO_LONG               BIT(1)
-#define RMAC_ERR_TOO_LONG_ACCEPT        BIT(1)
-#define RMAC_ERR_RUNT                   BIT(2)
-#define RMAC_ERR_RUNT_ACCEPT            BIT(2)
-#define RMAC_ERR_LEN_MISMATCH           BIT(3)
-#define RMAC_ERR_LEN_MISMATCH_ACCEPT    BIT(3)
+#define RMAC_ERR_FCS                    s2BIT(0)
+#define RMAC_ERR_FCS_ACCEPT             s2BIT(1)
+#define RMAC_ERR_TOO_LONG               s2BIT(1)
+#define RMAC_ERR_TOO_LONG_ACCEPT        s2BIT(1)
+#define RMAC_ERR_RUNT                   s2BIT(2)
+#define RMAC_ERR_RUNT_ACCEPT            s2BIT(2)
+#define RMAC_ERR_LEN_MISMATCH           s2BIT(3)
+#define RMAC_ERR_LEN_MISMATCH_ACCEPT    s2BIT(3)
 
        u64 rmac_cfg_key;
 #define RMAC_CFG_KEY(val)               vBIT(val,0,16)
@@ -728,15 +728,15 @@ struct XENA_dev_config {
 #define MAC_MC_ADDR_START_OFFSET    16
 #define MAC_MC_ALL_MC_ADDR_OFFSET   63 /* enables all multicast pkts */
        u64 rmac_addr_cmd_mem;
-#define RMAC_ADDR_CMD_MEM_WE                    BIT(7)
+#define RMAC_ADDR_CMD_MEM_WE                    s2BIT(7)
 #define RMAC_ADDR_CMD_MEM_RD                    0
-#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD        BIT(15)
-#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING  BIT(15)
+#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD        s2BIT(15)
+#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING  s2BIT(15)
 #define RMAC_ADDR_CMD_MEM_OFFSET(n)             vBIT(n,26,6)
 
        u64 rmac_addr_data0_mem;
 #define RMAC_ADDR_DATA0_MEM_ADDR(n)    vBIT(n,0,48)
-#define RMAC_ADDR_DATA0_MEM_USER       BIT(48)
+#define RMAC_ADDR_DATA0_MEM_USER       s2BIT(48)
 
        u64 rmac_addr_data1_mem;
 #define RMAC_ADDR_DATA1_MEM_MASK(n)    vBIT(n,0,48)
@@ -753,10 +753,10 @@ struct XENA_dev_config {
        u64 tmac_ipg_cfg;
 
        u64 rmac_pause_cfg;
-#define RMAC_PAUSE_GEN             BIT(0)
-#define RMAC_PAUSE_GEN_ENABLE      BIT(0)
-#define RMAC_PAUSE_RX              BIT(1)
-#define RMAC_PAUSE_RX_ENABLE       BIT(1)
+#define RMAC_PAUSE_GEN             s2BIT(0)
+#define RMAC_PAUSE_GEN_ENABLE      s2BIT(0)
+#define RMAC_PAUSE_RX              s2BIT(1)
+#define RMAC_PAUSE_RX_ENABLE       s2BIT(1)
 #define RMAC_PAUSE_HG_PTIME_DEF    vBIT(0xFFFF,16,16)
 #define RMAC_PAUSE_HG_PTIME(val)    vBIT(val,16,16)
 
@@ -787,29 +787,29 @@ struct XENA_dev_config {
 #define MAX_DIX_MAP                         4
        u64 rts_dix_map_n[MAX_DIX_MAP];
 #define RTS_DIX_MAP_ETYPE(val)             vBIT(val,0,16)
-#define RTS_DIX_MAP_SCW(val)               BIT(val,21)
+#define RTS_DIX_MAP_SCW(val)               s2BIT(val,21)
 
        u64 rts_q_alternates;
        u64 rts_default_q;
 
        u64 rts_ctrl;
-#define RTS_CTRL_IGNORE_SNAP_OUI           BIT(2)
-#define RTS_CTRL_IGNORE_LLC_CTRL           BIT(3)
+#define RTS_CTRL_IGNORE_SNAP_OUI           s2BIT(2)
+#define RTS_CTRL_IGNORE_LLC_CTRL           s2BIT(3)
 
        u64 rts_pn_cam_ctrl;
-#define RTS_PN_CAM_CTRL_WE                 BIT(7)
-#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD     BIT(15)
-#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED   BIT(15)
+#define RTS_PN_CAM_CTRL_WE                 s2BIT(7)
+#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD     s2BIT(15)
+#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED   s2BIT(15)
 #define RTS_PN_CAM_CTRL_OFFSET(n)          vBIT(n,24,8)
        u64 rts_pn_cam_data;
-#define RTS_PN_CAM_DATA_TCP_SELECT         BIT(7)
+#define RTS_PN_CAM_DATA_TCP_SELECT         s2BIT(7)
 #define RTS_PN_CAM_DATA_PORT(val)          vBIT(val,8,16)
 #define RTS_PN_CAM_DATA_SCW(val)           vBIT(val,24,8)
 
        u64 rts_ds_mem_ctrl;
-#define RTS_DS_MEM_CTRL_WE                 BIT(7)
-#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD     BIT(15)
-#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED   BIT(15)
+#define RTS_DS_MEM_CTRL_WE                 s2BIT(7)
+#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD     s2BIT(15)
+#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED   s2BIT(15)
 #define RTS_DS_MEM_CTRL_OFFSET(n)          vBIT(n,26,6)
        u64 rts_ds_mem_data;
 #define RTS_DS_MEM_DATA(n)                 vBIT(n,0,8)
@@ -823,23 +823,23 @@ struct XENA_dev_config {
 
 /* memory controller registers */
        u64 mc_int_status;
-#define MC_INT_STATUS_MC_INT               BIT(0)
+#define MC_INT_STATUS_MC_INT               s2BIT(0)
        u64 mc_int_mask;
-#define MC_INT_MASK_MC_INT                 BIT(0)
+#define MC_INT_MASK_MC_INT                 s2BIT(0)
 
        u64 mc_err_reg;
-#define MC_ERR_REG_ECC_DB_ERR_L            BIT(14)
-#define MC_ERR_REG_ECC_DB_ERR_U            BIT(15)
-#define MC_ERR_REG_MIRI_ECC_DB_ERR_0       BIT(18)
-#define MC_ERR_REG_MIRI_ECC_DB_ERR_1       BIT(20)
-#define MC_ERR_REG_MIRI_CRI_ERR_0          BIT(22)
-#define MC_ERR_REG_MIRI_CRI_ERR_1          BIT(23)
-#define MC_ERR_REG_SM_ERR                  BIT(31)
-#define MC_ERR_REG_ECC_ALL_SNG            (BIT(2) | BIT(3) | BIT(4) | BIT(5) |\
-                                       BIT(17) | BIT(19))
-#define MC_ERR_REG_ECC_ALL_DBL            (BIT(10) | BIT(11) | BIT(12) |\
-                                       BIT(13) | BIT(18) | BIT(20))
-#define PLL_LOCK_N                     BIT(39)
+#define MC_ERR_REG_ECC_DB_ERR_L            s2BIT(14)
+#define MC_ERR_REG_ECC_DB_ERR_U            s2BIT(15)
+#define MC_ERR_REG_MIRI_ECC_DB_ERR_0       s2BIT(18)
+#define MC_ERR_REG_MIRI_ECC_DB_ERR_1       s2BIT(20)
+#define MC_ERR_REG_MIRI_CRI_ERR_0          s2BIT(22)
+#define MC_ERR_REG_MIRI_CRI_ERR_1          s2BIT(23)
+#define MC_ERR_REG_SM_ERR                  s2BIT(31)
+#define MC_ERR_REG_ECC_ALL_SNG            (s2BIT(2) | s2BIT(3) | s2BIT(4) | s2BIT(5) |\
+                                       s2BIT(17) | s2BIT(19))
+#define MC_ERR_REG_ECC_ALL_DBL            (s2BIT(10) | s2BIT(11) | s2BIT(12) |\
+                                       s2BIT(13) | s2BIT(18) | s2BIT(20))
+#define PLL_LOCK_N                     s2BIT(39)
        u64 mc_err_mask;
        u64 mc_err_alarm;
 
@@ -857,8 +857,8 @@ struct XENA_dev_config {
 #define RX_QUEUE_CFG_Q7_SZ(n)              vBIT(n,56,8)
 
        u64 mc_rldram_mrs;
-#define        MC_RLDRAM_QUEUE_SIZE_ENABLE                     BIT(39)
-#define        MC_RLDRAM_MRS_ENABLE                            BIT(47)
+#define        MC_RLDRAM_QUEUE_SIZE_ENABLE                     s2BIT(39)
+#define        MC_RLDRAM_MRS_ENABLE                            s2BIT(47)
 
        u64 mc_rldram_interleave;
 
@@ -871,11 +871,11 @@ struct XENA_dev_config {
        u64 mc_rldram_ref_per;
        u8 unused20[0x220 - 0x208];
        u64 mc_rldram_test_ctrl;
-#define MC_RLDRAM_TEST_MODE            BIT(47)
-#define MC_RLDRAM_TEST_WRITE   BIT(7)
-#define MC_RLDRAM_TEST_GO              BIT(15)
-#define MC_RLDRAM_TEST_DONE            BIT(23)
-#define MC_RLDRAM_TEST_PASS            BIT(31)
+#define MC_RLDRAM_TEST_MODE            s2BIT(47)
+#define MC_RLDRAM_TEST_WRITE   s2BIT(7)
+#define MC_RLDRAM_TEST_GO              s2BIT(15)
+#define MC_RLDRAM_TEST_DONE            s2BIT(23)
+#define MC_RLDRAM_TEST_PASS            s2BIT(31)
 
        u8 unused21[0x240 - 0x228];
        u64 mc_rldram_test_add;
@@ -888,7 +888,7 @@ struct XENA_dev_config {
 
        u8 unused24_1[0x360 - 0x308];
        u64 mc_rldram_ctrl;
-#define        MC_RLDRAM_ENABLE_ODT            BIT(7)
+#define        MC_RLDRAM_ENABLE_ODT            s2BIT(7)
 
        u8 unused24_2[0x640 - 0x368];
        u64 mc_rldram_ref_per_herc;
@@ -906,24 +906,24 @@ struct XENA_dev_config {
        /* XGXS control registers */
 
        u64 xgxs_int_status;
-#define XGXS_INT_STATUS_TXGXS              BIT(0)
-#define XGXS_INT_STATUS_RXGXS              BIT(1)
+#define XGXS_INT_STATUS_TXGXS              s2BIT(0)
+#define XGXS_INT_STATUS_RXGXS              s2BIT(1)
        u64 xgxs_int_mask;
-#define XGXS_INT_MASK_TXGXS                BIT(0)
-#define XGXS_INT_MASK_RXGXS                BIT(1)
+#define XGXS_INT_MASK_TXGXS                s2BIT(0)
+#define XGXS_INT_MASK_RXGXS                s2BIT(1)
 
        u64 xgxs_txgxs_err_reg;
-#define TXGXS_ECC_SG_ERR               BIT(7)
-#define TXGXS_ECC_DB_ERR               BIT(15)
-#define TXGXS_ESTORE_UFLOW             BIT(31)
-#define TXGXS_TX_SM_ERR                        BIT(39)
+#define TXGXS_ECC_SG_ERR               s2BIT(7)
+#define TXGXS_ECC_DB_ERR               s2BIT(15)
+#define TXGXS_ESTORE_UFLOW             s2BIT(31)
+#define TXGXS_TX_SM_ERR                        s2BIT(39)
 
        u64 xgxs_txgxs_err_mask;
        u64 xgxs_txgxs_err_alarm;
 
        u64 xgxs_rxgxs_err_reg;
-#define RXGXS_ESTORE_OFLOW             BIT(7)
-#define RXGXS_RX_SM_ERR                        BIT(39)
+#define RXGXS_ESTORE_OFLOW             s2BIT(7)
+#define RXGXS_RX_SM_ERR                        s2BIT(39)
        u64 xgxs_rxgxs_err_mask;
        u64 xgxs_rxgxs_err_alarm;
 
@@ -942,10 +942,10 @@ struct XENA_dev_config {
 #define SPI_CONTROL_BYTECNT(cnt)       vBIT(cnt,29,3)
 #define SPI_CONTROL_CMD(cmd)           vBIT(cmd,32,8)
 #define SPI_CONTROL_ADDR(addr)         vBIT(addr,40,24)
-#define SPI_CONTROL_SEL1               BIT(4)
-#define SPI_CONTROL_REQ                        BIT(7)
-#define SPI_CONTROL_NACK               BIT(5)
-#define SPI_CONTROL_DONE               BIT(6)
+#define SPI_CONTROL_SEL1               s2BIT(4)
+#define SPI_CONTROL_REQ                        s2BIT(7)
+#define SPI_CONTROL_NACK               s2BIT(5)
+#define SPI_CONTROL_DONE               s2BIT(6)
        u64 spi_data;
 #define SPI_DATA_WRITE(data,len)       vBIT(data,0,len)
 };
index 22e4054d4fcb249feac3331eab47b544f85920a8..b8c0e7b4ca1cdd0e0d4d3a9251af47cff784a326 100644 (file)
@@ -1716,7 +1716,7 @@ static int init_nic(struct s2io_nic *nic)
                        MISC_LINK_STABILITY_PRD(3);
                writeq(val64, &bar0->misc_control);
                val64 = readq(&bar0->pic_control2);
-               val64 &= ~(BIT(13)|BIT(14)|BIT(15));
+               val64 &= ~(s2BIT(13)|s2BIT(14)|s2BIT(15));
                writeq(val64, &bar0->pic_control2);
        }
        if (strstr(nic->product_name, "CX4")) {
@@ -2427,7 +2427,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                }
                if ((rxdp->Control_1 & RXD_OWN_XENA) &&
                        ((nic->rxd_mode == RXD_MODE_3B) &&
-                               (rxdp->Control_2 & BIT(0)))) {
+                               (rxdp->Control_2 & s2BIT(0)))) {
                        mac_control->rings[ring_no].rx_curr_put_info.
                                        offset = off;
                        goto end;
@@ -2540,7 +2540,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
                                rxdp->Control_2 |= SET_BUFFER2_SIZE_3
                                                                (dev->mtu + 4);
                        }
-                       rxdp->Control_2 |= BIT(0);
+                       rxdp->Control_2 |= s2BIT(0);
                }
                rxdp->Host_Control = (unsigned long) (skb);
                if (alloc_tab & ((1 << rxsync_frequency) - 1))
@@ -3377,7 +3377,7 @@ static void s2io_reset(struct s2io_nic * sp)
                pci_write_config_dword(sp->pdev, 0x68, 0x7C);
 
                /* Clearing PCI_STATUS error reflected here */
-               writeq(BIT(62), &bar0->txpic_int_reg);
+               writeq(s2BIT(62), &bar0->txpic_int_reg);
        }
 
        /* Reset device statistics maintained by OS */
@@ -3575,7 +3575,7 @@ static int wait_for_msix_trans(struct s2io_nic *nic, int i)
 
        do {
                val64 = readq(&bar0->xmsi_access);
-               if (!(val64 & BIT(15)))
+               if (!(val64 & s2BIT(15)))
                        break;
                mdelay(1);
                cnt++;
@@ -3597,7 +3597,7 @@ static void restore_xmsi_data(struct s2io_nic *nic)
        for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
                writeq(nic->msix_info[i].addr, &bar0->xmsi_address);
                writeq(nic->msix_info[i].data, &bar0->xmsi_data);
-               val64 = (BIT(7) | BIT(15) | vBIT(i, 26, 6));
+               val64 = (s2BIT(7) | s2BIT(15) | vBIT(i, 26, 6));
                writeq(val64, &bar0->xmsi_access);
                if (wait_for_msix_trans(nic, i)) {
                        DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -3614,7 +3614,7 @@ static void store_xmsi_data(struct s2io_nic *nic)
 
        /* Store and display */
        for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
-               val64 = (BIT(15) | vBIT(i, 26, 6));
+               val64 = (s2BIT(15) | vBIT(i, 26, 6));
                writeq(val64, &bar0->xmsi_access);
                if (wait_for_msix_trans(nic, i)) {
                        DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -4634,7 +4634,7 @@ static void s2io_updt_stats(struct s2io_nic *sp)
                do {
                        udelay(100);
                        val64 = readq(&bar0->stat_cfg);
-                       if (!(val64 & BIT(0)))
+                       if (!(val64 & s2BIT(0)))
                                break;
                        cnt++;
                        if (cnt == 5)
index f6b45565304f07c2bf3b9f3b5b49d50bb50a9a12..cc1797a071aa0822617efa6b96f03a3795d1a25c 100644 (file)
@@ -14,7 +14,7 @@
 #define _S2IO_H
 
 #define TBD 0
-#define BIT(loc)               (0x8000000000000000ULL >> (loc))
+#define s2BIT(loc)             (0x8000000000000000ULL >> (loc))
 #define vBIT(val, loc, sz)     (((u64)val) << (64-loc-sz))
 #define INV(d)  ((d&0xff)<<24) | (((d>>8)&0xff)<<16) | (((d>>16)&0xff)<<8)| ((d>>24)&0xff)
 
@@ -473,42 +473,42 @@ struct TxFIFO_element {
 
        u64 List_Control;
 #define TX_FIFO_LAST_TXD_NUM( val)     vBIT(val,0,8)
-#define TX_FIFO_FIRST_LIST             BIT(14)
-#define TX_FIFO_LAST_LIST              BIT(15)
+#define TX_FIFO_FIRST_LIST             s2BIT(14)
+#define TX_FIFO_LAST_LIST              s2BIT(15)
 #define TX_FIFO_FIRSTNLAST_LIST        vBIT(3,14,2)
-#define TX_FIFO_SPECIAL_FUNC           BIT(23)
-#define TX_FIFO_DS_NO_SNOOP            BIT(31)
-#define TX_FIFO_BUFF_NO_SNOOP          BIT(30)
+#define TX_FIFO_SPECIAL_FUNC           s2BIT(23)
+#define TX_FIFO_DS_NO_SNOOP            s2BIT(31)
+#define TX_FIFO_BUFF_NO_SNOOP          s2BIT(30)
 };
 
 /* Tx descriptor structure */
 struct TxD {
        u64 Control_1;
 /* bit mask */
-#define TXD_LIST_OWN_XENA       BIT(7)
-#define TXD_T_CODE              (BIT(12)|BIT(13)|BIT(14)|BIT(15))
+#define TXD_LIST_OWN_XENA       s2BIT(7)
+#define TXD_T_CODE              (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
 #define TXD_T_CODE_OK(val)      (|(val & TXD_T_CODE))
 #define GET_TXD_T_CODE(val)     ((val & TXD_T_CODE)<<12)
-#define TXD_GATHER_CODE         (BIT(22) | BIT(23))
-#define TXD_GATHER_CODE_FIRST   BIT(22)
-#define TXD_GATHER_CODE_LAST    BIT(23)
-#define TXD_TCP_LSO_EN          BIT(30)
-#define TXD_UDP_COF_EN          BIT(31)
-#define TXD_UFO_EN             BIT(31) | BIT(30)
+#define TXD_GATHER_CODE         (s2BIT(22) | s2BIT(23))
+#define TXD_GATHER_CODE_FIRST   s2BIT(22)
+#define TXD_GATHER_CODE_LAST    s2BIT(23)
+#define TXD_TCP_LSO_EN          s2BIT(30)
+#define TXD_UDP_COF_EN          s2BIT(31)
+#define TXD_UFO_EN             s2BIT(31) | s2BIT(30)
 #define TXD_TCP_LSO_MSS(val)    vBIT(val,34,14)
 #define TXD_UFO_MSS(val)       vBIT(val,34,14)
 #define TXD_BUFFER0_SIZE(val)   vBIT(val,48,16)
 
        u64 Control_2;
-#define TXD_TX_CKO_CONTROL      (BIT(5)|BIT(6)|BIT(7))
-#define TXD_TX_CKO_IPV4_EN      BIT(5)
-#define TXD_TX_CKO_TCP_EN       BIT(6)
-#define TXD_TX_CKO_UDP_EN       BIT(7)
-#define TXD_VLAN_ENABLE         BIT(15)
+#define TXD_TX_CKO_CONTROL      (s2BIT(5)|s2BIT(6)|s2BIT(7))
+#define TXD_TX_CKO_IPV4_EN      s2BIT(5)
+#define TXD_TX_CKO_TCP_EN       s2BIT(6)
+#define TXD_TX_CKO_UDP_EN       s2BIT(7)
+#define TXD_VLAN_ENABLE         s2BIT(15)
 #define TXD_VLAN_TAG(val)       vBIT(val,16,16)
 #define TXD_INT_NUMBER(val)     vBIT(val,34,6)
-#define TXD_INT_TYPE_PER_LIST   BIT(47)
-#define TXD_INT_TYPE_UTILZ      BIT(46)
+#define TXD_INT_TYPE_PER_LIST   s2BIT(47)
+#define TXD_INT_TYPE_UTILZ      s2BIT(46)
 #define TXD_SET_MARKER         vBIT(0x6,0,4)
 
        u64 Buffer_Pointer;
@@ -525,14 +525,14 @@ struct list_info_hold {
 struct RxD_t {
        u64 Host_Control;       /* reserved for host */
        u64 Control_1;
-#define RXD_OWN_XENA            BIT(7)
-#define RXD_T_CODE              (BIT(12)|BIT(13)|BIT(14)|BIT(15))
+#define RXD_OWN_XENA            s2BIT(7)
+#define RXD_T_CODE              (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
 #define RXD_FRAME_PROTO         vBIT(0xFFFF,24,8)
-#define RXD_FRAME_PROTO_IPV4    BIT(27)
-#define RXD_FRAME_PROTO_IPV6    BIT(28)
-#define RXD_FRAME_IP_FRAG      BIT(29)
-#define RXD_FRAME_PROTO_TCP     BIT(30)
-#define RXD_FRAME_PROTO_UDP     BIT(31)
+#define RXD_FRAME_PROTO_IPV4    s2BIT(27)
+#define RXD_FRAME_PROTO_IPV6    s2BIT(28)
+#define RXD_FRAME_IP_FRAG      s2BIT(29)
+#define RXD_FRAME_PROTO_TCP     s2BIT(30)
+#define RXD_FRAME_PROTO_UDP     s2BIT(31)
 #define TCP_OR_UDP_FRAME        (RXD_FRAME_PROTO_TCP | RXD_FRAME_PROTO_UDP)
 #define RXD_GET_L3_CKSUM(val)   ((u16)(val>> 16) & 0xFFFF)
 #define RXD_GET_L4_CKSUM(val)   ((u16)(val) & 0xFFFF)
@@ -998,26 +998,26 @@ static inline void SPECIAL_REG_WRITE(u64 val, void __iomem *addr, int order)
 /*  Interrupt masks for the general interrupt mask register */
 #define DISABLE_ALL_INTRS   0xFFFFFFFFFFFFFFFFULL
 
-#define TXPIC_INT_M         BIT(0)
-#define TXDMA_INT_M         BIT(1)
-#define TXMAC_INT_M         BIT(2)
-#define TXXGXS_INT_M        BIT(3)
-#define TXTRAFFIC_INT_M     BIT(8)
-#define PIC_RX_INT_M        BIT(32)
-#define RXDMA_INT_M         BIT(33)
-#define RXMAC_INT_M         BIT(34)
-#define MC_INT_M            BIT(35)
-#define RXXGXS_INT_M        BIT(36)
-#define RXTRAFFIC_INT_M     BIT(40)
+#define TXPIC_INT_M         s2BIT(0)
+#define TXDMA_INT_M         s2BIT(1)
+#define TXMAC_INT_M         s2BIT(2)
+#define TXXGXS_INT_M        s2BIT(3)
+#define TXTRAFFIC_INT_M     s2BIT(8)
+#define PIC_RX_INT_M        s2BIT(32)
+#define RXDMA_INT_M         s2BIT(33)
+#define RXMAC_INT_M         s2BIT(34)
+#define MC_INT_M            s2BIT(35)
+#define RXXGXS_INT_M        s2BIT(36)
+#define RXTRAFFIC_INT_M     s2BIT(40)
 
 /*  PIC level Interrupts TODO*/
 
 /*  DMA level Inressupts */
-#define TXDMA_PFC_INT_M     BIT(0)
-#define TXDMA_PCC_INT_M     BIT(2)
+#define TXDMA_PFC_INT_M     s2BIT(0)
+#define TXDMA_PCC_INT_M     s2BIT(2)
 
 /*  PFC block interrupts */
-#define PFC_MISC_ERR_1      BIT(0)     /* Interrupt to indicate FIFO full */
+#define PFC_MISC_ERR_1      s2BIT(0)   /* Interrupt to indicate FIFO full */
 
 /* PCC block interrupts. */
 #define        PCC_FB_ECC_ERR     vBIT(0xff, 16, 8)    /* Interrupt to indicate
index fab055ffcc9037070442469a34cc4d0fa1675237..571060a3c91e98ea930be958c8523e4716fd66af 100644 (file)
@@ -46,7 +46,7 @@
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/pci-bridge.h>
 #include <net/checksum.h>
 
index 76e55612430bae38ffc8c352d798bb112fcf61b7..a7afeea156bd5f80229b6b8a55b3952f3c764299 100644 (file)
@@ -34,9 +34,9 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/dma-mapping.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/uaccess.h>
index 8f198befba390e1f325ee03f77bdc04b5896bc44..cb51dc51cce6d36f73ad2efef9d1672436c48ce3 100644 (file)
@@ -29,7 +29,7 @@
 #include "bcm43xx_radio.h"
 #include "bcm43xx.h"
 
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 
 static void bcm43xx_led_changestate(struct bcm43xx_led *led)
index ceb7f1e5e9e096d50012a3999e21f1014ee2e48d..517f8984514411bfddb0c602df00599bcdeb011c 100644 (file)
@@ -4,9 +4,6 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
-#define BIT(x) (1 << (x))
-
-
 /* IEEE 802.11 defines */
 
 /* Information Element IDs */
index 40f516d42c5e39894f4c14f45582852ce116fb7b..d8f5efcfcab96915d0f41ecfb22958ba556a3936 100644 (file)
@@ -2920,7 +2920,7 @@ static int prism2_ioctl_priv_monitor(struct net_device *dev, int *i)
 
        printk(KERN_DEBUG "%s: process %d (%s) used deprecated iwpriv monitor "
               "- update software to use iwconfig mode monitor\n",
-              dev->name, current->pid, current->comm);
+              dev->name, task_pid_nr(current), current->comm);
 
        /* Backward compatibility code - this can be removed at some point */
 
index 67d28ee80f220bc4e119b1ac2ebe9639e2459c46..c5e0d89c3ecefecf09cc03e5558b4c0aff4ca315 100644 (file)
@@ -22,9 +22,9 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/bitops.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/system.h>
 #include <asm/addrspace.h>
 
index b01985498460f926d0f6e920d900b10fb1b59cb8..d182760f035b1e7f8fed9d516ec4d02bdf9baa0b 100644 (file)
@@ -48,9 +48,9 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/fsl_devices.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/system.h>
 #include <asm/time.h>
 #include <asm/mpc8xx.h>
index 397f4ce849dc6afa7eddcd66379b627c34902c84..87b3493d88e5bea0f93b3fc0ef412bbb1232a722 100644 (file)
@@ -729,7 +729,7 @@ static void ps3av_monitor_info_dump(const struct ps3av_pkt_av_get_monitor_info *
 
 static const struct ps3av_monitor_quirk {
        const char *monitor_name;
-       u32 clear_60, clear_50, clear_vesa;
+       u32 clear_60;
 } ps3av_monitor_quirks[] = {
        {
                .monitor_name   = "DELL 2007WFP",
@@ -757,10 +757,6 @@ static void ps3av_fixup_monitor_info(struct ps3av_info_monitor *info)
                                quirk->monitor_name);
                        info->res_60.res_bits &= ~quirk->clear_60;
                        info->res_60.native &= ~quirk->clear_60;
-                       info->res_50.res_bits &= ~quirk->clear_50;
-                       info->res_50.native &= ~quirk->clear_50;
-                       info->res_vesa.res_bits &= ~quirk->clear_vesa;
-                       info->res_vesa.native &= ~quirk->clear_vesa;
                        break;
                }
        }
index bea25a1391ee3adc0475be28aeb236c716eb1324..9dea585ef8064f656ebeed605dc834179206ee2c 100644 (file)
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
+#include <linux/bitops.h>
 #include <asm/ps3.h>
 
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
-#include <asm/bitops.h>
 
 #include "vuart.h"
 
index e4bf68ca96f7b2b237888f0a32b7f1b3fc86b449..2fd49edcc712117a0da38adfa60268595d00fd4e 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/bitops.h>
 
 #include <linux/amba/bus.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/hardware.h>
 #include <asm/irq.h>
 #include <asm/rtc.h>
index 0918b787c4dd033671a0bd1902594b1ae858b653..6f1e9a9804bc9a7b0edbee3e83c256d5c1572e07 100644 (file)
@@ -29,8 +29,8 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/hardware.h>
 #include <asm/irq.h>
 #include <asm/rtc.h>
index 16ea828e99f7a0b39228cda81f78247348e01966..ef7bc0a125efde63b0c06dd7fb64ce52d9ec5a92 100644 (file)
@@ -6,7 +6,7 @@
  */
 
 #include <linux/slab.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include "idset.h"
 #include "css.h"
 
index 399695f7b1af147e083d60026bb49a6f11bd2389..3561982749e36d943d927c80227215cf838285d0 100644 (file)
  *    1.15  Changed for 2.6 Kernel  No longer compiles on 2.4 or lower
  *    1.25  Added Packing support
  */
-#include <asm/bitops.h>
 #include <asm/ccwdev.h>
 #include <asm/ccwgroup.h>
 #include <asm/debug.h>
 #include <asm/idals.h>
 #include <asm/io.h>
 
+#include <linux/bitops.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
index a7f916c0c9cdcc618476eecf3dcca083065d8d9c..1c9078191d9ecb4f76819007e4569bda09061bb4 100644 (file)
@@ -25,9 +25,6 @@
 
 #define FAILURE         0xFFFFFFFFL
 
-#define BIT(x)          ((unsigned char)(1<<(x)))      /* single-bit mask in bit position x */
-#define BITW(x)          ((unsigned short)(1<<(x)))    /* single-bit mask in bit position x */
-
 struct sccb;
 typedef void (*CALL_BK_FN) (struct sccb *);
 
@@ -374,9 +371,9 @@ typedef struct SCCBscam_info {
 #define  SCAM_ENABLED   BIT(2)
 #define  SCAM_LEVEL2    BIT(3)
 
-#define        RENEGO_ENA              BITW(10)
-#define        CONNIO_ENA              BITW(11)
-#define  GREEN_PC_ENA   BITW(12)
+#define        RENEGO_ENA              BIT(10)
+#define        CONNIO_ENA              BIT(11)
+#define  GREEN_PC_ENA   BIT(12)
 
 #define  AUTO_RATE_00   00
 #define  AUTO_RATE_05   01
@@ -511,23 +508,23 @@ typedef struct SCCBscam_info {
 
 #define  hp_intena              0x40
 
-#define  RESET          BITW(7)
-#define  PROG_HLT               BITW(6)
-#define  PARITY                 BITW(5)
-#define  FIFO           BITW(4)
-#define  SEL            BITW(3)
-#define  SCAM_SEL               BITW(2)
-#define  RSEL           BITW(1)
-#define  TIMEOUT                BITW(0)
-#define  BUS_FREE               BITW(15)
-#define  XFER_CNT_0     BITW(14)
-#define  PHASE          BITW(13)
-#define  IUNKWN                 BITW(12)
-#define  ICMD_COMP      BITW(11)
-#define  ITICKLE                BITW(10)
-#define  IDO_STRT               BITW(9)
-#define  ITAR_DISC      BITW(8)
-#define  AUTO_INT               (BITW(12)+BITW(11)+BITW(10)+BITW(9)+BITW(8))
+#define  RESET          BIT(7)
+#define  PROG_HLT               BIT(6)
+#define  PARITY                 BIT(5)
+#define  FIFO           BIT(4)
+#define  SEL            BIT(3)
+#define  SCAM_SEL               BIT(2)
+#define  RSEL           BIT(1)
+#define  TIMEOUT                BIT(0)
+#define  BUS_FREE               BIT(15)
+#define  XFER_CNT_0     BIT(14)
+#define  PHASE          BIT(13)
+#define  IUNKWN                 BIT(12)
+#define  ICMD_COMP      BIT(11)
+#define  ITICKLE                BIT(10)
+#define  IDO_STRT               BIT(9)
+#define  ITAR_DISC      BIT(8)
+#define  AUTO_INT               (BIT(12)+BIT(11)+BIT(10)+BIT(9)+BIT(8))
 #define  CLR_ALL_INT    0xFFFF
 #define  CLR_ALL_INT_1  0xFF00
 
@@ -674,37 +671,37 @@ typedef struct SCCBscam_info {
 #define  BIOS_DATA_OFFSET     0x60
 #define  BIOS_RELATIVE_CARD   0x64
 
-#define  AR3      (BITW(9) + BITW(8))
-#define  SDATA    BITW(10)
+#define  AR3      (BIT(9) + BIT(8))
+#define  SDATA    BIT(10)
 
-#define  CRD_OP   BITW(11)     /* Cmp Reg. w/ Data */
+#define  CRD_OP   BIT(11)      /* Cmp Reg. w/ Data */
 
-#define  CRR_OP   BITW(12)     /* Cmp Reg. w. Reg. */
+#define  CRR_OP   BIT(12)      /* Cmp Reg. w. Reg. */
 
-#define  CPE_OP   (BITW(14)+BITW(11))  /* Cmp SCSI phs & Branch EQ */
+#define  CPE_OP   (BIT(14)+BIT(11))    /* Cmp SCSI phs & Branch EQ */
 
-#define  CPN_OP   (BITW(14)+BITW(12))  /* Cmp SCSI phs & Branch NOT EQ */
+#define  CPN_OP   (BIT(14)+BIT(12))    /* Cmp SCSI phs & Branch NOT EQ */
 
 #define  ADATA_OUT   0x00
-#define  ADATA_IN    BITW(8)
-#define  ACOMMAND    BITW(10)
-#define  ASTATUS     (BITW(10)+BITW(8))
-#define  AMSG_OUT    (BITW(10)+BITW(9))
-#define  AMSG_IN     (BITW(10)+BITW(9)+BITW(8))
+#define  ADATA_IN    BIT(8)
+#define  ACOMMAND    BIT(10)
+#define  ASTATUS     (BIT(10)+BIT(8))
+#define  AMSG_OUT    (BIT(10)+BIT(9))
+#define  AMSG_IN     (BIT(10)+BIT(9)+BIT(8))
 
-#define  BRH_OP   BITW(13)     /* Branch */
+#define  BRH_OP   BIT(13)      /* Branch */
 
 #define  ALWAYS   0x00
-#define  EQUAL    BITW(8)
-#define  NOT_EQ   BITW(9)
+#define  EQUAL    BIT(8)
+#define  NOT_EQ   BIT(9)
 
-#define  TCB_OP   (BITW(13)+BITW(11))  /* Test condition & branch */
+#define  TCB_OP   (BIT(13)+BIT(11))    /* Test condition & branch */
 
-#define  FIFO_0      BITW(10)
+#define  FIFO_0      BIT(10)
 
-#define  MPM_OP   BITW(15)     /* Match phase and move data */
+#define  MPM_OP   BIT(15)      /* Match phase and move data */
 
-#define  MRR_OP   BITW(14)     /* Move DReg. to Reg. */
+#define  MRR_OP   BIT(14)      /* Move DReg. to Reg. */
 
 #define  S_IDREG  (BIT(2)+BIT(1)+BIT(0))
 
@@ -712,9 +709,9 @@ typedef struct SCCBscam_info {
 #define  D_AR1    BIT(0)
 #define  D_BUCKET (BIT(2) + BIT(1) + BIT(0))
 
-#define  RAT_OP      (BITW(14)+BITW(13)+BITW(11))
+#define  RAT_OP      (BIT(14)+BIT(13)+BIT(11))
 
-#define  SSI_OP      (BITW(15)+BITW(11))
+#define  SSI_OP      (BIT(15)+BIT(11))
 
 #define  SSI_ITAR_DISC (ITAR_DISC >> 8)
 #define  SSI_IDO_STRT  (IDO_STRT >> 8)
index 30905cebefbb4d876f8c249f9334e291ccce5751..a5763c6e936232e8445be6de3e2bb048d2040c67 100644 (file)
@@ -521,7 +521,7 @@ config SCSI_DPT_I2O
 
 config SCSI_ADVANSYS
        tristate "AdvanSys SCSI support"
-       depends on SCSI
+       depends on SCSI && VIRT_TO_BUS
        depends on ISA || EISA || PCI
        help
          This is a driver for all SCSI host adapters manufactured by
index fa7ba64483fb4c3976518f909365f1156bef9262..252d1806467fbd94caa949d47cbd83797660f679 100644 (file)
@@ -47,9 +47,9 @@
 #include <linux/scatterlist.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include <scsi/scsi.h>
index 7ef0afc3cd68b7f848a7b635af3c30fb8a6690ad..5f3a0d7b18de9db796d4ada15df39d8ef29c1b61 100644 (file)
@@ -285,7 +285,7 @@ static void sas_discover_domain(struct work_struct *work)
        dev = port->port_dev;
 
        SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id,
-                   current->pid);
+                   task_pid_nr(current));
 
        switch (dev->dev_type) {
        case SAS_END_DEV:
@@ -320,7 +320,7 @@ static void sas_discover_domain(struct work_struct *work)
        }
 
        SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
-                   current->pid, error);
+                   task_pid_nr(current), error);
 }
 
 static void sas_revalidate_domain(struct work_struct *work)
@@ -334,12 +334,12 @@ static void sas_revalidate_domain(struct work_struct *work)
                        &port->disc.pending);
 
        SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
-                   current->pid);
+                   task_pid_nr(current));
        if (port->port_dev)
                res = sas_ex_revalidate_domain(port->port_dev);
 
        SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
-                   port->id, current->pid, res);
+                   port->id, task_pid_nr(current), res);
 }
 
 /* ---------- Events ---------- */
index a976e8193d163d1ff983fb405d16d7a981a8e8c1..6715ecb3bfcaf2f1a3bce11c94b826ac72893979 100644 (file)
@@ -68,11 +68,6 @@ static char * nsp32_model[] = {
 typedef u32 u32_le;
 typedef u16 u16_le;
 
-/*
- * MACRO
- */
-#define BIT(x)      (1UL << (x))
-
 /*
  * BASIC Definitions
  */
index b7f0fa24641396ca147a6ba894045adaa13e5327..98397559c53ba8a2532417d512ff78a3e318e3e4 100644 (file)
@@ -24,7 +24,6 @@
 /************************************
  * Some useful macros...
  */
-#define BIT(x)      (1L << (x))
 
 /* SCSI initiator must be ID 7 */
 #define NSP_INITIATOR_ID  7
index 9bb3d1d2a925a0b6a28ecd90e4af02c184c71803..fe415ec8565592924853f0d5c57da5f05453d502 100644 (file)
@@ -671,7 +671,7 @@ struct continuation_t1_entry {
 #define ET_CONTINUE    ET_CONT_T1
 
 /* Marker entry structure*/
-struct marker_entry {
+struct qla4_marker_entry {
        struct qla4_header hdr; /* 00-03 */
 
        uint32_t system_defined; /* 04-07 */
index 5006ecb3ef5ea935e55a069eef9d8c9218b70a79..e4461b5d767a2305c0a1c6067e08f53db22235eb 100644 (file)
@@ -69,7 +69,7 @@ static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha,
 static int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha,
                                    struct ddb_entry *ddb_entry, int lun)
 {
-       struct marker_entry *marker_entry;
+       struct qla4_marker_entry *marker_entry;
        unsigned long flags = 0;
        uint8_t status = QLA_SUCCESS;
 
index 72229df9dc111754a74de5b1e3fb4eb896d8768e..40604a092921dac7352161ffeed1820caa11216f 100644 (file)
@@ -263,15 +263,15 @@ static unsigned int pl01x_get_mctrl(struct uart_port *port)
        unsigned int result = 0;
        unsigned int status = readw(uap->port.membase + UART01x_FR);
 
-#define BIT(uartbit, tiocmbit)         \
+#define TIOCMBIT(uartbit, tiocmbit)    \
        if (status & uartbit)           \
                result |= tiocmbit
 
-       BIT(UART01x_FR_DCD, TIOCM_CAR);
-       BIT(UART01x_FR_DSR, TIOCM_DSR);
-       BIT(UART01x_FR_CTS, TIOCM_CTS);
-       BIT(UART011_FR_RI, TIOCM_RNG);
-#undef BIT
+       TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR);
+       TIOCMBIT(UART01x_FR_DSR, TIOCM_DSR);
+       TIOCMBIT(UART01x_FR_CTS, TIOCM_CTS);
+       TIOCMBIT(UART011_FR_RI, TIOCM_RNG);
+#undef TIOCMBIT
        return result;
 }
 
@@ -282,18 +282,18 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
        cr = readw(uap->port.membase + UART011_CR);
 
-#define        BIT(tiocmbit, uartbit)          \
+#define        TIOCMBIT(tiocmbit, uartbit)             \
        if (mctrl & tiocmbit)           \
                cr |= uartbit;          \
        else                            \
                cr &= ~uartbit
 
-       BIT(TIOCM_RTS, UART011_CR_RTS);
-       BIT(TIOCM_DTR, UART011_CR_DTR);
-       BIT(TIOCM_OUT1, UART011_CR_OUT1);
-       BIT(TIOCM_OUT2, UART011_CR_OUT2);
-       BIT(TIOCM_LOOP, UART011_CR_LBE);
-#undef BIT
+       TIOCMBIT(TIOCM_RTS, UART011_CR_RTS);
+       TIOCMBIT(TIOCM_DTR, UART011_CR_DTR);
+       TIOCMBIT(TIOCM_OUT1, UART011_CR_OUT1);
+       TIOCMBIT(TIOCM_OUT2, UART011_CR_OUT2);
+       TIOCMBIT(TIOCM_LOOP, UART011_CR_LBE);
+#undef TIOCMBIT
 
        writew(cr, uap->port.membase + UART011_CR);
 }
index 7e8724d3571f3f53219075ddabbd667f40517d34..f523cdf4b02b52a3a439a0a80d8cf52af4612842 100644 (file)
@@ -442,11 +442,11 @@ static char *serial_version = "$Revision: 1.25 $";
 #include <asm/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <linux/delay.h>
 
 #include <asm/arch/svinto.h>
index f013b4012c9a18a4199f0764d0a8218233fdb577..1f4f6d02fe25598e22a62b785b1b7140913b7c12 100644 (file)
@@ -460,7 +460,7 @@ static int checkintf(struct dev_state *ps, unsigned int ifnum)
                return 0;
        /* if not yet claimed, claim it for the driver */
        dev_warn(&ps->dev->dev, "usbfs: process %d (%s) did not claim interface %u before use\n",
-              current->pid, current->comm, ifnum);
+              task_pid_nr(current), current->comm, ifnum);
        return claimintf(ps, ifnum);
 }
 
index 73726c570a6eeb01cc41d91cb5230a256e57729e..1d174dcb3ac9e6a4ea5ce8133d6e4519d32d9783 100644 (file)
@@ -4006,7 +4006,7 @@ static int __init fsg_bind(struct usb_gadget *gadget)
        DBG(fsg, "removable=%d, stall=%d, buflen=%u\n",
                        mod_data.removable, mod_data.can_stall,
                        mod_data.buflen);
-       DBG(fsg, "I/O thread pid: %d\n", fsg->thread_task->pid);
+       DBG(fsg, "I/O thread pid: %d\n", task_pid_nr(fsg->thread_task));
 
        set_bit(REGISTERED, &fsg->atomic_bitflags);
 
index 9bb2cbfe4a3db65506dc6ff781ea73b7263c194a..5fb8675e0d6b9278a3c31b9ec732cd3c918578db 100644 (file)
@@ -62,7 +62,7 @@ struct cfb_info {
        struct display_switch   *dispsw;
        struct display          *display;
        struct pci_dev          *dev;
-       unsigned char           __iomem *region;
+       unsigned char           __iomem *region;
        unsigned char           __iomem *regs;
        u_int                   id;
        int                     func_use_count;
@@ -97,11 +97,11 @@ MODULE_PARM_DESC(default_font, "Default font name");
 /*
  * Our access methods.
  */
-#define cyber2000fb_writel(val,reg,cfb)        writel(val, (cfb)->regs + (reg))
-#define cyber2000fb_writew(val,reg,cfb)        writew(val, (cfb)->regs + (reg))
-#define cyber2000fb_writeb(val,reg,cfb)        writeb(val, (cfb)->regs + (reg))
+#define cyber2000fb_writel(val, reg, cfb)      writel(val, (cfb)->regs + (reg))
+#define cyber2000fb_writew(val, reg, cfb)      writew(val, (cfb)->regs + (reg))
+#define cyber2000fb_writeb(val, reg, cfb)      writeb(val, (cfb)->regs + (reg))
 
-#define cyber2000fb_readb(reg,cfb)     readb((cfb)->regs + (reg))
+#define cyber2000fb_readb(reg, cfb)            readb((cfb)->regs + (reg))
 
 static inline void
 cyber2000_crtcw(unsigned int reg, unsigned int val, struct cfb_info *cfb)
@@ -221,12 +221,8 @@ cyber2000fb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 static void
 cyber2000fb_imageblit(struct fb_info *info, const struct fb_image *image)
 {
-//     struct cfb_info *cfb = (struct cfb_info *)info;
-
-//     if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT)) {
-               cfb_imageblit(info, image);
-               return;
-//     }
+       cfb_imageblit(info, image);
+       return;
 }
 
 static int cyber2000fb_sync(struct fb_info *info)
@@ -277,12 +273,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 
        /*
         * Pseudocolour:
-        *         8     8
+        *         8     8
         * pixel --/--+--/-->  red lut  --> red dac
-        *            |  8
-        *            +--/--> green lut --> green dac
-        *            |  8
-        *            +--/-->  blue lut --> blue dac
+        *            |  8
+        *            +--/--> green lut --> green dac
+        *            |  8
+        *            +--/-->  blue lut --> blue dac
         */
        case FB_VISUAL_PSEUDOCOLOR:
                if (regno >= NR_PALETTE)
@@ -292,9 +288,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
                green >>= 8;
                blue >>= 8;
 
-               cfb->palette[regno].red   = red;
+               cfb->palette[regno].red = red;
                cfb->palette[regno].green = green;
-               cfb->palette[regno].blue  = blue;
+               cfb->palette[regno].blue = blue;
 
                cyber2000fb_writeb(regno, 0x3c8, cfb);
                cyber2000fb_writeb(red, 0x3c9, cfb);
@@ -304,12 +300,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 
        /*
         * Direct colour:
-        *          n     rl
-        *  pixel --/--+--/-->  red lut  --> red dac
-        *             |  gl
-        *             +--/--> green lut --> green dac
-        *             |  bl
-        *             +--/-->  blue lut --> blue dac
+        *         n     rl
+        * pixel --/--+--/-->  red lut  --> red dac
+        *            |  gl
+        *            +--/--> green lut --> green dac
+        *            |  bl
+        *            +--/-->  blue lut --> blue dac
         * n = bpp, rl = red length, gl = green length, bl = blue length
         */
        case FB_VISUAL_DIRECTCOLOR:
@@ -325,9 +321,11 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
                         * to the high 6 bits of the LUT.
                         */
                        cyber2000fb_writeb(regno << 2, 0x3c8, cfb);
-                       cyber2000fb_writeb(cfb->palette[regno >> 1].red, 0x3c9, cfb);
+                       cyber2000fb_writeb(cfb->palette[regno >> 1].red,
+                                          0x3c9, cfb);
                        cyber2000fb_writeb(green, 0x3c9, cfb);
-                       cyber2000fb_writeb(cfb->palette[regno >> 1].blue, 0x3c9, cfb);
+                       cyber2000fb_writeb(cfb->palette[regno >> 1].blue,
+                                          0x3c9, cfb);
 
                        green = cfb->palette[regno << 3].green;
 
@@ -335,9 +333,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
                }
 
                if (var->green.length >= 5 && regno < 32) {
-                       cfb->palette[regno << 3].red   = red;
+                       cfb->palette[regno << 3].red = red;
                        cfb->palette[regno << 3].green = green;
-                       cfb->palette[regno << 3].blue  = blue;
+                       cfb->palette[regno << 3].blue = blue;
 
                        /*
                         * The 5 bits of each colour component are
@@ -351,9 +349,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
                }
 
                if (var->green.length == 4 && regno < 16) {
-                       cfb->palette[regno << 4].red   = red;
+                       cfb->palette[regno << 4].red = red;
                        cfb->palette[regno << 4].green = green;
-                       cfb->palette[regno << 4].blue  = blue;
+                       cfb->palette[regno << 4].blue = blue;
 
                        /*
                         * The 5 bits of each colour component are
@@ -377,12 +375,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 
        /*
         * True colour:
-        *          n     rl
-        *  pixel --/--+--/--> red dac
-        *             |  gl
-        *             +--/--> green dac
-        *             |  bl
-        *             +--/--> blue dac
+        *         n     rl
+        * pixel --/--+--/--> red dac
+        *            |  gl
+        *            +--/--> green dac
+        *            |  bl
+        *            +--/--> blue dac
         * n = bpp, rl = red length, gl = green length, bl = blue length
         */
        case FB_VISUAL_TRUECOLOR:
@@ -494,9 +492,9 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
 
        /* PLL registers */
        cyber2000_grphw(EXT_DCLK_MULT, hw->clock_mult, cfb);
-       cyber2000_grphw(EXT_DCLK_DIV,  hw->clock_div, cfb);
+       cyber2000_grphw(EXT_DCLK_DIV, hw->clock_div, cfb);
        cyber2000_grphw(EXT_MCLK_MULT, cfb->mclk_mult, cfb);
-       cyber2000_grphw(EXT_MCLK_DIV,  cfb->mclk_div, cfb);
+       cyber2000_grphw(EXT_MCLK_DIV, cfb->mclk_div, cfb);
        cyber2000_grphw(0x90, 0x01, cfb);
        cyber2000_grphw(0xb9, 0x80, cfb);
        cyber2000_grphw(0xb9, 0x00, cfb);
@@ -515,8 +513,8 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
        /*
         * Set up accelerator registers
         */
-       cyber2000fb_writew(hw->width,     CO_REG_SRC_WIDTH,  cfb);
-       cyber2000fb_writew(hw->width,     CO_REG_DEST_WIDTH, cfb);
+       cyber2000fb_writew(hw->width, CO_REG_SRC_WIDTH, cfb);
+       cyber2000fb_writew(hw->width, CO_REG_DEST_WIDTH, cfb);
        cyber2000fb_writeb(hw->co_pixfmt, CO_REG_PIXFMT, cfb);
 }
 
@@ -549,15 +547,15 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
 {
        u_int Htotal, Hblankend, Hsyncend;
        u_int Vtotal, Vdispend, Vblankstart, Vblankend, Vsyncstart, Vsyncend;
-#define BIT(v,b1,m,b2) (((v >> b1) & m) << b2)
+#define ENCODE_BIT(v, b1, m, b2) ((((v) >> (b1)) & (m)) << (b2))
 
        hw->crtc[13] = hw->pitch;
        hw->crtc[17] = 0xe3;
        hw->crtc[14] = 0;
        hw->crtc[8]  = 0;
 
-       Htotal      = var->xres + var->right_margin +
-                     var->hsync_len + var->left_margin;
+       Htotal     = var->xres + var->right_margin +
+                    var->hsync_len + var->left_margin;
 
        if (Htotal > 2080)
                return -EINVAL;
@@ -567,15 +565,15 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
        hw->crtc[2] = var->xres >> 3;
        hw->crtc[4] = (var->xres + var->right_margin) >> 3;
 
-       Hblankend   = (Htotal - 4*8) >> 3;
+       Hblankend   = (Htotal - 4 * 8) >> 3;
 
-       hw->crtc[3] = BIT(Hblankend,  0, 0x1f,  0) |
-                     BIT(1,          0, 0x01,  7);
+       hw->crtc[3] = ENCODE_BIT(Hblankend,  0, 0x1f,  0) |
+                     ENCODE_BIT(1,          0, 0x01,  7);
 
        Hsyncend    = (var->xres + var->right_margin + var->hsync_len) >> 3;
 
-       hw->crtc[5] = BIT(Hsyncend,   0, 0x1f,  0) |
-                     BIT(Hblankend,  5, 0x01,  7);
+       hw->crtc[5] = ENCODE_BIT(Hsyncend,   0, 0x1f,  0) |
+                     ENCODE_BIT(Hblankend,  5, 0x01,  7);
 
        Vdispend    = var->yres - 1;
        Vsyncstart  = var->yres + var->lower_margin;
@@ -590,20 +588,20 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
        Vblankend   = Vtotal - 10;
 
        hw->crtc[6]  = Vtotal;
-       hw->crtc[7]  = BIT(Vtotal,     8, 0x01,  0) |
-                       BIT(Vdispend,   8, 0x01,  1) |
-                       BIT(Vsyncstart, 8, 0x01,  2) |
-                       BIT(Vblankstart,8, 0x01,  3) |
-                       BIT(1,          0, 0x01,  4) |
-                       BIT(Vtotal,     9, 0x01,  5) |
-                       BIT(Vdispend,   9, 0x01,  6) |
-                       BIT(Vsyncstart, 9, 0x01,  7);
-       hw->crtc[9]  = BIT(0,          0, 0x1f,  0) |
-                       BIT(Vblankstart,9, 0x01,  5) |
-                       BIT(1,          0, 0x01,  6);
+       hw->crtc[7]  = ENCODE_BIT(Vtotal,     8, 0x01,  0) |
+                       ENCODE_BIT(Vdispend,   8, 0x01,  1) |
+                       ENCODE_BIT(Vsyncstart, 8, 0x01,  2) |
+                       ENCODE_BIT(Vblankstart, 8, 0x01,  3) |
+                       ENCODE_BIT(1,          0, 0x01,  4) |
+                       ENCODE_BIT(Vtotal,     9, 0x01,  5) |
+                       ENCODE_BIT(Vdispend,   9, 0x01,  6) |
+                       ENCODE_BIT(Vsyncstart, 9, 0x01,  7);
+       hw->crtc[9]  = ENCODE_BIT(0,          0, 0x1f,  0) |
+                       ENCODE_BIT(Vblankstart, 9, 0x01,  5) |
+                       ENCODE_BIT(1,          0, 0x01,  6);
        hw->crtc[10] = Vsyncstart;
-       hw->crtc[11] = BIT(Vsyncend,   0, 0x0f,  0) |
-                      BIT(1,          0, 0x01,  7);
+       hw->crtc[11] = ENCODE_BIT(Vsyncend,   0, 0x0f,  0) |
+                      ENCODE_BIT(1,          0, 0x01,  7);
        hw->crtc[12] = Vdispend;
        hw->crtc[15] = Vblankstart;
        hw->crtc[16] = Vblankend;
@@ -615,10 +613,10 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
         * 4=LINECOMP:10 5-IVIDEO 6=FIXCNT
         */
        hw->crtc_ofl =
-               BIT(Vtotal,     10, 0x01,  0) |
-               BIT(Vdispend,   10, 0x01,  1) |
-               BIT(Vsyncstart, 10, 0x01,  2) |
-               BIT(Vblankstart,10, 0x01,  3) |
+               ENCODE_BIT(Vtotal, 10, 0x01, 0) |
+               ENCODE_BIT(Vdispend, 10, 0x01, 1) |
+               ENCODE_BIT(Vsyncstart, 10, 0x01, 2) |
+               ENCODE_BIT(Vblankstart, 10, 0x01, 3) |
                EXT_CRT_VRTOFL_LINECOMP10;
 
        /* woody: set the interlaced bit... */
@@ -750,11 +748,11 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
        var->red.msb_right      = 0;
        var->green.msb_right    = 0;
        var->blue.msb_right     = 0;
+       var->transp.offset      = 0;
+       var->transp.length      = 0;
 
        switch (var->bits_per_pixel) {
        case 8: /* PSEUDOCOLOUR, 256 */
-               var->transp.offset      = 0;
-               var->transp.length      = 0;
                var->red.offset         = 0;
                var->red.length         = 8;
                var->green.offset       = 0;
@@ -766,8 +764,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
        case 16:/* DIRECTCOLOUR, 64k or 32k */
                switch (var->green.length) {
                case 6: /* RGB565, 64k */
-                       var->transp.offset      = 0;
-                       var->transp.length      = 0;
                        var->red.offset         = 11;
                        var->red.length         = 5;
                        var->green.offset       = 5;
@@ -778,8 +774,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 
                default:
                case 5: /* RGB555, 32k */
-                       var->transp.offset      = 0;
-                       var->transp.length      = 0;
                        var->red.offset         = 10;
                        var->red.length         = 5;
                        var->green.offset       = 5;
@@ -802,8 +796,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
                break;
 
        case 24:/* TRUECOLOUR, 16m */
-               var->transp.offset      = 0;
-               var->transp.length      = 0;
                var->red.offset         = 16;
                var->red.length         = 8;
                var->green.offset       = 8;
@@ -830,7 +822,7 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
        mem = var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8);
        if (mem > cfb->fb.fix.smem_len)
                var->yres_virtual = cfb->fb.fix.smem_len * 8 /
-                       (var->bits_per_pixel * var->xres_virtual);
+                                   (var->bits_per_pixel * var->xres_virtual);
 
        if (var->yres > var->yres_virtual)
                var->yres = var->yres_virtual;
@@ -921,7 +913,7 @@ static int cyber2000fb_set_par(struct fb_info *info)
                hw.fetch <<= 1;
        hw.fetch += 1;
 
-       cfb->fb.fix.line_length = var->xres_virtual * var->bits_per_pixel / 8;
+       cfb->fb.fix.line_length = var->xres_virtual * var->bits_per_pixel / 8;
 
        /*
         * Same here - if the size of the video mode exceeds the
@@ -952,7 +944,6 @@ static int cyber2000fb_set_par(struct fb_info *info)
        return 0;
 }
 
-
 /*
  *    Pan or Wrap the Display
  */
@@ -1002,15 +993,15 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
        switch (blank) {
        case FB_BLANK_POWERDOWN:        /* powerdown - both sync lines down */
                sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_0;
-               break;  
+               break;
        case FB_BLANK_HSYNC_SUSPEND:    /* hsync off */
                sync = EXT_SYNC_CTL_VS_NORMAL | EXT_SYNC_CTL_HS_0;
-               break;  
+               break;
        case FB_BLANK_VSYNC_SUSPEND:    /* vsync off */
                sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_NORMAL;
                break;
-       case FB_BLANK_NORMAL:           /* soft blank */
-       default: /* unblank */
+       case FB_BLANK_NORMAL:           /* soft blank */
+       default:                        /* unblank */
                break;
        }
 
@@ -1018,7 +1009,8 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
 
        if (blank <= 1) {
                /* turn on ramdacs */
-               cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN);
+               cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS |
+                                          RAMDAC_RAMPWRDN);
                cyber2000fb_write_ramdac_ctrl(cfb);
        }
 
@@ -1043,7 +1035,8 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
 
        if (blank >= 2) {
                /* turn off ramdacs */
-               cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN;
+               cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS |
+                                        RAMDAC_RAMPWRDN;
                cyber2000fb_write_ramdac_ctrl(cfb);
        }
 
@@ -1068,7 +1061,7 @@ static struct fb_ops cyber2000fb_ops = {
  * of this driver.  It is here solely at the moment to support the other
  * CyberPro modules external to this driver.
  */
-static struct cfb_info         *int_cfb_info;
+static struct cfb_info *int_cfb_info;
 
 /*
  * Enable access to the extended registers
@@ -1085,6 +1078,7 @@ void cyber2000fb_enable_extregs(struct cfb_info *cfb)
                cyber2000_grphw(EXT_FUNC_CTL, old, cfb);
        }
 }
+EXPORT_SYMBOL(cyber2000fb_enable_extregs);
 
 /*
  * Disable access to the extended registers
@@ -1104,11 +1098,13 @@ void cyber2000fb_disable_extregs(struct cfb_info *cfb)
        else
                cfb->func_use_count -= 1;
 }
+EXPORT_SYMBOL(cyber2000fb_disable_extregs);
 
 void cyber2000fb_get_fb_var(struct cfb_info *cfb, struct fb_var_screeninfo *var)
 {
        memcpy(var, &cfb->fb.var, sizeof(struct fb_var_screeninfo));
 }
+EXPORT_SYMBOL(cyber2000fb_get_fb_var);
 
 /*
  * Attach a capture/tv driver to the core CyberX0X0 driver.
@@ -1122,13 +1118,15 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx)
                info->fb_size         = int_cfb_info->fb.fix.smem_len;
                info->enable_extregs  = cyber2000fb_enable_extregs;
                info->disable_extregs = cyber2000fb_disable_extregs;
-               info->info            = int_cfb_info;
+               info->info            = int_cfb_info;
 
-               strlcpy(info->dev_name, int_cfb_info->fb.fix.id, sizeof(info->dev_name));
+               strlcpy(info->dev_name, int_cfb_info->fb.fix.id,
+                       sizeof(info->dev_name));
        }
 
        return int_cfb_info != NULL;
 }
+EXPORT_SYMBOL(cyber2000fb_attach);
 
 /*
  * Detach a capture/tv driver from the core CyberX0X0 driver.
@@ -1136,12 +1134,7 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx)
 void cyber2000fb_detach(int idx)
 {
 }
-
-EXPORT_SYMBOL(cyber2000fb_attach);
 EXPORT_SYMBOL(cyber2000fb_detach);
-EXPORT_SYMBOL(cyber2000fb_enable_extregs);
-EXPORT_SYMBOL(cyber2000fb_disable_extregs);
-EXPORT_SYMBOL(cyber2000fb_get_fb_var);
 
 /*
  * These parameters give
@@ -1205,7 +1198,7 @@ static void cyberpro_init_hw(struct cfb_info *cfb)
        int i;
 
        for (i = 0; i < sizeof(igs_regs); i += 2)
-               cyber2000_grphw(igs_regs[i], igs_regs[i+1], cfb);
+               cyber2000_grphw(igs_regs[i], igs_regs[i + 1], cfb);
 
        if (cfb->id == ID_CYBERPRO_5000) {
                unsigned char val;
@@ -1215,8 +1208,8 @@ static void cyberpro_init_hw(struct cfb_info *cfb)
        }
 }
 
-static struct cfb_info * __devinit
-cyberpro_alloc_fb_info(unsigned int id, char *name)
+static struct cfb_info __devinit *cyberpro_alloc_fb_info(unsigned int id,
+                                                        char *name)
 {
        struct cfb_info *cfb;
 
@@ -1228,9 +1221,9 @@ cyberpro_alloc_fb_info(unsigned int id, char *name)
        cfb->id                 = id;
 
        if (id == ID_CYBERPRO_5000)
-               cfb->ref_ps     = 40690; // 24.576 MHz
+               cfb->ref_ps     = 40690; /* 24.576 MHz */
        else
-               cfb->ref_ps     = 69842; // 14.31818 MHz (69841?)
+               cfb->ref_ps     = 69842; /* 14.31818 MHz (69841?) */
 
        cfb->divisors[0]        = 1;
        cfb->divisors[1]        = 2;
@@ -1282,8 +1275,7 @@ cyberpro_alloc_fb_info(unsigned int id, char *name)
        return cfb;
 }
 
-static void
-cyberpro_free_fb_info(struct cfb_info *cfb)
+static void cyberpro_free_fb_info(struct cfb_info *cfb)
 {
        if (cfb) {
                /*
@@ -1300,8 +1292,7 @@ cyberpro_free_fb_info(struct cfb_info *cfb)
  *  video=cyber2000:font:fontname
  */
 #ifndef MODULE
-static int
-cyber2000fb_setup(char *options)
+static int cyber2000fb_setup(char *options)
 {
        char *opt;
 
@@ -1315,7 +1306,8 @@ cyber2000fb_setup(char *options)
                if (strncmp(opt, "font:", 5) == 0) {
                        static char default_font_storage[40];
 
-                       strlcpy(default_font_storage, opt + 5, sizeof(default_font_storage));
+                       strlcpy(default_font_storage, opt + 5,
+                               sizeof(default_font_storage));
                        default_font = default_font_storage;
                        continue;
                }
@@ -1354,10 +1346,18 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
         * Determine the size of the memory.
         */
        switch (cfb->mem_ctl2 & MEM_CTL2_SIZE_MASK) {
-       case MEM_CTL2_SIZE_4MB: smem_size = 0x00400000; break;
-       case MEM_CTL2_SIZE_2MB: smem_size = 0x00200000; break;
-       case MEM_CTL2_SIZE_1MB: smem_size = 0x00100000; break;
-       default:                smem_size = 0x00100000; break;
+       case MEM_CTL2_SIZE_4MB:
+               smem_size = 0x00400000;
+               break;
+       case MEM_CTL2_SIZE_2MB:
+               smem_size = 0x00200000;
+               break;
+       case MEM_CTL2_SIZE_1MB:
+               smem_size = 0x00100000;
+               break;
+       default:
+               smem_size = 0x00100000;
+               break;
        }
 
        cfb->fb.fix.smem_len   = smem_size;
@@ -1366,8 +1366,8 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
 
        err = -EINVAL;
        if (!fb_find_mode(&cfb->fb.var, &cfb->fb, NULL, NULL, 0,
-                         &cyber2000fb_default_mode, 8)) {
-               printk("%s: no valid mode found\n", cfb->fb.fix.id);
+                         &cyber2000fb_default_mode, 8)) {
+               printk(KERN_ERR "%s: no valid mode found\n", cfb->fb.fix.id);
                goto failed;
        }
 
@@ -1377,7 +1377,7 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
        if (cfb->fb.var.yres_virtual < cfb->fb.var.yres)
                cfb->fb.var.yres_virtual = cfb->fb.var.yres;
 
-//     fb_set_var(&cfb->fb.var, -1, &cfb->fb);
+/*     fb_set_var(&cfb->fb.var, -1, &cfb->fb); */
 
        /*
         * Calculate the hsync and vsync frequencies.  Note that
@@ -1425,20 +1425,20 @@ static void cyberpro_common_resume(struct cfb_info *cfb)
 
 #include <asm/arch/hardware.h>
 
-static int __devinit
-cyberpro_vl_probe(void)
+static int __devinit cyberpro_vl_probe(void)
 {
        struct cfb_info *cfb;
        int err = -ENOMEM;
 
-       if (!request_mem_region(FB_START,FB_SIZE,"CyberPro2010")) return err;
+       if (!request_mem_region(FB_START, FB_SIZE, "CyberPro2010"))
+               return err;
 
        cfb = cyberpro_alloc_fb_info(ID_CYBERPRO_2010, "CyberPro2010");
        if (!cfb)
                goto failed_release;
 
        cfb->dev = NULL;
-       cfb->region = ioremap(FB_START,FB_SIZE);
+       cfb->region = ioremap(FB_START, FB_SIZE);
        if (!cfb->region)
                goto failed_ioremap;
 
@@ -1475,7 +1475,7 @@ failed:
 failed_ioremap:
        cyberpro_free_fb_info(cfb);
 failed_release:
-       release_mem_region(FB_START,FB_SIZE);
+       release_mem_region(FB_START, FB_SIZE);
 
        return err;
 }
@@ -1538,7 +1538,8 @@ static int cyberpro_pci_enable_mmio(struct cfb_info *cfb)
         * Allow the CyberPro to accept PCI burst accesses
         */
        if (cfb->id == ID_CYBERPRO_2010) {
-               printk(KERN_INFO "%s: NOT enabling PCI bursts\n", cfb->fb.fix.id);
+               printk(KERN_INFO "%s: NOT enabling PCI bursts\n",
+                      cfb->fb.fix.id);
        } else {
                val = cyber2000_grphr(EXT_BUS_CTL, cfb);
                if (!(val & EXT_BUS_CTL_PCIBURST_WRITE)) {
@@ -1688,9 +1689,10 @@ static int cyberpro_pci_resume(struct pci_dev *dev)
 }
 
 static struct pci_device_id cyberpro_pci_table[] = {
-//     Not yet
-//     { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682,
-//             PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 },
+/*     Not yet
+ *     { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682,
+ *             PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 },
+ */
        { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2000,
                PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_CYBERPRO_2000 },
        { PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2010,
@@ -1700,7 +1702,7 @@ static struct pci_device_id cyberpro_pci_table[] = {
        { 0, }
 };
 
-MODULE_DEVICE_TABLE(pci,cyberpro_pci_table);
+MODULE_DEVICE_TABLE(pci, cyberpro_pci_table);
 
 static struct pci_driver cyberpro_driver = {
        .name           = "CyberPro",
index e8c5dcdd8813fe2d02343e8a368799222ec5b235..189c3d64138307488010037c06e325255acd344a 100644 (file)
@@ -77,9 +77,6 @@
 #define CONF_DIRTYDETECTION_OFF        (0x600)
 #define CONF_DIRTYDETECTION_ON (0x601)
 
-/* Set the corresponding bit. */
-#define BIT(n) (0x1U << (n))
-
 struct dumchannel_uf {
        int channelnr;
        u32 *dirty;
index 38bd3737259973fcaebf683a2a5810283e46ffd8..a684b1e873723732836b638b3c52cece15cc0f92 100644 (file)
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -19,7 +20,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 #include <asm/arch/at91_st.h>
 
index 7150fb945eaf77c4a2774fdbb884fe3dec8a7d9e..e3a29c3023095ed8ca8e53bf667f16ce865bf095 100644 (file)
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -18,7 +19,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/arch/regs-timer.h>
index 719b066f73c41cd8cbf655d7f2afa0c7ad81e774..635ca454f56bafc6f52f4752a6be5ebdddc17940 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/moduleparam.h>
 #include <linux/clk.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/hardware.h>
-#include <asm/bitops.h>
 
 #include <asm/arch/prcm.h>
 
index 3475f47aaa45766cc48f3cf7361683ddfede5aee..34a2b3b81800785d8e2967ddd388cfa01905658a 100644 (file)
 #include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 
 #ifdef CONFIG_ARCH_PXA
 #include <asm/arch/pxa-regs.h>
 #endif
 
 #include <asm/hardware.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #define OSCR_FREQ              CLOCK_TICK_RATE
index e31f3691b151248ee8d3e58d3c24948a61dbe637..cc28a69246a7dc8536de2e9c85bb256ad8bbb8e7 100644 (file)
@@ -220,7 +220,7 @@ config JBD
 
 config JBD_DEBUG
        bool "JBD (ext3) debugging support"
-       depends on JBD
+       depends on JBD && DEBUG_FS
        help
          If you are using the ext3 journaled file system (or potentially any
          other file system/device using JBD), this option allows you to
@@ -229,10 +229,10 @@ config JBD_DEBUG
          debugging output will be turned off.
 
          If you select Y here, then you will be able to turn on debugging
-         with "echo N > /proc/sys/fs/jbd-debug", where N is a number between
-         1 and 5, the higher the number, the more debugging output is
-         generated.  To turn debugging off again, do
-         "echo 0 > /proc/sys/fs/jbd-debug".
+         with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
+         number between 1 and 5, the higher the number, the more debugging
+         output is generated.  To turn debugging off again, do
+         "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
 
 config JBD2
        tristate
index e7204d71acc94660f4a50a3d2c10eb2a6a816e63..45f5992a0957173bc0c7e744028ef40e5743aa8e 100644 (file)
@@ -80,7 +80,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 
        *uid = current->uid;
        *gid = current->gid;
-       *pgrp = process_group(current);
+       *pgrp = task_pgrp_nr(current);
 
        *minproto = *maxproto = AUTOFS_PROTO_VERSION;
 
index c1489533277ae87d1f1d5358d7248e35ef5d6eb5..5efff3c0d886985351bbe1588da8245992974525 100644 (file)
@@ -214,8 +214,8 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
 
        oz_mode = autofs_oz_mode(sbi);
        DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
-                               "oz_mode = %d\n", pid_nr(task_pid(current)),
-                               process_group(current), sbi->catatonic,
+                               "oz_mode = %d\n", task_pid_nr(current),
+                               task_pgrp_nr(current), sbi->catatonic,
                                oz_mode));
 
        /*
@@ -536,7 +536,7 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
        struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
        void __user *argp = (void __user *)arg;
 
-       DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current)));
+       DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
 
        if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
             _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
index d85f42fa92060003b7000f3e686a5070e3da908e..2d4ae40718d9ecdc35119a82a10d264ce5c178ea 100644 (file)
@@ -131,7 +131,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-       return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+       return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
index cd81f0836671d436f5a55049812b8d854e78233c..7f05d6ccdb130c8171a80af52c7266ebb14bc2f6 100644 (file)
@@ -226,7 +226,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 
        *uid = current->uid;
        *gid = current->gid;
-       *pgrp = process_group(current);
+       *pgrp = task_pgrp_nr(current);
 
        *minproto = AUTOFS_MIN_PROTO_VERSION;
        *maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -323,7 +323,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
        sbi->pipe = NULL;
        sbi->catatonic = 1;
        sbi->exp_timeout = 0;
-       sbi->oz_pgrp = process_group(current);
+       sbi->oz_pgrp = task_pgrp_nr(current);
        sbi->sb = s;
        sbi->version = 0;
        sbi->sub_version = 0;
index 45ff3d63b758bd09feea0162cc0a0cbeca0473ad..2bbcc8151dc343eae48914aa95748fef401238f7 100644 (file)
@@ -582,7 +582,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        oz_mode = autofs4_oz_mode(sbi);
 
        DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
-                current->pid, process_group(current), sbi->catatonic, oz_mode);
+                current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
 
        unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
        if (!unhashed) {
@@ -976,7 +976,7 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
        void __user *p = (void __user *)arg;
 
        DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u",
-               cmd,arg,sbi,process_group(current));
+               cmd,arg,sbi,task_pgrp_nr(current));
 
        if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
             _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
index 6e2f3b8dde7f2c9dd636101b86b6049580d8143d..ba8de7ca260bea7e0cf456b4a7bfe56434ad502f 100644 (file)
@@ -1383,10 +1383,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
        prstatus->pr_sigpend = p->pending.signal.sig[0];
        prstatus->pr_sighold = p->blocked.sig[0];
-       prstatus->pr_pid = p->pid;
-       prstatus->pr_ppid = p->parent->pid;
-       prstatus->pr_pgrp = process_group(p);
-       prstatus->pr_sid = process_session(p);
+       prstatus->pr_pid = task_pid_vnr(p);
+       prstatus->pr_ppid = task_pid_vnr(p->parent);
+       prstatus->pr_pgrp = task_pgrp_vnr(p);
+       prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
                /*
                 * This is the record for the group leader.  Add in the
@@ -1429,10 +1429,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
                        psinfo->pr_psargs[i] = ' ';
        psinfo->pr_psargs[len] = 0;
 
-       psinfo->pr_pid = p->pid;
-       psinfo->pr_ppid = p->parent->pid;
-       psinfo->pr_pgrp = process_group(p);
-       psinfo->pr_sid = process_session(p);
+       psinfo->pr_pid = task_pid_vnr(p);
+       psinfo->pr_ppid = task_pid_vnr(p->parent);
+       psinfo->pr_pgrp = task_pgrp_vnr(p);
+       psinfo->pr_sid = task_session_vnr(p);
 
        i = p->state ? ffz(~p->state) + 1 : 0;
        psinfo->pr_state = i;
index 033861c6b8f13028905826bcd6424bd3121f8eb2..32649f2a16544502c8affe2d147ac89d1e02ae50 100644 (file)
@@ -1342,10 +1342,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
        prstatus->pr_sigpend = p->pending.signal.sig[0];
        prstatus->pr_sighold = p->blocked.sig[0];
-       prstatus->pr_pid = p->pid;
-       prstatus->pr_ppid = p->parent->pid;
-       prstatus->pr_pgrp = process_group(p);
-       prstatus->pr_sid = process_session(p);
+       prstatus->pr_pid = task_pid_vnr(p);
+       prstatus->pr_ppid = task_pid_vnr(p->parent);
+       prstatus->pr_pgrp = task_pgrp_vnr(p);
+       prstatus->pr_sid = task_session_vnr(p);
        if (thread_group_leader(p)) {
                /*
                 * This is the record for the group leader.  Add in the
@@ -1391,10 +1391,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
                        psinfo->pr_psargs[i] = ' ';
        psinfo->pr_psargs[len] = 0;
 
-       psinfo->pr_pid = p->pid;
-       psinfo->pr_ppid = p->parent->pid;
-       psinfo->pr_pgrp = process_group(p);
-       psinfo->pr_sid = process_session(p);
+       psinfo->pr_pid = task_pid_vnr(p);
+       psinfo->pr_ppid = task_pid_vnr(p->parent);
+       psinfo->pr_pgrp = task_pgrp_vnr(p);
+       psinfo->pr_sid = task_session_vnr(p);
 
        i = p->state ? ffz(~p->state) + 1 : 0;
        psinfo->pr_state = i;
index bed6215c0794ef8d9264b113aff767dc85ef4025..3d419163c3d3a345f0a3bd06b230f0186038d60b 100644 (file)
@@ -1,3 +1,19 @@
+Version 1.51
+------------
+Fix memory leak in statfs when mounted to very old servers (e.g.
+Windows 9x).  Add new feature "POSIX open" which allows servers
+which support the current POSIX Extensions to provide better semantics
+(e.g. delete for open files opened with posix open).  Take into
+account umask on posix mkdir not just older style mkdir.  Add
+ability to mount to IPC$ share (which allows CIFS named pipes to be
+opened, read and written as if they were files).  When 1st tree
+connect fails (e.g. due to signing negotiation failure) fix
+leak that causes cifsd not to stop and rmmod to fail to cleanup
+cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
+bigendian architectures. Fix possible memory corruption when
+EAGAIN returned on kern_recvmsg. Return better error if server
+requires packet signing but client has disabled it.
+
 Version 1.50
 ------------
 Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
@@ -6,7 +22,10 @@ done with "serverino" mount option).  Add support for POSIX Unlink
 Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
 mount option to allow disabling the CIFS Unix Extensions for just
 that mount. Fix hang on spinlock in find_writable_file (race when
-reopening file after session crash).
+reopening file after session crash).  Byte range unlock request to
+windows server could unlock more bytes (on server copy of file)
+than intended if start of unlock request is well before start of
+a previous byte range lock that we issued.
 
 Version 1.49
 ------------
index 6ecd9d6ba3f3734a572ee757faf1773190069d35..ff6ba8d823f03a91b3ffad88202c75708d1c9eba 100644 (file)
@@ -3,4 +3,4 @@
 #
 obj-$(CONFIG_CIFS) += cifs.o
 
-cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o
+cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o cifsacl.o
index f50a88d58f782ee18d06a15650ade6eac3661e6c..2a01f3ef96a0503332da509b81e4c788f07113c2 100644 (file)
@@ -385,10 +385,9 @@ asn1_oid_decode(struct asn1_ctx *ctx,
        unsigned long *optr;
 
        size = eoc - ctx->pointer + 1;
-       *oid = kmalloc(size * sizeof (unsigned long), GFP_ATOMIC);
-       if (*oid == NULL) {
+       *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+       if (*oid == NULL)
                return 0;
-       }
 
        optr = *oid;
 
@@ -581,9 +580,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
                        return 0;
                } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
                           || (tag != ASN1_SEQ)) {
-                       cFYI(1,
-                            ("Exit 6 cls = %d con = %d tag = %d end = %p (%d)",
-                             cls, con, tag, end, *end));
+                       cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
+                               cls, con, tag, end, *end));
                }
 
                if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
index 1bf8cf522ad668a2832f095f1b75aaf40a7fde0a..73c4c419663c1dc1a3558d2df405fcb58bc07e66 100644 (file)
@@ -209,13 +209,16 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
                i++;
                tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
                dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
-               length =
-                   sprintf(buf,
-                           "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x "
-                           "Attributes: 0x%x\nPathComponentMax: %d Status: %d",
-                           i, tcon->treeName,
-                           atomic_read(&tcon->useCount),
-                           tcon->nativeFileSystem,
+               length = sprintf(buf, "\n%d) %s Uses: %d ", i,
+                                tcon->treeName, atomic_read(&tcon->useCount));
+               buf += length;
+               if (tcon->nativeFileSystem) {
+                       length = sprintf(buf, "Type: %s ",
+                                        tcon->nativeFileSystem);
+                       buf += length;
+               }
+               length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x"
+                                "\nPathComponentMax: %d Status: %d",
                            le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
                            le32_to_cpu(tcon->fsAttrInfo.Attributes),
                            le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
@@ -876,11 +879,16 @@ security_flags_write(struct file *file, const char __user *buffer,
        if (count < 3) {
                /* single char or single char followed by null */
                c = flags_string[0];
-               if (c == '0' || c == 'n' || c == 'N')
+               if (c == '0' || c == 'n' || c == 'N') {
                        extended_security = CIFSSEC_DEF; /* default */
-               else if (c == '1' || c == 'y' || c == 'Y')
+                       return count;
+               } else if (c == '1' || c == 'y' || c == 'Y') {
                        extended_security = CIFSSEC_MAX;
-               return count;
+                       return count;
+               } else if (!isdigit(c)) {
+                       cERROR(1, ("invalid flag %c", c));
+                       return -EINVAL;
+               }
        }
        /* else we have a number */
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
new file mode 100644 (file)
index 0000000..e8e5635
--- /dev/null
@@ -0,0 +1,333 @@
+/*
+ *   fs/cifs/cifsacl.c
+ *
+ *   Copyright (C) International Business Machines  Corp., 2007
+ *   Author(s): Steve French (sfrench@us.ibm.com)
+ *
+ *   Contains the routines for mapping CIFS/NTFS ACLs
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include "cifspdu.h"
+#include "cifsglob.h"
+#include "cifsacl.h"
+#include "cifsproto.h"
+#include "cifs_debug.h"
+
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+
+static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
+       {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
+       {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
+       {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
+       {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
+       {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"},
+       {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"},
+       {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"}
+};
+
+
+/* security id for everyone */
+static const struct cifs_sid sid_everyone =
+               {1, 1, {0, 0, 0, 0, 0, 0}, {} };
+/* group users */
+static const struct cifs_sid sid_user =
+               {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
+
+
+int match_sid(struct cifs_sid *ctsid)
+{
+       int i, j;
+       int num_subauth, num_sat, num_saw;
+       struct cifs_sid *cwsid;
+
+       if (!ctsid)
+               return (-1);
+
+       for (i = 0; i < NUM_WK_SIDS; ++i) {
+               cwsid = &(wksidarr[i].cifssid);
+
+               /* compare the revision */
+               if (ctsid->revision != cwsid->revision)
+                       continue;
+
+               /* compare all of the six auth values */
+               for (j = 0; j < 6; ++j) {
+                       if (ctsid->authority[j] != cwsid->authority[j])
+                               break;
+               }
+               if (j < 6)
+                       continue; /* all of the auth values did not match */
+
+               /* compare all of the subauth values if any */
+               num_sat = ctsid->num_subauth;
+               num_saw = cwsid->num_subauth;
+               num_subauth = num_sat < num_saw ? num_sat : num_saw;
+               if (num_subauth) {
+                       for (j = 0; j < num_subauth; ++j) {
+                               if (ctsid->sub_auth[j] != cwsid->sub_auth[j])
+                                       break;
+                       }
+                       if (j < num_subauth)
+                               continue; /* all sub_auth values do not match */
+               }
+
+               cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
+               return (0); /* sids compare/match */
+       }
+
+       cFYI(1, ("No matching sid"));
+       return (-1);
+}
+
+/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
+   the same returns 1, if they do not match returns 0 */
+int compare_sids(struct cifs_sid *ctsid, struct cifs_sid *cwsid)
+{
+       int i;
+       int num_subauth, num_sat, num_saw;
+
+       if ((!ctsid) || (!cwsid))
+               return (0);
+
+       /* compare the revision */
+       if (ctsid->revision != cwsid->revision)
+               return (0);
+
+       /* compare all of the six auth values */
+       for (i = 0; i < 6; ++i) {
+               if (ctsid->authority[i] != cwsid->authority[i])
+                       return (0);
+       }
+
+       /* compare all of the subauth values if any */
+       num_sat = ctsid->num_subauth;
+       num_saw = cwsid->num_subauth;
+       num_subauth = num_sat < num_saw ? num_sat : num_saw;
+       if (num_subauth) {
+               for (i = 0; i < num_subauth; ++i) {
+                       if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
+                               return (0);
+               }
+       }
+
+       return (1); /* sids compare/match */
+}
+
+
+static void parse_ace(struct cifs_ace *pace, char *end_of_acl)
+{
+       int num_subauth;
+
+       /* validate that we do not go past end of acl */
+
+       /* XXX this if statement can be removed
+       if (end_of_acl < (char *)pace + sizeof(struct cifs_ace)) {
+               cERROR(1, ("ACL too small to parse ACE"));
+               return;
+       } */
+
+       num_subauth = pace->num_subauth;
+       if (num_subauth) {
+#ifdef CONFIG_CIFS_DEBUG2
+               int i;
+               cFYI(1, ("ACE revision %d num_subauth %d",
+                       pace->revision, pace->num_subauth));
+               for (i = 0; i < num_subauth; ++i) {
+                       cFYI(1, ("ACE sub_auth[%d]: 0x%x", i,
+                               le32_to_cpu(pace->sub_auth[i])));
+               }
+
+               /* BB add length check to make sure that we do not have huge
+                       num auths and therefore go off the end */
+
+               cFYI(1, ("RID %d", le32_to_cpu(pace->sub_auth[num_subauth-1])));
+#endif
+       }
+
+       return;
+}
+
+static void parse_ntace(struct cifs_ntace *pntace, char *end_of_acl)
+{
+       /* validate that we do not go past end of acl */
+       if (end_of_acl < (char *)pntace + sizeof(struct cifs_ntace)) {
+               cERROR(1, ("ACL too small to parse NT ACE"));
+               return;
+       }
+
+#ifdef CONFIG_CIFS_DEBUG2
+       cFYI(1, ("NTACE type %d flags 0x%x size %d, access Req 0x%x",
+               pntace->type, pntace->flags, pntace->size,
+               pntace->access_req));
+#endif
+       return;
+}
+
+
+
+static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
+                      struct cifs_sid *pownersid, struct cifs_sid *pgrpsid)
+{
+       int i;
+       int num_aces = 0;
+       int acl_size;
+       char *acl_base;
+       struct cifs_ntace **ppntace;
+       struct cifs_ace **ppace;
+
+       /* BB need to add parm so we can store the SID BB */
+
+       /* validate that we do not go past end of acl */
+       if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
+               cERROR(1, ("ACL too small to parse DACL"));
+               return;
+       }
+
+#ifdef CONFIG_CIFS_DEBUG2
+       cFYI(1, ("DACL revision %d size %d num aces %d",
+               le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
+               le32_to_cpu(pdacl->num_aces)));
+#endif
+
+       acl_base = (char *)pdacl;
+       acl_size = sizeof(struct cifs_acl);
+
+       num_aces = le32_to_cpu(pdacl->num_aces);
+       if (num_aces  > 0) {
+               ppntace = kmalloc(num_aces * sizeof(struct cifs_ntace *),
+                               GFP_KERNEL);
+               ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
+                               GFP_KERNEL);
+
+/*             cifscred->cecount = pdacl->num_aces;
+               cifscred->ntaces = kmalloc(num_aces *
+                       sizeof(struct cifs_ntace *), GFP_KERNEL);
+               cifscred->aces = kmalloc(num_aces *
+                       sizeof(struct cifs_ace *), GFP_KERNEL);*/
+
+               for (i = 0; i < num_aces; ++i) {
+                       ppntace[i] = (struct cifs_ntace *)
+                                       (acl_base + acl_size);
+                       ppace[i] = (struct cifs_ace *) ((char *)ppntace[i] +
+                                       sizeof(struct cifs_ntace));
+
+                       parse_ntace(ppntace[i], end_of_acl);
+                       if (end_of_acl < ((char *)ppace[i] +
+                                       (le16_to_cpu(ppntace[i]->size) -
+                                       sizeof(struct cifs_ntace)))) {
+                               cERROR(1, ("ACL too small to parse ACE"));
+                               break;
+                       } else
+                               parse_ace(ppace[i], end_of_acl);
+
+/*                     memcpy((void *)(&(cifscred->ntaces[i])),
+                               (void *)ppntace[i],
+                               sizeof(struct cifs_ntace));
+                       memcpy((void *)(&(cifscred->aces[i])),
+                               (void *)ppace[i],
+                               sizeof(struct cifs_ace)); */
+
+                       acl_base = (char *)ppntace[i];
+                       acl_size = le16_to_cpu(ppntace[i]->size);
+               }
+
+               kfree(ppace);
+               kfree(ppntace);
+       }
+
+       return;
+}
+
+
+static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
+{
+
+       /* BB need to add parm so we can store the SID BB */
+
+       /* validate that we do not go past end of acl */
+       if (end_of_acl < (char *)psid + sizeof(struct cifs_sid)) {
+               cERROR(1, ("ACL too small to parse SID"));
+               return -EINVAL;
+       }
+
+       if (psid->num_subauth) {
+#ifdef CONFIG_CIFS_DEBUG2
+               int i;
+               cFYI(1, ("SID revision %d num_auth %d First subauth 0x%x",
+                       psid->revision, psid->num_subauth, psid->sub_auth[0]));
+
+               for (i = 0; i < psid->num_subauth; i++) {
+                       cFYI(1, ("SID sub_auth[%d]: 0x%x ", i,
+                               le32_to_cpu(psid->sub_auth[i])));
+               }
+
+               /* BB add length check to make sure that we do not have huge
+                       num auths and therefore go off the end */
+               cFYI(1, ("RID 0x%x",
+                       le32_to_cpu(psid->sub_auth[psid->num_subauth-1])));
+#endif
+       }
+
+       return 0;
+}
+
+
+/* Convert CIFS ACL to POSIX form */
+int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len)
+{
+       int rc;
+       struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
+       struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
+       char *end_of_acl = ((char *)pntsd) + acl_len;
+
+       owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+                               le32_to_cpu(pntsd->osidoffset));
+       group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+                               le32_to_cpu(pntsd->gsidoffset));
+       dacl_ptr = (struct cifs_acl *)((char *)pntsd +
+                               le32_to_cpu(pntsd->dacloffset));
+#ifdef CONFIG_CIFS_DEBUG2
+       cFYI(1, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
+                "sacloffset 0x%x dacloffset 0x%x",
+                pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
+                le32_to_cpu(pntsd->gsidoffset),
+                le32_to_cpu(pntsd->sacloffset),
+                le32_to_cpu(pntsd->dacloffset)));
+#endif
+       rc = parse_sid(owner_sid_ptr, end_of_acl);
+       if (rc)
+               return rc;
+
+       rc = parse_sid(group_sid_ptr, end_of_acl);
+       if (rc)
+               return rc;
+
+       parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr);
+
+/*     cifscred->uid = owner_sid_ptr->rid;
+       cifscred->gid = group_sid_ptr->rid;
+       memcpy((void *)(&(cifscred->osid)), (void *)owner_sid_ptr,
+                       sizeof (struct cifs_sid));
+       memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
+                       sizeof (struct cifs_sid)); */
+
+
+       return (0);
+}
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
index 5eff35d6e564ac95550697f7b884a9612ac667ca..420f87813647b9b9307ce484b3813f59180247a4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/cifsacl.h
  *
- *   Copyright (c) International Business Machines  Corp., 2005
+ *   Copyright (c) International Business Machines  Corp., 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
 #ifndef _CIFSACL_H
 #define _CIFSACL_H
 
+
+#define NUM_AUTHS 6 /* number of authority fields */
+#define NUM_SUBAUTHS 5 /* number of sub authority fields */
+#define NUM_WK_SIDS 7 /* number of well known sids */
+#define SIDNAMELENGTH 20 /* long enough for the ones we care about */
+
+#define READ_BIT        0x4
+#define WRITE_BIT       0x2
+#define EXEC_BIT        0x1
+
+#define UBITSHIFT      6
+#define GBITSHIFT      3
+
+struct cifs_ntsd {
+       __le16 revision; /* revision level */
+       __le16 type;
+       __le32 osidoffset;
+       __le32 gsidoffset;
+       __le32 sacloffset;
+       __le32 dacloffset;
+} __attribute__((packed));
+
 struct cifs_sid {
        __u8 revision; /* revision level */
-       __u8 num_subauths;
+       __u8 num_subauth;
+       __u8 authority[6];
+       __le32 sub_auth[5]; /* sub_auth[num_subauth] */ /* BB FIXME endianness BB */
+} __attribute__((packed));
+
+struct cifs_acl {
+       __le16 revision; /* revision level */
+       __le16 size;
+       __le32 num_aces;
+} __attribute__((packed));
+
+struct cifs_ntace { /* first part of ACE which contains perms */
+       __u8 type;
+       __u8 flags;
+       __le16 size;
+       __le32 access_req;
+} __attribute__((packed));
+
+struct cifs_ace { /* last part of ACE which includes user info */
+       __u8 revision; /* revision level */
+       __u8 num_subauth;
        __u8 authority[6];
-       __u32 sub_auth[4];
-       /* next sub_auth if any ... */
+       __le32 sub_auth[5];
 } __attribute__((packed));
 
-/* everyone */
-/* extern const struct cifs_sid sid_everyone;*/
-/* group users */
-/* extern const struct cifs_sid sid_user;*/
+struct cifs_wksid {
+       struct cifs_sid cifssid;
+       char sidname[SIDNAMELENGTH];
+} __attribute__((packed));
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+
+extern int match_sid(struct cifs_sid *);
+extern int compare_sids(struct cifs_sid *, struct cifs_sid *);
+
+#endif /*  CONFIG_CIFS_EXPERIMENTAL */
 
 #endif /* _CIFSACL_H */
index 36272293027d41902231b0725aba64ba680e49dc..632070b4275d91ba46baa1d735b578aeca555462 100644 (file)
@@ -345,7 +345,7 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
        user = kmalloc(2 + (len * 2), GFP_KERNEL);
        if (user == NULL)
                goto calc_exit_2;
-       len = cifs_strtoUCS(user, ses->userName, len, nls_cp);
+       len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
        UniStrupr(user);
        hmac_md5_update((char *)user, 2*len, pctxt);
 
@@ -356,7 +356,8 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
                domain = kmalloc(2 + (len * 2), GFP_KERNEL);
                if (domain == NULL)
                        goto calc_exit_1;
-               len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp);
+               len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
+                                       nls_cp);
                /* the following line was removed since it didn't work well
                   with lower cased domain name that passed as an option.
                   Maybe converting the domain name earlier makes sense */
index ba8f7868cb23c84a5e713910f7b7d5a72bfc6f8a..a6fbea57c4b1e6d895375ed10e8ef636995d763e 100644 (file)
 static struct quotactl_ops cifs_quotactl_ops;
 #endif /* QUOTA */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-extern struct export_operations cifs_export_ops;
-#endif /* EXPERIMENTAL */
-
 int cifsFYI = 0;
 int cifsERROR = 1;
 int traceSMB = 0;
@@ -240,9 +236,9 @@ static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
 
        cifs_sb = CIFS_SB(inode->i_sb);
 
-       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
                return 0;
-       else /* file mode might have been restricted at mount time
+       else /* file mode might have been restricted at mount time
                on the client (above and beyond ACL on servers) for
                servers which do not support setting and viewing mode bits,
                so allowing client to check permissions is useful */
@@ -312,15 +308,15 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
                                        seq_printf(s, ",domain=%s",
                                           cifs_sb->tcon->ses->domainName);
                        }
+                       if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
+                          !(cifs_sb->tcon->unix_ext))
+                               seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
+                       if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
+                          !(cifs_sb->tcon->unix_ext))
+                               seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
                }
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
                        seq_printf(s, ",posixpaths");
-               if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
-                  !(cifs_sb->tcon->unix_ext))
-                       seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
-               if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
-                  !(cifs_sb->tcon->unix_ext))
-                       seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
                seq_printf(s, ",rsize=%d", cifs_sb->rsize);
                seq_printf(s, ",wsize=%d", cifs_sb->wsize);
        }
@@ -346,7 +342,7 @@ int cifs_xquota_set(struct super_block *sb, int quota_type, qid_t qid,
        if (pTcon) {
                cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
        } else {
-               return -EIO;
+               rc = -EIO;
        }
 
        FreeXid(xid);
@@ -716,7 +712,7 @@ static int
 cifs_init_inodecache(void)
 {
        cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
-                                             sizeof (struct cifsInodeInfo),
+                                             sizeof(struct cifsInodeInfo),
                                              0, (SLAB_RECLAIM_ACCOUNT|
                                                SLAB_MEM_SPREAD),
                                              cifs_init_once);
@@ -816,8 +812,8 @@ static int
 cifs_init_mids(void)
 {
        cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids",
-                               sizeof (struct mid_q_entry), 0,
-                               SLAB_HWCACHE_ALIGN, NULL);
+                                           sizeof(struct mid_q_entry), 0,
+                                           SLAB_HWCACHE_ALIGN, NULL);
        if (cifs_mid_cachep == NULL)
                return -ENOMEM;
 
@@ -829,8 +825,8 @@ cifs_init_mids(void)
        }
 
        cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs",
-                               sizeof (struct oplock_q_entry), 0,
-                               SLAB_HWCACHE_ALIGN, NULL);
+                                       sizeof(struct oplock_q_entry), 0,
+                                       SLAB_HWCACHE_ALIGN, NULL);
        if (cifs_oplock_cachep == NULL) {
                mempool_destroy(cifs_mid_poolp);
                kmem_cache_destroy(cifs_mid_cachep);
@@ -882,7 +878,8 @@ static int cifs_oplock_thread(void *dummyarg)
                                the call */
                                /* mutex_lock(&inode->i_mutex);*/
                                if (S_ISREG(inode->i_mode)) {
-                                       rc = filemap_fdatawrite(inode->i_mapping);
+                                       rc =
+                                          filemap_fdatawrite(inode->i_mapping);
                                        if (CIFS_I(inode)->clientCanCacheRead
                                                                         == 0) {
                                                filemap_fdatawait(inode->i_mapping);
@@ -907,8 +904,7 @@ static int cifs_oplock_thread(void *dummyarg)
                                            0 /* len */ , 0 /* offset */, 0,
                                            0, LOCKING_ANDX_OPLOCK_RELEASE,
                                            0 /* wait flag */);
-                                       cFYI(1, 
-                                             ("Oplock release rc = %d ", rc));
+                                       cFYI(1, ("Oplock release rc = %d", rc));
                                }
                        } else
                                spin_unlock(&GlobalMid_Lock);
index a20de77a3856d91d49ff590e17f6b27d214a7c14..0a3ee5a322b02a4329c1448dea61713a1c422f24 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/cifsfs.h
  *
- *   Copyright (c) International Business Machines  Corp., 2002, 2005
+ *   Copyright (c) International Business Machines  Corp., 2002, 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -99,7 +99,12 @@ extern int   cifs_setxattr(struct dentry *, const char *, const void *,
                        size_t, int);
 extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
-extern int cifs_ioctl (struct inode *inode, struct file *filep,
+extern int cifs_ioctl(struct inode *inode, struct file *filep,
                       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.50"
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+extern struct export_operations cifs_export_ops;
+#endif /* EXPERIMENTAL */
+
+#define CIFS_VERSION   "1.51"
 #endif                         /* _CIFSFS_H */
index b98742fc3b5aea0d325a395edd750eff093e115c..87f51f23276f42f23d054679042fb92b1ace313f 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include "cifs_fs_sb.h"
+#include "cifsacl.h"
 /*
  * The sizes of various internal tables and strings
  */
@@ -89,7 +90,8 @@ enum statusEnum {
 };
 
 enum securityEnum {
-       LANMAN = 0,             /* Legacy LANMAN auth */
+       PLAINTXT = 0,           /* Legacy with Plaintext passwords */
+       LANMAN,                 /* Legacy LANMAN auth */
        NTLM,                   /* Legacy NTLM012 auth with NTLM hash */
        NTLMv2,                 /* Legacy NTLM auth with NTLMv2 hash */
        RawNTLMSSP,             /* NTLMSSP without SPNEGO */
@@ -115,6 +117,17 @@ struct mac_key {
        } data;
 };
 
+struct cifs_cred {
+       int uid;
+       int gid;
+       int mode;
+       int cecount;
+       struct cifs_sid osid;
+       struct cifs_sid gsid;
+       struct cifs_ntace *ntaces;
+       struct cifs_ace *aces;
+};
+
 /*
  *****************************************************************
  * Except the CIFS PDUs themselves all the
@@ -279,6 +292,7 @@ struct cifsTconInfo {
        FILE_SYSTEM_DEVICE_INFO fsDevInfo;
        FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
        FILE_SYSTEM_UNIX_INFO fsUnixInfo;
+       unsigned ipc:1;         /* set if connection to IPC$ eg for RPC/PIPES */
        unsigned retry:1;
        unsigned nocase:1;
        unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol
@@ -329,6 +343,7 @@ struct cifsFileInfo {
        struct list_head llist; /* list of byte range locks we have. */
        unsigned closePend:1;   /* file is marked to close */
        unsigned invalidHandle:1;  /* file closed via session abend */
+       unsigned messageMode:1;    /* for pipes: message vs byte mode */
        atomic_t wrtPending;   /* handle in use - defer close */
        struct semaphore fh_sem; /* prevents reopen race after dead ses*/
        char *search_resume_name; /* BB removeme BB */
@@ -464,6 +479,9 @@ struct dir_notify_req {
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 #define   CIFSSEC_MAY_LANMAN   0x00010
 #define   CIFSSEC_MAY_PLNTXT   0x00020
+#else
+#define   CIFSSEC_MAY_LANMAN    0
+#define   CIFSSEC_MAY_PLNTXT    0
 #endif /* weak passwords */
 #define   CIFSSEC_MAY_SEAL     0x00040 /* not supported yet */
 
@@ -477,14 +495,23 @@ require use of the stronger protocol */
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 #define   CIFSSEC_MUST_LANMAN  0x10010
 #define   CIFSSEC_MUST_PLNTXT  0x20020
+#ifdef CONFIG_CIFS_UPCALL
+#define   CIFSSEC_MASK          0x3F03F /* allows weak security but also krb5 */
+#else
 #define   CIFSSEC_MASK          0x37037 /* current flags supported if weak */
+#endif /* UPCALL */
+#else /* do not allow weak pw hash */
+#ifdef CONFIG_CIFS_UPCALL
+#define   CIFSSEC_MASK          0x0F00F /* flags supported if no weak allowed */
 #else
-#define          CIFSSEC_MASK          0x07007 /* flags supported if no weak config */
+#define          CIFSSEC_MASK          0x07007 /* flags supported if no weak allowed */
+#endif /* UPCALL */
 #endif /* WEAK_PW_HASH */
 #define   CIFSSEC_MUST_SEAL    0x40040 /* not supported yet */
 
 #define   CIFSSEC_DEF  CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
 #define   CIFSSEC_MAX  CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
+#define   CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
 /*
  *****************************************************************
  * All constants go here
index 6a2056e58ceb63af6857ee69af4759227f8ffbf1..c41ff74e9128b91af914930334b241fd3c0d313f 100644 (file)
                                         /* file_execute, file_read_attributes*/
                                         /* write_dac, and delete.           */
 
+#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
+#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
+                               | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
+#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
+
+
 /*
  * Invalid readdir handle
  */
@@ -360,10 +366,10 @@ struct smb_hdr {
        __u8 WordCount;
 } __attribute__((packed));
 /* given a pointer to an smb_hdr retrieve the value of byte count */
-#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
-#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
+#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
+#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
 /* given a pointer to an smb_hdr retrieve the pointer to the byte area */
-#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) + 2 )
+#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
 
 /*
  * Computer Name Length (since Netbios name was length 16 with last byte 0x20)
@@ -716,6 +722,14 @@ typedef struct smb_com_findclose_req {
 #define REQ_OPENDIRONLY    0x00000008
 #define REQ_EXTENDED_INFO  0x00000010
 
+/* File type */
+#define DISK_TYPE              0x0000
+#define BYTE_PIPE_TYPE         0x0001
+#define MESSAGE_PIPE_TYPE      0x0002
+#define PRINTER_TYPE           0x0003
+#define COMM_DEV_TYPE          0x0004
+#define UNKNOWN_TYPE           0xFFFF
+
 typedef struct smb_com_open_req {      /* also handles create */
        struct smb_hdr hdr;     /* wct = 24 */
        __u8 AndXCommand;
index 04a69dafedba006c281f98ec9d92c1bd99c9a0bc..1a883663b22dfcc51eac71da6fc8930bf5b7d3e6 100644 (file)
@@ -50,7 +50,8 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
                        int * /* bytes returned */ , const int long_op);
 extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
                        struct kvec *, int /* nvec to send */,
-                       int * /* type of buf returned */ , const int long_op);
+                       int * /* type of buf returned */ , const int long_op,
+                       const int logError /* whether to log status code*/ );
 extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
                                        struct cifsTconInfo *,
                                struct smb_hdr * /* input */ ,
@@ -65,7 +66,7 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
                        enum securityEnum *secType);
 extern int cifs_inet_pton(int, char *source, void *dst);
-extern int map_smb_to_linux_error(struct smb_hdr *smb);
+extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
                            const struct cifsTconInfo *, int /* length of
                            fixed section (word count) in two byte units */);
@@ -304,12 +305,13 @@ extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
                                 const char *pass);
 extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
                        const struct nls_table *);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * );
+extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
 extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
                             const struct nls_table *);
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key);
 #endif /* CIFS_WEAK_PW_HASH */
+extern int parse_sec_desc(struct cifs_ntsd *, int);
 extern int CIFSSMBCopy(int xid,
                        struct cifsTconInfo *source_tcon,
                        const char *fromName,
index 8eb102f940d433537ac2219521d692f567301439..f0d9a485d0951f29c3f140ebda33391d7471a53d 100644 (file)
 #include <asm/uaccess.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
+#include "cifsacl.h"
 #include "cifsproto.h"
 #include "cifs_unicode.h"
 #include "cifs_debug.h"
-#include "cifsacl.h"
 
 #ifdef CONFIG_CIFS_POSIX
 static struct {
@@ -94,9 +94,8 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
        write_lock(&GlobalSMBSeslock);
        list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
-               if (open_file) {
+               if (open_file)
                        open_file->invalidHandle = TRUE;
-               }
        }
        write_unlock(&GlobalSMBSeslock);
        /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -439,8 +438,13 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 
        pSMB->hdr.Mid = GetNextMid(server);
        pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
+
        if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
                pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+       else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
+               cFYI(1, ("Kerberos only mechanism, enable extended security"));
+               pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+       }
 
        count = 0;
        for (i = 0; i < CIFS_NUM_PROT; i++) {
@@ -513,7 +517,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                                (int)ts.tv_sec, (int)utc.tv_sec,
                                (int)(utc.tv_sec - ts.tv_sec)));
                        val = (int)(utc.tv_sec - ts.tv_sec);
-                       seconds = val < 0 ? -val : val;
+                       seconds = abs(val);
                        result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
                        remain = seconds % MIN_TZ_ADJ;
                        if (remain >= (MIN_TZ_ADJ / 2))
@@ -574,7 +578,20 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                server->secType = NTLM;
        else if (secFlags & CIFSSEC_MAY_NTLMV2)
                server->secType = NTLMv2;
-       /* else krb5 ... any others ... */
+       else if (secFlags & CIFSSEC_MAY_KRB5)
+               server->secType = Kerberos;
+       else if (secFlags & CIFSSEC_MAY_LANMAN)
+               server->secType = LANMAN;
+/* #ifdef CONFIG_CIFS_EXPERIMENTAL
+       else if (secFlags & CIFSSEC_MAY_PLNTXT)
+               server->secType = ??
+#endif */
+       else {
+               rc = -EOPNOTSUPP;
+               cERROR(1, ("Invalid security type"));
+               goto neg_err_exit;
+       }
+       /* else ... any others ...? */
 
        /* one byte, so no need to convert this or EncryptionKeyLen from
           little endian */
@@ -604,22 +621,26 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
        if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
                (server->capabilities & CAP_EXTENDED_SECURITY)) {
                count = pSMBr->ByteCount;
-               if (count < 16)
+               if (count < 16) {
                        rc = -EIO;
-               else if (count == 16) {
-                       server->secType = RawNTLMSSP;
-                       if (server->socketUseCount.counter > 1) {
-                               if (memcmp(server->server_GUID,
-                                          pSMBr->u.extended_response.
-                                          GUID, 16) != 0) {
-                                       cFYI(1, ("server UID changed"));
-                                       memcpy(server->server_GUID,
-                                               pSMBr->u.extended_response.GUID,
-                                               16);
-                               }
-                       } else
+                       goto neg_err_exit;
+               }
+
+               if (server->socketUseCount.counter > 1) {
+                       if (memcmp(server->server_GUID,
+                                  pSMBr->u.extended_response.
+                                  GUID, 16) != 0) {
+                               cFYI(1, ("server UID changed"));
                                memcpy(server->server_GUID,
-                                      pSMBr->u.extended_response.GUID, 16);
+                                       pSMBr->u.extended_response.GUID,
+                                       16);
+                       }
+               } else
+                       memcpy(server->server_GUID,
+                              pSMBr->u.extended_response.GUID, 16);
+
+               if (count == 16) {
+                       server->secType = RawNTLMSSP;
                } else {
                        rc = decode_negTokenInit(pSMBr->u.extended_response.
                                                 SecurityBlob,
@@ -642,10 +663,12 @@ signing_check:
                /* MUST_SIGN already includes the MAY_SIGN FLAG
                   so if this is zero it means that signing is disabled */
                cFYI(1, ("Signing disabled"));
-               if (server->secMode & SECMODE_SIGN_REQUIRED)
+               if (server->secMode & SECMODE_SIGN_REQUIRED) {
                        cERROR(1, ("Server requires "
-                                  "/proc/fs/cifs/PacketSigningEnabled "
-                                  "to be on"));
+                                  "packet signing to be enabled in "
+                                  "/proc/fs/cifs/SecurityFlags."));
+                       rc = -EOPNOTSUPP;
+               }
                server->secMode &=
                        ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
        } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
@@ -1052,7 +1075,7 @@ PsxCreat:
                                InformationLevel) - 4;
        offset = param_offset + params;
        pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
-       pdata->Level = SMB_QUERY_FILE_UNIX_BASIC;
+       pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
        pdata->Permissions = cpu_to_le64(mode);
        pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
        pdata->OpenFlags =  cpu_to_le32(*pOplock);
@@ -1098,8 +1121,8 @@ PsxCreat:
        if (cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction)
                *pOplock |= CIFS_CREATE_ACTION;
        /* check to make sure response data is there */
-       if (psx_rsp->ReturnedLevel != SMB_QUERY_FILE_UNIX_BASIC) {
-               pRetData->Type = -1; /* unknown */
+       if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
+               pRetData->Type = cpu_to_le32(-1); /* unknown */
 #ifdef CONFIG_CIFS_DEBUG2
                cFYI(1, ("unknown type"));
 #endif
@@ -1107,12 +1130,12 @@ PsxCreat:
                if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
                                        + sizeof(FILE_UNIX_BASIC_INFO)) {
                        cERROR(1, ("Open response data too small"));
-                       pRetData->Type = -1;
+                       pRetData->Type = cpu_to_le32(-1);
                        goto psx_create_err;
                }
                memcpy((char *) pRetData,
                        (char *)psx_rsp + sizeof(OPEN_PSX_RSP),
-                       sizeof (FILE_UNIX_BASIC_INFO));
+                       sizeof(FILE_UNIX_BASIC_INFO));
        }
 
 psx_create_err:
@@ -1193,9 +1216,9 @@ OldOpenRetry:
        }
        if (*pOplock & REQ_OPLOCK)
                pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK);
-       else if (*pOplock & REQ_BATCHOPLOCK) {
+       else if (*pOplock & REQ_BATCHOPLOCK)
                pSMB->OpenFlags = cpu_to_le16(REQ_BATCHOPLOCK);
-       }
+
        pSMB->OpenFlags |= cpu_to_le16(REQ_MORE_INFO);
        /* BB fixme add conversion for access_flags to bits 0 - 2 of mode */
        /* 0 = read
@@ -1310,9 +1333,8 @@ openRetry:
        }
        if (*pOplock & REQ_OPLOCK)
                pSMB->OpenFlags = cpu_to_le32(REQ_OPLOCK);
-       else if (*pOplock & REQ_BATCHOPLOCK) {
+       else if (*pOplock & REQ_BATCHOPLOCK)
                pSMB->OpenFlags = cpu_to_le32(REQ_BATCHOPLOCK);
-       }
        pSMB->DesiredAccess = cpu_to_le32(access_flags);
        pSMB->AllocationSize = 0;
        /* set file as system file if special file such
@@ -1424,9 +1446,8 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
 
        iov[0].iov_base = (char *)pSMB;
        iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
-       rc = SendReceive2(xid, tcon->ses, iov,
-                         1 /* num iovecs */,
-                         &resp_buf_type, 0);
+       rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
+                        &resp_buf_type, 0 /* not long op */, 1 /* log err */ );
        cifs_stats_inc(&tcon->num_reads);
        pSMBr = (READ_RSP *)iov[0].iov_base;
        if (rc) {
@@ -1446,11 +1467,11 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
                        *nbytes = 0;
                } else {
                        pReadData = (char *) (&pSMBr->hdr.Protocol) +
-                           le16_to_cpu(pSMBr->DataOffset);
-/*                      if (rc = copy_to_user(buf, pReadData, data_length)) {
+                                       le16_to_cpu(pSMBr->DataOffset);
+/*                     if (rc = copy_to_user(buf, pReadData, data_length)) {
                                cERROR(1,("Faulting on read rc = %d",rc));
                                rc = -EFAULT;
-                        }*/ /* can not use copy_to_user when using page cache*/
+                       }*/ /* can not use copy_to_user when using page cache*/
                        if (*buf)
                                memcpy(*buf, pReadData, data_length);
                }
@@ -1645,7 +1666,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 
 
        rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type,
-                         long_op);
+                         long_op, 0 /* do not log STATUS code */ );
        cifs_stats_inc(&tcon->num_writes);
        if (rc) {
                cFYI(1, ("Send error Write2 = %d", rc));
@@ -2538,7 +2559,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
                cFYI(1, ("data starts after end of smb"));
                return -EINVAL;
        } else if (data_count + *ppdata > end_of_smb) {
-               cFYI(1,("data %p + count %d (%p) ends after end of smb %p start %p",
+               cFYI(1, ("data %p + count %d (%p) ends after end of smb %p start %p",
                        *ppdata, data_count, (data_count + *ppdata),
                        end_of_smb, pSMBr));
                return -EINVAL;
@@ -2615,7 +2636,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
                                        reparse_buf->TargetNameOffset +
                                        reparse_buf->TargetNameLen) >
                                                end_of_smb) {
-                                       cFYI(1,("reparse buf goes beyond SMB"));
+                                       cFYI(1, ("reparse buf beyond SMB"));
                                        rc = -EIO;
                                        goto qreparse_out;
                                }
@@ -3042,25 +3063,12 @@ GetExtAttrOut:
 
 #endif /* CONFIG_POSIX */
 
-
-/* security id for everyone */
-static const struct cifs_sid sid_everyone =
-               {1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
-/* group users */
-static const struct cifs_sid sid_user =
-               {1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
-
-/* Convert CIFS ACL to POSIX form */
-static int parse_sec_desc(struct cifs_sid *psec_desc, int acl_len)
-{
-       return 0;
-}
-
+#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* Get Security Descriptor (by handle) from remote server for a file or dir */
 int
 CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
                /* BB fix up return info */ char *acl_inf, const int buflen,
-                 const int acl_type /* ACCESS/DEFAULT not sure implication */)
+                 const int acl_type)
 {
        int rc = 0;
        int buf_type = 0;
@@ -3085,12 +3093,13 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
        iov[0].iov_base = (char *)pSMB;
        iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
 
-       rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0);
+       rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
+                        0 /* not long op */, 0 /* do not log STATUS codes */ );
        cifs_stats_inc(&tcon->num_acl_get);
        if (rc) {
                cFYI(1, ("Send error in QuerySecDesc = %d", rc));
        } else {                /* decode response */
-               struct cifs_sid *psec_desc;
+               struct cifs_ntsd *psec_desc;
                __le32 * parm;
                int parm_len;
                int data_len;
@@ -3105,8 +3114,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
                        goto qsec_out;
                pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
 
-               cERROR(1, ("smb %p parm %p data %p",
-                         pSMBr, parm, psec_desc));  /* BB removeme BB */
+               cFYI(1, ("smb %p parm %p data %p", pSMBr, parm, psec_desc));
 
                if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
                        rc = -EIO;      /* bad smb */
@@ -3115,7 +3123,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 
 /* BB check that data area is minimum length and as big as acl_len */
 
-               acl_len = le32_to_cpu(*(__le32 *)parm);
+               acl_len = le32_to_cpu(*parm);
                /* BB check if (acl_len > bufsize) */
 
                parse_sec_desc(psec_desc, acl_len);
@@ -3128,6 +3136,7 @@ qsec_out:
 /*     cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
        return rc;
 }
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
 
 /* Legacy Query Path Information call for lookup to old servers such
    as Win9x/WinME */
@@ -3363,6 +3372,9 @@ UnixQPathInfoRetry:
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
                if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+                       cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n"
+                                  "Unix Extensions can be disabled on mount "
+                                  "by specifying the nosfu mount option."));
                        rc = -EIO;      /* bad smb */
                } else {
                        __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3883,12 +3895,10 @@ getDFSRetry:
        pSMB->hdr.Mid = GetNextMid(ses->server);
        pSMB->hdr.Tid = ses->ipc_tid;
        pSMB->hdr.Uid = ses->Suid;
-       if (ses->capabilities & CAP_STATUS32) {
+       if (ses->capabilities & CAP_STATUS32)
                pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS;
-       }
-       if (ses->capabilities & CAP_DFS) {
+       if (ses->capabilities & CAP_DFS)
                pSMB->hdr.Flags2 |= SMBFLG2_DFS;
-       }
 
        if (ses->capabilities & CAP_UNICODE) {
                pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
@@ -4060,10 +4070,6 @@ oldQFSInfoRetry:
                (void **) &pSMBr);
        if (rc)
                return rc;
-       rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
-                     (void **) &pSMBr);
-       if (rc)
-               return rc;
 
        params = 2;     /* level */
        pSMB->TotalDataCount = 0;
@@ -4265,7 +4271,7 @@ QFSAttributeRetry:
                             *) (((char *) &pSMBr->hdr.Protocol) +
                                 data_offset);
                        memcpy(&tcon->fsAttrInfo, response_data,
-                              sizeof (FILE_SYSTEM_ATTRIBUTE_INFO));
+                              sizeof(FILE_SYSTEM_ATTRIBUTE_INFO));
                }
        }
        cifs_buf_release(pSMB);
@@ -4334,7 +4340,7 @@ QFSDeviceRetry:
                                (((char *) &pSMBr->hdr.Protocol) +
                                 data_offset);
                        memcpy(&tcon->fsDevInfo, response_data,
-                              sizeof (FILE_SYSTEM_DEVICE_INFO));
+                              sizeof(FILE_SYSTEM_DEVICE_INFO));
                }
        }
        cifs_buf_release(pSMB);
@@ -4402,7 +4408,7 @@ QFSUnixRetry:
                             *) (((char *) &pSMBr->hdr.Protocol) +
                                 data_offset);
                        memcpy(&tcon->fsUnixInfo, response_data,
-                              sizeof (FILE_SYSTEM_UNIX_INFO));
+                              sizeof(FILE_SYSTEM_UNIX_INFO));
                }
        }
        cifs_buf_release(pSMB);
@@ -4612,7 +4618,7 @@ SetEOFRetry:
                strncpy(pSMB->FileName, fileName, name_len);
        }
        params = 6 + name_len;
-       data_count = sizeof (struct file_end_of_file_info);
+       data_count = sizeof(struct file_end_of_file_info);
        pSMB->MaxParameterCount = cpu_to_le16(2);
        pSMB->MaxDataCount = cpu_to_le16(4100);
        pSMB->MaxSetupCount = 0;
@@ -4800,7 +4806,7 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
 
        data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
 
-       count = sizeof (FILE_BASIC_INFO);
+       count = sizeof(FILE_BASIC_INFO);
        pSMB->MaxParameterCount = cpu_to_le16(2);
        pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */
        pSMB->SetupCount = 1;
@@ -4871,7 +4877,7 @@ SetTimesRetry:
        }
 
        params = 6 + name_len;
-       count = sizeof (FILE_BASIC_INFO);
+       count = sizeof(FILE_BASIC_INFO);
        pSMB->MaxParameterCount = cpu_to_le16(2);
        pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */
        pSMB->MaxSetupCount = 0;
@@ -4900,7 +4906,7 @@ SetTimesRetry:
                pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
        pSMB->Reserved4 = 0;
        pSMB->hdr.smb_buf_length += byte_count;
-       memcpy(data_offset, data, sizeof (FILE_BASIC_INFO));
+       memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
        pSMB->ByteCount = cpu_to_le16(byte_count);
        rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5003,7 +5009,7 @@ setPermsRetry:
        }
 
        params = 6 + name_len;
-       count = sizeof (FILE_UNIX_BASIC_INFO);
+       count = sizeof(FILE_UNIX_BASIC_INFO);
        pSMB->MaxParameterCount = cpu_to_le16(2);
        pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find exact max SMB PDU from sess structure BB */
        pSMB->MaxSetupCount = 0;
index 4af3588c1a9615b39976bac9fadc3487a64025eb..19ee11f7f35ad6188207d25edccb0af3b8cdad11 100644 (file)
@@ -124,7 +124,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
        struct mid_q_entry *mid_entry;
 
        spin_lock(&GlobalMid_Lock);
-       if ( kthread_should_stop() ) {
+       if (kthread_should_stop()) {
                /* the demux thread will exit normally
                next time through the loop */
                spin_unlock(&GlobalMid_Lock);
@@ -151,9 +151,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
        }
        list_for_each(tmp, &GlobalTreeConnectionList) {
                tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-               if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) {
+               if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
                        tcon->tidStatus = CifsNeedReconnect;
-               }
        }
        read_unlock(&GlobalSMBSeslock);
        /* do not want to be sending data on a socket we are freeing */
@@ -187,7 +186,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
        spin_unlock(&GlobalMid_Lock);
        up(&server->tcpSem);
 
-       while ( (!kthread_should_stop()) && (server->tcpStatus != CifsGood)) {
+       while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) {
                try_to_freeze();
                if (server->protocolType == IPV6) {
                        rc = ipv6_connect(&server->addr.sockAddr6,
@@ -204,7 +203,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                } else {
                        atomic_inc(&tcpSesReconnectCount);
                        spin_lock(&GlobalMid_Lock);
-                       if ( !kthread_should_stop() )
+                       if (!kthread_should_stop())
                                server->tcpStatus = CifsGood;
                        server->sequence_number = 0;
                        spin_unlock(&GlobalMid_Lock);
@@ -352,17 +351,15 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 
        current->flags |= PF_MEMALLOC;
        server->tsk = current;  /* save process info to wake at shutdown */
-       cFYI(1, ("Demultiplex PID: %d", current->pid));
+       cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
        write_lock(&GlobalSMBSeslock);
        atomic_inc(&tcpSesAllocCount);
        length = tcpSesAllocCount.counter;
        write_unlock(&GlobalSMBSeslock);
        complete(&cifsd_complete);
-       if (length  > 1) {
-               mempool_resize(cifs_req_poolp,
-                       length + cifs_min_rcv,
-                       GFP_KERNEL);
-       }
+       if (length  > 1)
+               mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
+                               GFP_KERNEL);
 
        set_freezable();
        while (!kthread_should_stop()) {
@@ -378,7 +375,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                        }
                } else if (isLargeBuf) {
                        /* we are reusing a dirty large buf, clear its start */
-                       memset(bigbuf, 0, sizeof (struct smb_hdr));
+                       memset(bigbuf, 0, sizeof(struct smb_hdr));
                }
 
                if (smallbuf == NULL) {
@@ -391,7 +388,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                        }
                        /* beginning of smb buffer is cleared in our buf_get */
                } else /* if existing small buf clear beginning */
-                       memset(smallbuf, 0, sizeof (struct smb_hdr));
+                       memset(smallbuf, 0, sizeof(struct smb_hdr));
 
                isLargeBuf = FALSE;
                isMultiRsp = FALSE;
@@ -400,11 +397,13 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                iov.iov_len = 4;
                smb_msg.msg_control = NULL;
                smb_msg.msg_controllen = 0;
+               pdu_length = 4; /* enough to get RFC1001 header */
+incomplete_rcv:
                length =
                    kernel_recvmsg(csocket, &smb_msg,
-                                &iov, 1, 4, 0 /* BB see socket.h flags */);
+                               &iov, 1, pdu_length, 0 /* BB other flags? */);
 
-               if ( kthread_should_stop() ) {
+               if (kthread_should_stop()) {
                        break;
                } else if (server->tcpStatus == CifsNeedReconnect) {
                        cFYI(1, ("Reconnect after server stopped responding"));
@@ -416,7 +415,10 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                        msleep(1); /* minimum sleep to prevent looping
                                allowing socket to clear and app threads to set
                                tcpStatus CifsNeedReconnect if server hung */
-                       continue;
+                       if (pdu_length < 4)
+                               goto incomplete_rcv;
+                       else
+                               continue;
                } else if (length <= 0) {
                        if (server->tcpStatus == CifsNew) {
                                cFYI(1, ("tcp session abend after SMBnegprot"));
@@ -437,13 +439,11 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                        wake_up(&server->response_q);
                        continue;
                } else if (length < 4) {
-                       cFYI(1,
-                           ("Frame under four bytes received (%d bytes long)",
+                       cFYI(1, ("less than four bytes received (%d bytes)",
                              length));
-                       cifs_reconnect(server);
-                       csocket = server->ssocket;
-                       wake_up(&server->response_q);
-                       continue;
+                       pdu_length -= length;
+                       msleep(1);
+                       goto incomplete_rcv;
                }
 
                /* The right amount was read from socket - 4 bytes */
@@ -504,7 +504,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 
                /* else we have an SMB response */
                if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) ||
-                           (pdu_length < sizeof (struct smb_hdr) - 1 - 4)) {
+                           (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) {
                        cERROR(1, ("Invalid size SMB length %d pdu_length %d",
                                        length, pdu_length+4));
                        cifs_reconnect(server);
@@ -528,7 +528,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                     total_read += length) {
                        length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
                                                pdu_length - total_read, 0);
-                       if ( kthread_should_stop() ||
+                       if (kthread_should_stop() ||
                            (length == -EINTR)) {
                                /* then will exit */
                                reconnect = 2;
@@ -546,6 +546,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                                              allowing socket to clear and app
                                              threads to set tcpStatus
                                              CifsNeedReconnect if server hung*/
+                               length = 0;
                                continue;
                        } else if (length <= 0) {
                                cERROR(1, ("Received no data, expecting %d",
@@ -631,9 +632,9 @@ multi_t2_fnd:
                        /* Was previous buf put in mpx struct for multi-rsp? */
                        if (!isMultiRsp) {
                                /* smb buffer will be freed by user thread */
-                               if (isLargeBuf) {
+                               if (isLargeBuf)
                                        bigbuf = NULL;
-                               else
+                               else
                                        smallbuf = NULL;
                        }
                        wake_up_process(task_to_wake);
@@ -676,9 +677,8 @@ multi_t2_fnd:
                server->ssocket = NULL;
        }
        /* buffer usuallly freed in free_mid - need to free it here on exit */
-       if (bigbuf != NULL)
-               cifs_buf_release(bigbuf);
-       if (smallbuf != NULL)
+       cifs_buf_release(bigbuf);
+       if (smallbuf) /* no sense logging a debug message if NULL */
                cifs_small_buf_release(smallbuf);
 
        read_lock(&GlobalSMBSeslock);
@@ -702,9 +702,8 @@ multi_t2_fnd:
                list_for_each(tmp, &GlobalSMBSessionList) {
                        ses = list_entry(tmp, struct cifsSesInfo,
                                         cifsSessionList);
-                       if (ses->server == server) {
+                       if (ses->server == server)
                                ses->status = CifsExiting;
-                       }
                }
 
                spin_lock(&GlobalMid_Lock);
@@ -714,9 +713,8 @@ multi_t2_fnd:
                                cFYI(1, ("Clearing Mid 0x%x - waking up ",
                                         mid_entry->mid));
                                task_to_wake = mid_entry->tsk;
-                               if (task_to_wake) {
+                               if (task_to_wake)
                                        wake_up_process(task_to_wake);
-                               }
                        }
                }
                spin_unlock(&GlobalMid_Lock);
@@ -749,18 +747,15 @@ multi_t2_fnd:
        list_for_each(tmp, &GlobalSMBSessionList) {
                ses = list_entry(tmp, struct cifsSesInfo,
                                cifsSessionList);
-               if (ses->server == server) {
+               if (ses->server == server)
                        ses->server = NULL;
-               }
        }
        write_unlock(&GlobalSMBSeslock);
 
        kfree(server);
-       if (length  > 0) {
-               mempool_resize(cifs_req_poolp,
-                       length + cifs_min_rcv,
-                       GFP_KERNEL);
-       }
+       if (length  > 0)
+               mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
+                               GFP_KERNEL);
 
        return 0;
 }
@@ -1477,7 +1472,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
        if (psin_server->sin_port) { /* user overrode default port */
                rc = (*csocket)->ops->connect(*csocket,
                                (struct sockaddr *) psin_server,
-                               sizeof (struct sockaddr_in), 0);
+                               sizeof(struct sockaddr_in), 0);
                if (rc >= 0)
                        connected = 1;
        }
@@ -1493,7 +1488,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
 
                        rc = (*csocket)->ops->connect(*csocket,
                                        (struct sockaddr *) psin_server,
-                                       sizeof (struct sockaddr_in), 0);
+                                       sizeof(struct sockaddr_in), 0);
                        if (rc >= 0)
                                connected = 1;
                }
@@ -1502,7 +1497,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
                psin_server->sin_port = htons(RFC1001_PORT);
                rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
                                              psin_server,
-                                             sizeof (struct sockaddr_in), 0);
+                                             sizeof(struct sockaddr_in), 0);
                if (rc >= 0)
                        connected = 1;
        }
@@ -1610,7 +1605,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
        if (psin_server->sin6_port) { /* user overrode default port */
                rc = (*csocket)->ops->connect(*csocket,
                                (struct sockaddr *) psin_server,
-                               sizeof (struct sockaddr_in6), 0);
+                               sizeof(struct sockaddr_in6), 0);
                if (rc >= 0)
                        connected = 1;
        }
@@ -1626,7 +1621,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
 
                        rc = (*csocket)->ops->connect(*csocket,
                                        (struct sockaddr *) psin_server,
-                                       sizeof (struct sockaddr_in6), 0);
+                                       sizeof(struct sockaddr_in6), 0);
                        if (rc >= 0)
                                connected = 1;
                }
@@ -1634,7 +1629,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
        if (!connected) {
                psin_server->sin6_port = htons(RFC1001_PORT);
                rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
-                                psin_server, sizeof (struct sockaddr_in6), 0);
+                                psin_server, sizeof(struct sockaddr_in6), 0);
                if (rc >= 0)
                        connected = 1;
        }
@@ -1750,7 +1745,16 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
                        cFYI(1, ("very large write cap"));
 #endif /* CIFS_DEBUG2 */
                if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
-                       cFYI(1, ("setting capabilities failed"));
+                       if (vol_info == NULL) {
+                               cFYI(1, ("resetting capabilities failed"));
+                       } else
+                               cERROR(1, ("Negotiating Unix capabilities "
+                                          "with the server failed.  Consider "
+                                          "mounting with the Unix Extensions\n"
+                                          "disabled, if problems are found, "
+                                          "by specifying the nounix mount "
+                                          "option."));
+
                }
        }
 }
@@ -1909,8 +1913,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        return rc;
                }
 
-               srvTcp = kmalloc(sizeof (struct TCP_Server_Info), GFP_KERNEL);
-               if (srvTcp == NULL) {
+               srvTcp = kzalloc(sizeof(struct TCP_Server_Info), GFP_KERNEL);
+               if (!srvTcp) {
                        rc = -ENOMEM;
                        sock_release(csocket);
                        kfree(volume_info.UNC);
@@ -1919,9 +1923,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        FreeXid(xid);
                        return rc;
                } else {
-                       memset(srvTcp, 0, sizeof (struct TCP_Server_Info));
                        memcpy(&srvTcp->addr.sockAddr, &sin_server,
-                               sizeof (struct sockaddr_in));
+                               sizeof(struct sockaddr_in));
                        atomic_set(&srvTcp->inFlight, 0);
                        /* BB Add code for ipv6 case too */
                        srvTcp->ssocket = csocket;
@@ -2173,8 +2176,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                                if (tsk)
                                                        kthread_stop(tsk);
                                        }
-                               } else
+                               } else {
                                        cFYI(1, ("No session or bad tcon"));
+                                       if ((pSesInfo->server) &&
+                                           (pSesInfo->server->tsk)) {
+                                               struct task_struct *tsk;
+                                               force_sig(SIGKILL,
+                                                       pSesInfo->server->tsk);
+                                               tsk = pSesInfo->server->tsk;
+                                               if (tsk)
+                                                       kthread_stop(tsk);
+                                       }
+                               }
                                sesInfoFree(pSesInfo);
                                /* pSesInfo = NULL; */
                        }
@@ -2185,8 +2198,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                tcon->ses = pSesInfo;
 
                /* do not care if following two calls succeed - informational */
-               CIFSSMBQFSDeviceInfo(xid, tcon);
-               CIFSSMBQFSAttributeInfo(xid, tcon);
+               if (!tcon->ipc) {
+                       CIFSSMBQFSDeviceInfo(xid, tcon);
+                       CIFSSMBQFSAttributeInfo(xid, tcon);
+               }
 
                /* tell server which Unix caps we support */
                if (tcon->ses->capabilities & CAP_UNIX)
@@ -2526,8 +2541,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 sesssetup_nomem:       /* do not return an error on nomem for the info strings,
                           since that could make reconnection harder, and
                           reconnection might be needed to free memory */
-       if (smb_buffer)
-               cifs_buf_release(smb_buffer);
+       cifs_buf_release(smb_buffer);
 
        return rc;
 }
@@ -2547,7 +2561,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
        int remaining_words = 0;
        int bytes_returned = 0;
        int len;
-       int SecurityBlobLength = sizeof (NEGOTIATE_MESSAGE);
+       int SecurityBlobLength = sizeof(NEGOTIATE_MESSAGE);
        PNEGOTIATE_MESSAGE SecurityBlob;
        PCHALLENGE_MESSAGE SecurityBlob2;
        __u32 negotiate_flags, capabilities;
@@ -2865,15 +2879,14 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
                rc = -EIO;
        }
 
-       if (smb_buffer)
-               cifs_buf_release(smb_buffer);
+       cifs_buf_release(smb_buffer);
 
        return rc;
 }
 static int
 CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-               char *ntlm_session_key, int ntlmv2_flag,
-               const struct nls_table *nls_codepage)
+                       char *ntlm_session_key, int ntlmv2_flag,
+                       const struct nls_table *nls_codepage)
 {
        struct smb_hdr *smb_buffer;
        struct smb_hdr *smb_buffer_response;
@@ -2886,7 +2899,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
        int remaining_words = 0;
        int bytes_returned = 0;
        int len;
-       int SecurityBlobLength = sizeof (AUTHENTICATE_MESSAGE);
+       int SecurityBlobLength = sizeof(AUTHENTICATE_MESSAGE);
        PAUTHENTICATE_MESSAGE SecurityBlob;
        __u32 negotiate_flags, capabilities;
        __u16 count;
@@ -2901,8 +2914,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                return -ENOMEM;
        }
        smb_buffer_response = smb_buffer;
-       pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
-       pSMBr = (SESSION_SETUP_ANDX *) smb_buffer_response;
+       pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
+       pSMBr = (SESSION_SETUP_ANDX *)smb_buffer_response;
 
        /* send SMBsessionSetup here */
        header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
@@ -2921,7 +2934,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
 
        capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-           CAP_EXTENDED_SECURITY;
+                       CAP_EXTENDED_SECURITY;
        if (ses->capabilities & CAP_UNICODE) {
                smb_buffer->Flags2 |= SMBFLG2_UNICODE;
                capabilities |= CAP_UNICODE;
@@ -2936,15 +2949,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
        }
        pSMB->req.Capabilities = cpu_to_le32(capabilities);
 
-       bcc_ptr = (char *) &pSMB->req.SecurityBlob;
-       SecurityBlob = (PAUTHENTICATE_MESSAGE) bcc_ptr;
+       bcc_ptr = (char *)&pSMB->req.SecurityBlob;
+       SecurityBlob = (PAUTHENTICATE_MESSAGE)bcc_ptr;
        strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
        SecurityBlob->MessageType = NtLmAuthenticate;
        bcc_ptr += SecurityBlobLength;
-       negotiate_flags =
-           NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
-           NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
-           0x80000000 | NTLMSSP_NEGOTIATE_128;
+       negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
+                       NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
+                       0x80000000 | NTLMSSP_NEGOTIATE_128;
        if (sign_CIFS_PDUs)
                negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN;
        if (ntlmv2_flag)
@@ -2979,36 +2991,32 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                        SecurityBlob->DomainName.Length = 0;
                        SecurityBlob->DomainName.MaximumLength = 0;
                } else {
-                       __u16 len =
-                           cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
+                       __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
                                          nls_codepage);
-                       len *= 2;
+                       ln *= 2;
                        SecurityBlob->DomainName.MaximumLength =
-                           cpu_to_le16(len);
+                           cpu_to_le16(ln);
                        SecurityBlob->DomainName.Buffer =
                            cpu_to_le32(SecurityBlobLength);
-                       bcc_ptr += len;
-                       SecurityBlobLength += len;
-                       SecurityBlob->DomainName.Length =
-                           cpu_to_le16(len);
+                       bcc_ptr += ln;
+                       SecurityBlobLength += ln;
+                       SecurityBlob->DomainName.Length = cpu_to_le16(ln);
                }
                if (user == NULL) {
                        SecurityBlob->UserName.Buffer = 0;
                        SecurityBlob->UserName.Length = 0;
                        SecurityBlob->UserName.MaximumLength = 0;
                } else {
-                       __u16 len =
-                           cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
+                       __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
                                          nls_codepage);
-                       len *= 2;
+                       ln *= 2;
                        SecurityBlob->UserName.MaximumLength =
-                           cpu_to_le16(len);
+                           cpu_to_le16(ln);
                        SecurityBlob->UserName.Buffer =
                            cpu_to_le32(SecurityBlobLength);
-                       bcc_ptr += len;
-                       SecurityBlobLength += len;
-                       SecurityBlob->UserName.Length =
-                           cpu_to_le16(len);
+                       bcc_ptr += ln;
+                       SecurityBlobLength += ln;
+                       SecurityBlob->UserName.Length = cpu_to_le16(ln);
                }
 
                /* SecurityBlob->WorkstationName.Length =
@@ -3052,33 +3060,32 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                        SecurityBlob->DomainName.Length = 0;
                        SecurityBlob->DomainName.MaximumLength = 0;
                } else {
-                       __u16 len;
+                       __u16 ln;
                        negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED;
                        strncpy(bcc_ptr, domain, 63);
-                       len = strnlen(domain, 64);
+                       ln = strnlen(domain, 64);
                        SecurityBlob->DomainName.MaximumLength =
-                           cpu_to_le16(len);
+                           cpu_to_le16(ln);
                        SecurityBlob->DomainName.Buffer =
                            cpu_to_le32(SecurityBlobLength);
-                       bcc_ptr += len;
-                       SecurityBlobLength += len;
-                       SecurityBlob->DomainName.Length = cpu_to_le16(len);
+                       bcc_ptr += ln;
+                       SecurityBlobLength += ln;
+                       SecurityBlob->DomainName.Length = cpu_to_le16(ln);
                }
                if (user == NULL) {
                        SecurityBlob->UserName.Buffer = 0;
                        SecurityBlob->UserName.Length = 0;
                        SecurityBlob->UserName.MaximumLength = 0;
                } else {
-                       __u16 len;
+                       __u16 ln;
                        strncpy(bcc_ptr, user, 63);
-                       len = strnlen(user, 64);
-                       SecurityBlob->UserName.MaximumLength =
-                           cpu_to_le16(len);
+                       ln = strnlen(user, 64);
+                       SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln);
                        SecurityBlob->UserName.Buffer =
-                           cpu_to_le32(SecurityBlobLength);
-                       bcc_ptr += len;
-                       SecurityBlobLength += len;
-                       SecurityBlob->UserName.Length = cpu_to_le16(len);
+                                               cpu_to_le32(SecurityBlobLength);
+                       bcc_ptr += ln;
+                       SecurityBlobLength += ln;
+                       SecurityBlob->UserName.Length = cpu_to_le16(ln);
                }
                /* BB fill in our workstation name if known BB */
 
@@ -3100,12 +3107,11 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
        rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
                         &bytes_returned, 1);
        if (rc) {
-/*    rc = map_smb_to_linux_error(smb_buffer_response);  *//* done in SendReceive now */
-       } else if ((smb_buffer_response->WordCount == 3)
-                  || (smb_buffer_response->WordCount == 4)) {
+/*   rc = map_smb_to_linux_error(smb_buffer_response) done in SendReceive now */
+       } else if ((smb_buffer_response->WordCount == 3) ||
+                  (smb_buffer_response->WordCount == 4)) {
                __u16 action = le16_to_cpu(pSMBr->resp.Action);
-               __u16 blob_len =
-                   le16_to_cpu(pSMBr->resp.SecurityBlobLength);
+               __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
                if (action & GUEST_LOGIN)
                        cFYI(1, (" Guest login")); /* BB Should we set anything
                                                         in SesInfo struct ? */
@@ -3145,8 +3151,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                                        } else {
                                                remaining_words = BCC(smb_buffer_response) / 2;
                                        }
-                                       len =
-                                           UniStrnlen((wchar_t *) bcc_ptr,remaining_words - 1);
+                                       len = UniStrnlen((wchar_t *) bcc_ptr,
+                                                       remaining_words - 1);
 /* We look for obvious messed up bcc or strings in response so we do not go off
   the end since (at least) WIN2K and Windows XP have a major bug in not null
   terminating last Unicode string in response  */
@@ -3230,7 +3236,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                                                <= BCC(smb_buffer_response)) {
                                                if (ses->serverOS)
                                                        kfree(ses->serverOS);
-                                               ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
+                                               ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
                                                strncpy(ses->serverOS,bcc_ptr, len);
 
                                                bcc_ptr += len;
@@ -3259,28 +3265,24 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                                                bcc_ptr[0] = 0;
                                                bcc_ptr++;
                                        } else
-                                               cFYI(1,
-                                                    ("field of length %d "
+                                               cFYI(1, ("field of length %d "
                                                   "extends beyond end of smb ",
                                                      len));
                                }
                        } else {
-                               cERROR(1,
-                                      (" Security Blob extends beyond end "
+                               cERROR(1, ("Security Blob extends beyond end "
                                        "of SMB"));
                        }
                } else {
                        cERROR(1, ("No session structure passed in."));
                }
        } else {
-               cERROR(1,
-                      (" Invalid Word count %d: ",
+               cERROR(1, ("Invalid Word count %d: ",
                        smb_buffer_response->WordCount));
                rc = -EIO;
        }
 
-       if (smb_buffer)
-               cifs_buf_release(smb_buffer);
+       cifs_buf_release(smb_buffer);
 
        return rc;
 }
@@ -3389,6 +3391,18 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                bcc_ptr = pByteArea(smb_buffer_response);
                length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2);
                /* skip service field (NB: this field is always ASCII) */
+               if (length == 3) {
+                       if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
+                           (bcc_ptr[2] == 'C')) {
+                               cFYI(1, ("IPC connection"));
+                               tcon->ipc = 1;
+                       }
+               } else if (length == 2) {
+                       if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
+                               /* the most common case */
+                               cFYI(1, ("disk share connection"));
+                       }
+               }
                bcc_ptr += length + 1;
                strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
                if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
@@ -3399,9 +3413,11 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                                kfree(tcon->nativeFileSystem);
                                tcon->nativeFileSystem =
                                    kzalloc(length + 2, GFP_KERNEL);
-                               cifs_strfromUCS_le(tcon->nativeFileSystem,
-                                                  (__le16 *) bcc_ptr,
-                                                  length, nls_codepage);
+                               if (tcon->nativeFileSystem)
+                                       cifs_strfromUCS_le(
+                                               tcon->nativeFileSystem,
+                                               (__le16 *) bcc_ptr,
+                                               length, nls_codepage);
                                bcc_ptr += 2 * length;
                                bcc_ptr[0] = 0; /* null terminate the string */
                                bcc_ptr[1] = 0;
@@ -3416,8 +3432,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                                kfree(tcon->nativeFileSystem);
                                tcon->nativeFileSystem =
                                    kzalloc(length + 1, GFP_KERNEL);
-                               strncpy(tcon->nativeFileSystem, bcc_ptr,
-                                       length);
+                               if (tcon->nativeFileSystem)
+                                       strncpy(tcon->nativeFileSystem, bcc_ptr,
+                                               length);
                        }
                        /* else do not bother copying these information fields*/
                }
@@ -3433,8 +3450,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
                ses->ipc_tid = smb_buffer_response->Tid;
        }
 
-       if (smb_buffer)
-               cifs_buf_release(smb_buffer);
+       cifs_buf_release(smb_buffer);
        return rc;
 }
 
index 4830acc86d747740c9e2cd96c3d8465beb1287d9..793404b109252b7f589a1d5f11da8667e76379c2 100644 (file)
@@ -3,7 +3,7 @@
  *
  *   vfs operations that deal with dentries
  *
- *   Copyright (C) International Business Machines  Corp., 2002,2005
+ *   Copyright (C) International Business Machines  Corp., 2002,2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -269,7 +269,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                        CIFSSMBClose(xid, pTcon, fileHandle);
                } else if (newinode) {
                        pCifsFile =
-                          kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL);
+                          kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 
                        if (pCifsFile == NULL)
                                goto cifs_create_out;
@@ -397,7 +397,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
                                /* BB Do not bother to decode buf since no
                                   local inode yet to put timestamps in,
                                   but we can reuse it safely */
-                               int bytes_written;
+                               unsigned int bytes_written;
                                struct win_dev *pdev;
                                pdev = (struct win_dev *)buf;
                                if (S_ISCHR(mode)) {
@@ -450,8 +450,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 
        xid = GetXid();
 
-       cFYI(1,
-            (" parent inode = 0x%p name is: %s and dentry = 0x%p",
+       cFYI(1, (" parent inode = 0x%p name is: %s and dentry = 0x%p",
              parent_dir_inode, direntry->d_name.name, direntry));
 
        /* check whether path exists */
index 893fd0aebff82160ec04ecd6b4de5825c7bfa956..d614b91caeca5f53ad188dd5f20735f1a88d9501 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/exportfs.h>
 #include "cifsglob.h"
 #include "cifs_debug.h"
+#include "cifsfs.h"
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 static struct dentry *cifs_get_parent(struct dentry *dentry)
index 894b1f7b299d5d5dc679b391bb54ca6e1abc092e..1e7e4c06d9e3f8771f813064fcdc2bcce29596cc 100644 (file)
@@ -467,7 +467,7 @@ reopen_error_exit:
 int cifs_close(struct inode *inode, struct file *file)
 {
        int rc = 0;
-       int xid;
+       int xid, timeout;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
        struct cifsFileInfo *pSMBFile =
@@ -485,9 +485,9 @@ int cifs_close(struct inode *inode, struct file *file)
                        /* no sense reconnecting to close a file that is
                           already closed */
                        if (pTcon->tidStatus != CifsNeedReconnect) {
-                               int timeout = 2;
+                               timeout = 2;
                                while ((atomic_read(&pSMBFile->wrtPending) != 0)
-                                        && (timeout < 1000) ) {
+                                       && (timeout <= 2048)) {
                                        /* Give write a better chance to get to
                                        server ahead of the close.  We do not
                                        want to add a wait_q here as it would
@@ -522,12 +522,30 @@ int cifs_close(struct inode *inode, struct file *file)
                list_del(&pSMBFile->flist);
                list_del(&pSMBFile->tlist);
                write_unlock(&GlobalSMBSeslock);
+               timeout = 10;
+               /* We waited above to give the SMBWrite a chance to issue
+                  on the wire (so we do not get SMBWrite returning EBADF
+                  if writepages is racing with close.  Note that writepages
+                  does not specify a file handle, so it is possible for a file
+                  to be opened twice, and the application close the "wrong"
+                  file handle - in these cases we delay long enough to allow
+                  the SMBWrite to get on the wire before the SMB Close.
+                  We allow total wait here over 45 seconds, more than
+                  oplock break time, and more than enough to allow any write
+                  to complete on the server, or to time out on the client */
+               while ((atomic_read(&pSMBFile->wrtPending) != 0)
+                               && (timeout <= 50000)) {
+                       cERROR(1, ("writes pending, delay free of handle"));
+                       msleep(timeout);
+                       timeout *= 8;
+               }
                kfree(pSMBFile->search_resume_name);
                kfree(file->private_data);
                file->private_data = NULL;
        } else
                rc = -EBADF;
 
+       read_lock(&GlobalSMBSeslock);
        if (list_empty(&(CIFS_I(inode)->openFileList))) {
                cFYI(1, ("closing last open instance for inode %p", inode));
                /* if the file is not open we do not know if we can cache info
@@ -535,6 +553,7 @@ int cifs_close(struct inode *inode, struct file *file)
                CIFS_I(inode)->clientCanCacheRead = FALSE;
                CIFS_I(inode)->clientCanCacheAll  = FALSE;
        }
+       read_unlock(&GlobalSMBSeslock);
        if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
                rc = CIFS_I(inode)->write_behind_rc;
        FreeXid(xid);
@@ -767,7 +786,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                        mutex_lock(&fid->lock_mutex);
                        list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
                                if (pfLock->fl_start <= li->offset &&
-                                               length >= li->length) {
+                                               (pfLock->fl_start + length) >=
+                                               (li->offset + li->length)) {
                                        stored_rc = CIFSSMBLock(xid, pTcon,
                                                        netfid,
                                                        li->length, li->offset,
@@ -1022,6 +1042,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
        }
 
        read_lock(&GlobalSMBSeslock);
+refind_writable:
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
                if (open_file->closePend)
                        continue;
@@ -1029,24 +1050,49 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
                    ((open_file->pfile->f_flags & O_RDWR) ||
                     (open_file->pfile->f_flags & O_WRONLY))) {
                        atomic_inc(&open_file->wrtPending);
+
+                       if (!open_file->invalidHandle) {
+                               /* found a good writable file */
+                               read_unlock(&GlobalSMBSeslock);
+                               return open_file;
+                       }
+       
                        read_unlock(&GlobalSMBSeslock);
-                       if ((open_file->invalidHandle) &&
-                          (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) {
-                               rc = cifs_reopen_file(open_file->pfile, FALSE);
-                               /* if it fails, try another handle - might be */
-                               /* dangerous to hold up writepages with retry */
-                               if (rc) {
-                                       cFYI(1,
-                                             ("failed on reopen file in wp"));
+                       /* Had to unlock since following call can block */
+                       rc = cifs_reopen_file(open_file->pfile, FALSE);
+                       if (!rc) { 
+                               if (!open_file->closePend)
+                                       return open_file;
+                               else { /* start over in case this was deleted */
+                                      /* since the list could be modified */
                                        read_lock(&GlobalSMBSeslock);
-                                       /* can not use this handle, no write
-                                       pending on this one after all */
-                                       atomic_dec
-                                            (&open_file->wrtPending);
-                                       continue;
+                                       atomic_dec(&open_file->wrtPending);
+                                       goto refind_writable;
                                }
                        }
-                       return open_file;
+
+                       /* if it fails, try another handle if possible -
+                       (we can not do this if closePending since
+                       loop could be modified - in which case we
+                       have to start at the beginning of the list
+                       again. Note that it would be bad
+                       to hold up writepages here (rather than
+                       in caller) with continuous retries */
+                       cFYI(1, ("wp failed on reopen file"));
+                       read_lock(&GlobalSMBSeslock);
+                       /* can not use this handle, no write
+                          pending on this one after all */
+                       atomic_dec(&open_file->wrtPending);
+                       
+                       if (open_file->closePend) /* list could have changed */
+                               goto refind_writable;
+                       /* else we simply continue to the next entry. Thus
+                          we do not loop on reopen errors.  If we
+                          can not reopen the file, for example if we
+                          reconnected to a server with another client
+                          racing to delete or lock the file we would not
+                          make progress if we restarted before the beginning
+                          of the loop here. */
                }
        }
        read_unlock(&GlobalSMBSeslock);
@@ -1709,7 +1755,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
        struct page *page;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
-       int bytes_read = 0;
+       unsigned int bytes_read = 0;
        unsigned int read_size, i;
        char *smb_read_data = NULL;
        struct smb_com_read_rsp *pSMBr;
@@ -1803,7 +1849,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 
                        i +=  bytes_read >> PAGE_CACHE_SHIFT;
                        cifs_stats_bytes_read(pTcon, bytes_read);
-                       if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
+                       if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
                                i++; /* account for partial page */
 
                                /* server copy of file can have smaller size
index 279f3c5e0ce316bf72e9ddb166a50586da206cfb..5e8b388be3b672a7130e9e26f8bb954ed8321534 100644 (file)
@@ -115,7 +115,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
                inode->i_mode = le64_to_cpu(findData.Permissions);
                /* since we set the inode type below we need to mask off
                   to avoid strange results if bits set above */
-                       inode->i_mode &= ~S_IFMT;
+               inode->i_mode &= ~S_IFMT;
                if (type == UNIX_FILE) {
                        inode->i_mode |= S_IFREG;
                } else if (type == UNIX_SYMLINK) {
@@ -575,19 +575,33 @@ int cifs_get_inode_info(struct inode **pinode,
        return rc;
 }
 
+static const struct inode_operations cifs_ipc_inode_ops = {
+       .lookup = cifs_lookup,
+};
+
 /* gets root inode */
 void cifs_read_inode(struct inode *inode)
 {
-       int xid;
+       int xid, rc;
        struct cifs_sb_info *cifs_sb;
 
        cifs_sb = CIFS_SB(inode->i_sb);
        xid = GetXid();
 
        if (cifs_sb->tcon->unix_ext)
-               cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
+               rc = cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
        else
-               cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid);
+               rc = cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid);
+       if (rc && cifs_sb->tcon->ipc) {
+               cFYI(1, ("ipc connection - fake read inode"));
+               inode->i_mode |= S_IFDIR;
+               inode->i_nlink = 2;
+               inode->i_op = &cifs_ipc_inode_ops;
+               inode->i_fop = &simple_dir_operations;
+               inode->i_uid = cifs_sb->mnt_uid;
+               inode->i_gid = cifs_sb->mnt_gid;
+       }
+
        /* can not call macro FreeXid here since in a void func */
        _FreeXid(xid);
 }
@@ -919,18 +933,25 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
                        goto mkdir_out;
                }
 
+               mode &= ~current->fs->umask;
                rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT,
                                mode, NULL /* netfid */, pInfo, &oplock,
                                full_path, cifs_sb->local_nls,
                                cifs_sb->mnt_cifs_flags &
                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
-               if (rc) {
+               if (rc == -EOPNOTSUPP) {
+                       kfree(pInfo);
+                       goto mkdir_retry_old;
+               } else if (rc) {
                        cFYI(1, ("posix mkdir returned 0x%x", rc));
                        d_drop(direntry);
                } else {
                        int obj_type;
-                       if (pInfo->Type == -1) /* no return info - go query */
+                       if (pInfo->Type == cpu_to_le32(-1)) {
+                               /* no return info, go query for it */
+                               kfree(pInfo);
                                goto mkdir_get_info;
+                       }
 /*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
        to set uid/gid */
                        inc_nlink(inode);
@@ -940,8 +961,10 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
                                direntry->d_op = &cifs_dentry_ops;
 
                        newinode = new_inode(inode->i_sb);
-                       if (newinode == NULL)
+                       if (newinode == NULL) {
+                               kfree(pInfo);
                                goto mkdir_get_info;
+                       }
                        /* Is an i_ino of zero legal? */
                        /* Are there sanity checks we can use to ensure that
                           the server is really filling in that field? */
@@ -972,7 +995,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
                kfree(pInfo);
                goto mkdir_out;
        }
-
+mkdir_retry_old:
        /* BB add setting the equivalent of mode via CreateX w/ACLs */
        rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
                          cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1377,8 +1400,17 @@ static int cifs_vmtruncate(struct inode *inode, loff_t offset)
        }
        i_size_write(inode, offset);
        spin_unlock(&inode->i_lock);
+       /*
+        * unmap_mapping_range is called twice, first simply for efficiency
+        * so that truncate_inode_pages does fewer single-page unmaps. However
+        * after this first call, and before truncate_inode_pages finishes,
+        * it is possible for private pages to be COWed, which remain after
+        * truncate_inode_pages finishes, hence the second unmap_mapping_range
+        * call must be made for correctness.
+        */
        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
        truncate_inode_pages(mapping, offset);
+       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
        goto out_truncate;
 
 do_expand:
@@ -1469,7 +1501,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                        atomic_dec(&open_file->wrtPending);
                        cFYI(1, ("SetFSize for attrs rc = %d", rc));
                        if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-                               int bytes_written;
+                               unsigned int bytes_written;
                                rc = CIFSSMBWrite(xid, pTcon,
                                                  nfid, 0, attrs->ia_size,
                                                  &bytes_written, NULL, NULL,
@@ -1502,7 +1534,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                                        cifs_sb->mnt_cifs_flags &
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                                if (rc == 0) {
-                                       int bytes_written;
+                                       unsigned int bytes_written;
                                        rc = CIFSSMBWrite(xid, pTcon,
                                                        netfid, 0,
                                                        attrs->ia_size,
index 6a85ef7b879752ec04e093784e3eef61b676dd29..11f265726db780e968e8084359696bc8a4c6446b 100644 (file)
@@ -237,7 +237,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
        char *tmp_path = NULL;
        char *tmpbuffer;
        unsigned char *referrals = NULL;
-       int num_referrals = 0;
+       unsigned int num_referrals = 0;
        int len;
        __u16 fid;
 
index 0bcec0844bee8830099f9ae264e8cd65860bf012..51ec681fe74a2e32c19d90e2f45de591bb3ab048 100644 (file)
@@ -169,7 +169,6 @@ cifs_buf_get(void)
 void
 cifs_buf_release(void *buf_to_free)
 {
-
        if (buf_to_free == NULL) {
                /* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/
                return;
index 2bfed3f45d0f97c4d9765bd6842c07e1e39f515f..f06359cb22ee96645e7dfd7b0aa62cc0de216b2b 100644 (file)
@@ -114,10 +114,16 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
        {ERRusempx, -EIO},
        {ERRusestd, -EIO},
        {ERR_NOTIFY_ENUM_DIR, -ENOBUFS},
-       {ERRaccountexpired, -EACCES},
+       {ERRnoSuchUser, -EACCES},
+/*     {ERRaccountexpired, -EACCES},
        {ERRbadclient, -EACCES},
        {ERRbadLogonTime, -EACCES},
-       {ERRpasswordExpired, -EACCES},
+       {ERRpasswordExpired, -EACCES},*/
+       {ERRaccountexpired, -EKEYEXPIRED},
+       {ERRbadclient, -EACCES},
+       {ERRbadLogonTime, -EACCES},
+       {ERRpasswordExpired, -EKEYEXPIRED},
+
        {ERRnosupport, -EINVAL},
        {0, 0}
 };
@@ -270,7 +276,7 @@ static const struct {
         from NT_STATUS_NO_SUCH_USER to NT_STATUS_LOGON_FAILURE
         during the session setup } */
        {
-       ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, {
+       ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, { /* could map to 2238 */
        ERRHRD, ERRgeneral, NT_STATUS_GROUP_EXISTS}, {
        ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_GROUP}, {
        ERRHRD, ERRgeneral, NT_STATUS_MEMBER_IN_GROUP}, {
@@ -285,10 +291,10 @@ static const struct {
        ERRHRD, ERRgeneral, NT_STATUS_PASSWORD_RESTRICTION}, {
        ERRDOS, ERRnoaccess, NT_STATUS_LOGON_FAILURE}, {
        ERRHRD, ERRgeneral, NT_STATUS_ACCOUNT_RESTRICTION}, {
-       ERRSRV, 2241, NT_STATUS_INVALID_LOGON_HOURS}, {
-       ERRSRV, 2240, NT_STATUS_INVALID_WORKSTATION}, {
+       ERRSRV, ERRbadLogonTime, NT_STATUS_INVALID_LOGON_HOURS}, {
+       ERRSRV, ERRbadclient, NT_STATUS_INVALID_WORKSTATION}, {
        ERRSRV, ERRpasswordExpired, NT_STATUS_PASSWORD_EXPIRED}, {
-       ERRSRV, 2239, NT_STATUS_ACCOUNT_DISABLED}, {
+       ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_DISABLED}, {
        ERRHRD, ERRgeneral, NT_STATUS_NONE_MAPPED}, {
        ERRHRD, ERRgeneral, NT_STATUS_TOO_MANY_LUIDS_REQUESTED}, {
        ERRHRD, ERRgeneral, NT_STATUS_LUIDS_EXHAUSTED}, {
@@ -585,7 +591,7 @@ static const struct {
        ERRDOS, ERRnoaccess, NT_STATUS_TRUST_FAILURE}, {
        ERRHRD, ERRgeneral, NT_STATUS_MUTANT_LIMIT_EXCEEDED}, {
        ERRDOS, ERRnetlogonNotStarted, NT_STATUS_NETLOGON_NOT_STARTED}, {
-       ERRSRV, 2239, NT_STATUS_ACCOUNT_EXPIRED}, {
+       ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_EXPIRED}, {
        ERRHRD, ERRgeneral, NT_STATUS_POSSIBLE_DEADLOCK}, {
        ERRHRD, ERRgeneral, NT_STATUS_NETWORK_CREDENTIAL_CONFLICT}, {
        ERRHRD, ERRgeneral, NT_STATUS_REMOTE_SESSION_LIMIT}, {
@@ -754,7 +760,7 @@ ntstatus_to_dos(__u32 ntstatus, __u8 * eclass, __u16 * ecode)
 }
 
 int
-map_smb_to_linux_error(struct smb_hdr *smb)
+map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
 {
        unsigned int i;
        int rc = -EIO;  /* if transport error smb error may not be set */
@@ -771,7 +777,9 @@ map_smb_to_linux_error(struct smb_hdr *smb)
                /* translate the newer STATUS codes to old style SMB errors
                 * and then to POSIX errors */
                __u32 err = le32_to_cpu(smb->Status.CifsError);
-               if (cifsFYI & CIFS_RC)
+               if (logErr && (err != (NT_STATUS_MORE_PROCESSING_REQUIRED)))
+                       cifs_print_status(err);
+               else if (cifsFYI & CIFS_RC)
                        cifs_print_status(err);
                ntstatus_to_dos(err, &smberrclass, &smberrcode);
        } else {
@@ -813,7 +821,7 @@ map_smb_to_linux_error(struct smb_hdr *smb)
        }
        /* else ERRHRD class errors or junk  - return EIO */
 
-       cFYI(1, (" !!Mapping smb error code %d to POSIX err %d !!",
+       cFYI(1, ("Mapping smb error code %d to POSIX err %d",
                 smberrcode, rc));
 
        /* generic corrective action e.g. reconnect SMB session on
@@ -899,8 +907,11 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
                cERROR(1, ("illegal hours %d", st->Hours));
        days = sd->Day;
        month = sd->Month;
-       if ((days > 31) || (month > 12))
+       if ((days > 31) || (month > 12)) {
                cERROR(1, ("illegal date, month %d day: %d", month, days));
+               if (month > 12)
+                       month = 12;
+       }
        month -= 1;
        days += total_days_of_prev_months[month];
        days += 3652; /* account for difference in days between 1980 and 1970 */
index 916df9431336734b8cab6495659ef4d345133498..3746580e97016f390a491684ecfae3a14cbd5796 100644 (file)
@@ -121,7 +121,7 @@ static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode)
 
 
 static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
-                         char *buf, int *pobject_type, int isNewInode)
+                         char *buf, unsigned int *pobject_type, int isNewInode)
 {
        loff_t local_size;
        struct timespec local_mtime;
@@ -294,7 +294,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 }
 
 static void unix_fill_in_inode(struct inode *tmp_inode,
-       FILE_UNIX_INFO *pfindData, int *pobject_type, int isNewInode)
+       FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode)
 {
        loff_t local_size;
        struct timespec local_mtime;
@@ -826,7 +826,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
        int rc = 0;
        struct qstr qstring;
        struct cifsFileInfo *pCifsF;
-       unsigned obj_type;
+       unsigned int obj_type;
        ino_t  inum;
        struct cifs_sb_info *cifs_sb;
        struct inode *tmp_inode;
@@ -1067,7 +1067,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
                for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
                        if (current_entry == NULL) {
                                /* evaluate whether this case is an error */
-                               cERROR(1,("past end of SMB num to fill %d i %d",
+                               cERROR(1, ("past SMB end,  num to fill %d i %d",
                                          num_to_fill, i));
                                break;
                        }
index 892be9b4d1f362351de1c0075b2f9b3086aff514..899dc6078d9ab7848b3ffda2547161c212c2e9e5 100644 (file)
@@ -67,14 +67,59 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
                pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
                capabilities |= CAP_DFS;
        }
-       if (ses->capabilities & CAP_UNIX) {
+       if (ses->capabilities & CAP_UNIX)
                capabilities |= CAP_UNIX;
-       }
 
        /* BB check whether to init vcnum BB */
        return capabilities;
 }
 
+static void
+unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
+{
+       char *bcc_ptr = *pbcc_area;
+       int bytes_ret = 0;
+
+       /* Copy OS version */
+       bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
+                                 nls_cp);
+       bcc_ptr += 2 * bytes_ret;
+       bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
+                                 32, nls_cp);
+       bcc_ptr += 2 * bytes_ret;
+       bcc_ptr += 2; /* trailing null */
+
+       bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
+                                 32, nls_cp);
+       bcc_ptr += 2 * bytes_ret;
+       bcc_ptr += 2; /* trailing null */
+
+       *pbcc_area = bcc_ptr;
+}
+
+static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses,
+                                  const struct nls_table *nls_cp)
+{
+       char *bcc_ptr = *pbcc_area;
+       int bytes_ret = 0;
+
+       /* copy domain */
+       if (ses->domainName == NULL) {
+               /* Sending null domain better than using a bogus domain name (as
+               we did briefly in 2.6.18) since server will use its default */
+               *bcc_ptr = 0;
+               *(bcc_ptr+1) = 0;
+               bytes_ret = 0;
+       } else
+               bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
+                                         256, nls_cp);
+       bcc_ptr += 2 * bytes_ret;
+       bcc_ptr += 2;  /* account for null terminator */
+
+       *pbcc_area = bcc_ptr;
+}
+
+
 static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
                                   const struct nls_table *nls_cp)
 {
@@ -100,32 +145,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
        }
        bcc_ptr += 2 * bytes_ret;
        bcc_ptr += 2; /* account for null termination */
-       /* copy domain */
-       if (ses->domainName == NULL) {
-               /* Sending null domain better than using a bogus domain name (as
-               we did briefly in 2.6.18) since server will use its default */
-               *bcc_ptr = 0;
-               *(bcc_ptr+1) = 0;
-               bytes_ret = 0;
-       } else
-               bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
-                                         256, nls_cp);
-       bcc_ptr += 2 * bytes_ret;
-       bcc_ptr += 2;  /* account for null terminator */
-
-       /* Copy OS version */
-       bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
-                                 nls_cp);
-       bcc_ptr += 2 * bytes_ret;
-       bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
-                                 32, nls_cp);
-       bcc_ptr += 2 * bytes_ret;
-       bcc_ptr += 2; /* trailing null */
 
-       bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
-                                 32, nls_cp);
-       bcc_ptr += 2 * bytes_ret;
-       bcc_ptr += 2; /* trailing null */
+       unicode_domain_string(&bcc_ptr, ses, nls_cp);
+       unicode_oslm_strings(&bcc_ptr, nls_cp);
 
        *pbcc_area = bcc_ptr;
 }
@@ -203,14 +225,11 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
        if (len >= words_left)
                return rc;
 
-       if (ses->serverOS)
-               kfree(ses->serverOS);
+       kfree(ses->serverOS);
        /* UTF-8 string will not grow more than four times as big as UCS-16 */
        ses->serverOS = kzalloc(4 * len, GFP_KERNEL);
-       if (ses->serverOS != NULL) {
-               cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len,
-                                  nls_cp);
-       }
+       if (ses->serverOS != NULL)
+               cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
        data += 2 * (len + 1);
        words_left -= len + 1;
 
@@ -220,8 +239,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
        if (len >= words_left)
                return rc;
 
-       if (ses->serverNOS)
-               kfree(ses->serverNOS);
+       kfree(ses->serverNOS);
        ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */
        if (ses->serverNOS != NULL) {
                cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
@@ -240,8 +258,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
        if (len > words_left)
                return rc;
 
-       if (ses->serverDomain)
-               kfree(ses->serverDomain);
+       kfree(ses->serverDomain);
        ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
        if (ses->serverDomain != NULL) {
                cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
@@ -271,8 +288,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
        if (len >= bleft)
                return rc;
 
-       if (ses->serverOS)
-               kfree(ses->serverOS);
+       kfree(ses->serverOS);
 
        ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
        if (ses->serverOS)
@@ -289,8 +305,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
        if (len >= bleft)
                return rc;
 
-       if (ses->serverNOS)
-               kfree(ses->serverNOS);
+       kfree(ses->serverNOS);
 
        ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
        if (ses->serverNOS)
@@ -479,7 +494,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
                if (ses->capabilities & CAP_UNICODE) {
                        if (iov[0].iov_len % 2) {
                                *bcc_ptr = 0;
-                       }       bcc_ptr++;
+                               bcc_ptr++;
+                       }
                        unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
                } else
                        ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
@@ -497,7 +513,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 
        iov[1].iov_base = str_area;
        iov[1].iov_len = count;
-       rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0);
+       rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type,
+                         0 /* not long op */, 1 /* log NT STATUS if any */ );
        /* SMB request buf freed in SendReceive2 */
 
        cFYI(1, ("ssetup rc from sendrecv2 is %d", rc));
index 2ef0be28882033e560925957a19307d99c52685c..7f50e8577c1c144c8e28e443b54ac6996db87fd6 100644 (file)
 #define ERRusestd              251     /* temporarily unable to use either raw
                                           or mpx */
 #define ERR_NOTIFY_ENUM_DIR    1024
+#define ERRnoSuchUser          2238    /* user account does not exist */
 #define ERRaccountexpired      2239
-#define ERRbadclient           2240
-#define ERRbadLogonTime                2241
+#define ERRbadclient           2240    /* can not logon from this client */
+#define ERRbadLogonTime                2241    /* logon hours do not allow this */
 #define ERRpasswordExpired     2242
 #define ERRnetlogonNotStarted  2455
 #define ERRnosupport           0xFFFF
index 746bc9405db15ef8e8d7a3a01f50ca21b5337db7..7ed32b3cb781a0011a0560b7fe3898825f5c9e4f 100644 (file)
@@ -55,7 +55,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
        if (temp == NULL)
                return temp;
        else {
-               memset(temp, 0, sizeof (struct mid_q_entry));
+               memset(temp, 0, sizeof(struct mid_q_entry));
                temp->mid = smb_buffer->Mid;    /* always LE */
                temp->pid = current->pid;
                temp->command = smb_buffer->Command;
@@ -158,7 +158,7 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
        iov.iov_len = len;
 
        smb_msg.msg_name = sin;
-       smb_msg.msg_namelen = sizeof (struct sockaddr);
+       smb_msg.msg_namelen = sizeof(struct sockaddr);
        smb_msg.msg_control = NULL;
        smb_msg.msg_controllen = 0;
        smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -228,7 +228,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
                return -ENOTSOCK; /* BB eventually add reconnect code here */
 
        smb_msg.msg_name = sin;
-       smb_msg.msg_namelen = sizeof (struct sockaddr);
+       smb_msg.msg_namelen = sizeof(struct sockaddr);
        smb_msg.msg_control = NULL;
        smb_msg.msg_controllen = 0;
        smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -363,9 +363,8 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
                } /* else ok - we are setting up session */
        }
        *ppmidQ = AllocMidQEntry(in_buf, ses);
-       if (*ppmidQ == NULL) {
+       if (*ppmidQ == NULL)
                return -ENOMEM;
-       }
        return 0;
 }
 
@@ -419,7 +418,7 @@ static int wait_for_response(struct cifsSesInfo *ses,
 int
 SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
             struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
-            const int long_op)
+            const int long_op, const int logError)
 {
        int rc = 0;
        unsigned int receive_len;
@@ -465,7 +464,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                wake_up(&ses->server->request_q);
                return rc;
        }
-
        rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
 
        midQ->midState = MID_REQUEST_SUBMITTED;
@@ -568,13 +566,11 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                        }
 
                        /* BB special case reconnect tid and uid here? */
-                       /* BB special case Errbadpassword and pwdexpired here */
-                       rc = map_smb_to_linux_error(midQ->resp_buf);
+                       rc = map_smb_to_linux_error(midQ->resp_buf, logError);
 
                        /* convert ByteCount if necessary */
-                       if (receive_len >=
-                           sizeof (struct smb_hdr) -
-                           4 /* do not count RFC1001 header */  +
+                       if (receive_len >= sizeof(struct smb_hdr) - 4
+                           /* do not count RFC1001 header */  +
                            (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
                                BCC(midQ->resp_buf) =
                                        le16_to_cpu(BCC_LE(midQ->resp_buf));
@@ -749,12 +745,11 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
                        *pbytes_returned = out_buf->smb_buf_length;
 
                        /* BB special case reconnect tid and uid here? */
-                       rc = map_smb_to_linux_error(out_buf);
+                       rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
 
                        /* convert ByteCount if necessary */
-                       if (receive_len >=
-                           sizeof (struct smb_hdr) -
-                           4 /* do not count RFC1001 header */  +
+                       if (receive_len >= sizeof(struct smb_hdr) - 4
+                           /* do not count RFC1001 header */  +
                            (2 * out_buf->WordCount) + 2 /* bcc */ )
                                BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
                } else {
@@ -993,12 +988,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
                        *pbytes_returned = out_buf->smb_buf_length;
 
                        /* BB special case reconnect tid and uid here? */
-                       rc = map_smb_to_linux_error(out_buf);
+                       rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
 
                        /* convert ByteCount if necessary */
-                       if (receive_len >=
-                           sizeof (struct smb_hdr) -
-                           4 /* do not count RFC1001 header */  +
+                       if (receive_len >= sizeof(struct smb_hdr) - 4
+                           /* do not count RFC1001 header */  +
                            (2 * out_buf->WordCount) + 2 /* bcc */ )
                                BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
                } else {
index f61e433d281c908c6b106a8f3c95ed8bf267ec9b..369e838bebd3142554b72d58e7333547079b3e41 100644 (file)
@@ -261,21 +261,26 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
                                cifs_sb->local_nls,
                                cifs_sb->mnt_cifs_flags &
                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
-/*             else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+               else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
                        __u16 fid;
                        int oplock = FALSE;
-                       rc = CIFSSMBOpen(xid, pTcon, full_path,
-                                        FILE_OPEN, GENERIC_READ, 0, &fid,
-                                        &oplock, NULL, cifs_sb->local_nls,
-                                        cifs_sb->mnt_cifs_flags &
-                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
+                       if (experimEnabled) 
+                               rc = CIFSSMBOpen(xid, pTcon, full_path,
+                                       FILE_OPEN, GENERIC_READ, 0, &fid,
+                                       &oplock, NULL, cifs_sb->local_nls,
+                                       cifs_sb->mnt_cifs_flags &
+                                       CIFS_MOUNT_MAP_SPECIAL_CHR);
+                       /* else rc is EOPNOTSUPP from above */
+
                        if(rc == 0) {
                                rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
                                        ea_value, buf_size,
                                        ACL_TYPE_ACCESS);
                                CIFSSMBClose(xid, pTcon, fid);
                        }
-               } */  /* BB enable after fixing up return data */
+               }
+#endif /* EXPERIMENTAL */
 #else
                cFYI(1, ("query POSIX ACL not supported yet"));
 #endif /* CONFIG_CIFS_POSIX */
index cdb4c07a78700a93e34739a4aadddda96bfce8cc..359e531094ddddb74a88de2dbb74e11dde27d528 100644 (file)
@@ -51,7 +51,7 @@ static void *alloc_upcall(int opcode, int size)
 
         inp->ih.opcode = opcode;
        inp->ih.pid = current->pid;
-       inp->ih.pgid = process_group(current);
+       inp->ih.pgid = task_pgrp_nr(current);
 #ifdef CONFIG_CODA_FS_OLD_API
        memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
        inp->ih.cred.cr_fsuid = current->fsuid;
index 6438941ab1f8baf21e4ca88ce5d05b120b525f8b..4f741546f4bbce873079bee9060eac437934d8b6 100644 (file)
@@ -456,7 +456,7 @@ static int check_version(struct dlm_write_request *req)
                printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
                       "user (%d.%d.%d) kernel (%d.%d.%d)\n",
                       current->comm,
-                      current->pid,
+                      task_pid_nr(current),
                       req->version[0],
                       req->version[1],
                       req->version[2],
index de61892919549d84d2add0d7db5b8c16de1ca521..34f68f3a069a1b7d4924dc8be718927f9e6c791d 100644 (file)
@@ -325,15 +325,14 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
        int wake_nests = 0;
        unsigned long flags;
        struct task_struct *this_task = current;
-       struct list_head *lsthead = &psw->wake_task_list, *lnk;
+       struct list_head *lsthead = &psw->wake_task_list;
        struct wake_task_node *tncur;
        struct wake_task_node tnode;
 
        spin_lock_irqsave(&psw->lock, flags);
 
        /* Try to see if the current task is already inside this wakeup call */
-       list_for_each(lnk, lsthead) {
-               tncur = list_entry(lnk, struct wake_task_node, llink);
+       list_for_each_entry(tncur, lsthead, llink) {
 
                if (tncur->wq == wq ||
                    (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
index 070ddf13cb719d62ead6233961c2bb9716beed02..2c942e2d14ea2b3ebf2fb51bcf692e3a37565ae5 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -234,7 +234,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
        vma->vm_start = vma->vm_end - PAGE_SIZE;
 
        vma->vm_flags = VM_STACK_FLAGS;
-       vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+       vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
        err = insert_vm_struct(mm, vma);
        if (err) {
                up_write(&mm->mmap_sem);
@@ -775,8 +775,8 @@ static int de_thread(struct task_struct *tsk)
         * Reparenting needs write_lock on tasklist_lock,
         * so it is safe to do it under read_lock.
         */
-       if (unlikely(tsk->group_leader == child_reaper(tsk)))
-               tsk->nsproxy->pid_ns->child_reaper = tsk;
+       if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
+               task_active_pid_ns(tsk)->child_reaper = tsk;
 
        zap_other_threads(tsk);
        read_unlock(&tasklist_lock);
@@ -841,8 +841,8 @@ static int de_thread(struct task_struct *tsk)
                 */
                tsk->start_time = leader->start_time;
 
-               BUG_ON(leader->tgid != tsk->tgid);
-               BUG_ON(tsk->pid == tsk->tgid);
+               BUG_ON(!same_thread_group(leader, tsk));
+               BUG_ON(has_group_leader_pid(tsk));
                /*
                 * An exec() starts a new thread group with the
                 * TGID of the previous thread group. Rehash the
@@ -857,7 +857,7 @@ static int de_thread(struct task_struct *tsk)
                 */
                detach_pid(tsk, PIDTYPE_PID);
                tsk->pid = leader->pid;
-               attach_pid(tsk, PIDTYPE_PID,  find_pid(tsk->pid));
+               attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
                transfer_pid(leader, tsk, PIDTYPE_PGID);
                transfer_pid(leader, tsk, PIDTYPE_SID);
                list_replace_rcu(&leader->tasks, &tsk->tasks);
@@ -1433,7 +1433,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
                        case 'p':
                                pid_in_pattern = 1;
                                rc = snprintf(out_ptr, out_end - out_ptr,
-                                             "%d", current->tgid);
+                                             "%d", task_tgid_vnr(current));
                                if (rc > out_end - out_ptr)
                                        goto out;
                                out_ptr += rc;
@@ -1513,7 +1513,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
        if (!ispipe && !pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
                rc = snprintf(out_ptr, out_end - out_ptr,
-                             ".%d", current->tgid);
+                             ".%d", task_tgid_vnr(current));
                if (rc > out_end - out_ptr)
                        goto out;
                out_ptr += rc;
index 3dec003b773edb069b1269a7e4927fc19aa56040..9b162cd6c16c170ce7c35c3fc2f993ccc648cbc3 100644 (file)
@@ -2954,7 +2954,7 @@ int ext3_write_inode(struct inode *inode, int wait)
                return 0;
 
        if (ext3_journal_current_handle()) {
-               jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+               jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
                dump_stack();
                return -EIO;
        }
index f58cbb26323e1ea666d78eba4cb4d3bd4c16490a..408373819e34eddb7dec372b9d518189d54b4242 100644 (file)
@@ -741,12 +741,11 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
                }
        } else {
                /* Allocate a buffer where we construct the new block. */
-               s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+               s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
                /* assert(header == s->base) */
                error = -ENOMEM;
                if (s->base == NULL)
                        goto cleanup;
-               memset(s->base, 0, sb->s_blocksize);
                header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
                header(s->base)->h_blocks = cpu_to_le32(1);
                header(s->base)->h_refcount = cpu_to_le32(1);
index c9db73fc5e3d2d2e137bab66724d4c91f3d7544c..8685263ccc4a7aa51d6666136c285687393bfabb 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/rcupdate.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -292,7 +293,7 @@ int f_setown(struct file *filp, unsigned long arg, int force)
                who = -who;
        }
        rcu_read_lock();
-       pid = find_pid(who);
+       pid = find_vpid(who);
        result = __f_setown(filp, pid, type, force);
        rcu_read_unlock();
        return result;
@@ -308,7 +309,7 @@ pid_t f_getown(struct file *filp)
 {
        pid_t pid;
        read_lock(&filp->f_owner.lock);
-       pid = pid_nr(filp->f_owner.pid);
+       pid = pid_nr_ns(filp->f_owner.pid, current->nsproxy->pid_ns);
        if (filp->f_owner.pid_type == PIDTYPE_PGID)
                pid = -pid;
        read_unlock(&filp->f_owner.lock);
index 3176fefc92e139771a9c9065524c26e8e437e7fa..664e3f2309b89811b028603da0f17a6e266ee9fd 100644 (file)
@@ -323,12 +323,11 @@ void file_kill(struct file *file)
 
 int fs_may_remount_ro(struct super_block *sb)
 {
-       struct list_head *p;
+       struct file *file;
 
        /* Check that no files are currently opened for writing. */
        file_list_lock();
-       list_for_each(p, &sb->s_files) {
-               struct file *file = list_entry(p, struct file, f_u.fu_list);
+       list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
                struct inode *inode = file->f_path.dentry->d_inode;
 
                /* File with pending delete? */
index 686734ff973d947f6f393d2e5fe14427040e978c..0fca82021d7652df2d2fc8e1b1f33204e8712651 100644 (file)
@@ -89,7 +89,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
                        printk(KERN_DEBUG
                               "%s(%d): dirtied inode %lu (%s) on %s\n",
-                              current->comm, current->pid, inode->i_ino,
+                              current->comm, task_pid_nr(current), inode->i_ino,
                               name, inode->i_sb->s_id);
        }
 
index 10d2c211d18b28414aa8b0f26dcfe489fa0ddfe9..d6ff77e8e7ec563b273bd94d2458ba89b1355cee 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/capability.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
+#include <linux/pid_namespace.h>
 
 static int set_task_ioprio(struct task_struct *task, int ioprio)
 {
@@ -93,7 +94,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
                        if (!who)
                                p = current;
                        else
-                               p = find_task_by_pid(who);
+                               p = find_task_by_vpid(who);
                        if (p)
                                ret = set_task_ioprio(p, ioprio);
                        break;
@@ -101,7 +102,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
                        if (!who)
                                pgrp = task_pgrp(current);
                        else
-                               pgrp = find_pid(who);
+                               pgrp = find_vpid(who);
                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                                ret = set_task_ioprio(p, ioprio);
                                if (ret)
@@ -180,7 +181,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
                        if (!who)
                                p = current;
                        else
-                               p = find_task_by_pid(who);
+                               p = find_task_by_vpid(who);
                        if (p)
                                ret = get_task_ioprio(p);
                        break;
@@ -188,7 +189,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
                        if (!who)
                                pgrp = task_pgrp(current);
                        else
-                               pgrp = find_pid(who);
+                               pgrp = find_vpid(who);
                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                                tmpio = get_task_ioprio(p);
                                if (tmpio < 0)
index a263d82761dfdc3fcf63be7900ed47a803b9b3b2..8f1f2aa5fb39b2df5b744c2f7e495145cf1b2df8 100644 (file)
@@ -466,7 +466,7 @@ void journal_commit_transaction(journal_t *journal)
        spin_unlock(&journal->j_list_lock);
 
        if (err)
-               __journal_abort_hard(journal);
+               journal_abort(journal, err);
 
        journal_write_revoke_records(journal, commit_transaction);
 
@@ -524,7 +524,7 @@ void journal_commit_transaction(journal_t *journal)
 
                        descriptor = journal_get_descriptor_buffer(journal);
                        if (!descriptor) {
-                               __journal_abort_hard(journal);
+                               journal_abort(journal, -EIO);
                                continue;
                        }
 
@@ -557,7 +557,7 @@ void journal_commit_transaction(journal_t *journal)
                   and repeat this loop: we'll fall into the
                   refile-on-abort condition above. */
                if (err) {
-                       __journal_abort_hard(journal);
+                       journal_abort(journal, err);
                        continue;
                }
 
@@ -748,7 +748,7 @@ wait_for_iobuf:
                err = -EIO;
 
        if (err)
-               __journal_abort_hard(journal);
+               journal_abort(journal, err);
 
        /* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
index 5d9fec0b7ebd561501ed7973ff0c226c1db1aa2d..5d14243499d47d117fa662620f34da9d3bd76ed8 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/kthread.h>
 #include <linux/poison.h>
 #include <linux/proc_fs.h>
+#include <linux/debugfs.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -654,10 +655,9 @@ static journal_t * journal_init_common (void)
        journal_t *journal;
        int err;
 
-       journal = kmalloc(sizeof(*journal), GFP_KERNEL);
+       journal = kzalloc(sizeof(*journal), GFP_KERNEL);
        if (!journal)
                goto fail;
-       memset(journal, 0, sizeof(*journal));
 
        init_waitqueue_head(&journal->j_wait_transaction_locked);
        init_waitqueue_head(&journal->j_wait_logspace);
@@ -1852,64 +1852,41 @@ void journal_put_journal_head(struct journal_head *jh)
 }
 
 /*
- * /proc tunables
+ * debugfs tunables
  */
-#if defined(CONFIG_JBD_DEBUG)
-int journal_enable_debug;
-EXPORT_SYMBOL(journal_enable_debug);
-#endif
+#ifdef CONFIG_JBD_DEBUG
 
-#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
+u8 journal_enable_debug __read_mostly;
+EXPORT_SYMBOL(journal_enable_debug);
 
-static struct proc_dir_entry *proc_jbd_debug;
+static struct dentry *jbd_debugfs_dir;
+static struct dentry *jbd_debug;
 
-static int read_jbd_debug(char *page, char **start, off_t off,
-                         int count, int *eof, void *data)
+static void __init jbd_create_debugfs_entry(void)
 {
-       int ret;
-
-       ret = sprintf(page + off, "%d\n", journal_enable_debug);
-       *eof = 1;
-       return ret;
+       jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
+       if (jbd_debugfs_dir)
+               jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
+                                              jbd_debugfs_dir,
+                                              &journal_enable_debug);
 }
 
-static int write_jbd_debug(struct file *file, const char __user *buffer,
-                          unsigned long count, void *data)
+static void __exit jbd_remove_debugfs_entry(void)
 {
-       char buf[32];
-
-       if (count > ARRAY_SIZE(buf) - 1)
-               count = ARRAY_SIZE(buf) - 1;
-       if (copy_from_user(buf, buffer, count))
-               return -EFAULT;
-       buf[ARRAY_SIZE(buf) - 1] = '\0';
-       journal_enable_debug = simple_strtoul(buf, NULL, 10);
-       return count;
+       debugfs_remove(jbd_debug);
+       debugfs_remove(jbd_debugfs_dir);
 }
 
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
+#else
 
-static void __init create_jbd_proc_entry(void)
+static inline void jbd_create_debugfs_entry(void)
 {
-       proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-       if (proc_jbd_debug) {
-               /* Why is this so hard? */
-               proc_jbd_debug->read_proc = read_jbd_debug;
-               proc_jbd_debug->write_proc = write_jbd_debug;
-       }
 }
 
-static void __exit remove_jbd_proc_entry(void)
+static inline void jbd_remove_debugfs_entry(void)
 {
-       if (proc_jbd_debug)
-               remove_proc_entry(JBD_PROC_NAME, NULL);
 }
 
-#else
-
-#define create_jbd_proc_entry() do {} while (0)
-#define remove_jbd_proc_entry() do {} while (0)
-
 #endif
 
 struct kmem_cache *jbd_handle_cache;
@@ -1966,7 +1943,7 @@ static int __init journal_init(void)
        ret = journal_init_caches();
        if (ret != 0)
                journal_destroy_caches();
-       create_jbd_proc_entry();
+       jbd_create_debugfs_entry();
        return ret;
 }
 
@@ -1977,7 +1954,7 @@ static void __exit journal_exit(void)
        if (n)
                printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
 #endif
-       remove_jbd_proc_entry();
+       jbd_remove_debugfs_entry();
        journal_destroy_caches();
 }
 
index 2a5f4b833e353626d6b3d6199e6ea13f2989e3de..c5d9694b6a2ff2df7f9311942674321c05147c88 100644 (file)
@@ -250,10 +250,10 @@ int journal_recover(journal_t *journal)
        if (!err)
                err = do_one_pass(journal, &info, PASS_REPLAY);
 
-       jbd_debug(0, "JBD: recovery, exit status %d, "
+       jbd_debug(1, "JBD: recovery, exit status %d, "
                  "recovered transactions %u to %u\n",
                  err, info.start_transaction, info.end_transaction);
-       jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+       jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
                  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
 
        /* Restart the log at the next transaction ID, thus invalidating
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
 #ifdef CONFIG_JBD_DEBUG
                int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
 #endif
-               jbd_debug(0,
+               jbd_debug(1,
                          "JBD: ignoring %d transaction%s from the journal.\n",
                          dropped, (dropped == 1) ? "" : "s");
                journal->j_transaction_sequence = ++info.end_transaction;
index 9841b1e5af0367dbfc9cb751421425317e1a59df..08ff6c7028cc585b13cab54d01abb8ca1c6ea15d 100644 (file)
@@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 
 alloc_transaction:
        if (!journal->j_running_transaction) {
-               new_transaction = kmalloc(sizeof(*new_transaction),
+               new_transaction = kzalloc(sizeof(*new_transaction),
                                                GFP_NOFS|__GFP_NOFAIL);
                if (!new_transaction) {
                        ret = -ENOMEM;
                        goto out;
                }
-               memset(new_transaction, 0, sizeof(*new_transaction));
        }
 
        jbd_debug(3, "New handle %p going live.\n", handle);
index 2a49f2c51a9f8a11e42d7664a7dffc7ceb60060b..4130adabd76e6d9a9d496d1e6d176a43cb905783 100644 (file)
 #define JFFS2_ERROR(fmt, ...)                                          \
        do {                                                            \
                printk(JFFS2_ERR_MSG_PREFIX                             \
-                       " (%d) %s: " fmt, current->pid,                 \
+                       " (%d) %s: " fmt, task_pid_nr(current),         \
                        __FUNCTION__ , ##__VA_ARGS__);                  \
        } while(0)
 
 #define JFFS2_WARNING(fmt, ...)                                                \
        do {                                                            \
                printk(JFFS2_WARN_MSG_PREFIX                            \
-                       " (%d) %s: " fmt, current->pid,                 \
+                       " (%d) %s: " fmt, task_pid_nr(current),         \
                        __FUNCTION__ , ##__VA_ARGS__);                  \
        } while(0)
 
 #define JFFS2_NOTICE(fmt, ...)                                         \
        do {                                                            \
                printk(JFFS2_NOTICE_MSG_PREFIX                          \
-                       " (%d) %s: " fmt, current->pid,                 \
+                       " (%d) %s: " fmt, task_pid_nr(current),         \
                        __FUNCTION__ , ##__VA_ARGS__);                  \
        } while(0)
 
 #define JFFS2_DEBUG(fmt, ...)                                          \
        do {                                                            \
                printk(JFFS2_DBG_MSG_PREFIX                             \
-                       " (%d) %s: " fmt, current->pid,                 \
+                       " (%d) %s: " fmt, task_pid_nr(current),         \
                        __FUNCTION__ , ##__VA_ARGS__);                  \
        } while(0)
 
index 07daa797259184f1c4cd181cf09f04156bd4dabe..860752998fb3172d725a642ed94221b2ac455f27 100644 (file)
@@ -1411,7 +1411,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                mnt_flags |= MNT_RELATIME;
 
        flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
+                  MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
 
        /* ... and get the mountpoint */
        retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
index 46934c97f8f7fe30c915aa24e0cee144c390657b..d0199189924cee7d5ea49aafd52dcc81fab21df2 100644 (file)
@@ -1029,13 +1029,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                if (EX_WGATHER(exp)) {
                        if (atomic_read(&inode->i_writecount) > 1
                            || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-                               dprintk("nfsd: write defer %d\n", current->pid);
+                               dprintk("nfsd: write defer %d\n", task_pid_nr(current));
                                msleep(10);
-                               dprintk("nfsd: write resume %d\n", current->pid);
+                               dprintk("nfsd: write resume %d\n", task_pid_nr(current));
                        }
 
                        if (inode->i_state & I_DIRTY) {
-                               dprintk("nfsd: write sync %d\n", current->pid);
+                               dprintk("nfsd: write sync %d\n", task_pid_nr(current));
                                host_err=nfsd_sync(file);
                        }
 #if 0
index f14b541fab951d01aae5155a4cbd224c6c807b2e..9cc7c0418b70eb53b04d1904a197feb82b196b24 100644 (file)
@@ -1372,7 +1372,7 @@ static ssize_t o2hb_region_pid_read(struct o2hb_region *reg,
 
        spin_lock(&o2hb_live_lock);
        if (reg->hr_task)
-               pid = reg->hr_task->pid;
+               pid = task_pid_nr(reg->hr_task);
        spin_unlock(&o2hb_live_lock);
 
        if (!pid)
index 75cd877f6d42e17d3687674158a37764826e0633..cd046060114ef4790a0039919bb68e3ff3ab86fd 100644 (file)
@@ -192,7 +192,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
  * previous token if args expands to nothing.
  */
 #define __mlog_printk(level, fmt, args...)                             \
-       printk(level "(%u,%lu):%s:%d " fmt, current->pid,               \
+       printk(level "(%u,%lu):%s:%d " fmt, task_pid_nr(current),       \
               __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ ,        \
               ##args)
 
index a2c33160bfd6838dd17487b49492f6a85e5b3539..2fde7bf91434b0e95faddaf188d193877d803b39 100644 (file)
@@ -259,7 +259,7 @@ static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
        struct dlm_lock_resource *res;
 
        mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
-            dlm->name, dlm->dlm_reco_thread_task->pid,
+            dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
             dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
             dlm->reco.dead_node, dlm->reco.new_master);
 
@@ -420,7 +420,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
        if (dlm_in_recovery(dlm)) {
                mlog(0, "%s: reco thread %d in recovery: "
                     "state=%d, master=%u, dead=%u\n",
-                    dlm->name, dlm->dlm_reco_thread_task->pid,
+                    dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
                     dlm->reco.state, dlm->reco.new_master,
                     dlm->reco.dead_node);
        }
@@ -483,7 +483,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
                return 0;
        }
        mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
-            dlm->name, dlm->dlm_reco_thread_task->pid,
+            dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
             dlm->reco.dead_node);
        spin_unlock(&dlm->spinlock);
 
@@ -507,7 +507,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
                mlog(0, "another node will master this recovery session.\n");
        }
        mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n",
-            dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master,
+            dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master,
             dlm->node_num, dlm->reco.dead_node);
 
        /* it is safe to start everything back up here
@@ -520,7 +520,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 
 master_here:
        mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
-            dlm->dlm_reco_thread_task->pid,
+            task_pid_nr(dlm->dlm_reco_thread_task),
             dlm->name, dlm->reco.dead_node, dlm->node_num);
 
        status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
index 27b59f5f3bd1f382ab16401671b5316dd867841d..7a34571203bc46b335b2c03d52e48ef4fde9d4ea 100644 (file)
@@ -77,6 +77,7 @@
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
 #include <linux/delayacct.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -145,8 +146,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
                                            TASK_UNINTERRUPTIBLE |
                                            TASK_STOPPED |
                                            TASK_TRACED)) |
-                       (tsk->exit_state & (EXIT_ZOMBIE |
-                                           EXIT_DEAD));
+                                          tsk->exit_state;
        const char **p = &task_state_array[0];
 
        while (state) {
@@ -161,8 +161,15 @@ static inline char *task_state(struct task_struct *p, char *buffer)
        struct group_info *group_info;
        int g;
        struct fdtable *fdt = NULL;
+       struct pid_namespace *ns;
+       pid_t ppid, tpid;
 
+       ns = current->nsproxy->pid_ns;
        rcu_read_lock();
+       ppid = pid_alive(p) ?
+               task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
+       tpid = pid_alive(p) && p->ptrace ?
+               task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
        buffer += sprintf(buffer,
                "State:\t%s\n"
                "Tgid:\t%d\n"
@@ -172,9 +179,9 @@ static inline char *task_state(struct task_struct *p, char *buffer)
                "Uid:\t%d\t%d\t%d\t%d\n"
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
-               p->tgid, p->pid,
-               pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
-               pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
+               task_tgid_nr_ns(p, ns),
+               task_pid_nr_ns(p, ns),
+               ppid, tpid,
                p->uid, p->euid, p->suid, p->fsuid,
                p->gid, p->egid, p->sgid, p->fsgid);
 
@@ -394,6 +401,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
        unsigned long rsslim = 0;
        char tcomm[sizeof(task->comm)];
        unsigned long flags;
+       struct pid_namespace *ns;
+
+       ns = current->nsproxy->pid_ns;
 
        state = *get_task_state(task);
        vsize = eip = esp = 0;
@@ -416,7 +426,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
                struct signal_struct *sig = task->signal;
 
                if (sig->tty) {
-                       tty_pgrp = pid_nr(sig->tty->pgrp);
+                       tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns);
                        tty_nr = new_encode_dev(tty_devnum(sig->tty));
                }
 
@@ -449,9 +459,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
                        gtime += cputime_add(gtime, sig->gtime);
                }
 
-               sid = signal_session(sig);
-               pgid = process_group(task);
-               ppid = rcu_dereference(task->real_parent)->tgid;
+               sid = task_session_nr_ns(task, ns);
+               pgid = task_pgrp_nr_ns(task, ns);
+               ppid = task_ppid_nr_ns(task, ns);
 
                unlock_task_sighand(task, &flags);
        }
@@ -483,7 +493,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
        res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
-               task->pid,
+               task_pid_nr_ns(task, ns),
                tcomm,
                state,
                ppid,
index 4fe74d156416f0936b686d57dc6ee7bd2518dba5..39a3d7c969c5de96496e418eb397b3dec4ed789b 100644 (file)
 #include <linux/mm.h>
 #include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
+#include <linux/resource.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
 #include <linux/oom.h>
 #include <linux/elf.h>
+#include <linux/pid_namespace.h>
 #include "internal.h"
 
 /* NOTE:
@@ -301,6 +304,78 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
        return sprintf(buffer, "%lu\n", points);
 }
 
+struct limit_names {
+       char *name;
+       char *unit;
+};
+
+static const struct limit_names lnames[RLIM_NLIMITS] = {
+       [RLIMIT_CPU] = {"Max cpu time", "ms"},
+       [RLIMIT_FSIZE] = {"Max file size", "bytes"},
+       [RLIMIT_DATA] = {"Max data size", "bytes"},
+       [RLIMIT_STACK] = {"Max stack size", "bytes"},
+       [RLIMIT_CORE] = {"Max core file size", "bytes"},
+       [RLIMIT_RSS] = {"Max resident set", "bytes"},
+       [RLIMIT_NPROC] = {"Max processes", "processes"},
+       [RLIMIT_NOFILE] = {"Max open files", "files"},
+       [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
+       [RLIMIT_AS] = {"Max address space", "bytes"},
+       [RLIMIT_LOCKS] = {"Max file locks", "locks"},
+       [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
+       [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
+       [RLIMIT_NICE] = {"Max nice priority", NULL},
+       [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
+};
+
+/* Display limits for a process */
+static int proc_pid_limits(struct task_struct *task, char *buffer)
+{
+       unsigned int i;
+       int count = 0;
+       unsigned long flags;
+       char *bufptr = buffer;
+
+       struct rlimit rlim[RLIM_NLIMITS];
+
+       rcu_read_lock();
+       if (!lock_task_sighand(task,&flags)) {
+               rcu_read_unlock();
+               return 0;
+       }
+       memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
+       unlock_task_sighand(task, &flags);
+       rcu_read_unlock();
+
+       /*
+        * print the file header
+        */
+       count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+                       "Limit", "Soft Limit", "Hard Limit", "Units");
+
+       for (i = 0; i < RLIM_NLIMITS; i++) {
+               if (rlim[i].rlim_cur == RLIM_INFINITY)
+                       count += sprintf(&bufptr[count], "%-25s %-20s ",
+                                        lnames[i].name, "unlimited");
+               else
+                       count += sprintf(&bufptr[count], "%-25s %-20lu ",
+                                        lnames[i].name, rlim[i].rlim_cur);
+
+               if (rlim[i].rlim_max == RLIM_INFINITY)
+                       count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+               else
+                       count += sprintf(&bufptr[count], "%-20lu ",
+                                        rlim[i].rlim_max);
+
+               if (lnames[i].unit)
+                       count += sprintf(&bufptr[count], "%-10s\n",
+                                        lnames[i].unit);
+               else
+                       count += sprintf(&bufptr[count], "\n");
+       }
+
+       return count;
+}
+
 /************************************************************************/
 /*                       Here the fs part begins                        */
 /************************************************************************/
@@ -349,18 +424,21 @@ struct proc_mounts {
 static int mounts_open(struct inode *inode, struct file *file)
 {
        struct task_struct *task = get_proc_task(inode);
+       struct nsproxy *nsp;
        struct mnt_namespace *ns = NULL;
        struct proc_mounts *p;
        int ret = -EINVAL;
 
        if (task) {
-               task_lock(task);
-               if (task->nsproxy) {
-                       ns = task->nsproxy->mnt_ns;
+               rcu_read_lock();
+               nsp = task_nsproxy(task);
+               if (nsp) {
+                       ns = nsp->mnt_ns;
                        if (ns)
                                get_mnt_ns(ns);
                }
-               task_unlock(task);
+               rcu_read_unlock();
+
                put_task_struct(task);
        }
 
@@ -423,16 +501,20 @@ static int mountstats_open(struct inode *inode, struct file *file)
 
        if (!ret) {
                struct seq_file *m = file->private_data;
+               struct nsproxy *nsp;
                struct mnt_namespace *mnt_ns = NULL;
                struct task_struct *task = get_proc_task(inode);
 
                if (task) {
-                       task_lock(task);
-                       if (task->nsproxy)
-                               mnt_ns = task->nsproxy->mnt_ns;
-                       if (mnt_ns)
-                               get_mnt_ns(mnt_ns);
-                       task_unlock(task);
+                       rcu_read_lock();
+                       nsp = task_nsproxy(task);
+                       if (nsp) {
+                               mnt_ns = nsp->mnt_ns;
+                               if (mnt_ns)
+                                       get_mnt_ns(mnt_ns);
+                       }
+                       rcu_read_unlock();
+
                        put_task_struct(task);
                }
 
@@ -1437,7 +1519,7 @@ static int proc_readfd_common(struct file * filp, void * dirent,
        struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        struct task_struct *p = get_proc_task(inode);
-       unsigned int fd, tid, ino;
+       unsigned int fd, ino;
        int retval;
        struct files_struct * files;
        struct fdtable *fdt;
@@ -1446,7 +1528,6 @@ static int proc_readfd_common(struct file * filp, void * dirent,
        if (!p)
                goto out_no_task;
        retval = 0;
-       tid = p->pid;
 
        fd = filp->f_pos;
        switch (fd) {
@@ -1681,7 +1762,6 @@ static int proc_pident_readdir(struct file *filp,
                const struct pid_entry *ents, unsigned int nents)
 {
        int i;
-       int pid;
        struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        struct task_struct *task = get_proc_task(inode);
@@ -1694,7 +1774,6 @@ static int proc_pident_readdir(struct file *filp,
                goto out_no_task;
 
        ret = 0;
-       pid = task->pid;
        i = filp->f_pos;
        switch (i) {
        case 0:
@@ -1928,14 +2007,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
                              int buflen)
 {
        char tmp[PROC_NUMBUF];
-       sprintf(tmp, "%d", current->tgid);
+       sprintf(tmp, "%d", task_tgid_vnr(current));
        return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        char tmp[PROC_NUMBUF];
-       sprintf(tmp, "%d", current->tgid);
+       sprintf(tmp, "%d", task_tgid_vnr(current));
        return ERR_PTR(vfs_follow_link(nd,tmp));
 }
 
@@ -2101,6 +2180,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("environ",    S_IRUSR, environ),
        INF("auxv",       S_IRUSR, pid_auxv),
        INF("status",     S_IRUGO, pid_status),
+       INF("limits",     S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
 #endif
@@ -2130,8 +2210,11 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SCHEDSTATS
        INF("schedstat",  S_IRUGO, pid_schedstat),
 #endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
        REG("cpuset",     S_IRUGO, cpuset),
+#endif
+#ifdef CONFIG_CGROUPS
+       REG("cgroup",  S_IRUGO, cgroup),
 #endif
        INF("oom_score",  S_IRUGO, oom_score),
        REG("oom_adj",    S_IRUGO|S_IWUSR, oom_adjust),
@@ -2193,27 +2276,27 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
  *       that no dcache entries will exist at process exit time it
  *       just makes it very unlikely that any will persist.
  */
-void proc_flush_task(struct task_struct *task)
+static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 {
        struct dentry *dentry, *leader, *dir;
        char buf[PROC_NUMBUF];
        struct qstr name;
 
        name.name = buf;
-       name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
-       dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+       name.len = snprintf(buf, sizeof(buf), "%d", pid);
+       dentry = d_hash_and_lookup(mnt->mnt_root, &name);
        if (dentry) {
                shrink_dcache_parent(dentry);
                d_drop(dentry);
                dput(dentry);
        }
 
-       if (thread_group_leader(task))
+       if (tgid == 0)
                goto out;
 
        name.name = buf;
-       name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
-       leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+       name.len = snprintf(buf, sizeof(buf), "%d", tgid);
+       leader = d_hash_and_lookup(mnt->mnt_root, &name);
        if (!leader)
                goto out;
 
@@ -2224,7 +2307,7 @@ void proc_flush_task(struct task_struct *task)
                goto out_put_leader;
 
        name.name = buf;
-       name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
+       name.len = snprintf(buf, sizeof(buf), "%d", pid);
        dentry = d_hash_and_lookup(dir, &name);
        if (dentry) {
                shrink_dcache_parent(dentry);
@@ -2239,6 +2322,36 @@ out:
        return;
 }
 
+/*
+ * when flushing dentries from proc one need to flush them from global
+ * proc (proc_mnt) and from all the namespaces' procs this task was seen
+ * in. this call is supposed to make all this job.
+ */
+
+void proc_flush_task(struct task_struct *task)
+{
+       int i, leader;
+       struct pid *pid, *tgid;
+       struct upid *upid;
+
+       leader = thread_group_leader(task);
+       proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0);
+       pid = task_pid(task);
+       if (pid->level == 0)
+               return;
+
+       tgid = task_tgid(task);
+       for (i = 1; i <= pid->level; i++) {
+               upid = &pid->numbers[i];
+               proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
+                               leader ? 0 : tgid->numbers[i].nr);
+       }
+
+       upid = &pid->numbers[pid->level];
+       if (upid->nr == 1)
+               pid_ns_release_proc(upid->ns);
+}
+
 static struct dentry *proc_pid_instantiate(struct inode *dir,
                                           struct dentry * dentry,
                                           struct task_struct *task, const void *ptr)
@@ -2274,6 +2387,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
        struct dentry *result = ERR_PTR(-ENOENT);
        struct task_struct *task;
        unsigned tgid;
+       struct pid_namespace *ns;
 
        result = proc_base_lookup(dir, dentry);
        if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
@@ -2283,8 +2397,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
        if (tgid == ~0U)
                goto out;
 
+       ns = dentry->d_sb->s_fs_info;
        rcu_read_lock();
-       task = find_task_by_pid(tgid);
+       task = find_task_by_pid_ns(tgid, ns);
        if (task)
                get_task_struct(task);
        rcu_read_unlock();
@@ -2301,7 +2416,8 @@ out:
  * Find the first task with tgid >= tgid
  *
  */
-static struct task_struct *next_tgid(unsigned int tgid)
+static struct task_struct *next_tgid(unsigned int tgid,
+               struct pid_namespace *ns)
 {
        struct task_struct *task;
        struct pid *pid;
@@ -2309,9 +2425,9 @@ static struct task_struct *next_tgid(unsigned int tgid)
        rcu_read_lock();
 retry:
        task = NULL;
-       pid = find_ge_pid(tgid);
+       pid = find_ge_pid(tgid, ns);
        if (pid) {
-               tgid = pid->nr + 1;
+               tgid = pid_nr_ns(pid, ns) + 1;
                task = pid_task(pid, PIDTYPE_PID);
                /* What we to know is if the pid we have find is the
                 * pid of a thread_group_leader.  Testing for task
@@ -2351,6 +2467,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
        struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
        struct task_struct *task;
        int tgid;
+       struct pid_namespace *ns;
 
        if (!reaper)
                goto out_no_task;
@@ -2361,11 +2478,12 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
                        goto out;
        }
 
+       ns = filp->f_dentry->d_sb->s_fs_info;
        tgid = filp->f_pos - TGID_OFFSET;
-       for (task = next_tgid(tgid);
+       for (task = next_tgid(tgid, ns);
             task;
-            put_task_struct(task), task = next_tgid(tgid + 1)) {
-               tgid = task->pid;
+            put_task_struct(task), task = next_tgid(tgid + 1, ns)) {
+               tgid = task_pid_nr_ns(task, ns);
                filp->f_pos = tgid + TGID_OFFSET;
                if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
                        put_task_struct(task);
@@ -2388,6 +2506,7 @@ static const struct pid_entry tid_base_stuff[] = {
        REG("environ",   S_IRUSR, environ),
        INF("auxv",      S_IRUSR, pid_auxv),
        INF("status",    S_IRUGO, pid_status),
+       INF("limits",    S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
 #endif
@@ -2416,8 +2535,11 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SCHEDSTATS
        INF("schedstat", S_IRUGO, pid_schedstat),
 #endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
        REG("cpuset",    S_IRUGO, cpuset),
+#endif
+#ifdef CONFIG_CGROUPS
+       REG("cgroup",  S_IRUGO, cgroup),
 #endif
        INF("oom_score", S_IRUGO, oom_score),
        REG("oom_adj",   S_IRUGO|S_IWUSR, oom_adjust),
@@ -2486,6 +2608,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
        struct task_struct *task;
        struct task_struct *leader = get_proc_task(dir);
        unsigned tid;
+       struct pid_namespace *ns;
 
        if (!leader)
                goto out_no_task;
@@ -2494,14 +2617,15 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
        if (tid == ~0U)
                goto out;
 
+       ns = dentry->d_sb->s_fs_info;
        rcu_read_lock();
-       task = find_task_by_pid(tid);
+       task = find_task_by_pid_ns(tid, ns);
        if (task)
                get_task_struct(task);
        rcu_read_unlock();
        if (!task)
                goto out;
-       if (leader->tgid != task->tgid)
+       if (!same_thread_group(leader, task))
                goto out_drop_task;
 
        result = proc_task_instantiate(dir, dentry, task, NULL);
@@ -2526,14 +2650,14 @@ out_no_task:
  * threads past it.
  */
 static struct task_struct *first_tid(struct task_struct *leader,
-                                       int tid, int nr)
+               int tid, int nr, struct pid_namespace *ns)
 {
        struct task_struct *pos;
 
        rcu_read_lock();
        /* Attempt to start with the pid of a thread */
        if (tid && (nr > 0)) {
-               pos = find_task_by_pid(tid);
+               pos = find_task_by_pid_ns(tid, ns);
                if (pos && (pos->group_leader == leader))
                        goto found;
        }
@@ -2602,6 +2726,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
        ino_t ino;
        int tid;
        unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
+       struct pid_namespace *ns;
 
        task = get_proc_task(inode);
        if (!task)
@@ -2635,12 +2760,13 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
        /* f_version caches the tgid value that the last readdir call couldn't
         * return. lseek aka telldir automagically resets f_version to 0.
         */
+       ns = filp->f_dentry->d_sb->s_fs_info;
        tid = (int)filp->f_version;
        filp->f_version = 0;
-       for (task = first_tid(leader, tid, pos - 2);
+       for (task = first_tid(leader, tid, pos - 2, ns);
             task;
             task = next_tid(task), pos++) {
-               tid = task->pid;
+               tid = task_pid_nr_ns(task, ns);
                if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
                        /* returning this tgid failed, save it as the first
                         * pid for the next readir call */
index 99ca00485fc307ff52014dc303b60fd918f87de4..abe6a3f04368490aa9776dbb919cd1cc5c0dd075 100644 (file)
@@ -448,7 +448,7 @@ out_mod:
        return NULL;
 }                      
 
-int proc_fill_super(struct super_block *s, void *data, int silent)
+int proc_fill_super(struct super_block *s)
 {
        struct inode * root_inode;
 
index d6dc72c78bc1bd9a1d36fb29e6728450fcbb34e5..e0d064e9764ef55422cf79119b7624b3e6b87373 100644 (file)
@@ -91,7 +91,8 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
                LOAD_INT(a), LOAD_FRAC(a),
                LOAD_INT(b), LOAD_FRAC(b),
                LOAD_INT(c), LOAD_FRAC(c),
-               nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid);
+               nr_running(), nr_threads,
+               task_active_pid_ns(current)->last_pid);
        return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
index cf3046638b09406947198ba81ac4c93aa01ec7f8..ec9cb3b6c93bf777e8c6cb8ed4a6f8405898fa87 100644 (file)
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
+#include <linux/pid_namespace.h>
 
 #include "internal.h"
 
 struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 
+static int proc_test_super(struct super_block *sb, void *data)
+{
+       return sb->s_fs_info == data;
+}
+
+static int proc_set_super(struct super_block *sb, void *data)
+{
+       struct pid_namespace *ns;
+
+       ns = (struct pid_namespace *)data;
+       sb->s_fs_info = get_pid_ns(ns);
+       return set_anon_super(sb, NULL);
+}
+
 static int proc_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
+       int err;
+       struct super_block *sb;
+       struct pid_namespace *ns;
+       struct proc_inode *ei;
+
        if (proc_mnt) {
                /* Seed the root directory with a pid so it doesn't need
                 * to be special in base.c.  I would do this earlier but
                 * the only task alive when /proc is mounted the first time
                 * is the init_task and it doesn't have any pids.
                 */
-               struct proc_inode *ei;
                ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
                if (!ei->pid)
                        ei->pid = find_get_pid(1);
        }
-       return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
+
+       if (flags & MS_KERNMOUNT)
+               ns = (struct pid_namespace *)data;
+       else
+               ns = current->nsproxy->pid_ns;
+
+       sb = sget(fs_type, proc_test_super, proc_set_super, ns);
+       if (IS_ERR(sb))
+               return PTR_ERR(sb);
+
+       if (!sb->s_root) {
+               sb->s_flags = flags;
+               err = proc_fill_super(sb);
+               if (err) {
+                       up_write(&sb->s_umount);
+                       deactivate_super(sb);
+                       return err;
+               }
+
+               ei = PROC_I(sb->s_root->d_inode);
+               if (!ei->pid) {
+                       rcu_read_lock();
+                       ei->pid = get_pid(find_pid_ns(1, ns));
+                       rcu_read_unlock();
+               }
+
+               sb->s_flags |= MS_ACTIVE;
+               ns->proc_mnt = mnt;
+       }
+
+       return simple_set_mnt(mnt, sb);
+}
+
+static void proc_kill_sb(struct super_block *sb)
+{
+       struct pid_namespace *ns;
+
+       ns = (struct pid_namespace *)sb->s_fs_info;
+       kill_anon_super(sb);
+       put_pid_ns(ns);
 }
 
 static struct file_system_type proc_fs_type = {
        .name           = "proc",
        .get_sb         = proc_get_sb,
-       .kill_sb        = kill_anon_super,
+       .kill_sb        = proc_kill_sb,
 };
 
 void __init proc_root_init(void)
@@ -54,12 +112,13 @@ void __init proc_root_init(void)
        err = register_filesystem(&proc_fs_type);
        if (err)
                return;
-       proc_mnt = kern_mount(&proc_fs_type);
+       proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
        err = PTR_ERR(proc_mnt);
        if (IS_ERR(proc_mnt)) {
                unregister_filesystem(&proc_fs_type);
                return;
        }
+
        proc_misc_init();
 
        proc_net_init();
@@ -153,6 +212,22 @@ struct proc_dir_entry proc_root = {
        .parent         = &proc_root,
 };
 
+int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+       struct vfsmount *mnt;
+
+       mnt = kern_mount_data(&proc_fs_type, ns);
+       if (IS_ERR(mnt))
+               return PTR_ERR(mnt);
+
+       return 0;
+}
+
+void pid_ns_release_proc(struct pid_namespace *ns)
+{
+       mntput(ns->proc_mnt);
+}
+
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_SYMBOL(create_proc_entry);
index 2a5dd34649b34eb2c81275c57ef7afdf9a6ec781..16b331dd9913a93f6575d8ec38dd9b63dcf05251 100644 (file)
@@ -47,7 +47,9 @@
     test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
 
 static inline void get_bit_address(struct super_block *s,
-                                  b_blocknr_t block, int *bmap_nr, int *offset)
+                                  b_blocknr_t block,
+                                  unsigned int *bmap_nr,
+                                  unsigned int *offset)
 {
        /* It is in the bitmap block number equal to the block
         * number divided by the number of bits in a block. */
@@ -56,10 +58,10 @@ static inline void get_bit_address(struct super_block *s,
        *offset = block & ((s->s_blocksize << 3) - 1);
 }
 
-#ifdef CONFIG_REISERFS_CHECK
 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
 {
-       int bmap, offset;
+       unsigned int bmap, offset;
+       unsigned int bmap_count = reiserfs_bmap_count(s);
 
        if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
                reiserfs_warning(s,
@@ -75,25 +77,26 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
        if (unlikely(test_bit(REISERFS_OLD_FORMAT,
                              &(REISERFS_SB(s)->s_properties)))) {
                b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
-               if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) {
+               if (block >= bmap1 &&
+                   block <= bmap1 + bmap_count) {
                        reiserfs_warning(s, "vs: 4019: is_reusable: "
                                         "bitmap block %lu(%u) can't be freed or reused",
-                                        block, SB_BMAP_NR(s));
+                                        block, bmap_count);
                        return 0;
                }
        } else {
                if (offset == 0) {
                        reiserfs_warning(s, "vs: 4020: is_reusable: "
                                         "bitmap block %lu(%u) can't be freed or reused",
-                                        block, SB_BMAP_NR(s));
+                                        block, bmap_count);
                        return 0;
                }
        }
 
-       if (bmap >= SB_BMAP_NR(s)) {
+       if (bmap >= bmap_count) {
                reiserfs_warning(s,
                                 "vs-4030: is_reusable: there is no so many bitmap blocks: "
-                                "block=%lu, bitmap_nr=%d", block, bmap);
+                                "block=%lu, bitmap_nr=%u", block, bmap);
                return 0;
        }
 
@@ -106,12 +109,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
 
        return 1;
 }
-#endif                         /* CONFIG_REISERFS_CHECK */
 
 /* searches in journal structures for a given block number (bmap, off). If block
    is found in reiserfs journal it suggests next free block candidate to test. */
-static inline int is_block_in_journal(struct super_block *s, int bmap, int
-                                     off, int *next)
+static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
+                                     int off, int *next)
 {
        b_blocknr_t tmp;
 
@@ -132,8 +134,8 @@ static inline int is_block_in_journal(struct super_block *s, int bmap, int
 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
  * block; */
 static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
-                            int bmap_n, int *beg, int boundary, int min,
-                            int max, int unfm)
+                            unsigned int bmap_n, int *beg, int boundary,
+                            int min, int max, int unfm)
 {
        struct super_block *s = th->t_super;
        struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
@@ -143,8 +145,8 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 
        BUG_ON(!th->t_trans_id);
 
-       RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
-              bmap_n, SB_BMAP_NR(s) - 1);
+       RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
+              "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
        PROC_INFO_INC(s, scan_bitmap.bmap);
 /* this is unclear and lacks comments, explain how journal bitmaps
    work here for the reader.  Convey a sense of the design here. What
@@ -249,12 +251,12 @@ static int bmap_hash_id(struct super_block *s, u32 id)
        } else {
                hash_in = (char *)(&id);
                hash = keyed_hash(hash_in, 4);
-               bm = hash % SB_BMAP_NR(s);
+               bm = hash % reiserfs_bmap_count(s);
                if (!bm)
                        bm = 1;
        }
        /* this can only be true when SB_BMAP_NR = 1 */
-       if (bm >= SB_BMAP_NR(s))
+       if (bm >= reiserfs_bmap_count(s))
                bm = 0;
        return bm;
 }
@@ -273,7 +275,7 @@ static inline int block_group_used(struct super_block *s, u32 id)
         * to make a better decision. This favors long-term performace gain
         * with a better on-disk layout vs. a short term gain of skipping the
         * read and potentially having a bad placement. */
-       if (info->first_zero_hint == 0) {
+       if (info->free_count == UINT_MAX) {
                struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
                brelse(bh);
        }
@@ -309,16 +311,16 @@ __le32 reiserfs_choose_packing(struct inode * dir)
  * bitmap and place new blocks there. Returns number of allocated blocks. */
 static int scan_bitmap(struct reiserfs_transaction_handle *th,
                       b_blocknr_t * start, b_blocknr_t finish,
-                      int min, int max, int unfm, unsigned long file_block)
+                      int min, int max, int unfm, sector_t file_block)
 {
        int nr_allocated = 0;
        struct super_block *s = th->t_super;
        /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
         * - Hans, it is not a block number - Zam. */
 
-       int bm, off;
-       int end_bm, end_off;
-       int off_max = s->s_blocksize << 3;
+       unsigned int bm, off;
+       unsigned int end_bm, end_off;
+       unsigned int off_max = s->s_blocksize << 3;
 
        BUG_ON(!th->t_trans_id);
 
@@ -328,10 +330,10 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
 
        get_bit_address(s, *start, &bm, &off);
        get_bit_address(s, finish, &end_bm, &end_off);
-       if (bm > SB_BMAP_NR(s))
+       if (bm > reiserfs_bmap_count(s))
                return 0;
-       if (end_bm > SB_BMAP_NR(s))
-               end_bm = SB_BMAP_NR(s);
+       if (end_bm > reiserfs_bmap_count(s))
+               end_bm = reiserfs_bmap_count(s);
 
        /* When the bitmap is more than 10% free, anyone can allocate.
         * When it's less than 10% free, only files that already use the
@@ -385,7 +387,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
        struct reiserfs_super_block *rs;
        struct buffer_head *sbh, *bmbh;
        struct reiserfs_bitmap_info *apbi;
-       int nr, offset;
+       unsigned int nr, offset;
 
        BUG_ON(!th->t_trans_id);
 
@@ -397,10 +399,12 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 
        get_bit_address(s, block, &nr, &offset);
 
-       if (nr >= sb_bmap_nr(rs)) {
+       if (nr >= reiserfs_bmap_count(s)) {
                reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
-                                "block %lu is out of range on %s",
-                                block, reiserfs_bdevname(s));
+                                "block %lu is out of range on %s "
+                                "(nr=%u,max=%u)", block,
+                                reiserfs_bdevname(s), nr,
+                                reiserfs_bmap_count(s));
                return;
        }
 
@@ -434,12 +438,19 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
                         int for_unformatted)
 {
        struct super_block *s = th->t_super;
-
        BUG_ON(!th->t_trans_id);
 
        RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
-       RFALSE(is_reusable(s, block, 1) == 0,
-              "vs-4071: can not free such block");
+       if (!is_reusable(s, block, 1))
+               return;
+
+       if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
+               reiserfs_panic(th->t_super, "bitmap-4072",
+                              "Trying to free block outside file system "
+                              "boundaries (%lu > %lu)",
+                              block, sb_block_count(REISERFS_SB(s)->s_rs));
+               return;
+       }
        /* mark it before we clear it, just in case */
        journal_mark_freed(th, s, block);
        _reiserfs_free_block(th, inode, block, for_unformatted);
@@ -449,11 +460,11 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
                                         struct inode *inode, b_blocknr_t block)
 {
+       BUG_ON(!th->t_trans_id);
        RFALSE(!th->t_super,
               "vs-4060: trying to free block on nonexistent device");
-       RFALSE(is_reusable(th->t_super, block, 1) == 0,
-              "vs-4070: can not free such block");
-       BUG_ON(!th->t_trans_id);
+       if (!is_reusable(th->t_super, block, 1))
+               return;
        _reiserfs_free_block(th, inode, block, 1);
 }
 
@@ -1207,27 +1218,22 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb,
 {
        unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
 
-       info->first_zero_hint = 1 << (sb->s_blocksize_bits + 3);
+       /* The first bit must ALWAYS be 1 */
+       BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data));
+
+       info->free_count = 0;
 
        while (--cur >= (unsigned long *)bh->b_data) {
-               int base = ((char *)cur - bh->b_data) << 3;
+               int i;
 
                /* 0 and ~0 are special, we can optimize for them */
-               if (*cur == 0) {
-                       info->first_zero_hint = base;
+               if (*cur == 0)
                        info->free_count += BITS_PER_LONG;
-               } else if (*cur != ~0L) {       /* A mix, investigate */
-                       int b;
-                       for (b = BITS_PER_LONG - 1; b >= 0; b--) {
-                               if (!reiserfs_test_le_bit(b, cur)) {
-                                       info->first_zero_hint = base + b;
+               else if (*cur != ~0L)   /* A mix, investigate */
+                       for (i = BITS_PER_LONG - 1; i >= 0; i--)
+                               if (!reiserfs_test_le_bit(i, cur))
                                        info->free_count++;
-                               }
-                       }
-               }
        }
-       /* The first bit must ALWAYS be 1 */
-       BUG_ON(info->first_zero_hint == 0);
 }
 
 struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
@@ -1257,7 +1263,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
                BUG_ON(!buffer_uptodate(bh));
                BUG_ON(atomic_read(&bh->b_count) == 0);
 
-               if (info->first_zero_hint == 0)
+               if (info->free_count == UINT_MAX)
                        reiserfs_cache_bitmap_metadata(sb, bh, info);
        }
 
@@ -1267,12 +1273,13 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
 int reiserfs_init_bitmap_cache(struct super_block *sb)
 {
        struct reiserfs_bitmap_info *bitmap;
+       unsigned int bmap_nr = reiserfs_bmap_count(sb);
 
-       bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb));
+       bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
        if (bitmap == NULL)
                return -ENOMEM;
 
-       memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb));
+       memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
 
        SB_AP_BITMAP(sb) = bitmap;
 
index 0804289d355d9dc94f7d2630c0ae812bb24e9563..a991af96f3f053ecf9a513b93662d1a1ef47bf1f 100644 (file)
@@ -199,7 +199,7 @@ static inline void set_block_dev_mapped(struct buffer_head *bh,
 // files which were created in the earlier version can not be longer,
 // than 2 gb
 //
-static int file_capable(struct inode *inode, long block)
+static int file_capable(struct inode *inode, sector_t block)
 {
        if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||      // it is new file.
            block < (1 << (31 - inode->i_sb->s_blocksize_bits)))        // old file, but 'block' is inside of 2gb
@@ -242,7 +242,7 @@ static int restart_transaction(struct reiserfs_transaction_handle *th,
 // Please improve the english/clarity in the comment above, as it is
 // hard to understand.
 
-static int _get_block_create_0(struct inode *inode, long block,
+static int _get_block_create_0(struct inode *inode, sector_t block,
                               struct buffer_head *bh_result, int args)
 {
        INITIALIZE_PATH(path);
@@ -250,7 +250,7 @@ static int _get_block_create_0(struct inode *inode, long block,
        struct buffer_head *bh;
        struct item_head *ih, tmp_ih;
        int fs_gen;
-       int blocknr;
+       b_blocknr_t blocknr;
        char *p = NULL;
        int chars;
        int ret;
@@ -569,7 +569,7 @@ static int convert_tail_for_hole(struct inode *inode,
 }
 
 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
-                                 long block,
+                                 sector_t block,
                                  struct inode *inode,
                                  b_blocknr_t * allocated_block_nr,
                                  struct treepath *path, int flags)
index 4cad9e75ef560f6238087bf2cc49ccd5cc571412..bb05a3e51b93ba709a01135d940e8c3c0fa8f320 100644 (file)
@@ -219,11 +219,12 @@ static void allocate_bitmap_nodes(struct super_block *p_s_sb)
        }
 }
 
-static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
+static int set_bit_in_list_bitmap(struct super_block *p_s_sb,
+                                 b_blocknr_t block,
                                  struct reiserfs_list_bitmap *jb)
 {
-       int bmap_nr = block / (p_s_sb->s_blocksize << 3);
-       int bit_nr = block % (p_s_sb->s_blocksize << 3);
+       unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3);
+       unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3);
 
        if (!jb->bitmaps[bmap_nr]) {
                jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
@@ -239,7 +240,7 @@ static void cleanup_bitmap_list(struct super_block *p_s_sb,
        if (jb->bitmaps == NULL)
                return;
 
-       for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) {
+       for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) {
                if (jb->bitmaps[i]) {
                        free_bitmap_node(p_s_sb, jb->bitmaps[i]);
                        jb->bitmaps[i] = NULL;
@@ -289,7 +290,7 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
 */
 int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
                                   struct reiserfs_list_bitmap *jb_array,
-                                  int bmap_nr)
+                                  unsigned int bmap_nr)
 {
        int i;
        int failed = 0;
@@ -483,7 +484,7 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
 **
 */
 int reiserfs_in_journal(struct super_block *p_s_sb,
-                       int bmap_nr, int bit_nr, int search_all,
+                       unsigned int bmap_nr, int bit_nr, int search_all,
                        b_blocknr_t * next_zero_bit)
 {
        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
@@ -1013,7 +1014,7 @@ static int flush_commit_list(struct super_block *s,
                             struct reiserfs_journal_list *jl, int flushall)
 {
        int i;
-       int bn;
+       b_blocknr_t bn;
        struct buffer_head *tbh = NULL;
        unsigned long trans_id = jl->j_trans_id;
        struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2307,8 +2308,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
    Right now it is only used from journal code. But later we might use it
    from other places.
    Note: Do not use journal_getblk/sb_getblk functions here! */
-static struct buffer_head *reiserfs_breada(struct block_device *dev, int block,
-                                          int bufsize, unsigned int max_block)
+static struct buffer_head *reiserfs_breada(struct block_device *dev,
+                                          b_blocknr_t block, int bufsize,
+                                          b_blocknr_t max_block)
 {
        struct buffer_head *bhlist[BUFNR];
        unsigned int blocks = BUFNR;
@@ -2732,7 +2734,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        journal->j_persistent_trans = 0;
        if (reiserfs_allocate_list_bitmaps(p_s_sb,
                                           journal->j_list_bitmap,
-                                          SB_BMAP_NR(p_s_sb)))
+                                          reiserfs_bmap_count(p_s_sb)))
                goto free_and_return;
        allocate_bitmap_nodes(p_s_sb);
 
@@ -2740,7 +2742,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
        SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
                                                 REISERFS_OLD_DISK_OFFSET_IN_BYTES
                                                 / p_s_sb->s_blocksize +
-                                                SB_BMAP_NR(p_s_sb) +
+                                                reiserfs_bmap_count(p_s_sb) +
                                                 1 :
                                                 REISERFS_DISK_OFFSET_IN_BYTES /
                                                 p_s_sb->s_blocksize + 2);
index bc808a91eeaa75fba63c211667b3869a295ce1ed..5e7388b32d020f17b6b22fb515bbe04a8a3ee67c 100644 (file)
@@ -356,13 +356,11 @@ extern struct tree_balance *cur_tb;
 void reiserfs_panic(struct super_block *sb, const char *fmt, ...)
 {
        do_reiserfs_warning(fmt);
-       printk(KERN_EMERG "REISERFS: panic (device %s): %s\n",
-              reiserfs_bdevname(sb), error_buf);
-       BUG();
 
-       /* this is not actually called, but makes reiserfs_panic() "noreturn" */
-       panic("REISERFS: panic (device %s): %s\n",
-             reiserfs_bdevname(sb), error_buf);
+       dump_stack();
+
+       panic(KERN_EMERG "REISERFS: panic (device %s): %s\n",
+              reiserfs_bdevname(sb), error_buf);
 }
 
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
index 976cc7887a0dfe60c4f5933e7d7757acfb6cf8bf..f71c3948edef8324ac68a71042fbc35209cb5ea3 100644 (file)
@@ -61,7 +61,8 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
        }
 
        /* count used bits in last bitmap block */
-       block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8;
+       block_r = SB_BLOCK_COUNT(s) -
+                       (reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8;
 
        /* count bitmap blocks in new fs */
        bmap_nr_new = block_count_new / (s->s_blocksize * 8);
@@ -73,7 +74,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 
        /* save old values */
        block_count = SB_BLOCK_COUNT(s);
-       bmap_nr = SB_BMAP_NR(s);
+       bmap_nr = reiserfs_bmap_count(s);
 
        /* resizing of reiserfs bitmaps (journal and real), if needed */
        if (bmap_nr_new > bmap_nr) {
@@ -119,7 +120,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
                        return -ENOMEM;
                }
                memset(bitmap, 0,
-                      sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
+                      sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
                for (i = 0; i < bmap_nr; i++)
                        bitmap[i] = old_bitmap[i];
 
@@ -143,7 +144,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
                        mark_buffer_dirty(bh);
                        sync_dirty_buffer(bh);
                        // update bitmap_info stuff
-                       bitmap[i].first_zero_hint = 1;
                        bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
                        brelse(bh);
                }
@@ -173,8 +173,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
        for (i = block_r; i < s->s_blocksize * 8; i++)
                reiserfs_test_and_clear_le_bit(i, bh->b_data);
        info->free_count += s->s_blocksize * 8 - block_r;
-       if (!info->first_zero_hint)
-               info->first_zero_hint = block_r;
 
        journal_mark_dirty(&th, s, bh);
        brelse(bh);
@@ -196,9 +194,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
        brelse(bh);
 
        info->free_count -= s->s_blocksize * 8 - block_r_new;
-       /* Extreme case where last bitmap is the only valid block in itself. */
-       if (!info->free_count)
-               info->first_zero_hint = 0;
        /* update super */
        reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
        free_blocks = SB_FREE_BLOCKS(s);
@@ -206,7 +201,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
                           free_blocks + (block_count_new - block_count -
                                          (bmap_nr_new - bmap_nr)));
        PUT_SB_BLOCK_COUNT(s, block_count_new);
-       PUT_SB_BMAP_NR(s, bmap_nr_new);
+       PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new);
        s->s_dirt = 1;
 
        journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
index 981027d1187bb49f33b0798cd9cfb790a2d8e549..ca41567d7890b8fecbec571d59f357e66eeb171e 100644 (file)
@@ -559,7 +559,7 @@ static int is_tree_node(struct buffer_head *bh, int level)
 /* The function is NOT SCHEDULE-SAFE! */
 static void search_by_key_reada(struct super_block *s,
                                struct buffer_head **bh,
-                               unsigned long *b, int num)
+                               b_blocknr_t *b, int num)
 {
        int i, j;
 
@@ -611,7 +611,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,        /*
                                           DISK_LEAF_NODE_LEVEL */
     )
 {
-       int n_block_number;
+       b_blocknr_t n_block_number;
        int expected_level;
        struct buffer_head *p_s_bh;
        struct path_element *p_s_last_element;
@@ -619,7 +619,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,        /*
        int right_neighbor_of_leaf_node;
        int fs_gen;
        struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
-       unsigned long reada_blocks[SEARCH_BY_KEY_READA];
+       b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
        int reada_count = 0;
 
 #ifdef CONFIG_REISERFS_CHECK
index b82897ae090bdba89331029216c3ce33893415fd..57adfe90d5aed2e8f64b0674bb9bf104c13b0bce 100644 (file)
@@ -1725,6 +1725,21 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                set_sb_umount_state(rs, REISERFS_ERROR_FS);
                set_sb_fs_state(rs, 0);
 
+               /* Clear out s_bmap_nr if it would wrap. We can handle this
+                * case, but older revisions can't. This will cause the
+                * file system to fail mount on those older implementations,
+                * avoiding corruption. -jeffm */
+               if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
+                   sb_bmap_nr(rs) != 0) {
+                       reiserfs_warning(s, "super-2030: This file system "
+                                       "claims to use %u bitmap blocks in "
+                                       "its super block, but requires %u. "
+                                       "Clearing to zero.", sb_bmap_nr(rs),
+                                       reiserfs_bmap_count(s));
+
+                       set_sb_bmap_nr(rs, 0);
+               }
+
                if (old_format_only(s)) {
                        /* filesystem of format 3.5 either with standard or non-standard
                           journal */
index fab4b9b2664fff2d460e93ef87f5d606be32a082..1597f6b649e040a5aad306e5cde5330f2e953f73 100644 (file)
@@ -484,7 +484,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
        /* Resize it so we're ok to write there */
        newattrs.ia_size = buffer_size;
        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-       mutex_lock(&xinode->i_mutex);
+       mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR);
        err = notify_change(fp->f_path.dentry, &newattrs);
        if (err)
                goto out_filp;
@@ -1223,7 +1223,8 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                if (!IS_ERR(dentry)) {
                        if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
                                struct inode *inode = dentry->d_parent->d_inode;
-                               mutex_lock(&inode->i_mutex);
+                               mutex_lock_nested(&inode->i_mutex,
+                                                 I_MUTEX_XATTR);
                                err = inode->i_op->mkdir(inode, dentry, 0700);
                                mutex_unlock(&inode->i_mutex);
                                if (err) {
index 7dede89658f535964e8808682d6d7fe784281d4e..47f47925aea2bead2a4491d672476a7a63e9be1b 100644 (file)
@@ -177,11 +177,6 @@ get_max:
        return max;
 }
 
-#define BIT(i)         (1UL << ((i)&(__NFDBITS-1)))
-#define MEM(i,m)       ((m)+(unsigned)(i)/__NFDBITS)
-#define ISSET(i,m)     (((i)&*(m)) != 0)
-#define SET(i,m)       (*(m) |= (i))
-
 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 #define POLLEX_SET (POLLPRI)
index 1bfcca2104be91ac9a8233cd94bf0e0e3ef6a182..d28fde7e1cfbf96329c02ce34e38cf1310d0f6f4 100644 (file)
 #include <asm/uaccess.h>
 
 
-void get_filesystem(struct file_system_type *fs);
-void put_filesystem(struct file_system_type *fs);
-struct file_system_type *get_fs_type(const char *name);
-
 LIST_HEAD(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
 
@@ -336,21 +332,21 @@ struct super_block *sget(struct file_system_type *type,
                        void *data)
 {
        struct super_block *s = NULL;
-       struct list_head *p;
+       struct super_block *old;
        int err;
 
 retry:
        spin_lock(&sb_lock);
-       if (test) list_for_each(p, &type->fs_supers) {
-               struct super_block *old;
-               old = list_entry(p, struct super_block, s_instances);
-               if (!test(old, data))
-                       continue;
-               if (!grab_super(old))
-                       goto retry;
-               if (s)
-                       destroy_super(s);
-               return old;
+       if (test) {
+               list_for_each_entry(old, &type->fs_supers, s_instances) {
+                       if (!test(old, data))
+                               continue;
+                       if (!grab_super(old))
+                               goto retry;
+                       if (s)
+                               destroy_super(s);
+                       return old;
+               }
        }
        if (!s) {
                spin_unlock(&sb_lock);
@@ -948,9 +944,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
        return mnt;
 }
 
-struct vfsmount *kern_mount(struct file_system_type *type)
+struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 {
-       return vfs_kern_mount(type, 0, type->name, NULL);
+       return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
 }
 
-EXPORT_SYMBOL(kern_mount);
+EXPORT_SYMBOL_GPL(kern_mount_data);
index 726449d4fd223fc80817968e50c77d1e92f3977f..3586c7a28d2cb1cce5d631b2dd385a8144e3ea6e 100644 (file)
@@ -54,8 +54,8 @@ xfs_fs_decode_fh(
                struct dentry   *de),
        void                    *context)
 {
-       xfs_fid2_t              ifid;
-       xfs_fid2_t              pfid;
+       xfs_fid_t               ifid;
+       xfs_fid_t               pfid;
        void                    *parent = NULL;
        int                     is64 = 0;
        __u32                   *p = fh;
@@ -144,7 +144,7 @@ xfs_fs_get_dentry(
        struct dentry           *result;
        int                     error;
 
-       error = xfs_vget(XFS_M(sb), &vp, (fid_t *)data);
+       error = xfs_vget(XFS_M(sb), &vp, data);
        if (error || vp == NULL)
                return ERR_PTR(-ESTALE) ;
 
index e794ca4efc761b28edd3813f6c9dea16ca03d957..2f36071a86f76b289c4e5976405058bb26ac80b0 100644 (file)
@@ -71,13 +71,13 @@ xfs_fileid_length(int hasparent, int is64)
 
 /*
  * Decode encoded inode information (either for the inode itself
- * or the parent) into an xfs_fid2_t structure.  Advances and
+ * or the parent) into an xfs_fid_t structure.  Advances and
  * returns the new data pointer
  */
 static inline __u32 *
-xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64)
+xfs_fileid_decode_fid2(__u32 *p, xfs_fid_t *fid, int is64)
 {
-       fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len);
+       fid->fid_len = sizeof(xfs_fid_t) - sizeof(fid->fid_len);
        fid->fid_pad = 0;
        fid->fid_ino = *p++;
 #if XFS_BIG_INUMS
index ffec630e7db71743ecc2b63089992be081b8e830..2b34bad48b079c73da80bc47c696d92a51198bd4 100644 (file)
@@ -152,11 +152,11 @@ xfs_find_handle(
                lock_mode = xfs_ilock_map_shared(ip);
 
                /* fill in fid section of handle from inode */
-               handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
-                                           sizeof(handle.ha_fid.xfs_fid_len);
-               handle.ha_fid.xfs_fid_pad = 0;
-               handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
-               handle.ha_fid.xfs_fid_ino = ip->i_ino;
+               handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
+                                       sizeof(handle.ha_fid.fid_len);
+               handle.ha_fid.fid_pad = 0;
+               handle.ha_fid.fid_gen = ip->i_d.di_gen;
+               handle.ha_fid.fid_ino = ip->i_ino;
 
                xfs_iunlock_map_shared(ip, lock_mode);
 
@@ -222,10 +222,10 @@ xfs_vget_fsop_handlereq(
        if (hlen < sizeof(*handlep))
                memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
        if (hlen > sizeof(handlep->ha_fsid)) {
-               if (handlep->ha_fid.xfs_fid_len !=
-                               (hlen - sizeof(handlep->ha_fsid)
-                                       - sizeof(handlep->ha_fid.xfs_fid_len))
-                   || handlep->ha_fid.xfs_fid_pad)
+               if (handlep->ha_fid.fid_len !=
+                   (hlen - sizeof(handlep->ha_fsid) -
+                           sizeof(handlep->ha_fid.fid_len)) ||
+                   handlep->ha_fid.fid_pad)
                        return XFS_ERROR(EINVAL);
        }
 
@@ -233,9 +233,9 @@ xfs_vget_fsop_handlereq(
         * Crack the handle, obtain the inode # & generation #
         */
        xfid = (struct xfs_fid *)&handlep->ha_fid;
-       if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
-               ino  = xfid->xfs_fid_ino;
-               igen = xfid->xfs_fid_gen;
+       if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
+               ino  = xfid->fid_ino;
+               igen = xfid->fid_gen;
        } else {
                return XFS_ERROR(EINVAL);
        }
index 6cd5704258a29f1584ec63faa9a7a000e71eb05e..a1e55fb9d5dde43c95565c1aece67d4345232391 100644 (file)
@@ -41,29 +41,16 @@ int
 xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 {
        if (args->flags & XFSMNT_DMAPI) {
-               struct xfs_dmops *ops;
-
-               ops = symbol_get(xfs_dmcore_xfs);
-               if (!ops) {
-                       request_module("xfs_dmapi");
-                       ops = symbol_get(xfs_dmcore_xfs);
-               }
-
-               if (!ops) {
-                       cmn_err(CE_WARN, "XFS: no dmapi support available.");
-                       return EINVAL;
-               }
-               mp->m_dm_ops = ops;
-       } else {
-               mp->m_dm_ops = &xfs_dmcore_stub;
+               cmn_err(CE_WARN,
+                       "XFS: dmapi support not available in this kernel.");
+               return EINVAL;
        }
 
+       mp->m_dm_ops = &xfs_dmcore_stub;
        return 0;
 }
 
 void
 xfs_dmops_put(struct xfs_mount *mp)
 {
-       if (mp->m_dm_ops != &xfs_dmcore_stub)
-               symbol_put(xfs_dmcore_xfs);
 }
index ec3c9c27e0de699fe66e6fc1aaf48428549b072b..aab9662765170b556bdf82cd623c2642dce8aa45 100644 (file)
@@ -389,30 +389,13 @@ typedef struct xfs_fsop_attrmulti_handlereq {
  */
 typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */
 
-
-#ifndef HAVE_FID
-#define MAXFIDSZ       46
-
-typedef struct fid {
-       __u16           fid_len;                /* length of data in bytes */
-       unsigned char   fid_data[MAXFIDSZ];     /* data (fid_len worth)  */
-} fid_t;
-#endif
-
 typedef struct xfs_fid {
-       __u16   xfs_fid_len;            /* length of remainder  */
-       __u16   xfs_fid_pad;
-       __u32   xfs_fid_gen;            /* generation number    */
-       __u64   xfs_fid_ino;            /* 64 bits inode number */
+       __u16   fid_len;                /* length of remainder  */
+       __u16   fid_pad;
+       __u32   fid_gen;                /* generation number    */
+       __u64   fid_ino;                /* 64 bits inode number */
 } xfs_fid_t;
 
-typedef struct xfs_fid2 {
-       __u16   fid_len;        /* length of remainder */
-       __u16   fid_pad;        /* padding, must be zero */
-       __u32   fid_gen;        /* generation number */
-       __u64   fid_ino;        /* inode number */
-} xfs_fid2_t;
-
 typedef struct xfs_handle {
        union {
                __s64       align;      /* force alignment of ha_fid     */
@@ -422,9 +405,9 @@ typedef struct xfs_handle {
 } xfs_handle_t;
 #define ha_fsid ha_u._ha_fsid
 
-#define XFS_HSIZE(handle)      (((char *) &(handle).ha_fid.xfs_fid_pad  \
+#define XFS_HSIZE(handle)      (((char *) &(handle).ha_fid.fid_pad      \
                                 - (char *) &(handle))                    \
-                                + (handle).ha_fid.xfs_fid_len)
+                                + (handle).ha_fid.fid_len)
 
 /*
  * Flags for going down operation
index c266a0184b42e2e38eac37059a1b8ee0997dd1ab..2ec1d8a27352e6a8595af8cbaecbfc9390e16cdd 100644 (file)
@@ -135,19 +135,13 @@ int
 xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 {
        if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) {
-               struct xfs_qmops *ops;
-
-               ops = symbol_get(xfs_qmcore_xfs);
-               if (!ops) {
-                       request_module("xfs_quota");
-                       ops = symbol_get(xfs_qmcore_xfs);
-               }
-
-               if (!ops) {
-                       cmn_err(CE_WARN, "XFS: no quota support available.");
-                       return EINVAL;
-               }
-               mp->m_qm_ops = ops;
+#ifdef CONFIG_XFS_QUOTA
+               mp->m_qm_ops = &xfs_qmcore_xfs;
+#else
+               cmn_err(CE_WARN,
+                       "XFS: qouta support not available in this kernel.");
+               return EINVAL;
+#endif
        } else {
                mp->m_qm_ops = &xfs_qmcore_stub;
        }
@@ -158,6 +152,4 @@ xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 void
 xfs_qmops_put(struct xfs_mount *mp)
 {
-       if (mp->m_qm_ops != &xfs_qmcore_stub)
-               symbol_put(xfs_qmcore_xfs);
 }
index a5a8454f2a631684de389d911a54055c5e64701a..a1544597bcd318c7f35a80551321c1f103b55f13 100644 (file)
@@ -1635,9 +1635,8 @@ int
 xfs_vget(
        xfs_mount_t     *mp,
        bhv_vnode_t     **vpp,
-       fid_t           *fidp)
+       xfs_fid_t       *xfid)
 {
-       xfs_fid_t       *xfid = (struct xfs_fid *)fidp;
        xfs_inode_t     *ip;
        int             error;
        xfs_ino_t       ino;
@@ -1647,11 +1646,11 @@ xfs_vget(
         * Invalid.  Since handles can be created in user space and passed in
         * via gethandle(), this is not cause for a panic.
         */
-       if (xfid->xfs_fid_len != sizeof(*xfid) - sizeof(xfid->xfs_fid_len))
+       if (xfid->fid_len != sizeof(*xfid) - sizeof(xfid->fid_len))
                return XFS_ERROR(EINVAL);
 
-       ino  = xfid->xfs_fid_ino;
-       igen = xfid->xfs_fid_gen;
+       ino  = xfid->fid_ino;
+       igen = xfid->fid_gen;
 
        /*
         * NFS can sometimes send requests for ino 0.  Fail them gracefully.
index bc99e3eb7dbba0291d139c4e0a6340e18f64b87b..a592fe02a3393367150447d4dae6af4ce36870dc 100644 (file)
@@ -2,7 +2,7 @@
 #define _XFS_VFSOPS_H 1
 
 struct cred;
-struct fid;
+struct xfs_fid;
 struct inode;
 struct kstatfs;
 struct xfs_mount;
@@ -17,7 +17,7 @@ int xfs_root(struct xfs_mount *mp, bhv_vnode_t **vpp);
 int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp,
                bhv_vnode_t *vp);
 int xfs_sync(struct xfs_mount *mp, int flags);
-int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct fid *fidp);
+int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct xfs_fid *xfid);
 int xfs_parseargs(struct xfs_mount *mp, char *options,
                struct xfs_mount_args *args, int update);
 int xfs_showargs(struct xfs_mount *mp, struct seq_file *m);
index 5e3c57ca9981e6573b4a968e91f3a18e8ee68b42..efd5aff9eaf6aa1d149fa6431ca6355e944c5b1a 100644 (file)
@@ -3466,23 +3466,14 @@ std_return:
 }
 
 
-/*
- * xfs_fid2
- *
- * A fid routine that takes a pointer to a previously allocated
- * fid structure (like xfs_fast_fid) but uses a 64 bit inode number.
- */
 int
 xfs_fid2(
        xfs_inode_t     *ip,
-       fid_t           *fidp)
+       xfs_fid_t       *xfid)
 {
-       xfs_fid2_t      *xfid = (xfs_fid2_t *)fidp;
-
        vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-       ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t));
 
-       xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len);
+       xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len);
        xfid->fid_pad = 0;
        /*
         * use memcpy because the inode is a long long and there's no
index f36e74f2f0c2a18719fcaca6b585fefe97ce940a..b7e461c40cfbace8e0289ab1d131a67f8b813a86 100644 (file)
@@ -39,7 +39,7 @@ int xfs_readdir(struct xfs_inode      *dp, void *dirent, size_t bufsize,
 int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
                char *target_path, mode_t mode, bhv_vnode_t **vpp,
                struct cred *credp);
-int xfs_fid2(struct xfs_inode *ip, fid_t       *fidp);
+int xfs_fid2(struct xfs_inode *ip, struct xfs_fid *xfid);
 int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);
index 381b4f5b4d5dfb9c25a5ce84ca43d651710da9f2..9e19a704d4840693a31bd88413060024e3623266 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef _ALPHA_BITOPS_H
 #define _ALPHA_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/compiler.h>
 #include <asm/barrier.h>
 
index 1ca3ed3bd6d38f1c2461e5d83980758affeac76e..eefab3fb51ae87f76050c34118fabe6d31c87078 100644 (file)
@@ -92,17 +92,6 @@ flush_tlb_other(struct mm_struct *mm)
        if (*mmc) *mmc = 0;
 }
 
-/* Flush a specified range of user mapping page tables from TLB.
-   Although Alpha uses VPTE caches, this can be a nop, as Alpha does
-   not have finegrained tlb flushing, so it will flush VPTE stuff
-   during next flush_tlb_range.  */
-
-static inline void
-flush_tlb_pgtables(struct mm_struct *mm, unsigned long start,
-                  unsigned long end)
-{
-}
-
 #ifndef CONFIG_SMP
 /* Flush everything (kernel mapping may also have changed
    due to vmalloc/vfree).  */
index c72f9d79417cbc3208bdc0574bb6784685605523..eeeea90cd5a96f1b08a12acc6b1d00d68a588c4f 100644 (file)
@@ -17,9 +17,6 @@
 
 #define IO_SPACE_LIMIT 0xffff0000
 
-#define        BIT(x)  ((1)<<(x))
-
-
 extern int (*ixp4xx_pci_read)(u32 addr, u32 cmd, u32* data);
 extern int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data);
 
index 52fe05895debb77159a13f1eb68f05f907799aee..47a6b086eee27fd3ef5dc9a9a04a09db8c5488ee 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/system.h>
 
index 71be4fded7e20f1b069f2dbe7ad3c6a5d63456ca..8c6bc1bb9d1a5cb83c0abe7359841c1c48011bf0 100644 (file)
@@ -463,11 +463,6 @@ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
  */
 extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte);
 
-/*
- * ARM processors do not cache TLB tables in RAM.
- */
-#define flush_tlb_pgtables(mm,start,end)       do { } while (0)
-
 #endif
 
 #endif /* CONFIG_MMU */
index f3faddfd46a8b924bd40ef7eb544797fc56bd229..1a50b69b1a192de23bd6bdf88881a5181082faeb 100644 (file)
@@ -8,6 +8,10 @@
 #ifndef __ASM_AVR32_BITOPS_H
 #define __ASM_AVR32_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/byteorder.h>
 #include <asm/system.h>
 
index 730e268f81f383083573c424a3eaeddb966f1dd8..5bc7c88a5770a9c211d8aa6d32c2b04ed4186166 100644 (file)
@@ -19,7 +19,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void flush_tlb(void);
 extern void flush_tlb_all(void);
@@ -29,12 +28,6 @@ extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 extern void __flush_tlb_page(unsigned long asid, unsigned long page);
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       /* Nothing to do */
-}
-
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 #endif /* __ASM_AVR32_TLBFLUSH_H */
index 03ecedc1f2a74089838768efc21ba495b62f2a67..b39a175c79c1bcaba34f4decfefc337166992698 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/sched.h>
index 10a07ba1e011b6f66bff7d34931d9fe97a2d31d1..277b400924b86f9c647e25b9d837fe4353cd643b 100644 (file)
@@ -53,10 +53,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
        BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       BUG();
-}
-
 #endif
index 617151b9b72bb52cde9433c825ac0829b879ba7b..e2f49c27ed297ab40dc7187de1639809fa1c9640 100644 (file)
 /* Currently this is unsuitable for consumption outside the kernel.  */
 #ifdef __KERNEL__ 
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/arch/bitops.h>
 #include <asm/system.h>
 #include <asm/atomic.h>
index 7b9ed22ab5dd6accf38fc93f79dc461c04cfdb08..92000d0c3f9753a645fdf14dbfd875cd22e9b91b 100644 (file)
@@ -52,7 +52,7 @@ typedef struct {
 } __kernel_fsid_t;
 
 #ifdef __KERNEL__
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 #undef __FD_SET
 #define __FD_SET(fd,fdsetp) set_bit(fd, (void *)(fdsetp))
index 0569612477e3eddc1df7cbd7132b5238f19a14be..20697e7ef4f2091d594bb187b512cba4f4b88017 100644 (file)
@@ -38,13 +38,6 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st
        flush_tlb_mm(vma->vm_mm);
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-        /* CRIS does not keep any page table caches in TLB */
-}
-
-
 static inline void flush_tlb(void)
 {
        flush_tlb_mm(current->mm);
index 8dba74b1a2549e10a520ae9766151f4e613c77a6..e29de7131b79dcca7250aad73b3d603fb1f53458 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffz.h>
 
 /*
index 8370f97e41ee57d0c782592db6f5d27263eac7f1..7ac5eafc5d9839c34c23c459075aa8c8b450629c 100644 (file)
@@ -57,7 +57,6 @@ do {                                                          \
 #define __flush_tlb_global()                   flush_tlb_all()
 #define flush_tlb()                            flush_tlb_all()
 #define flush_tlb_kernel_range(start, end)     flush_tlb_all()
-#define flush_tlb_pgtables(mm,start,end)       do { } while(0)
 
 #else
 
@@ -66,7 +65,6 @@ do {                                                          \
 #define flush_tlb_mm(mm)                       BUG()
 #define flush_tlb_page(vma,addr)               BUG()
 #define flush_tlb_range(mm,start,end)          BUG()
-#define flush_tlb_pgtables(mm,start,end)       BUG()
 #define flush_tlb_kernel_range(start, end)     BUG()
 
 #endif
index e022a0f59e6bb8707268466922adcb849e9e284e..15e6f253dda4090622d93c0d688e4350da4411ad 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/hweight.h>
index cd8a9641bd668e5e0ae2da5bebac5d1e7bf8704f..4657f3e410fce4a71ca7e7903f97e92a92676bd9 100644 (file)
@@ -3,9 +3,6 @@
 
 #include <asm/types.h>
 
-#define BITOP_MASK(nr)         (1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr)         ((nr) / BITS_PER_LONG)
-
 #ifdef CONFIG_SMP
 #include <asm/spinlock.h>
 #include <asm/cache.h>         /* we use L1_CACHE_BYTES */
@@ -66,8 +63,8 @@ extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
  */
 static inline void set_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long flags;
 
        _atomic_spin_lock_irqsave(p, flags);
@@ -87,8 +84,8 @@ static inline void set_bit(int nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long flags;
 
        _atomic_spin_lock_irqsave(p, flags);
@@ -108,8 +105,8 @@ static inline void clear_bit(int nr, volatile unsigned long *addr)
  */
 static inline void change_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long flags;
 
        _atomic_spin_lock_irqsave(p, flags);
@@ -128,8 +125,8 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
  */
 static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long old;
        unsigned long flags;
 
@@ -152,8 +149,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
  */
 static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long old;
        unsigned long flags;
 
@@ -175,8 +172,8 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
  */
 static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long old;
        unsigned long flags;
 
index 46a825cf2ae12c5723106a098dc3228de1339df4..697cc2b7e0f0d90189aa3e1e9d6d8c8c44bb42be 100644 (file)
@@ -3,9 +3,6 @@
 
 #include <asm/types.h>
 
-#define BITOP_MASK(nr)         (1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr)         ((nr) / BITS_PER_LONG)
-
 /**
  * __set_bit - Set a bit in memory
  * @nr: the bit to set
  */
 static inline void __set_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
        *p  |= mask;
 }
 
 static inline void __clear_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
        *p &= ~mask;
 }
@@ -42,8 +39,8 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
  */
 static inline void __change_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
        *p ^= mask;
 }
@@ -59,8 +56,8 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
  */
 static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long old = *p;
 
        *p = old | mask;
@@ -78,8 +75,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
  */
 static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long old = *p;
 
        *p = old & ~mask;
@@ -90,8 +87,8 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 static inline int __test_and_change_bit(int nr,
                                            volatile unsigned long *addr)
 {
-       unsigned long mask = BITOP_MASK(nr);
-       unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
        unsigned long old = *p;
 
        *p = old ^ mask;
@@ -105,7 +102,7 @@ static inline int __test_and_change_bit(int nr,
  */
 static inline int test_bit(int nr, const volatile unsigned long *addr)
 {
-       return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+       return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
 }
 
 #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
index 5615440027ec545b069f6477e61d85273b4a674a..9f584cc5c5fb48f9491b1cdbe8ceca5e01d844ca 100644 (file)
 /* .data section */
 #define DATA_DATA                                                      \
        *(.data)                                                        \
-       *(.data.init.refok)
+       *(.data.init.refok)                                             \
+       . = ALIGN(8);                                                   \
+       VMLINUX_SYMBOL(__start___markers) = .;                          \
+       *(__markers)                                                    \
+       VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)                                                 \
        . = ALIGN((align));                                             \
@@ -20,6 +24,7 @@
                VMLINUX_SYMBOL(__start_rodata) = .;                     \
                *(.rodata) *(.rodata.*)                                 \
                *(__vermagic)           /* Kernel version magic */      \
+               *(__markers_strings)    /* Markers: strings */          \
        }                                                               \
                                                                        \
        .rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {          \
index e64ad315656dfea57dbe90bfb14eb775b25657a2..cb18e3b0aa948520b8f7321e6ead3d00eb1d5cf2 100644 (file)
 #include <asm/system.h>
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 /*
  * Function prototypes to keep gcc -Wall happy
  */
index 9a2c5c9fd700346ef514f37c57edba3f68a0dd57..41c148a9208ee202c74ab4f1ea7c444388d5a493 100644 (file)
@@ -52,10 +52,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
        BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       BUG();
-}
-
 #endif /* _H8300_TLBFLUSH_H */
index 2144f1a8ed6f7f89793858c9cab1d4f73b37be95..a977affaebeca34b02b1e9bb991f4ad83cd35281 100644 (file)
@@ -9,6 +9,10 @@
  * O(1) scheduler patch
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/intrinsics.h>
index 4906916d715b5d3ea95d53659218234afe0f42f3..afcfbda76e20f3f5914b579d16e0dae9c7f83249 100644 (file)
@@ -7,8 +7,8 @@
  */
 
 #include <linux/page-flags.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/page.h>
 
 /*
index 3a62878e84f3f9d1d0e3310302bf4b773f490261..f93308f54b615ea673a669db653beca9c18c4ae6 100644 (file)
@@ -35,7 +35,7 @@ extern void find_memory (void);
 extern void reserve_memory (void);
 extern void find_initrd (void);
 extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
-extern void efi_memmap_init(unsigned long *, unsigned long *);
+extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
 extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
 
 extern unsigned long vmcore_find_descriptor_size(unsigned long address);
index 0971ec90807e77201f2b77b93533c1ab7254341f..e6204f14f6142222cc3b7b06d469ffbf5a1d4a6e 100644 (file)
 # ifndef __ASSEMBLY__
 
 #include <linux/sched.h>       /* for mm_struct */
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
index 1703c9d885bd0723b186f2ca8a673741b1d666cf..471cc2ee9ac4c10be30a3b6d772f53912593d90e 100644 (file)
@@ -14,8 +14,8 @@
 #include <linux/threads.h>
 #include <linux/kernel.h>
 #include <linux/cpumask.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/param.h>
 #include <asm/processor.h>
index ff857e31738a00dfbed4534ee1832528e231fb33..0229fb95fb3824f657452dc4adbd1bd4e52c48e1 100644 (file)
@@ -11,9 +11,9 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/intrinsics.h>
 #include <asm/system.h>
 
index e37f9fbf33af8babd7406b648219a1dca48bc152..80bcb0a38e8adfbb7022ea9fff1a8e0f08534cc8 100644 (file)
@@ -83,19 +83,6 @@ flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
 #endif
 }
 
-/*
- * Flush the TLB entries mapping the virtually mapped linear page
- * table corresponding to address range [START-END).
- */
-static inline void
-flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-       /*
-        * Deprecated.  The virtual page table is now flushed via the normal gather/flush
-        * interface (see tlb.h).
-        */
-}
-
 /*
  * Flush the local TLB. Invoked from another cpu using an IPI.
  */
index 313a02c4a88923cbd1003833835df45ee980993d..6dc9b81bf9f36544eeacfee071866629f220ce5c 100644 (file)
  *    Copyright (C) 2004  Hirokazu Takata <takata at linux-m32r.org>
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/assembler.h>
 #include <asm/system.h>
index 92d7266783fd1b9486818d678f1ae20d82090f06..86505387be0872b305cefdaa89796e3f67d04548 100644 (file)
@@ -21,9 +21,9 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/threads.h>
+#include <linux/bitops.h>
 #include <asm/processor.h>
 #include <asm/addrspace.h>
-#include <asm/bitops.h>
 #include <asm/page.h>
 
 struct mm_struct;
index 3d37ac002bcc6e2a9329de1df0a644297aee9249..0ef95307784e0d9143cba8d2608d9f6cada5ca11 100644 (file)
@@ -12,7 +12,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 extern void local_flush_tlb_all(void);
@@ -93,8 +92,6 @@ static __inline__ void __flush_tlb_all(void)
        );
 }
 
-#define flush_tlb_pgtables(mm, start, end)     do { } while (0)
-
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
 
 #endif /* _ASM_M32R_TLBFLUSH_H */
index da151f70cdc6389a245590245863029129e12075..2976b5d68e96cbb5189cb1a348312939a699dab8 100644 (file)
@@ -8,6 +8,10 @@
  * for more details.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 
 /*
index 31678831ee470cb8409e5d24c194704a82943d02..17707ec315e2b0a93466aec670259e22befd23f4 100644 (file)
@@ -92,11 +92,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
        flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-}
-
 #else
 
 
@@ -219,11 +214,6 @@ static inline void flush_tlb_kernel_page (unsigned long addr)
        sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-}
-
 #endif
 
 #endif /* _M68K_TLBFLUSH_H */
index b8b2770d6870965de40edcf112b405aadd2f881e..f8dfb7ba2e2573566a58cf97d01942adc48d4785 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/sched.h>
index de858db28b00584a529ab81229559da8a2ad6b03..a470cfb803eba31bccfb27a8d28f3eb6bceaa91e 100644 (file)
@@ -52,10 +52,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
        BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       BUG();
-}
-
 #endif /* _M68KNOMMU_TLBFLUSH_H */
index 77ed0c79830b6335dbb1c86fb4244091d63e7608..ec75ce4cdb8c3e90307e51350c2d7dc4b950a4d5 100644 (file)
@@ -9,6 +9,10 @@
 #ifndef _ASM_BITOPS_H
 #define _ASM_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <linux/irqflags.h>
 #include <linux/types.h>
index 483685b1592e0f5d3a970db9cb352f10529e8745..e59d4c039661278a469df9caa5e2a74f6cee1a50 100644 (file)
 
 #include <linux/sched.h>
 #include <linux/thread_info.h>
+#include <linux/bitops.h>
 
 #include <asm/mipsregs.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
 #include <asm/hazards.h>
-#include <asm/bitops.h>
 #include <asm/processor.h>
 #include <asm/current.h>
 
index a13702fafa8580839917a7d8d963860f58971367..7c36b0e5b1c64b5879ea5546fd832de782887749 100644 (file)
@@ -17,9 +17,6 @@
  */
 #define CRIME_BASE     0x14000000      /* physical */
 
-#undef BIT
-#define BIT(x) (1UL << (x))
-
 struct sgi_crime {
        volatile unsigned long id;
 #define CRIME_ID_MASK                  0xff
index 990082c81f3957f04abab2b5f240647216bf4742..d08d7c6721394ea269eb0f41c29da264b3638a70 100644 (file)
@@ -17,9 +17,6 @@
  */
 #define MACE_BASE      0x1f000000      /* physical */
 
-#undef BIT
-#define BIT(x) (1UL << (x))
-
 /*
  * PCI interface
  */
index 730e841fb08a82d10427d881a8fd69b6434749b0..86b21de12e91314a68c30ddf4cb6d449bca86674 100644 (file)
@@ -11,7 +11,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void local_flush_tlb_all(void);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -45,10 +44,4 @@ extern void flush_tlb_one(unsigned long vaddr);
 
 #endif /* CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-       unsigned long start, unsigned long end)
-{
-       /* Nothing to do on MIPS.  */
-}
-
 #endif /* __ASM_TLBFLUSH_H */
index 03ae287baf89ccd6bd81d10692d14616f9e18ca5..f8eebcbad01f81d57e0a222d4573d9044f114e96 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef _PARISC_BITOPS_H
 #define _PARISC_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/types.h>         /* for BITS_PER_LONG/SHIFT_PER_LONG */
 #include <asm/byteorder.h>
index e88cacd63724f89674f45e8f69f18db1e9f78a16..9ab79c8e5a417b12401be1f639f11a3cd43b8724 100644 (file)
@@ -11,9 +11,9 @@
  */
 
 #include <linux/mm.h>          /* for vm_area_struct */
+#include <linux/bitops.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
-#include <asm/bitops.h>
 
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
index 270cf309772bef396dfbe8d1531eab269367554d..b72ec66db699e56f0985ebd6d88d82a7ceb14411 100644 (file)
@@ -57,10 +57,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 #endif
 }
 
-extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-}
 static inline void flush_tlb_page(struct vm_area_struct *vma,
        unsigned long addr)
 {
index e85c3e078ba29be59f7d4f0425844de39d24461a..733b4af7f4f1e7e49269b17856bbf7e47924d7b6 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/asm-compat.h>
 #include <asm/synch.h>
index 870967e47204e32642106b61702eb4580be1e69e..4a82fdccee92a8fd57e5bde18439a05c5e48c02e 100644 (file)
@@ -26,9 +26,9 @@
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/bitops.h>
 #include <asm/machdep.h>
 #include <asm/types.h>
-#include <asm/bitops.h>
 
 #define IOMMU_PAGE_SHIFT      12
 #define IOMMU_PAGE_SIZE       (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
index f863ac21409e7b4166add8e1fea65c4e8ab05ac7..9102b8bf0ead02863ac30fcc0f055fa1cf41c743 100644 (file)
@@ -8,7 +8,7 @@
 
 #ifndef CONFIG_PPC64
 #include <asm/atomic.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 /*
  * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
index a022f806bb21eba7eea83420b01442cef25f4872..b6b036ccee34f5fd5abbea69c712e4f273219d5a 100644 (file)
@@ -8,7 +8,6 @@
  *  - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
@@ -174,15 +173,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
  */
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
 
-/*
- * This is called in munmap when we have freed up some page-table
- * pages.  We don't need to do anything here, there's nothing special
- * about our page-table pages.  -- paulus
- */
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-}
-
 #endif /*__KERNEL__ */
 #endif /* _ASM_POWERPC_TLBFLUSH_H */
index a6441a063e5d7c20d1553e79aa898814052fe1d9..b2e25d8997bffdce4dc14f635646df600c1fce52 100644 (file)
@@ -2,8 +2,9 @@
 #ifndef __PPC_MMU_CONTEXT_H
 #define __PPC_MMU_CONTEXT_H
 
+#include <linux/bitops.h>
+
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/mmu.h>
 #include <asm/cputable.h>
 #include <asm-generic/mm_hooks.h>
index d756b34d25f309555d91631d1aead2a0a3e13578..34d9a6357c38941c70d7b638b2d2f19045291872 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 
 /*
index 66793f55c8b2dea7b61f8a7aeada359e252afa22..6de2632a3e4f5ac1fb6c63431c4de4bf51148a9c 100644 (file)
@@ -14,7 +14,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 /*
@@ -152,10 +151,4 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 
 #endif
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-        /* S/390 does not keep any page table caches in TLB */
-}
-
 #endif /* _S390_TLBFLUSH_H */
index 9d7021723a25363c0828f4b3422ef9fc1b52d145..df805f20b26786a3d74478a0c2fd6e10bf1777e5 100644 (file)
@@ -2,6 +2,11 @@
 #define __ASM_SH_BITOPS_H
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/system.h>
 /* For __swab32 */
 #include <asm/byteorder.h>
index 455fb8da441e6786abe3895c83faf73e633469a6..e0ac97221ae6bb6a6351331ce72b884fc229ecd1 100644 (file)
@@ -9,7 +9,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void local_flush_tlb_all(void);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -47,9 +46,4 @@ extern void flush_tlb_one(unsigned long asid, unsigned long page);
 
 #endif /* CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       /* Nothing to do */
-}
 #endif /* __ASM_SH_TLBFLUSH_H */
index 444d5ea92ce9452c8f70abc46539330c8c9f2fd1..600c59efb4c29a6df194d7bcd574ed4537dc9b07 100644 (file)
  */
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/system.h>
 /* For __swab32 */
index e45beadc29ee860066996c8989893c497acfb550..16a164a237547631459e2a386bf447d9db0f88f0 100644 (file)
@@ -20,10 +20,6 @@ extern void flush_tlb_mm(struct mm_struct *mm);
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                            unsigned long end);
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-}
 
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
index 00bd0a679d702f0a07737cd66f766442cb70f09d..cb3cefab6e098e56dc1c1835fdb2a44212fea376 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 extern unsigned long ___set_bit(unsigned long *addr, unsigned long mask);
 extern unsigned long ___clear_bit(unsigned long *addr, unsigned long mask);
 extern unsigned long ___change_bit(unsigned long *addr, unsigned long mask);
index a619da5cfaa9f5d0e1b9321859692df34a9566b9..b957e29d2ae186510f627094e69699ea3ec41f18 100644 (file)
@@ -13,7 +13,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 #ifdef CONFIG_SMP
@@ -42,11 +41,6 @@ BTFIXUPDEF_CALL(void, flush_tlb_mm, struct mm_struct *)
 BTFIXUPDEF_CALL(void, flush_tlb_range, struct vm_area_struct *, unsigned long, unsigned long)
 BTFIXUPDEF_CALL(void, flush_tlb_page, struct vm_area_struct *, unsigned long)
 
-// Thanks to Anton Blanchard, our pagetables became uncached in 2.4. Wee!
-// extern void flush_tlb_pgtables(struct mm_struct *mm,
-//     unsigned long start, unsigned long end);
-#define flush_tlb_pgtables(mm, start, end)     do{ }while(0)
-
 #define flush_tlb_all() BTFIXUP_CALL(flush_tlb_all)()
 #define flush_tlb_mm(mm) BTFIXUP_CALL(flush_tlb_mm)(mm)
 #define flush_tlb_range(vma,start,end) BTFIXUP_CALL(flush_tlb_range)(vma,start,end)
index dd4bfe993b619160ba8e7e9629cacc6342a8930b..982ce8992b91fcae6a2d9e790235b718d7cfa1b1 100644 (file)
@@ -7,6 +7,10 @@
 #ifndef _SPARC64_BITOPS_H
 #define _SPARC64_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/byteorder.h>
 
index 42c09949526cd53f3b259b4912463a77851edbee..1c1c5ea5cea5dea69e40cf4c4a96932f4d24d58b 100644 (file)
@@ -26,7 +26,7 @@
  *     Private routines/data
  */
  
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/atomic.h>
 #include <asm/percpu.h>
 
index 3487328570edc1b5e86886ec29de371a72055136..fbb675dbe0c92abe8bead2c0a46c58451fa4d0e1 100644 (file)
@@ -41,11 +41,4 @@ do { flush_tsb_kernel_range(start,end); \
 
 #endif /* ! CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-       /* We don't use virtual page tables for TLB miss processing
-        * any more.  Nowadays we use the TSB.
-        */
-}
-
 #endif /* _SPARC64_TLBFLUSH_H */
index 46d781953d3ad7c5fa42e92f3c0489bf5eeccf64..e4d38d437b97c4baefc2d1dc47bf34d61fc9ccf1 100644 (file)
@@ -1,6 +1,10 @@
 #ifndef __UM_BITOPS_H
 #define __UM_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include "asm/arch/bitops.h"
 
 #endif
index 9d647c55350b0774f1ea9b546d63ade3dbe13d34..614f2c0911781aebf9a62d3912272f4b640f99d1 100644 (file)
@@ -17,7 +17,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_kernel_vm() flushes the kernel vm area
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 extern void flush_tlb_all(void);
@@ -29,9 +28,4 @@ extern void flush_tlb_kernel_vm(void);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void __flush_tlb_one(unsigned long addr);
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-}
-
 #endif
index 8eafdb1c08ba96b3939b30fdd2b38b250c3584ff..f82f5b4a56e0d0526d3d2e53023e90ab65aeaab0 100644 (file)
@@ -13,6 +13,9 @@
 #ifndef __V850_BITOPS_H__
 #define __V850_BITOPS_H__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
 
 #include <linux/compiler.h>    /* unlikely  */
 #include <asm/byteorder.h>     /* swab32 */
index 5f2f85f636ea74bc0fe019ef51c53e978ac9a348..c44aa64449c83779df8b05212e46799936264ad2 100644 (file)
@@ -61,10 +61,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
        BUG ();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       BUG ();
-}
-
 #endif /* __V850_TLBFLUSH_H__ */
index c96641f7502298fca9aa60c5f511c9e8df50dfb3..3268a341cf495177de5653c066dacfc81b7b52da 100644 (file)
@@ -5,6 +5,10 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/alternative.h>
 
index 525edf2ce5c263454158fb8a724f9b75083f63f5..dacaa5f1febcb713d38cbe6114759ff61127dc11 100644 (file)
@@ -5,6 +5,10 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/alternative.h>
 
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
index acd4b339c49b411b273949c0c70b799696844bd8..ed3e70d8d04bfc3a94a56f38b177850d756cad1a 100644 (file)
 #include <linux/threads.h>
 #include <asm/paravirt.h>
 
-#ifndef _I386_BITOPS_H
-#include <asm/bitops.h>
-#endif
-
+#include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
index a79f5355e3b06191287c6142291cfab3923a7743..9b0ff477b39e7dc2021196619148969bffe2b355 100644 (file)
@@ -9,7 +9,7 @@
  * the x86-64 page table tree.
  */
 #include <asm/processor.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/threads.h>
 #include <asm/pda.h>
 
index ee46038d126c07fac09499d7acdf99ac2dfdc684..1f576a93368fc2bd44d6582f84052bac38e4a62b 100644 (file)
@@ -11,7 +11,7 @@
 #endif
 
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/mpspec.h>
 #include <asm/apic.h>
 #ifdef CONFIG_X86_IO_APIC
index a50fa6741486ae8cafb7525cd6335c7852d11930..2bd5b95e2048065ee2979319ef6c4bcf18e37b13 100644 (file)
@@ -78,7 +78,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *  - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
  *
  * ..but the i386 has somewhat limited tlb flushing capabilities,
@@ -166,10 +165,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
        flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       /* i386 does not keep any page table caches in TLB */
-}
-
 #endif /* _I386_TLBFLUSH_H */
index 888eb4abdd07a2fffa1c3ca3486962a3b56d7be5..7731fd23d572362fe08577649a0f33c6a5d5e308 100644 (file)
@@ -31,7 +31,6 @@ static inline void __flush_tlb_all(void)
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *
  * x86-64 can only flush individual pages or full VMs. For a range flush
  * we always do the full VM. Might be worth trying if for a small
@@ -98,12 +97,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
        flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                     unsigned long start, unsigned long end)
-{
-       /* x86_64 does not keep any page table caches in a software TLB.
-          The CPUs do in their hardware TLBs, but they are handled
-          by the normal TLB flushing algorithms. */
-}
-
 #endif /* _X8664_TLBFLUSH_H */
index 848c17f922262d0e7fd9733c0baa1b7a58e347a0..c0c93d744673664b1bcfeaa0602d8b3e3f7308d4 100644 (file)
@@ -5,7 +5,7 @@
 #ifdef CONFIG_NUMA
 
 #include <asm/mpspec.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 extern cpumask_t cpu_online_map;
 
index 78db04cf6e4859bf24d57ea6ed854760a7aa1201..23261e8f2e5ac4c15cf36a23921f3cd9cb654b5f 100644 (file)
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/processor.h>
 #include <asm/byteorder.h>
 #include <asm/system.h>
index 7c637b3c352cf0fcae1a903057e17ee30567d7c0..46d240074f747aa622711b35d0b130cedba6b812 100644 (file)
@@ -41,17 +41,6 @@ extern void flush_tlb_range(struct vm_area_struct*,unsigned long,unsigned long);
 
 #define flush_tlb_kernel_range(start,end) flush_tlb_all()
 
-
-/* This is calld in munmap when we have freed up some page-table pages.
- * We don't need to do anything here, there's nothing special about our
- * page-table pages.
- */
-
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-}
-
 /* TLB operations. */
 
 static inline unsigned long itlb_probe(unsigned long addr)
index 7ac8303c8471d5cff03a82def3456af934f4c291..e3ffd14a3f0b412091828c7afb9fe088dbb56d1c 100644 (file)
@@ -47,6 +47,7 @@ header-y += coda_psdev.h
 header-y += coff.h
 header-y += comstats.h
 header-y += const.h
+header-y += cgroupstats.h
 header-y += cycx_cfm.h
 header-y += dlm_device.h
 header-y += dlm_netlink.h
index 64b4641904fee0415c169d3f20e3bd4cdba71f5c..acad1105d94287af0cba3322e3368efe1b4f38fb 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/string.h>
+#include <linux/kernel.h>
 
 /*
  * bitmaps provide bit arrays that consume one or more unsigned
index b9fb8ee3308bd6b2b4699194d47e2a6ecfeb7144..69c1edb9fe54a345f061cf983fe8311805c83154 100644 (file)
@@ -2,6 +2,14 @@
 #define _LINUX_BITOPS_H
 #include <asm/types.h>
 
+#ifdef __KERNEL__
+#define BIT(nr)                        (1UL << (nr))
+#define BIT_MASK(nr)           (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)           ((nr) / BITS_PER_LONG)
+#define BITS_TO_LONGS(nr)      DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BITS_PER_BYTE          8
+#endif
+
 /*
  * Include this here because some architectures need generic_ffs/fls in
  * scope
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
new file mode 100644 (file)
index 0000000..8747932
--- /dev/null
@@ -0,0 +1,327 @@
+#ifndef _LINUX_CGROUP_H
+#define _LINUX_CGROUP_H
+/*
+ *  cgroup interface
+ *
+ *  Copyright (C) 2003 BULL SA
+ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/cpumask.h>
+#include <linux/nodemask.h>
+#include <linux/rcupdate.h>
+#include <linux/cgroupstats.h>
+
+#ifdef CONFIG_CGROUPS
+
+struct cgroupfs_root;
+struct cgroup_subsys;
+struct inode;
+
+extern int cgroup_init_early(void);
+extern int cgroup_init(void);
+extern void cgroup_init_smp(void);
+extern void cgroup_lock(void);
+extern void cgroup_unlock(void);
+extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork_callbacks(struct task_struct *p);
+extern void cgroup_post_fork(struct task_struct *p);
+extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern int cgroupstats_build(struct cgroupstats *stats,
+                               struct dentry *dentry);
+
+extern struct file_operations proc_cgroup_operations;
+
+/* Define the enumeration of all cgroup subsystems */
+#define SUBSYS(_x) _x ## _subsys_id,
+enum cgroup_subsys_id {
+#include <linux/cgroup_subsys.h>
+       CGROUP_SUBSYS_COUNT
+};
+#undef SUBSYS
+
+/* Per-subsystem/per-cgroup state maintained by the system. */
+struct cgroup_subsys_state {
+       /* The cgroup that this subsystem is attached to. Useful
+        * for subsystems that want to know about the cgroup
+        * hierarchy structure */
+       struct cgroup *cgroup;
+
+       /* State maintained by the cgroup system to allow
+        * subsystems to be "busy". Should be accessed via css_get()
+        * and css_put() */
+
+       atomic_t refcnt;
+
+       unsigned long flags;
+};
+
+/* bits in struct cgroup_subsys_state flags field */
+enum {
+       CSS_ROOT, /* This CSS is the root of the subsystem */
+};
+
+/*
+ * Call css_get() to hold a reference on the cgroup;
+ *
+ */
+
+static inline void css_get(struct cgroup_subsys_state *css)
+{
+       /* We don't need to reference count the root state */
+       if (!test_bit(CSS_ROOT, &css->flags))
+               atomic_inc(&css->refcnt);
+}
+/*
+ * css_put() should be called to release a reference taken by
+ * css_get()
+ */
+
+extern void __css_put(struct cgroup_subsys_state *css);
+static inline void css_put(struct cgroup_subsys_state *css)
+{
+       if (!test_bit(CSS_ROOT, &css->flags))
+               __css_put(css);
+}
+
+struct cgroup {
+       unsigned long flags;            /* "unsigned long" so bitops work */
+
+       /* count users of this cgroup. >0 means busy, but doesn't
+        * necessarily indicate the number of tasks in the
+        * cgroup */
+       atomic_t count;
+
+       /*
+        * We link our 'sibling' struct into our parent's 'children'.
+        * Our children link their 'sibling' into our 'children'.
+        */
+       struct list_head sibling;       /* my parent's children */
+       struct list_head children;      /* my children */
+
+       struct cgroup *parent;  /* my parent */
+       struct dentry *dentry;          /* cgroup fs entry */
+
+       /* Private pointers for each registered subsystem */
+       struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+       struct cgroupfs_root *root;
+       struct cgroup *top_cgroup;
+
+       /*
+        * List of cg_cgroup_links pointing at css_sets with
+        * tasks in this cgroup. Protected by css_set_lock
+        */
+       struct list_head css_sets;
+
+       /*
+        * Linked list running through all cgroups that can
+        * potentially be reaped by the release agent. Protected by
+        * release_list_lock
+        */
+       struct list_head release_list;
+};
+
+/* A css_set is a structure holding pointers to a set of
+ * cgroup_subsys_state objects. This saves space in the task struct
+ * object and speeds up fork()/exit(), since a single inc/dec and a
+ * list_add()/del() can bump the reference count on the entire
+ * cgroup set for a task.
+ */
+
+struct css_set {
+
+       /* Reference count */
+       struct kref ref;
+
+       /*
+        * List running through all cgroup groups. Protected by
+        * css_set_lock
+        */
+       struct list_head list;
+
+       /*
+        * List running through all tasks using this cgroup
+        * group. Protected by css_set_lock
+        */
+       struct list_head tasks;
+
+       /*
+        * List of cg_cgroup_link objects on link chains from
+        * cgroups referenced from this css_set. Protected by
+        * css_set_lock
+        */
+       struct list_head cg_links;
+
+       /*
+        * Set of subsystem states, one for each subsystem. This array
+        * is immutable after creation apart from the init_css_set
+        * during subsystem registration (at boot time).
+        */
+       struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+};
+
+/* struct cftype:
+ *
+ * The files in the cgroup filesystem mostly have a very simple read/write
+ * handling, some common function will take care of it. Nevertheless some cases
+ * (read tasks) are special and therefore I define this structure for every
+ * kind of file.
+ *
+ *
+ * When reading/writing to a file:
+ *     - the cgroup to use in file->f_dentry->d_parent->d_fsdata
+ *     - the 'cftype' of the file is file->f_dentry->d_fsdata
+ */
+
+#define MAX_CFTYPE_NAME 64
+struct cftype {
+       /* By convention, the name should begin with the name of the
+        * subsystem, followed by a period */
+       char name[MAX_CFTYPE_NAME];
+       int private;
+       int (*open) (struct inode *inode, struct file *file);
+       ssize_t (*read) (struct cgroup *cont, struct cftype *cft,
+                        struct file *file,
+                        char __user *buf, size_t nbytes, loff_t *ppos);
+       /*
+        * read_uint() is a shortcut for the common case of returning a
+        * single integer. Use it in place of read()
+        */
+       u64 (*read_uint) (struct cgroup *cont, struct cftype *cft);
+       ssize_t (*write) (struct cgroup *cont, struct cftype *cft,
+                         struct file *file,
+                         const char __user *buf, size_t nbytes, loff_t *ppos);
+
+       /*
+        * write_uint() is a shortcut for the common case of accepting
+        * a single integer (as parsed by simple_strtoull) from
+        * userspace. Use in place of write(); return 0 or error.
+        */
+       int (*write_uint) (struct cgroup *cont, struct cftype *cft, u64 val);
+
+       int (*release) (struct inode *inode, struct file *file);
+};
+
+/* Add a new file to the given cgroup directory. Should only be
+ * called by subsystems from within a populate() method */
+int cgroup_add_file(struct cgroup *cont, struct cgroup_subsys *subsys,
+                      const struct cftype *cft);
+
+/* Add a set of new files to the given cgroup directory. Should
+ * only be called by subsystems from within a populate() method */
+int cgroup_add_files(struct cgroup *cont,
+                       struct cgroup_subsys *subsys,
+                       const struct cftype cft[],
+                       int count);
+
+int cgroup_is_removed(const struct cgroup *cont);
+
+int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
+
+int cgroup_task_count(const struct cgroup *cont);
+
+/* Return true if the cgroup is a descendant of the current cgroup */
+int cgroup_is_descendant(const struct cgroup *cont);
+
+/* Control Group subsystem type. See Documentation/cgroups.txt for details */
+
+struct cgroup_subsys {
+       struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
+                                                 struct cgroup *cont);
+       void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cont);
+       int (*can_attach)(struct cgroup_subsys *ss,
+                         struct cgroup *cont, struct task_struct *tsk);
+       void (*attach)(struct cgroup_subsys *ss, struct cgroup *cont,
+                       struct cgroup *old_cont, struct task_struct *tsk);
+       void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
+       void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
+       int (*populate)(struct cgroup_subsys *ss,
+                       struct cgroup *cont);
+       void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont);
+       void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
+       int subsys_id;
+       int active;
+       int early_init;
+#define MAX_CGROUP_TYPE_NAMELEN 32
+       const char *name;
+
+       /* Protected by RCU */
+       struct cgroupfs_root *root;
+
+       struct list_head sibling;
+
+       void *private;
+};
+
+#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+static inline struct cgroup_subsys_state *cgroup_subsys_state(
+       struct cgroup *cont, int subsys_id)
+{
+       return cont->subsys[subsys_id];
+}
+
+static inline struct cgroup_subsys_state *task_subsys_state(
+       struct task_struct *task, int subsys_id)
+{
+       return rcu_dereference(task->cgroups->subsys[subsys_id]);
+}
+
+static inline struct cgroup* task_cgroup(struct task_struct *task,
+                                              int subsys_id)
+{
+       return task_subsys_state(task, subsys_id)->cgroup;
+}
+
+int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
+
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+
+/* A cgroup_iter should be treated as an opaque object */
+struct cgroup_iter {
+       struct list_head *cg_link;
+       struct list_head *task;
+};
+
+/* To iterate across the tasks in a cgroup:
+ *
+ * 1) call cgroup_iter_start to intialize an iterator
+ *
+ * 2) call cgroup_iter_next() to retrieve member tasks until it
+ *    returns NULL or until you want to end the iteration
+ *
+ * 3) call cgroup_iter_end() to destroy the iterator.
+ */
+void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it);
+struct task_struct *cgroup_iter_next(struct cgroup *cont,
+                                       struct cgroup_iter *it);
+void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
+
+#else /* !CONFIG_CGROUPS */
+
+static inline int cgroup_init_early(void) { return 0; }
+static inline int cgroup_init(void) { return 0; }
+static inline void cgroup_init_smp(void) {}
+static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork_callbacks(struct task_struct *p) {}
+static inline void cgroup_post_fork(struct task_struct *p) {}
+static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
+static inline int cgroupstats_build(struct cgroupstats *stats,
+                                       struct dentry *dentry)
+{
+       return -EINVAL;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
+#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
new file mode 100644 (file)
index 0000000..0b9bfbd
--- /dev/null
@@ -0,0 +1,38 @@
+/* Add subsystem definitions of the form SUBSYS(<name>) in this
+ * file. Surround each one by a line of comment markers so that
+ * patches don't collide
+ */
+
+/* */
+
+/* */
+
+#ifdef CONFIG_CPUSETS
+SUBSYS(cpuset)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_CPUACCT
+SUBSYS(cpuacct)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_DEBUG
+SUBSYS(debug)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_NS
+SUBSYS(ns)
+#endif
+
+/* */
+
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+SUBSYS(cpu_cgroup)
+#endif
+
+/* */
diff --git a/include/linux/cgroupstats.h b/include/linux/cgroupstats.h
new file mode 100644 (file)
index 0000000..4f53abf
--- /dev/null
@@ -0,0 +1,70 @@
+/* cgroupstats.h - exporting per-cgroup statistics
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef _LINUX_CGROUPSTATS_H
+#define _LINUX_CGROUPSTATS_H
+
+#include <linux/taskstats.h>
+
+/*
+ * Data shared between user space and kernel space on a per cgroup
+ * basis. This data is shared using taskstats.
+ *
+ * Most of these states are derived by looking at the task->state value
+ * For the nr_io_wait state, a flag in the delay accounting structure
+ * indicates that the task is waiting on IO
+ *
+ * Each member is aligned to a 8 byte boundary.
+ */
+struct cgroupstats {
+       __u64   nr_sleeping;            /* Number of tasks sleeping */
+       __u64   nr_running;             /* Number of tasks running */
+       __u64   nr_stopped;             /* Number of tasks in stopped state */
+       __u64   nr_uninterruptible;     /* Number of tasks in uninterruptible */
+                                       /* state */
+       __u64   nr_io_wait;             /* Number of tasks waiting on IO */
+};
+
+/*
+ * Commands sent from userspace
+ * Not versioned. New commands should only be inserted at the enum's end
+ * prior to __CGROUPSTATS_CMD_MAX
+ */
+
+enum {
+       CGROUPSTATS_CMD_UNSPEC = __TASKSTATS_CMD_MAX,   /* Reserved */
+       CGROUPSTATS_CMD_GET,            /* user->kernel request/get-response */
+       CGROUPSTATS_CMD_NEW,            /* kernel->user event */
+       __CGROUPSTATS_CMD_MAX,
+};
+
+#define CGROUPSTATS_CMD_MAX (__CGROUPSTATS_CMD_MAX - 1)
+
+enum {
+       CGROUPSTATS_TYPE_UNSPEC = 0,    /* Reserved */
+       CGROUPSTATS_TYPE_CGROUP_STATS,  /* contains name + stats */
+       __CGROUPSTATS_TYPE_MAX,
+};
+
+#define CGROUPSTATS_TYPE_MAX (__CGROUPSTATS_TYPE_MAX - 1)
+
+enum {
+       CGROUPSTATS_CMD_ATTR_UNSPEC = 0,
+       CGROUPSTATS_CMD_ATTR_FD,
+       __CGROUPSTATS_CMD_ATTR_MAX,
+};
+
+#define CGROUPSTATS_CMD_ATTR_MAX (__CGROUPSTATS_CMD_ATTR_MAX - 1)
+
+#endif /* _LINUX_CGROUPSTATS_H */
diff --git a/include/linux/cpu_acct.h b/include/linux/cpu_acct.h
new file mode 100644 (file)
index 0000000..6b5fd8a
--- /dev/null
@@ -0,0 +1,14 @@
+
+#ifndef _LINUX_CPU_ACCT_H
+#define _LINUX_CPU_ACCT_H
+
+#include <linux/cgroup.h>
+#include <asm/cputime.h>
+
+#ifdef CONFIG_CGROUP_CPUACCT
+extern void cpuacct_charge(struct task_struct *, cputime_t cputime);
+#else
+static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {}
+#endif
+
+#endif
index ea44d2e768a042902e566273173a83ee337b7f0b..ecae585ec3da97fce87fa5703e64d51519522d59 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
+#include <linux/cgroup.h>
 
 #ifdef CONFIG_CPUSETS
 
@@ -19,9 +20,8 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
 extern int cpuset_init_early(void);
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
-extern void cpuset_fork(struct task_struct *p);
-extern void cpuset_exit(struct task_struct *p);
 extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
+extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -76,18 +76,22 @@ static inline int cpuset_do_slab_mem_spread(void)
 
 extern void cpuset_track_online_nodes(void);
 
+extern int current_cpuset_is_being_rebound(void);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline int cpuset_init_early(void) { return 0; }
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
-static inline void cpuset_fork(struct task_struct *p) {}
-static inline void cpuset_exit(struct task_struct *p) {}
 
 static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
 {
        return cpu_possible_map;
 }
+static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
+{
+       return cpu_possible_map;
+}
 
 static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 {
@@ -148,6 +152,11 @@ static inline int cpuset_do_slab_mem_spread(void)
 
 static inline void cpuset_track_online_nodes(void) {}
 
+static inline int current_cpuset_is_being_rebound(void)
+{
+       return 0;
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
index 55d1ca5e60f54f11efcfe7078a5e02cf3bb450fd..ab94bc083558c329c8a59e49ff3c7af6076627ca 100644 (file)
@@ -26,6 +26,7 @@
  * Used to set current->delays->flags
  */
 #define DELAYACCT_PF_SWAPIN    0x00000001      /* I am doing a swapin */
+#define DELAYACCT_PF_BLKIO     0x00000002      /* I am waiting on IO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 
@@ -39,6 +40,14 @@ extern void __delayacct_blkio_end(void);
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 
+static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
+{
+       if (p->delays)
+               return (p->delays->flags & DELAYACCT_PF_BLKIO);
+       else
+               return 0;
+}
+
 static inline void delayacct_set_flag(int flag)
 {
        if (current->delays)
@@ -71,6 +80,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 
 static inline void delayacct_blkio_start(void)
 {
+       delayacct_set_flag(DELAYACCT_PF_BLKIO);
        if (current->delays)
                __delayacct_blkio_start();
 }
@@ -79,6 +89,7 @@ static inline void delayacct_blkio_end(void)
 {
        if (current->delays)
                __delayacct_blkio_end();
+       delayacct_clear_flag(DELAYACCT_PF_BLKIO);
 }
 
 static inline int delayacct_add_tsk(struct taskstats *d,
@@ -116,6 +127,8 @@ static inline int delayacct_add_tsk(struct taskstats *d,
 { return 0; }
 static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 { return 0; }
+static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
+{ return 0; }
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
index 6a4d170ad9a52d2f0a48c9765fdd73c836ae3f17..1657e995f72c18d21e2234ce9a2c74bf1fdf1c22 100644 (file)
@@ -123,6 +123,7 @@ extern int dir_notify_enable;
 #define MS_SLAVE       (1<<19) /* change to slave */
 #define MS_SHARED      (1<<20) /* change to shared */
 #define MS_RELATIME    (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
 #define MS_ACTIVE      (1<<30)
 #define MS_NOUSER      (1<<31)
 
@@ -1459,7 +1460,8 @@ void unnamed_dev_init(void);
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
-extern struct vfsmount *kern_mount(struct file_system_type *);
+extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
+#define kern_mount(type) kern_mount_data(type, NULL)
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
@@ -1922,6 +1924,8 @@ extern int vfs_fstat(unsigned int, struct kstat *);
 
 extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
 
+extern void get_filesystem(struct file_system_type *fs);
+extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
 extern struct super_block *user_get_super(dev_t);
index edb8024d744bff94c1fa4c3d3abc4adaaf863802..6e35b92b1d2caeb6ee4b24f8cf7a8c7c0f158899 100644 (file)
@@ -469,8 +469,8 @@ struct hid_device {                                                 /* device report descriptor */
        /* handler for raw output data, used by hidraw */
        int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t);
 #ifdef CONFIG_USB_HIDINPUT_POWERBOOK
-       unsigned long pb_pressed_fn[NBITS(KEY_MAX)];
-       unsigned long pb_pressed_numlock[NBITS(KEY_MAX)];
+       unsigned long pb_pressed_fn[BITS_TO_LONGS(KEY_CNT)];
+       unsigned long pb_pressed_numlock[BITS_TO_LONGS(KEY_CNT)];
 #endif
 };
 
index d4b2f1c76e12c528b8701111445bd864ef5ea062..cae35b6b9aecf9a849085a3c67a56da6c57d69cb 100644 (file)
@@ -67,9 +67,6 @@
        .posix_timers    = LIST_HEAD_INIT(sig.posix_timers),            \
        .cpu_timers     = INIT_CPU_TIMERS(sig.cpu_timers),              \
        .rlim           = INIT_RLIMITS,                                 \
-       .pgrp           = 0,                                            \
-       .tty_old_pgrp   = NULL,                                         \
-       { .__session      = 0},                                         \
 }
 
 extern struct nsproxy init_nsproxy;
@@ -94,15 +91,18 @@ extern struct group_info init_groups;
 
 #define INIT_STRUCT_PID {                                              \
        .count          = ATOMIC_INIT(1),                               \
-       .nr             = 0,                                            \
-       /* Don't put this struct pid in pid_hash */                     \
-       .pid_chain      = { .next = NULL, .pprev = NULL },              \
        .tasks          = {                                             \
                { .first = &init_task.pids[PIDTYPE_PID].node },         \
                { .first = &init_task.pids[PIDTYPE_PGID].node },        \
                { .first = &init_task.pids[PIDTYPE_SID].node },         \
        },                                                              \
        .rcu            = RCU_HEAD_INIT,                                \
+       .level          = 0,                                            \
+       .numbers        = { {                                           \
+               .nr             = 0,                                    \
+               .ns             = &init_pid_ns,                         \
+               .pid_chain      = { .next = NULL, .pprev = NULL },      \
+       }, }                                                            \
 }
 
 #define INIT_PID_LINK(type)                                    \
index f30da6fc08e3775dfa5b0945004711baae879a47..62268929856c9d26c8a9be4b134e4d7704796aac 100644 (file)
@@ -98,6 +98,7 @@ struct input_absinfo {
 #define EV_PWR                 0x16
 #define EV_FF_STATUS           0x17
 #define EV_MAX                 0x1f
+#define EV_CNT                 (EV_MAX+1)
 
 /*
  * Synchronization events.
@@ -567,6 +568,7 @@ struct input_absinfo {
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING    KEY_MUTE
 #define KEY_MAX                        0x1ff
+#define KEY_CNT                        (KEY_MAX+1)
 
 /*
  * Relative axes
@@ -583,6 +585,7 @@ struct input_absinfo {
 #define REL_WHEEL              0x08
 #define REL_MISC               0x09
 #define REL_MAX                        0x0f
+#define REL_CNT                        (REL_MAX+1)
 
 /*
  * Absolute axes
@@ -615,6 +618,7 @@ struct input_absinfo {
 #define ABS_VOLUME             0x20
 #define ABS_MISC               0x28
 #define ABS_MAX                        0x3f
+#define ABS_CNT                        (ABS_MAX+1)
 
 /*
  * Switch events
@@ -625,6 +629,7 @@ struct input_absinfo {
 #define SW_HEADPHONE_INSERT    0x02  /* set = inserted */
 #define SW_RADIO               0x03  /* set = radio enabled */
 #define SW_MAX                 0x0f
+#define SW_CNT                 (SW_MAX+1)
 
 /*
  * Misc events
@@ -636,6 +641,7 @@ struct input_absinfo {
 #define MSC_RAW                        0x03
 #define MSC_SCAN               0x04
 #define MSC_MAX                        0x07
+#define MSC_CNT                        (MSC_MAX+1)
 
 /*
  * LEDs
@@ -653,6 +659,7 @@ struct input_absinfo {
 #define LED_MAIL               0x09
 #define LED_CHARGING           0x0a
 #define LED_MAX                        0x0f
+#define LED_CNT                        (LED_MAX+1)
 
 /*
  * Autorepeat values
@@ -670,6 +677,7 @@ struct input_absinfo {
 #define SND_BELL               0x01
 #define SND_TONE               0x02
 #define SND_MAX                        0x07
+#define SND_CNT                        (SND_MAX+1)
 
 /*
  * IDs.
@@ -920,6 +928,7 @@ struct ff_effect {
 #define FF_AUTOCENTER  0x61
 
 #define FF_MAX         0x7f
+#define FF_CNT         (FF_MAX+1)
 
 #ifdef __KERNEL__
 
@@ -932,10 +941,6 @@ struct ff_effect {
 #include <linux/timer.h>
 #include <linux/mod_devicetable.h>
 
-#define NBITS(x) (((x)/BITS_PER_LONG)+1)
-#define BIT(x) (1UL<<((x)%BITS_PER_LONG))
-#define LONG(x) ((x)/BITS_PER_LONG)
-
 /**
  * struct input_dev - represents an input device
  * @name: name of the device
@@ -1005,28 +1010,30 @@ struct ff_effect {
  * @going_away: marks devices that are in a middle of unregistering and
  *     causes input_open_device*() fail with -ENODEV.
  * @dev: driver model's view of this device
+ * @cdev: union for struct device pointer
  * @h_list: list of input handles associated with the device. When
  *     accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
  */
 struct input_dev {
-
+       /* private: */
        void *private;  /* do not use */
+       /* public: */
 
        const char *name;
        const char *phys;
        const char *uniq;
        struct input_id id;
 
-       unsigned long evbit[NBITS(EV_MAX)];
-       unsigned long keybit[NBITS(KEY_MAX)];
-       unsigned long relbit[NBITS(REL_MAX)];
-       unsigned long absbit[NBITS(ABS_MAX)];
-       unsigned long mscbit[NBITS(MSC_MAX)];
-       unsigned long ledbit[NBITS(LED_MAX)];
-       unsigned long sndbit[NBITS(SND_MAX)];
-       unsigned long ffbit[NBITS(FF_MAX)];
-       unsigned long swbit[NBITS(SW_MAX)];
+       unsigned long evbit[BITS_TO_LONGS(EV_CNT)];
+       unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
+       unsigned long relbit[BITS_TO_LONGS(REL_CNT)];
+       unsigned long absbit[BITS_TO_LONGS(ABS_CNT)];
+       unsigned long mscbit[BITS_TO_LONGS(MSC_CNT)];
+       unsigned long ledbit[BITS_TO_LONGS(LED_CNT)];
+       unsigned long sndbit[BITS_TO_LONGS(SND_CNT)];
+       unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
+       unsigned long swbit[BITS_TO_LONGS(SW_CNT)];
 
        unsigned int keycodemax;
        unsigned int keycodesize;
@@ -1044,10 +1051,10 @@ struct input_dev {
        int abs[ABS_MAX + 1];
        int rep[REP_MAX + 1];
 
-       unsigned long key[NBITS(KEY_MAX)];
-       unsigned long led[NBITS(LED_MAX)];
-       unsigned long snd[NBITS(SND_MAX)];
-       unsigned long sw[NBITS(SW_MAX)];
+       unsigned long key[BITS_TO_LONGS(KEY_CNT)];
+       unsigned long led[BITS_TO_LONGS(LED_CNT)];
+       unsigned long snd[BITS_TO_LONGS(SND_CNT)];
+       unsigned long sw[BITS_TO_LONGS(SW_CNT)];
 
        int absmax[ABS_MAX + 1];
        int absmin[ABS_MAX + 1];
@@ -1291,7 +1298,7 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
        dev->absfuzz[axis] = fuzz;
        dev->absflat[axis] = flat;
 
-       dev->absbit[LONG(axis)] |= BIT(axis);
+       dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis);
 }
 
 extern struct class input_class;
@@ -1332,7 +1339,7 @@ struct ff_device {
 
        void *private;
 
-       unsigned long ffbit[NBITS(FF_MAX)];
+       unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
 
        struct mutex mutex;
 
index ee111834091cf99752fc3679281ef9c86adbbf45..408696ea51892f0deaf7a866f7e5ec99619c5770 100644 (file)
@@ -89,6 +89,7 @@ struct kern_ipc_perm
 {
        spinlock_t      lock;
        int             deleted;
+       int             id;
        key_t           key;
        uid_t           uid;
        gid_t           gid;
@@ -110,6 +111,8 @@ struct ipc_namespace {
        int             msg_ctlmax;
        int             msg_ctlmnb;
        int             msg_ctlmni;
+       atomic_t        msg_bytes;
+       atomic_t        msg_hdrs;
 
        size_t          shm_ctlmax;
        size_t          shm_ctlall;
index a3abf51e488fca138eef37028c06fae4a800680c..16e7ed855a18d9a1bc7fafd58d70f01e0753ae19 100644 (file)
@@ -58,7 +58,7 @@
  * CONFIG_JBD_DEBUG is on.
  */
 #define JBD_EXPENSIVE_CHECKING
-extern int journal_enable_debug;
+extern u8 journal_enable_debug;
 
 #define jbd_debug(n, f, a...)                                          \
        do {                                                            \
@@ -248,17 +248,7 @@ typedef struct journal_superblock_s
 #include <linux/fs.h>
 #include <linux/sched.h>
 
-#define JBD_ASSERTIONS
-#ifdef JBD_ASSERTIONS
-#define J_ASSERT(assert)                                               \
-do {                                                                   \
-       if (!(assert)) {                                                \
-               printk (KERN_EMERG                                      \
-                       "Assertion failure in %s() at %s:%d: \"%s\"\n", \
-                       __FUNCTION__, __FILE__, __LINE__, # assert);    \
-               BUG();                                                  \
-       }                                                               \
-} while (0)
+#define J_ASSERT(assert)       BUG_ON(!(assert))
 
 #if defined(CONFIG_BUFFER_DEBUG)
 void buffer_assertion_failure(struct buffer_head *bh);
@@ -274,10 +264,6 @@ void buffer_assertion_failure(struct buffer_head *bh);
 #define J_ASSERT_JH(jh, expr)  J_ASSERT(expr)
 #endif
 
-#else
-#define J_ASSERT(assert)       do { } while (0)
-#endif         /* JBD_ASSERTIONS */
-
 #if defined(JBD_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)         J_ASSERT(expr)
 #define J_EXPECT_BH(bh, expr, why...)  J_ASSERT_BH(bh, expr)
index ad4b82ce84af13ffac6e2f3b381242f4bdde43d8..2d9c448d8c52f4c21f61e36890e1219dc074e591 100644 (file)
@@ -187,6 +187,8 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
+int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+               unsigned long long *crash_size, unsigned long long *crash_base);
 
 #else /* !CONFIG_KEXEC */
 struct pt_regs;
index 33b5c2e325b92b3d14800ad83b8e1c2406103654..65c2d70853e96557dc13c3f8e0a2e1420b4ccbf0 100644 (file)
 #define MAX_NR_OF_USER_KEYMAPS 256     /* should be at least 7 */
 
 #ifdef __KERNEL__
+struct notifier_block;
 extern const int NR_TYPES;
 extern const int max_vals[];
 extern unsigned short *key_maps[MAX_NR_KEYMAPS];
 extern unsigned short plain_map[NR_KEYS];
+
+struct keyboard_notifier_param {
+       struct vc_data *vc;     /* VC on which the keyboard press was done */
+       int down;               /* Pressure of the key? */
+       int shift;              /* Current shift mask */
+       unsigned int value;     /* keycode, unicode value or keysym */
+};
+
+extern int register_keyboard_notifier(struct notifier_block *nb);
+extern int unregister_keyboard_notifier(struct notifier_block *nb);
 #endif
 
 #define MAX_NR_FUNC    256     /* max nr of strings assigned to keys */
index b0cf0135fe3edec0f7054e6602ca7f54572d592f..75ce2cb4ff6ebc92a8fcaa557a9bd7b6b9b9b1b6 100644 (file)
@@ -478,8 +478,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
                pos = n, n = pos->next)
 
 /**
- * list_for_each_prev_safe - iterate over a list backwards safe against removal
-                       of list entry
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
  * @pos:       the &struct list_head to use as a loop cursor.
  * @n:         another &struct list_head to use as temporary storage
  * @head:      the head for your list.
index f6279f68a8270c143196c1c0c09afbea2cff02be..4c4d236ded1885415066c774877abc9fc39592f7 100644 (file)
@@ -275,6 +275,14 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
                lockdep_init_map(&(lock)->dep_map, #lock, \
                                 (lock)->dep_map.key, sub)
 
+/*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+       { .name = (_name), .key = (void *)(_key), }
+
+
 /*
  * Acquire a lock.
  *
index 722d4755060f7deee6f403ee7a5aafed5a914c25..1fa0c2ce4dec09456f5cb6c2480f6be5e5eb908a 100644 (file)
@@ -37,6 +37,7 @@
 
 #define SMB_SUPER_MAGIC                0x517B
 #define USBDEVICE_SUPER_MAGIC  0x9fa2
+#define CGROUP_SUPER_MAGIC     0x27e0eb
 
 #define FUTEXFS_SUPER_MAGIC    0xBAD1DEA
 #define INOTIFYFS_SUPER_MAGIC  0x2BAD1DEA
diff --git a/include/linux/marker.h b/include/linux/marker.h
new file mode 100644 (file)
index 0000000..5f36cf9
--- /dev/null
@@ -0,0 +1,129 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+
+struct module;
+struct marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct marker *mdata,
+       void *private_data, const char *fmt, ...);
+
+struct marker {
+       const char *name;       /* Marker name */
+       const char *format;     /* Marker format string, describing the
+                                * variable argument list.
+                                */
+       char state;             /* Marker state. */
+       marker_probe_func *call;/* Probe handler function pointer */
+       void *private;          /* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(name, call_data, format, args...)                 \
+       do {                                                            \
+               static const char __mstrtab_name_##name[]               \
+               __attribute__((section("__markers_strings")))           \
+               = #name;                                                \
+               static const char __mstrtab_format_##name[]             \
+               __attribute__((section("__markers_strings")))           \
+               = format;                                               \
+               static struct marker __mark_##name                      \
+               __attribute__((section("__markers"), aligned(8))) =     \
+               { __mstrtab_name_##name, __mstrtab_format_##name,       \
+               0, __mark_empty_function, NULL };                       \
+               __mark_check_format(format, ## args);                   \
+               if (unlikely(__mark_##name.state)) {                    \
+                       preempt_disable();                              \
+                       (*__mark_##name.call)                           \
+                               (&__mark_##name, call_data,             \
+                               format, ## args);                       \
+                       preempt_enable();                               \
+               }                                                       \
+       } while (0)
+
+extern void marker_update_probe_range(struct marker *begin,
+       struct marker *end, struct module *probe_module, int *refcount);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(name, call_data, format, args...) \
+               __mark_check_format(format, ## args)
+static inline void marker_update_probe_range(struct marker *begin,
+       struct marker *end, struct module *probe_module, int *refcount)
+{ }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker.
+ */
+#define trace_mark(name, format, args...) \
+       __trace_mark(name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN    1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...)
+{
+}
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a marker.
+ * private data pointer must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+                               marker_probe_func *probe, void *private);
+
+/*
+ * Returns the private data given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered private data.
+ */
+extern void *marker_probe_unregister_private_data(void *private);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+extern void *marker_get_private_data(const char *name);
+
+#endif
index 38c04d61ee069f5d5443a6c514d6dc4eb23424f9..59c4865bc85f13fbe36beefd959ab499f1d5aaaf 100644 (file)
@@ -148,14 +148,6 @@ extern void mpol_rebind_task(struct task_struct *tsk,
                                        const nodemask_t *new);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern void mpol_fix_fork_child_flag(struct task_struct *p);
-#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
-
-#ifdef CONFIG_CPUSETS
-#define current_cpuset_is_being_rebound() \
-                               (cpuset_being_rebound == current->cpuset)
-#else
-#define current_cpuset_is_being_rebound() 0
-#endif
 
 extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
@@ -173,8 +165,6 @@ static inline void check_highest_zone(enum zone_type k)
 int do_migrate_pages(struct mm_struct *mm,
        const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
 
-extern void *cpuset_being_rebound;     /* Trigger mpol_copy vma rebind */
-
 #else
 
 struct mempolicy {};
@@ -248,8 +238,6 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
 {
 }
 
-#define set_cpuset_being_rebound(x) do {} while (0)
-
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
                unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
 {
index 642f325e4917fe16cf95660be6ff8b733c7e599d..2cbc0b87e329d30d0ec72b03c83f5d7f123b8a05 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -354,6 +355,10 @@ struct module
        /* The command line arguments (may be mangled).  People like
           keeping pointers to this stuff */
        char *args;
+#ifdef CONFIG_MARKERS
+       struct marker *markers;
+       unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
@@ -457,6 +462,8 @@ int unregister_module_notifier(struct notifier_block * nb);
 
 extern void print_modules(void);
 
+extern void module_update_markers(struct module *probe_module, int *refcount);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -556,6 +563,11 @@ static inline void print_modules(void)
 {
 }
 
+static inline void module_update_markers(struct module *probe_module,
+               int *refcount)
+{
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
index f1b60740d641c5a61daaed95f68b12e465b331ed..10a3d5a1abffb30fd9b158148b26379416cd1fe0 100644 (file)
@@ -77,7 +77,6 @@ struct msg_msg {
 /* one msq_queue structure for each present queue on the system */
 struct msg_queue {
        struct kern_ipc_perm q_perm;
-       int q_id;
        time_t q_stime;                 /* last msgsnd time */
        time_t q_rtime;                 /* last msgrcv time */
        time_t q_ctime;                 /* last change time */
index fad7ff17e468ded82fc2e797bc42f6bb4a2f95fe..0c40cc0b4a362034fbe444a929ab1d9a0ecdb749 100644 (file)
@@ -231,5 +231,22 @@ static inline int notifier_to_errno(int ret)
 #define PM_SUSPEND_PREPARE     0x0003 /* Going to suspend the system */
 #define PM_POST_SUSPEND                0x0004 /* Suspend finished */
 
+/* Console keyboard events.
+ * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and
+ * KBD_KEYSYM. */
+#define KBD_KEYCODE            0x0001 /* Keyboard keycode, called before any other */
+#define KBD_UNBOUND_KEYCODE    0x0002 /* Keyboard keycode which is not bound to any other */
+#define KBD_UNICODE            0x0003 /* Keyboard unicode */
+#define KBD_KEYSYM             0x0004 /* Keyboard keysym */
+#define KBD_POST_KEYSYM                0x0005 /* Called after keyboard keysym interpretation */
+
+extern struct blocking_notifier_head reboot_notifier_list;
+
+/* Virtual Terminal events. */
+#define VT_ALLOCATE            0x0001 /* Console got allocated */
+#define VT_DEALLOCATE          0x0002 /* Console will be deallocated */
+#define VT_WRITE               0x0003 /* A char got output */
+#define VT_UPDATE              0x0004 /* A bigger update occurred */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
index 033a648709b620be45ad76db17c1bfa8fe31fcf1..0e66b57631fc767f202c32771f2ab5c8194c34cd 100644 (file)
@@ -32,8 +32,39 @@ struct nsproxy {
 };
 extern struct nsproxy init_nsproxy;
 
+/*
+ * the namespaces access rules are:
+ *
+ *  1. only current task is allowed to change tsk->nsproxy pointer or
+ *     any pointer on the nsproxy itself
+ *
+ *  2. when accessing (i.e. reading) current task's namespaces - no
+ *     precautions should be taken - just dereference the pointers
+ *
+ *  3. the access to other task namespaces is performed like this
+ *     rcu_read_lock();
+ *     nsproxy = task_nsproxy(tsk);
+ *     if (nsproxy != NULL) {
+ *             / *
+ *               * work with the namespaces here
+ *               * e.g. get the reference on one of them
+ *               * /
+ *     } / *
+ *         * NULL task_nsproxy() means that this task is
+ *         * almost dead (zombie)
+ *         * /
+ *     rcu_read_unlock();
+ *
+ */
+
+static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
+{
+       return rcu_dereference(tsk->nsproxy);
+}
+
 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
-void get_task_namespaces(struct task_struct *tsk);
+void exit_task_namespaces(struct task_struct *tsk);
+void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
        struct fs_struct *);
@@ -45,14 +76,15 @@ static inline void put_nsproxy(struct nsproxy *ns)
        }
 }
 
-static inline void exit_task_namespaces(struct task_struct *p)
+static inline void get_nsproxy(struct nsproxy *ns)
 {
-       struct nsproxy *ns = p->nsproxy;
-       if (ns) {
-               task_lock(p);
-               p->nsproxy = NULL;
-               task_unlock(p);
-               put_nsproxy(ns);
-       }
+       atomic_inc(&ns->count);
 }
+
+#ifdef CONFIG_CGROUP_NS
+int ns_cgroup_clone(struct task_struct *tsk);
+#else
+static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+#endif
+
 #endif
index 6df80e9859147973ecd06d6c2863d400b838d71a..5c39b9270ff7007b359df90a7cfc407607ddb7ab 100644 (file)
@@ -16,8 +16,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/types.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/prom.h>
 
 /* flag descriptions */
index d3ebbfae6903ede052b71731536c0cc8d903a195..96f4048a6cc37bd1a8970c30a1037c1f5f27ad04 100644 (file)
@@ -30,7 +30,11 @@ struct phm_regs {
 #define PHN_SET_REG            _IOW (PH_IOC_MAGIC, 1, struct phm_reg *)
 #define PHN_GET_REGS           _IOWR(PH_IOC_MAGIC, 2, struct phm_regs *)
 #define PHN_SET_REGS           _IOW (PH_IOC_MAGIC, 3, struct phm_regs *)
-#define PH_IOC_MAXNR           3
+/* this ioctl tells the driver, that the caller is not OpenHaptics and might
+ * use improved registers update (no more phantom switchoffs when using
+ * libphantom) */
+#define PHN_NOT_OH             _IO  (PH_IOC_MAGIC, 4)
+#define PH_IOC_MAXNR           4
 
 #define PHN_CONTROL            0x6     /* control byte in iaddr space */
 #define PHN_CTL_AMP            0x1     /*   switch after torques change */
index 1e0e4e3423a628a7983520762b7364038fbbe628..e29a900a84992f308ced20ed5ce4e9a4f2685f09 100644 (file)
@@ -40,15 +40,28 @@ enum pid_type
  * processes.
  */
 
-struct pid
-{
-       atomic_t count;
+
+/*
+ * struct upid is used to get the id of the struct pid, as it is
+ * seen in particular namespace. Later the struct pid is found with
+ * find_pid_ns() using the int nr and struct pid_namespace *ns.
+ */
+
+struct upid {
        /* Try to keep pid_chain in the same cacheline as nr for find_pid */
        int nr;
+       struct pid_namespace *ns;
        struct hlist_node pid_chain;
+};
+
+struct pid
+{
+       atomic_t count;
        /* lists of tasks that use this pid */
        struct hlist_head tasks[PIDTYPE_MAX];
        struct rcu_head rcu;
+       int level;
+       struct upid numbers[1];
 };
 
 extern struct pid init_struct_pid;
@@ -83,26 +96,60 @@ extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
 extern void FASTCALL(transfer_pid(struct task_struct *old,
                                  struct task_struct *new, enum pid_type));
 
+struct pid_namespace;
+extern struct pid_namespace init_pid_ns;
+
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
  * or rcu_read_lock() held.
+ *
+ * find_pid_ns() finds the pid in the namespace specified
+ * find_pid() find the pid by its global id, i.e. in the init namespace
+ * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
+ *
+ * see also find_task_by_pid() set in include/linux/sched.h
  */
-extern struct pid *FASTCALL(find_pid(int nr));
+extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns));
+extern struct pid *find_vpid(int nr);
+extern struct pid *find_pid(int nr);
 
 /*
  * Lookup a PID in the hash table, and return with it's count elevated.
  */
 extern struct pid *find_get_pid(int nr);
-extern struct pid *find_ge_pid(int nr);
+extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 
-extern struct pid *alloc_pid(void);
+extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void FASTCALL(free_pid(struct pid *pid));
+extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
+
+/*
+ * the helpers to get the pid's id seen from different namespaces
+ *
+ * pid_nr()    : global id, i.e. the id seen from the init namespace;
+ * pid_vnr()   : virtual id, i.e. the id seen from the namespace this pid
+ *               belongs to. this only makes sence when called in the
+ *               context of the task that belongs to the same namespace;
+ * pid_nr_ns() : id seen from the ns specified.
+ *
+ * see also task_xid_nr() etc in include/linux/sched.h
+ */
 
 static inline pid_t pid_nr(struct pid *pid)
 {
        pid_t nr = 0;
        if (pid)
-               nr = pid->nr;
+               nr = pid->numbers[0].nr;
+       return nr;
+}
+
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
+
+static inline pid_t pid_vnr(struct pid *pid)
+{
+       pid_t nr = 0;
+       if (pid)
+               nr = pid->numbers[pid->level].nr;
        return nr;
 }
 
index b9a17e08ff0fa6cd24202defde59f46f31da351e..0135c76c76c6dc8005d4cb2d42353fd3320624b1 100644 (file)
@@ -4,7 +4,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/threads.h>
-#include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/kref.h>
 
@@ -20,13 +19,21 @@ struct pid_namespace {
        struct pidmap pidmap[PIDMAP_ENTRIES];
        int last_pid;
        struct task_struct *child_reaper;
+       struct kmem_cache *pid_cachep;
+       int level;
+       struct pid_namespace *parent;
+#ifdef CONFIG_PROC_FS
+       struct vfsmount *proc_mnt;
+#endif
 };
 
 extern struct pid_namespace init_pid_ns;
 
-static inline void get_pid_ns(struct pid_namespace *ns)
+static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
-       kref_get(&ns->kref);
+       if (ns != &init_pid_ns)
+               kref_get(&ns->kref);
+       return ns;
 }
 
 extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
@@ -34,12 +41,19 @@ extern void free_pid_ns(struct kref *kref);
 
 static inline void put_pid_ns(struct pid_namespace *ns)
 {
-       kref_put(&ns->kref, free_pid_ns);
+       if (ns != &init_pid_ns)
+               kref_put(&ns->kref, free_pid_ns);
 }
 
-static inline struct task_struct *child_reaper(struct task_struct *tsk)
+static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
 {
-       return init_pid_ns.child_reaper;
+       return tsk->nsproxy->pid_ns;
+}
+
+static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
+{
+       BUG_ON(tsk != current);
+       return tsk->nsproxy->pid_ns->child_reaper;
 }
 
 #endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h
new file mode 100644 (file)
index 0000000..0809435
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef _LINUX_PRIO_HEAP_H
+#define _LINUX_PRIO_HEAP_H
+
+/*
+ * Simple insertion-only static-sized priority heap containing
+ * pointers, based on CLR, chapter 7
+ */
+
+#include <linux/gfp.h>
+
+/**
+ * struct ptr_heap - simple static-sized priority heap
+ * @ptrs - pointer to data area
+ * @max - max number of elements that can be stored in @ptrs
+ * @size - current number of valid elements in @ptrs (in the range 0..@size-1
+ * @gt: comparison operator, which should implement "greater than"
+ */
+struct ptr_heap {
+       void **ptrs;
+       int max;
+       int size;
+       int (*gt)(void *, void *);
+};
+
+/**
+ * heap_init - initialize an empty heap with a given memory size
+ * @heap: the heap structure to be initialized
+ * @size: amount of memory to use in bytes
+ * @gfp_mask: mask to pass to kmalloc()
+ * @gt: comparison operator, which should implement "greater than"
+ */
+extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
+                    int (*gt)(void *, void *));
+
+/**
+ * heap_free - release a heap's storage
+ * @heap: the heap structure whose data should be released
+ */
+void heap_free(struct ptr_heap *heap);
+
+/**
+ * heap_insert - insert a value into the heap and return any overflowed value
+ * @heap: the heap to be operated on
+ * @p: the pointer to be inserted
+ *
+ * Attempts to insert the given value into the priority heap. If the
+ * heap is full prior to the insertion, then the resulting heap will
+ * consist of the smallest @max elements of the original heap and the
+ * new element; the greatest element will be removed from the heap and
+ * returned. Note that the returned element will be the new element
+ * (i.e. no change to the heap) if the new element is greater than all
+ * elements currently in the heap.
+ */
+extern void *heap_insert(struct ptr_heap *heap, void *p);
+
+
+
+#endif /* _LINUX_PRIO_HEAP_H */
index 20741f668f7bebbba1cd018a07bab5810ae93428..1ff461672060ebc52d882156e0659250944d4110 100644 (file)
@@ -125,7 +125,8 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
-extern int proc_fill_super(struct super_block *,void *,int);
+struct pid_namespace;
+extern int proc_fill_super(struct super_block *);
 extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
 
 /*
@@ -142,6 +143,9 @@ extern const struct file_operations proc_kcore_operations;
 extern const struct file_operations proc_kmsg_operations;
 extern const struct file_operations ppc_htab_operations;
 
+extern int pid_ns_prepare_proc(struct pid_namespace *ns);
+extern void pid_ns_release_proc(struct pid_namespace *ns);
+
 /*
  * proc_tty.c
  */
@@ -207,7 +211,9 @@ extern void proc_net_remove(struct net *net, const char *name);
 #define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; })
 static inline void proc_net_remove(struct net *net, const char *name) {}
 
-static inline void proc_flush_task(struct task_struct *task) { }
+static inline void proc_flush_task(struct task_struct *task)
+{
+}
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
        mode_t mode, struct proc_dir_entry *parent) { return NULL; }
@@ -232,6 +238,15 @@ static inline void proc_tty_unregister_driver(struct tty_driver *driver) {};
 
 extern struct proc_dir_entry proc_root;
 
+static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+       return 0;
+}
+
+static inline void pid_ns_release_proc(struct pid_namespace *ns)
+{
+}
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
index 8dcf237d3386e901fedfb98553e11e2ebf796d32..72bfccd3da22bc91e831d77b0b00179fef338d61 100644 (file)
@@ -85,7 +85,7 @@ void reiserfs_warning(struct super_block *s, const char *fmt, ...);
 if( !( cond ) )                                                                \
   reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at " \
                  __FILE__ ":%i:%s: " format "\n",              \
-                 in_interrupt() ? -1 : current -> pid, __LINE__ , __FUNCTION__ , ##args )
+                 in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args )
 
 #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
 
@@ -283,6 +283,18 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
        return sb->s_fs_info;
 }
 
+/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
+ * which overflows on large file systems. */
+static inline u32 reiserfs_bmap_count(struct super_block *sb)
+{
+       return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
+}
+
+static inline int bmap_would_wrap(unsigned bmap_nr)
+{
+       return bmap_nr > ((1LL << 16) - 1);
+}
+
 /** this says about version of key of all items (but stat data) the
     object consists of */
 #define get_inode_item_key_version( inode )                                    \
@@ -1734,8 +1746,8 @@ int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
 int journal_mark_freed(struct reiserfs_transaction_handle *,
                       struct super_block *, b_blocknr_t blocknr);
 int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
-int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr,
-                       int searchall, b_blocknr_t * next);
+int reiserfs_in_journal(struct super_block *p_s_sb, unsigned int bmap_nr,
+                       int bit_nr, int searchall, b_blocknr_t *next);
 int journal_begin(struct reiserfs_transaction_handle *,
                  struct super_block *p_s_sb, unsigned long);
 int journal_join_abort(struct reiserfs_transaction_handle *,
@@ -1743,7 +1755,7 @@ int journal_join_abort(struct reiserfs_transaction_handle *,
 void reiserfs_journal_abort(struct super_block *sb, int errno);
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
 int reiserfs_allocate_list_bitmaps(struct super_block *s,
-                                  struct reiserfs_list_bitmap *, int);
+                                  struct reiserfs_list_bitmap *, unsigned int);
 
 void add_save_link(struct reiserfs_transaction_handle *th,
                   struct inode *inode, int truncate);
@@ -2041,7 +2053,7 @@ struct buffer_head *get_FEB(struct tree_balance *);
  * arguments, such as node, search path, transaction_handle, etc. */
 struct __reiserfs_blocknr_hint {
        struct inode *inode;    /* inode passed to allocator, if we allocate unf. nodes */
-       long block;             /* file offset, in blocks */
+       sector_t block;         /* file offset, in blocks */
        struct in_core_key key;
        struct treepath *path;  /* search path, used by allocator to deternine search_start by
                                 * various ways */
@@ -2099,7 +2111,8 @@ static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
 static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
                                            *th, struct inode *inode,
                                            b_blocknr_t * new_blocknrs,
-                                           struct treepath *path, long block)
+                                           struct treepath *path,
+                                           sector_t block)
 {
        reiserfs_blocknr_hint_t hint = {
                .th = th,
@@ -2116,7 +2129,8 @@ static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
 static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
                                             *th, struct inode *inode,
                                             b_blocknr_t * new_blocknrs,
-                                            struct treepath *path, long block)
+                                            struct treepath *path,
+                                            sector_t block)
 {
        reiserfs_blocknr_hint_t hint = {
                .th = th,
index ff9e9234f8ba64b77f2b7316e34fc69781919e2f..10fa0c832018c870a7bac82b428cdb976cb34449 100644 (file)
@@ -265,9 +265,7 @@ enum journal_state_bits {
 typedef __u32(*hashf_t) (const signed char *, int);
 
 struct reiserfs_bitmap_info {
-       // FIXME: Won't work with block sizes > 8K
-       __u16 first_zero_hint;
-       __u16 free_count;
+       __u32 free_count;
 };
 
 struct proc_dir_entry;
index 10a83d8d5775d370c6eac6ff6b8488e07e9b7579..13df99fb27693b159d1bbc272ed406fc87a10e3b 100644 (file)
@@ -25,6 +25,7 @@
 #define CLONE_NEWUTS           0x04000000      /* New utsname group? */
 #define CLONE_NEWIPC           0x08000000      /* New ipcs */
 #define CLONE_NEWUSER          0x10000000      /* New user namespace */
+#define CLONE_NEWPID           0x20000000      /* New pid namespace */
 #define CLONE_NEWNET           0x40000000      /* New network namespace */
 
 /*
@@ -428,7 +429,17 @@ struct signal_struct {
        cputime_t it_prof_incr, it_virt_incr;
 
        /* job control IDs */
-       pid_t pgrp;
+
+       /*
+        * pgrp and session fields are deprecated.
+        * use the task_session_Xnr and task_pgrp_Xnr routines below
+        */
+
+       union {
+               pid_t pgrp __deprecated;
+               pid_t __pgrp;
+       };
+
        struct pid *tty_old_pgrp;
 
        union {
@@ -736,6 +747,8 @@ struct sched_domain {
 #endif
 };
 
+extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
+
 #endif /* CONFIG_SMP */
 
 /*
@@ -756,8 +769,6 @@ static inline int above_background_load(void)
 }
 
 struct io_context;                     /* See blkdev.h */
-struct cpuset;
-
 #define NGROUPS_SMALL          32
 #define NGROUPS_PER_BLOCK      ((int)(PAGE_SIZE / sizeof(gid_t)))
 struct group_info {
@@ -1125,11 +1136,16 @@ struct task_struct {
        short il_next;
 #endif
 #ifdef CONFIG_CPUSETS
-       struct cpuset *cpuset;
        nodemask_t mems_allowed;
        int cpuset_mems_generation;
        int cpuset_mem_spread_rotor;
 #endif
+#ifdef CONFIG_CGROUPS
+       /* Control Group info protected by css_set_lock */
+       struct css_set *cgroups;
+       /* cg_list protected by css_set_lock and tsk->alloc_lock */
+       struct list_head cg_list;
+#endif
 #ifdef CONFIG_FUTEX
        struct robust_list_head __user *robust_list;
 #ifdef CONFIG_COMPAT
@@ -1185,24 +1201,14 @@ static inline int rt_task(struct task_struct *p)
        return rt_prio(p->prio);
 }
 
-static inline pid_t process_group(struct task_struct *tsk)
+static inline void set_task_session(struct task_struct *tsk, pid_t session)
 {
-       return tsk->signal->pgrp;
+       tsk->signal->__session = session;
 }
 
-static inline pid_t signal_session(struct signal_struct *sig)
+static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
 {
-       return sig->__session;
-}
-
-static inline pid_t process_session(struct task_struct *tsk)
-{
-       return signal_session(tsk->signal);
-}
-
-static inline void set_signal_session(struct signal_struct *sig, pid_t session)
-{
-       sig->__session = session;
+       tsk->signal->__pgrp = pgrp;
 }
 
 static inline struct pid *task_pid(struct task_struct *task)
@@ -1225,6 +1231,88 @@ static inline struct pid *task_session(struct task_struct *task)
        return task->group_leader->pids[PIDTYPE_SID].pid;
 }
 
+struct pid_namespace;
+
+/*
+ * the helpers to get the task's different pids as they are seen
+ * from various namespaces
+ *
+ * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
+ * task_xid_vnr()    : virtual id, i.e. the id seen from the namespace the task
+ *                     belongs to. this only makes sence when called in the
+ *                     context of the task that belongs to the same namespace;
+ * task_xid_nr_ns()  : id seen from the ns specified;
+ *
+ * set_task_vxid()   : assigns a virtual id to a task;
+ *
+ * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
+ *                     the result depends on the namespace and whether the
+ *                     task in question is the namespace's init. e.g. for the
+ *                     namespace's init this will return 0 when called from
+ *                     the namespace of this init, or appropriate id otherwise.
+ *
+ *
+ * see also pid_nr() etc in include/linux/pid.h
+ */
+
+static inline pid_t task_pid_nr(struct task_struct *tsk)
+{
+       return tsk->pid;
+}
+
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pid_vnr(struct task_struct *tsk)
+{
+       return pid_vnr(task_pid(tsk));
+}
+
+
+static inline pid_t task_tgid_nr(struct task_struct *tsk)
+{
+       return tsk->tgid;
+}
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+{
+       return pid_vnr(task_tgid(tsk));
+}
+
+
+static inline pid_t task_pgrp_nr(struct task_struct *tsk)
+{
+       return tsk->signal->__pgrp;
+}
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
+{
+       return pid_vnr(task_pgrp(tsk));
+}
+
+
+static inline pid_t task_session_nr(struct task_struct *tsk)
+{
+       return tsk->signal->__session;
+}
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_session_vnr(struct task_struct *tsk)
+{
+       return pid_vnr(task_session(tsk));
+}
+
+
+static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
+               struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
+}
+
 /**
  * pid_alive - check that a task structure is not stale
  * @p: Task structure to be checked.
@@ -1239,16 +1327,22 @@ static inline int pid_alive(struct task_struct *p)
 }
 
 /**
- * is_init - check if a task structure is init
+ * is_global_init - check if a task structure is init
  * @tsk: Task structure to be checked.
  *
  * Check if a task structure is the first user space task the kernel created.
  */
-static inline int is_init(struct task_struct *tsk)
+static inline int is_global_init(struct task_struct *tsk)
 {
        return tsk->pid == 1;
 }
 
+/*
+ * is_container_init:
+ * check whether in the task is init in its own pid namespace.
+ */
+extern int is_container_init(struct task_struct *tsk);
+
 extern struct pid *cad_pid;
 
 extern void free_task(struct task_struct *tsk);
@@ -1420,8 +1514,32 @@ extern struct task_struct init_task;
 
 extern struct   mm_struct init_mm;
 
-#define find_task_by_pid(nr)   find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
+extern struct pid_namespace init_pid_ns;
+
+/*
+ * find a task by one of its numerical ids
+ *
+ * find_task_by_pid_type_ns():
+ *      it is the most generic call - it finds a task by all id,
+ *      type and namespace specified
+ * find_task_by_pid_ns():
+ *      finds a task by its pid in the specified namespace
+ * find_task_by_vpid():
+ *      finds a task by its virtual pid
+ * find_task_by_pid():
+ *      finds a task by its global pid
+ *
+ * see also find_pid() etc in include/linux/pid.h
+ */
+
+extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
+               struct pid_namespace *ns);
+
+extern struct task_struct *find_task_by_pid(pid_t nr);
+extern struct task_struct *find_task_by_vpid(pid_t nr);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr,
+               struct pid_namespace *ns);
+
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
 /* per-UID process charging. */
@@ -1608,6 +1726,12 @@ static inline int has_group_leader_pid(struct task_struct *p)
        return p->pid == p->tgid;
 }
 
+static inline
+int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+       return p1->tgid == p2->tgid;
+}
+
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
        return list_entry(rcu_dereference(p->thread_group.next),
@@ -1625,7 +1749,8 @@ static inline int thread_group_empty(struct task_struct *p)
 /*
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
- * pins the final release of task.io_context.  Also protects ->cpuset.
+ * pins the final release of task.io_context.  Also protects ->cpuset and
+ * ->cgroup.subsys[].
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
index 9aaffb0b1d81e9ebfee3c9731209a23fac42223e..c8eaad9e4b72afacb1725204abbda308df0b3d45 100644 (file)
@@ -90,7 +90,6 @@ struct sem {
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
        struct kern_ipc_perm    sem_perm;       /* permissions .. see ipc.h */
-       int                     sem_id;
        time_t                  sem_otime;      /* last semop time */
        time_t                  sem_ctime;      /* last change time */
        struct sem              *sem_base;      /* ptr to first semaphore in array */
index bea65d9c93ef1c9da525bf5c6fb79a206f2e51f7..eeaed921a1dc02d02f3a61cec5ea0c62873ecfad 100644 (file)
@@ -79,7 +79,6 @@ struct shmid_kernel /* private to the kernel */
 {      
        struct kern_ipc_perm    shm_perm;
        struct file *           shm_file;
-       int                     id;
        unsigned long           shm_nattch;
        unsigned long           shm_segsz;
        time_t                  shm_atim;
index 0351bf2fac85779839842a60ae8012e3fbaf81c6..4f0dad21c91702a86acdcd22ca1b556e84ed1203 100644 (file)
@@ -3,12 +3,9 @@
 
 #ifdef __KERNEL__
 
-#define BITS_TO_LONGS(bits) \
-       (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
        unsigned long name[BITS_TO_LONGS(bits)]
 
-#define BITS_PER_BYTE 8
 #endif
 
 #include <linux/posix_types.h>
index a6c1e8eed2265e914f90b6611902974eed726fa0..15ddd4483b09b9b94b47a9130bc0579c31468c74 100644 (file)
@@ -162,10 +162,6 @@ struct uinput_ff_erase {
 #define UI_FF_UPLOAD           1
 #define UI_FF_ERASE            2
 
-#ifndef NBITS
-#define NBITS(x) ((((x)-1)/(sizeof(long)*8))+1)
-#endif /* NBITS */
-
 #define UINPUT_MAX_NAME_SIZE   80
 struct uinput_user_dev {
        char name[UINPUT_MAX_NAME_SIZE];
index ba806e8711beb42129534649e8270c9b4bd35ba9..02c1c02887703b6d044af1bc57685729cb242278 100644 (file)
@@ -1,6 +1,18 @@
 #ifndef _LINUX_VT_H
 #define _LINUX_VT_H
 
+#ifdef __KERNEL__
+struct notifier_block;
+
+struct vt_notifier_param {
+       struct vc_data *vc;     /* VC on which the update happened */
+       unsigned int c;         /* Printed char */
+};
+
+extern int register_vt_notifier(struct notifier_block *nb);
+extern int unregister_vt_notifier(struct notifier_block *nb);
+#endif
+
 /*
  * These constants are also useful for user-level apps (e.g., VC
  * resizing).
index ce6badc98f6dcb3f2b78615cd4aa382d6d956abd..7daafdc2514b9657a0e0d9d0325785cc65c934b2 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <linux/lockdep.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -28,6 +29,9 @@ struct work_struct {
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
        struct list_head entry;
        work_func_t func;
+#ifdef CONFIG_LOCKDEP
+       struct lockdep_map lockdep_map;
+#endif
 };
 
 #define WORK_DATA_INIT()       ATOMIC_LONG_INIT(0)
@@ -41,10 +45,23 @@ struct execute_work {
        struct work_struct work;
 };
 
+#ifdef CONFIG_LOCKDEP
+/*
+ * NB: because we have to copy the lockdep_map, setting _key
+ * here is required, otherwise it could get initialised to the
+ * copy of the lockdep_map!
+ */
+#define __WORK_INIT_LOCKDEP_MAP(n, k) \
+       .lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
+#else
+#define __WORK_INIT_LOCKDEP_MAP(n, k)
+#endif
+
 #define __WORK_INITIALIZER(n, f) {                             \
        .data = WORK_DATA_INIT(),                               \
        .entry  = { &(n).entry, &(n).entry },                   \
        .func = (f),                                            \
+       __WORK_INIT_LOCKDEP_MAP(#n, &(n))                       \
        }
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {                     \
@@ -76,12 +93,24 @@ struct execute_work {
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
+#ifdef CONFIG_LOCKDEP
+#define INIT_WORK(_work, _func)                                                \
+       do {                                                            \
+               static struct lock_class_key __key;                     \
+                                                                       \
+               (_work)->data = (atomic_long_t) WORK_DATA_INIT();       \
+               lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
+               INIT_LIST_HEAD(&(_work)->entry);                        \
+               PREPARE_WORK((_work), (_func));                         \
+       } while (0)
+#else
 #define INIT_WORK(_work, _func)                                                \
        do {                                                            \
                (_work)->data = (atomic_long_t) WORK_DATA_INIT();       \
                INIT_LIST_HEAD(&(_work)->entry);                        \
                PREPARE_WORK((_work), (_func));                         \
        } while (0)
+#endif
 
 #define INIT_DELAYED_WORK(_work, _func)                                \
        do {                                                    \
@@ -118,9 +147,23 @@ struct execute_work {
        clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
-extern struct workqueue_struct *__create_workqueue(const char *name,
-                                                   int singlethread,
-                                                   int freezeable);
+extern struct workqueue_struct *
+__create_workqueue_key(const char *name, int singlethread,
+                      int freezeable, struct lock_class_key *key);
+
+#ifdef CONFIG_LOCKDEP
+#define __create_workqueue(name, singlethread, freezeable)     \
+({                                                             \
+       static struct lock_class_key __key;                     \
+                                                               \
+       __create_workqueue_key((name), (singlethread),          \
+                              (freezeable), &__key);           \
+})
+#else
+#define __create_workqueue(name, singlethread, freezeable)     \
+       __create_workqueue_key((name), (singlethread), (freezeable), NULL)
+#endif
+
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
 #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
index 686425a97b0f63b1be45a6b59aa2a3c5c044068c..625346c47ee2e276dd6eb413a96672bbb75014fb 100644 (file)
@@ -44,7 +44,7 @@ extern unsigned int p9_debug_level;
 do {  \
        if ((p9_debug_level & level) == level) \
                printk(KERN_NOTICE "-- %s (%d): " \
-               format , __FUNCTION__, current->pid , ## arg); \
+               format , __FUNCTION__, task_pid_nr(current) , ## arg); \
 } while (0)
 
 #define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR,   \
@@ -59,7 +59,7 @@ do {  \
 #define P9_EPRINTK(level, format, arg...) \
 do { \
        printk(level "9p: %s (%d): " \
-               format , __FUNCTION__, current->pid , ## arg); \
+               format , __FUNCTION__, task_pid_nr(current), ## arg); \
 } while (0)
 
 
index 423cb1d5ac2530dcd4a742f286b7d416be6b8f6e..06df126103cab8224d6cf7ae64c7f00e75f06e12 100644 (file)
@@ -4,6 +4,8 @@
 #include <linux/limits.h>
 #include <linux/net.h>
 #include <linux/security.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
@@ -54,7 +56,7 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
        struct task_struct *p = current;
        scm->creds.uid = p->uid;
        scm->creds.gid = p->gid;
-       scm->creds.pid = p->tgid;
+       scm->creds.pid = task_tgid_vnr(p);
        scm->fp = NULL;
        scm->seq = 0;
        unix_get_peersec_dgram(sock, scm);
index baa163f770abd7ceba1d7aa273b8b5450bd4916d..b52f07381243efdcd125a1c5a6c3a8d090dbd4e5 100644 (file)
@@ -68,7 +68,6 @@
 #  define print_var(X,Y...)
 #endif
 
-#define BIT(x)         (1ul<<(x))
 #define POW2(x)                (1ul<<(x))
 
 /*
index 05b63c2a5abcfd8f061204e1c1f2021783b2fba7..7431d9681e57280b740db748257d168077eb3e1a 100644 (file)
@@ -79,8 +79,6 @@
 
 /* register bitfields (not all, only as needed) */
 
-#define BIT(x) (1UL << (x))
-
 /* COMMAND_2D reg. values */
 #define TDFX_ROP_COPY          0xcc    /* src */
 #define TDFX_ROP_INVERT                0x55    /* NOT dst */
index a29a688c47d3128dc63fed1c70e799351ca38449..541382d539add2184cadd8e58e1d73e92c4858fe 100644 (file)
@@ -270,9 +270,43 @@ config LOG_BUF_SHIFT
                     13 =>  8 KB
                     12 =>  4 KB
 
+config CGROUPS
+       bool "Control Group support"
+       help
+         This option will let you use process cgroup subsystems
+         such as Cpusets
+
+         Say N if unsure.
+
+config CGROUP_DEBUG
+       bool "Example debug cgroup subsystem"
+       depends on CGROUPS
+       help
+         This option enables a simple cgroup subsystem that
+         exports useful debugging information about the cgroups
+         framework
+
+         Say N if unsure
+
+config CGROUP_NS
+        bool "Namespace cgroup subsystem"
+        depends on CGROUPS
+        help
+          Provides a simple namespace cgroup subsystem to
+          provide hierarchical naming of sets of namespaces,
+          for instance virtual servers and checkpoint/restart
+          jobs.
+
+config CGROUP_CPUACCT
+       bool "Simple CPU accounting cgroup subsystem"
+       depends on CGROUPS
+       help
+         Provides a simple Resource Controller for monitoring the
+         total CPU consumed by the tasks in a cgroup
+
 config CPUSETS
        bool "Cpuset support"
-       depends on SMP
+       depends on SMP && CGROUPS
        help
          This option will let you create and manage CPUSETs which
          allow dynamically partitioning a system into sets of CPUs and
@@ -300,6 +334,16 @@ config FAIR_USER_SCHED
          This option will choose userid as the basis for grouping
          tasks, thus providing equal CPU bandwidth to each user.
 
+config FAIR_CGROUP_SCHED
+       bool "Control groups"
+       depends on CGROUPS
+       help
+         This option allows you to create arbitrary task groups
+         using the "cgroup" pseudo filesystem and control
+         the cpu bandwidth allocated to each such task group.
+         Refer to Documentation/cgroups.txt for more information
+         on "cgroup" pseudo filesystem.
+
 endchoice
 
 config SYSFS_DEPRECATED
@@ -322,6 +366,11 @@ config SYSFS_DEPRECATED
          If you are using a distro that was released in 2006 or later,
          it should be safe to say N here.
 
+config PROC_PID_CPUSET
+       bool "Include legacy /proc/<pid>/cpuset file"
+       depends on CPUSETS
+       default y
+
 config RELAY
        bool "Kernel->user space relay support (formerly relayfs)"
        help
index 9def935ab13a27c829e28bb2830cdbe2c6fd900d..0dd0e7a1f6329fab9bfecb88f1363530459281e8 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/writeback.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/efi.h>
 #include <linux/tick.h>
 #include <linux/interrupt.h>
@@ -523,6 +524,7 @@ asmlinkage void __init start_kernel(void)
         */
        unwind_init();
        lockdep_init();
+       cgroup_init_early();
 
        local_irq_disable();
        early_boot_irqs_off();
@@ -640,6 +642,7 @@ asmlinkage void __init start_kernel(void)
 #ifdef CONFIG_PROC_FS
        proc_root_init();
 #endif
+       cgroup_init();
        cpuset_init();
        taskstats_init_early();
        delayacct_init();
index 20f1fed8fa48f65f206eeb3124b56d602da3ca80..c0b26dc4617bd108925656d85743b80acd462654 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/audit.h>
 #include <linux/signal.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
+#include <linux/pid.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -330,7 +332,8 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
                        (info->notify_owner &&
                         info->notify.sigev_notify == SIGEV_SIGNAL) ?
                                info->notify.sigev_signo : 0,
-                       pid_nr(info->notify_owner));
+                       pid_nr_ns(info->notify_owner,
+                               current->nsproxy->pid_ns));
        spin_unlock(&info->lock);
        buffer[sizeof(buffer)-1] = '\0';
        slen = strlen(buffer)+1;
@@ -507,7 +510,7 @@ static void __do_notify(struct mqueue_inode_info *info)
                        sig_i.si_errno = 0;
                        sig_i.si_code = SI_MESGQ;
                        sig_i.si_value = info->notify.sigev_value;
-                       sig_i.si_pid = current->tgid;
+                       sig_i.si_pid = task_pid_vnr(current);
                        sig_i.si_uid = current->uid;
 
                        kill_pid_info(info->notify.sigev_signo,
index a03fcb522fff827df275ab75a7e2c76610e229b7..fdf3db5731ce8df2cfd038f40c69710243d85338 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -34,7 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/audit.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 
 #include <asm/current.h>
@@ -66,23 +66,15 @@ struct msg_sender {
 #define SEARCH_NOTEQUAL                3
 #define SEARCH_LESSEQUAL       4
 
-static atomic_t msg_bytes =    ATOMIC_INIT(0);
-static atomic_t msg_hdrs =     ATOMIC_INIT(0);
-
 static struct ipc_ids init_msg_ids;
 
 #define msg_ids(ns)    (*((ns)->ids[IPC_MSG_IDS]))
 
-#define msg_lock(ns, id)       ((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
 #define msg_unlock(msq)                ipc_unlock(&(msq)->q_perm)
-#define msg_rmid(ns, id)       ((struct msg_queue*)ipc_rmid(&msg_ids(ns), id))
-#define msg_checkid(ns, msq, msgid)    \
-       ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid)
-#define msg_buildid(ns, id, seq) \
-       ipc_buildid(&msg_ids(ns), id, seq)
-
-static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
-static int newque (struct ipc_namespace *ns, key_t key, int msgflg);
+#define msg_buildid(id, seq)   ipc_buildid(id, seq)
+
+static void freeque(struct ipc_namespace *, struct msg_queue *);
+static int newque(struct ipc_namespace *, struct ipc_params *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
 #endif
@@ -93,7 +85,9 @@ static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
        ns->msg_ctlmax = MSGMAX;
        ns->msg_ctlmnb = MSGMNB;
        ns->msg_ctlmni = MSGMNI;
-       ipc_init_ids(ids, ns->msg_ctlmni);
+       atomic_set(&ns->msg_bytes, 0);
+       atomic_set(&ns->msg_hdrs, 0);
+       ipc_init_ids(ids);
 }
 
 int msg_init_ns(struct ipc_namespace *ns)
@@ -110,20 +104,25 @@ int msg_init_ns(struct ipc_namespace *ns)
 
 void msg_exit_ns(struct ipc_namespace *ns)
 {
-       int i;
        struct msg_queue *msq;
+       int next_id;
+       int total, in_use;
+
+       down_write(&msg_ids(ns).rw_mutex);
+
+       in_use = msg_ids(ns).in_use;
 
-       mutex_lock(&msg_ids(ns).mutex);
-       for (i = 0; i <= msg_ids(ns).max_id; i++) {
-               msq = msg_lock(ns, i);
+       for (total = 0, next_id = 0; total < in_use; next_id++) {
+               msq = idr_find(&msg_ids(ns).ipcs_idr, next_id);
                if (msq == NULL)
                        continue;
-
-               freeque(ns, msq, i);
+               ipc_lock_by_ptr(&msq->q_perm);
+               freeque(ns, msq);
+               total++;
        }
-       mutex_unlock(&msg_ids(ns).mutex);
 
-       ipc_fini_ids(ns->ids[IPC_MSG_IDS]);
+       up_write(&msg_ids(ns).rw_mutex);
+
        kfree(ns->ids[IPC_MSG_IDS]);
        ns->ids[IPC_MSG_IDS] = NULL;
 }
@@ -136,10 +135,55 @@ void __init msg_init(void)
                                IPC_MSG_IDS, sysvipc_msg_proc_show);
 }
 
-static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
+/*
+ * This routine is called in the paths where the rw_mutex is held to protect
+ * access to the idr tree.
+ */
+static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns,
+                                               int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id);
+
+       return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+/*
+ * msg_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);
+
+       return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
+                                               int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);
+
+       return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
+{
+       ipc_rmid(&msg_ids(ns), &s->q_perm);
+}
+
+/**
+ * newque - Create a new msg queue
+ * @ns: namespace
+ * @params: ptr to the structure that contains the key and msgflg
+ *
+ * Called with msg_ids.rw_mutex held (writer)
+ */
+static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
        struct msg_queue *msq;
        int id, retval;
+       key_t key = params->key;
+       int msgflg = params->flg;
 
        msq = ipc_rcu_alloc(sizeof(*msq));
        if (!msq)
@@ -155,14 +199,17 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
                return retval;
        }
 
+       /*
+        * ipc_addid() locks msq
+        */
        id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
-       if (id == -1) {
+       if (id < 0) {
                security_msg_queue_free(msq);
                ipc_rcu_putref(msq);
-               return -ENOSPC;
+               return id;
        }
 
-       msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
+       msq->q_perm.id = msg_buildid(id, msq->q_perm.seq);
        msq->q_stime = msq->q_rtime = 0;
        msq->q_ctime = get_seconds();
        msq->q_cbytes = msq->q_qnum = 0;
@@ -171,9 +218,10 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
        INIT_LIST_HEAD(&msq->q_messages);
        INIT_LIST_HEAD(&msq->q_receivers);
        INIT_LIST_HEAD(&msq->q_senders);
+
        msg_unlock(msq);
 
-       return msq->q_id;
+       return msq->q_perm.id;
 }
 
 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
@@ -224,19 +272,19 @@ static void expunge_all(struct msg_queue *msq, int res)
 
 /*
  * freeque() wakes up waiters on the sender and receiver waiting queue,
- * removes the message queue from message queue ID
- * array, and cleans up all the messages associated with this queue.
+ * removes the message queue from message queue ID IDR, and cleans up all the
+ * messages associated with this queue.
  *
- * msg_ids.mutex and the spinlock for this message queue is hold
- * before freeque() is called. msg_ids.mutex remains locked on exit.
+ * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
+ * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
  */
-static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
+static void freeque(struct ipc_namespace *ns, struct msg_queue *msq)
 {
        struct list_head *tmp;
 
        expunge_all(msq, -EIDRM);
        ss_wakeup(&msq->q_senders, 1);
-       msq = msg_rmid(ns, id);
+       msg_rmid(ns, msq);
        msg_unlock(msq);
 
        tmp = msq->q_messages.next;
@@ -244,49 +292,40 @@ static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
                struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
 
                tmp = tmp->next;
-               atomic_dec(&msg_hdrs);
+               atomic_dec(&ns->msg_hdrs);
                free_msg(msg);
        }
-       atomic_sub(msq->q_cbytes, &msg_bytes);
+       atomic_sub(msq->q_cbytes, &ns->msg_bytes);
        security_msg_queue_free(msq);
        ipc_rcu_putref(msq);
 }
 
+/*
+ * Called with msg_ids.rw_mutex and ipcp locked.
+ */
+static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
+{
+       struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
+
+       return security_msg_queue_associate(msq, msgflg);
+}
+
 asmlinkage long sys_msgget(key_t key, int msgflg)
 {
-       struct msg_queue *msq;
-       int id, ret = -EPERM;
        struct ipc_namespace *ns;
+       struct ipc_ops msg_ops;
+       struct ipc_params msg_params;
 
        ns = current->nsproxy->ipc_ns;
-       
-       mutex_lock(&msg_ids(ns).mutex);
-       if (key == IPC_PRIVATE) 
-               ret = newque(ns, key, msgflg);
-       else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
-               if (!(msgflg & IPC_CREAT))
-                       ret = -ENOENT;
-               else
-                       ret = newque(ns, key, msgflg);
-       } else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
-               ret = -EEXIST;
-       } else {
-               msq = msg_lock(ns, id);
-               BUG_ON(msq == NULL);
-               if (ipcperms(&msq->q_perm, msgflg))
-                       ret = -EACCES;
-               else {
-                       int qid = msg_buildid(ns, id, msq->q_perm.seq);
-
-                       ret = security_msg_queue_associate(msq, msgflg);
-                       if (!ret)
-                               ret = qid;
-               }
-               msg_unlock(msq);
-       }
-       mutex_unlock(&msg_ids(ns).mutex);
 
-       return ret;
+       msg_ops.getnew = newque;
+       msg_ops.associate = msg_security;
+       msg_ops.more_checks = NULL;
+
+       msg_params.key = key;
+       msg_params.flg = msgflg;
+
+       return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
 }
 
 static inline unsigned long
@@ -420,23 +459,23 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                msginfo.msgmnb = ns->msg_ctlmnb;
                msginfo.msgssz = MSGSSZ;
                msginfo.msgseg = MSGSEG;
-               mutex_lock(&msg_ids(ns).mutex);
+               down_read(&msg_ids(ns).rw_mutex);
                if (cmd == MSG_INFO) {
                        msginfo.msgpool = msg_ids(ns).in_use;
-                       msginfo.msgmap = atomic_read(&msg_hdrs);
-                       msginfo.msgtql = atomic_read(&msg_bytes);
+                       msginfo.msgmap = atomic_read(&ns->msg_hdrs);
+                       msginfo.msgtql = atomic_read(&ns->msg_bytes);
                } else {
                        msginfo.msgmap = MSGMAP;
                        msginfo.msgpool = MSGPOOL;
                        msginfo.msgtql = MSGTQL;
                }
-               max_id = msg_ids(ns).max_id;
-               mutex_unlock(&msg_ids(ns).mutex);
+               max_id = ipc_get_maxid(&msg_ids(ns));
+               up_read(&msg_ids(ns).rw_mutex);
                if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
                        return -EFAULT;
                return (max_id < 0) ? 0 : max_id;
        }
-       case MSG_STAT:
+       case MSG_STAT:  /* msqid is an index rather than a msg queue id */
        case IPC_STAT:
        {
                struct msqid64_ds tbuf;
@@ -444,21 +483,16 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 
                if (!buf)
                        return -EFAULT;
-               if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
-                       return -EINVAL;
-
-               memset(&tbuf, 0, sizeof(tbuf));
-
-               msq = msg_lock(ns, msqid);
-               if (msq == NULL)
-                       return -EINVAL;
 
                if (cmd == MSG_STAT) {
-                       success_return = msg_buildid(ns, msqid, msq->q_perm.seq);
+                       msq = msg_lock(ns, msqid);
+                       if (IS_ERR(msq))
+                               return PTR_ERR(msq);
+                       success_return = msq->q_perm.id;
                } else {
-                       err = -EIDRM;
-                       if (msg_checkid(ns, msq, msqid))
-                               goto out_unlock;
+                       msq = msg_lock_check(ns, msqid);
+                       if (IS_ERR(msq))
+                               return PTR_ERR(msq);
                        success_return = 0;
                }
                err = -EACCES;
@@ -469,6 +503,8 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                if (err)
                        goto out_unlock;
 
+               memset(&tbuf, 0, sizeof(tbuf));
+
                kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
                tbuf.msg_stime  = msq->q_stime;
                tbuf.msg_rtime  = msq->q_rtime;
@@ -495,15 +531,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                return  -EINVAL;
        }
 
-       mutex_lock(&msg_ids(ns).mutex);
-       msq = msg_lock(ns, msqid);
-       err = -EINVAL;
-       if (msq == NULL)
+       down_write(&msg_ids(ns).rw_mutex);
+       msq = msg_lock_check_down(ns, msqid);
+       if (IS_ERR(msq)) {
+               err = PTR_ERR(msq);
                goto out_up;
+       }
 
-       err = -EIDRM;
-       if (msg_checkid(ns, msq, msqid))
-               goto out_unlock_up;
        ipcp = &msq->q_perm;
 
        err = audit_ipc_obj(ipcp);
@@ -552,12 +586,12 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                break;
        }
        case IPC_RMID:
-               freeque(ns, msq, msqid);
+               freeque(ns, msq);
                break;
        }
        err = 0;
 out_up:
-       mutex_unlock(&msg_ids(ns).mutex);
+       up_write(&msg_ids(ns).rw_mutex);
        return err;
 out_unlock_up:
        msg_unlock(msq);
@@ -611,7 +645,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
                                msr->r_msg = ERR_PTR(-E2BIG);
                        } else {
                                msr->r_msg = NULL;
-                               msq->q_lrpid = msr->r_tsk->pid;
+                               msq->q_lrpid = task_pid_vnr(msr->r_tsk);
                                msq->q_rtime = get_seconds();
                                wake_up_process(msr->r_tsk);
                                smp_mb();
@@ -646,14 +680,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
        msg->m_type = mtype;
        msg->m_ts = msgsz;
 
-       msq = msg_lock(ns, msqid);
-       err = -EINVAL;
-       if (msq == NULL)
+       msq = msg_lock_check(ns, msqid);
+       if (IS_ERR(msq)) {
+               err = PTR_ERR(msq);
                goto out_free;
-
-       err= -EIDRM;
-       if (msg_checkid(ns, msq, msqid))
-               goto out_unlock_free;
+       }
 
        for (;;) {
                struct msg_sender s;
@@ -695,7 +726,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                }
        }
 
-       msq->q_lspid = current->tgid;
+       msq->q_lspid = task_tgid_vnr(current);
        msq->q_stime = get_seconds();
 
        if (!pipelined_send(msq, msg)) {
@@ -703,8 +734,8 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
                list_add_tail(&msg->m_list, &msq->q_messages);
                msq->q_cbytes += msgsz;
                msq->q_qnum++;
-               atomic_add(msgsz, &msg_bytes);
-               atomic_inc(&msg_hdrs);
+               atomic_add(msgsz, &ns->msg_bytes);
+               atomic_inc(&ns->msg_hdrs);
        }
 
        err = 0;
@@ -760,13 +791,9 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
        mode = convert_mode(&msgtyp, msgflg);
        ns = current->nsproxy->ipc_ns;
 
-       msq = msg_lock(ns, msqid);
-       if (msq == NULL)
-               return -EINVAL;
-
-       msg = ERR_PTR(-EIDRM);
-       if (msg_checkid(ns, msq, msqid))
-               goto out_unlock;
+       msq = msg_lock_check(ns, msqid);
+       if (IS_ERR(msq))
+               return PTR_ERR(msq);
 
        for (;;) {
                struct msg_receiver msr_d;
@@ -810,10 +837,10 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
                        list_del(&msg->m_list);
                        msq->q_qnum--;
                        msq->q_rtime = get_seconds();
-                       msq->q_lrpid = current->tgid;
+                       msq->q_lrpid = task_tgid_vnr(current);
                        msq->q_cbytes -= msg->m_ts;
-                       atomic_sub(msg->m_ts, &msg_bytes);
-                       atomic_dec(&msg_hdrs);
+                       atomic_sub(msg->m_ts, &ns->msg_bytes);
+                       atomic_dec(&ns->msg_hdrs);
                        ss_wakeup(&msq->q_senders, 0);
                        msg_unlock(msq);
                        break;
@@ -926,7 +953,7 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
        return seq_printf(s,
                        "%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
                        msq->q_perm.key,
-                       msq->q_id,
+                       msq->q_perm.id,
                        msq->q_perm.mode,
                        msq->q_cbytes,
                        msq->q_qnum,
index b676fef6d208b563dfe52929f6ef8a491860a928..35952c0bae4629cac017c4c62623c91a022ebee5 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -80,7 +80,7 @@
 #include <linux/audit.h>
 #include <linux/capability.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 
 #define sem_ids(ns)    (*((ns)->ids[IPC_SEM_IDS]))
 
-#define sem_lock(ns, id)       ((struct sem_array*)ipc_lock(&sem_ids(ns), id))
 #define sem_unlock(sma)                ipc_unlock(&(sma)->sem_perm)
-#define sem_rmid(ns, id)       ((struct sem_array*)ipc_rmid(&sem_ids(ns), id))
-#define sem_checkid(ns, sma, semid)    \
-       ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
-#define sem_buildid(ns, id, seq) \
-       ipc_buildid(&sem_ids(ns), id, seq)
+#define sem_checkid(sma, semid)        ipc_checkid(&sma->sem_perm, semid)
+#define sem_buildid(id, seq)   ipc_buildid(id, seq)
 
 static struct ipc_ids init_sem_ids;
 
-static int newary(struct ipc_namespace *, key_t, int, int);
-static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id);
+static int newary(struct ipc_namespace *, struct ipc_params *);
+static void freeary(struct ipc_namespace *, struct sem_array *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #endif
@@ -129,7 +125,7 @@ static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
        ns->sc_semopm = SEMOPM;
        ns->sc_semmni = SEMMNI;
        ns->used_sems = 0;
-       ipc_init_ids(ids, ns->sc_semmni);
+       ipc_init_ids(ids);
 }
 
 int sem_init_ns(struct ipc_namespace *ns)
@@ -146,20 +142,24 @@ int sem_init_ns(struct ipc_namespace *ns)
 
 void sem_exit_ns(struct ipc_namespace *ns)
 {
-       int i;
        struct sem_array *sma;
+       int next_id;
+       int total, in_use;
 
-       mutex_lock(&sem_ids(ns).mutex);
-       for (i = 0; i <= sem_ids(ns).max_id; i++) {
-               sma = sem_lock(ns, i);
+       down_write(&sem_ids(ns).rw_mutex);
+
+       in_use = sem_ids(ns).in_use;
+
+       for (total = 0, next_id = 0; total < in_use; next_id++) {
+               sma = idr_find(&sem_ids(ns).ipcs_idr, next_id);
                if (sma == NULL)
                        continue;
-
-               freeary(ns, sma, i);
+               ipc_lock_by_ptr(&sma->sem_perm);
+               freeary(ns, sma);
+               total++;
        }
-       mutex_unlock(&sem_ids(ns).mutex);
+       up_write(&sem_ids(ns).rw_mutex);
 
-       ipc_fini_ids(ns->ids[IPC_SEM_IDS]);
        kfree(ns->ids[IPC_SEM_IDS]);
        ns->ids[IPC_SEM_IDS] = NULL;
 }
@@ -172,6 +172,42 @@ void __init sem_init (void)
                                IPC_SEM_IDS, sysvipc_sem_proc_show);
 }
 
+/*
+ * This routine is called in the paths where the rw_mutex is held to protect
+ * access to the idr tree.
+ */
+static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns,
+                                               int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id);
+
+       return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+/*
+ * sem_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
+
+       return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
+                                               int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
+
+       return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+{
+       ipc_rmid(&sem_ids(ns), &s->sem_perm);
+}
+
 /*
  * Lockless wakeup algorithm:
  * Without the check/retry algorithm a lockless wakeup is possible:
@@ -206,12 +242,23 @@ void __init sem_init (void)
  */
 #define IN_WAKEUP      1
 
-static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
+/**
+ * newary - Create a new semaphore set
+ * @ns: namespace
+ * @params: ptr to the structure that contains key, semflg and nsems
+ *
+ * Called with sem_ids.rw_mutex held (as a writer)
+ */
+
+static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 {
        int id;
        int retval;
        struct sem_array *sma;
        int size;
+       key_t key = params->key;
+       int nsems = params->u.nsems;
+       int semflg = params->flg;
 
        if (!nsems)
                return -EINVAL;
@@ -236,14 +283,14 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
        }
 
        id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
-       if(id == -1) {
+       if (id < 0) {
                security_sem_free(sma);
                ipc_rcu_putref(sma);
-               return -ENOSPC;
+               return id;
        }
        ns->used_sems += nsems;
 
-       sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq);
+       sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq);
        sma->sem_base = (struct sem *) &sma[1];
        /* sma->sem_pending = NULL; */
        sma->sem_pending_last = &sma->sem_pending;
@@ -252,48 +299,56 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
        sma->sem_ctime = get_seconds();
        sem_unlock(sma);
 
-       return sma->sem_id;
+       return sma->sem_perm.id;
+}
+
+
+/*
+ * Called with sem_ids.rw_mutex and ipcp locked.
+ */
+static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
+{
+       struct sem_array *sma;
+
+       sma = container_of(ipcp, struct sem_array, sem_perm);
+       return security_sem_associate(sma, semflg);
 }
 
-asmlinkage long sys_semget (key_t key, int nsems, int semflg)
+/*
+ * Called with sem_ids.rw_mutex and ipcp locked.
+ */
+static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
+                               struct ipc_params *params)
 {
-       int id, err = -EINVAL;
        struct sem_array *sma;
+
+       sma = container_of(ipcp, struct sem_array, sem_perm);
+       if (params->u.nsems > sma->sem_nsems)
+               return -EINVAL;
+
+       return 0;
+}
+
+asmlinkage long sys_semget(key_t key, int nsems, int semflg)
+{
        struct ipc_namespace *ns;
+       struct ipc_ops sem_ops;
+       struct ipc_params sem_params;
 
        ns = current->nsproxy->ipc_ns;
 
        if (nsems < 0 || nsems > ns->sc_semmsl)
                return -EINVAL;
-       mutex_lock(&sem_ids(ns).mutex);
-       
-       if (key == IPC_PRIVATE) {
-               err = newary(ns, key, nsems, semflg);
-       } else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) {  /* key not used */
-               if (!(semflg & IPC_CREAT))
-                       err = -ENOENT;
-               else
-                       err = newary(ns, key, nsems, semflg);
-       } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
-               err = -EEXIST;
-       } else {
-               sma = sem_lock(ns, id);
-               BUG_ON(sma==NULL);
-               if (nsems > sma->sem_nsems)
-                       err = -EINVAL;
-               else if (ipcperms(&sma->sem_perm, semflg))
-                       err = -EACCES;
-               else {
-                       int semid = sem_buildid(ns, id, sma->sem_perm.seq);
-                       err = security_sem_associate(sma, semflg);
-                       if (!err)
-                               err = semid;
-               }
-               sem_unlock(sma);
-       }
 
-       mutex_unlock(&sem_ids(ns).mutex);
-       return err;
+       sem_ops.getnew = newary;
+       sem_ops.associate = sem_security;
+       sem_ops.more_checks = sem_more_checks;
+
+       sem_params.key = key;
+       sem_params.flg = semflg;
+       sem_params.u.nsems = nsems;
+
+       return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 }
 
 /* Manage the doubly linked list sma->sem_pending as a FIFO:
@@ -487,15 +542,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
        return semzcnt;
 }
 
-/* Free a semaphore set. freeary() is called with sem_ids.mutex locked and
- * the spinlock for this semaphore set hold. sem_ids.mutex remains locked
- * on exit.
+/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
+ * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
+ * remains locked on exit.
  */
-static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
+static void freeary(struct ipc_namespace *ns, struct sem_array *sma)
 {
        struct sem_undo *un;
        struct sem_queue *q;
-       int size;
 
        /* Invalidate the existing undo structures for this semaphore set.
         * (They will be freed without any further action in exit_sem()
@@ -518,12 +572,11 @@ static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
                q = n;
        }
 
-       /* Remove the semaphore set from the ID array*/
-       sma = sem_rmid(ns, id);
+       /* Remove the semaphore set from the ID*/
+       sem_rmid(ns, sma);
        sem_unlock(sma);
 
        ns->used_sems -= sma->sem_nsems;
-       size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
        security_sem_free(sma);
        ipc_rcu_putref(sma);
 }
@@ -576,7 +629,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
                seminfo.semmnu = SEMMNU;
                seminfo.semmap = SEMMAP;
                seminfo.semume = SEMUME;
-               mutex_lock(&sem_ids(ns).mutex);
+               down_read(&sem_ids(ns).rw_mutex);
                if (cmd == SEM_INFO) {
                        seminfo.semusz = sem_ids(ns).in_use;
                        seminfo.semaem = ns->used_sems;
@@ -584,8 +637,8 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
                        seminfo.semusz = SEMUSZ;
                        seminfo.semaem = SEMAEM;
                }
-               max_id = sem_ids(ns).max_id;
-               mutex_unlock(&sem_ids(ns).mutex);
+               max_id = ipc_get_maxid(&sem_ids(ns));
+               up_read(&sem_ids(ns).rw_mutex);
                if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 
                        return -EFAULT;
                return (max_id < 0) ? 0: max_id;
@@ -595,14 +648,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
                struct semid64_ds tbuf;
                int id;
 
-               if(semid >= sem_ids(ns).entries->size)
-                       return -EINVAL;
-
-               memset(&tbuf,0,sizeof(tbuf));
-
                sma = sem_lock(ns, semid);
-               if(sma == NULL)
-                       return -EINVAL;
+               if (IS_ERR(sma))
+                       return PTR_ERR(sma);
 
                err = -EACCES;
                if (ipcperms (&sma->sem_perm, S_IRUGO))
@@ -612,7 +660,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
                if (err)
                        goto out_unlock;
 
-               id = sem_buildid(ns, semid, sma->sem_perm.seq);
+               id = sma->sem_perm.id;
+
+               memset(&tbuf, 0, sizeof(tbuf));
 
                kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
                tbuf.sem_otime  = sma->sem_otime;
@@ -642,16 +692,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
        ushort* sem_io = fast_sem_io;
        int nsems;
 
-       sma = sem_lock(ns, semid);
-       if(sma==NULL)
-               return -EINVAL;
+       sma = sem_lock_check(ns, semid);
+       if (IS_ERR(sma))
+               return PTR_ERR(sma);
 
        nsems = sma->sem_nsems;
 
-       err=-EIDRM;
-       if (sem_checkid(ns,sma,semid))
-               goto out_unlock;
-
        err = -EACCES;
        if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
                goto out_unlock;
@@ -795,7 +841,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
                for (un = sma->undo; un; un = un->id_next)
                        un->semadj[semnum] = 0;
                curr->semval = val;
-               curr->sempid = current->tgid;
+               curr->sempid = task_tgid_vnr(current);
                sma->sem_ctime = get_seconds();
                /* maybe some queued-up processes were waiting for this */
                update_queue(sma);
@@ -863,14 +909,10 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
                if(copy_semid_from_user (&setbuf, arg.buf, version))
                        return -EFAULT;
        }
-       sma = sem_lock(ns, semid);
-       if(sma==NULL)
-               return -EINVAL;
+       sma = sem_lock_check_down(ns, semid);
+       if (IS_ERR(sma))
+               return PTR_ERR(sma);
 
-       if (sem_checkid(ns,sma,semid)) {
-               err=-EIDRM;
-               goto out_unlock;
-       }       
        ipcp = &sma->sem_perm;
 
        err = audit_ipc_obj(ipcp);
@@ -894,7 +936,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
 
        switch(cmd){
        case IPC_RMID:
-               freeary(ns, sma, semid);
+               freeary(ns, sma);
                err = 0;
                break;
        case IPC_SET:
@@ -948,45 +990,15 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
                return err;
        case IPC_RMID:
        case IPC_SET:
-               mutex_lock(&sem_ids(ns).mutex);
+               down_write(&sem_ids(ns).rw_mutex);
                err = semctl_down(ns,semid,semnum,cmd,version,arg);
-               mutex_unlock(&sem_ids(ns).mutex);
+               up_write(&sem_ids(ns).rw_mutex);
                return err;
        default:
                return -EINVAL;
        }
 }
 
-static inline void lock_semundo(void)
-{
-       struct sem_undo_list *undo_list;
-
-       undo_list = current->sysvsem.undo_list;
-       if (undo_list)
-               spin_lock(&undo_list->lock);
-}
-
-/* This code has an interaction with copy_semundo().
- * Consider; two tasks are sharing the undo_list. task1
- * acquires the undo_list lock in lock_semundo().  If task2 now
- * exits before task1 releases the lock (by calling
- * unlock_semundo()), then task1 will never call spin_unlock().
- * This leave the sem_undo_list in a locked state.  If task1 now creats task3
- * and once again shares the sem_undo_list, the sem_undo_list will still be
- * locked, and future SEM_UNDO operations will deadlock.  This case is
- * dealt with in copy_semundo() by having it reinitialize the spin lock when 
- * the refcnt goes from 1 to 2.
- */
-static inline void unlock_semundo(void)
-{
-       struct sem_undo_list *undo_list;
-
-       undo_list = current->sysvsem.undo_list;
-       if (undo_list)
-               spin_unlock(&undo_list->lock);
-}
-
-
 /* If the task doesn't already have a undo_list, then allocate one
  * here.  We guarantee there is only one thread using this undo list,
  * and current is THE ONE
@@ -1047,22 +1059,17 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
        if (error)
                return ERR_PTR(error);
 
-       lock_semundo();
+       spin_lock(&ulp->lock);
        un = lookup_undo(ulp, semid);
-       unlock_semundo();
+       spin_unlock(&ulp->lock);
        if (likely(un!=NULL))
                goto out;
 
        /* no undo structure around - allocate one. */
-       sma = sem_lock(ns, semid);
-       un = ERR_PTR(-EINVAL);
-       if(sma==NULL)
-               goto out;
-       un = ERR_PTR(-EIDRM);
-       if (sem_checkid(ns,sma,semid)) {
-               sem_unlock(sma);
-               goto out;
-       }
+       sma = sem_lock_check(ns, semid);
+       if (IS_ERR(sma))
+               return ERR_PTR(PTR_ERR(sma));
+
        nsems = sma->sem_nsems;
        ipc_rcu_getref(sma);
        sem_unlock(sma);
@@ -1077,10 +1084,10 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
        new->semadj = (short *) &new[1];
        new->semid = semid;
 
-       lock_semundo();
+       spin_lock(&ulp->lock);
        un = lookup_undo(ulp, semid);
        if (un) {
-               unlock_semundo();
+               spin_unlock(&ulp->lock);
                kfree(new);
                ipc_lock_by_ptr(&sma->sem_perm);
                ipc_rcu_putref(sma);
@@ -1091,7 +1098,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
        ipc_rcu_putref(sma);
        if (sma->sem_perm.deleted) {
                sem_unlock(sma);
-               unlock_semundo();
+               spin_unlock(&ulp->lock);
                kfree(new);
                un = ERR_PTR(-EIDRM);
                goto out;
@@ -1102,7 +1109,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
        sma->undo = new;
        sem_unlock(sma);
        un = new;
-       unlock_semundo();
+       spin_unlock(&ulp->lock);
 out:
        return un;
 }
@@ -1168,15 +1175,14 @@ retry_undos:
        } else
                un = NULL;
 
-       sma = sem_lock(ns, semid);
-       error=-EINVAL;
-       if(sma==NULL)
+       sma = sem_lock_check(ns, semid);
+       if (IS_ERR(sma)) {
+               error = PTR_ERR(sma);
                goto out_free;
-       error = -EIDRM;
-       if (sem_checkid(ns,sma,semid))
-               goto out_unlock_free;
+       }
+
        /*
-        * semid identifies are not unique - find_undo may have
+        * semid identifiers are not unique - find_undo may have
         * allocated an undo structure, it was invalidated by an RMID
         * and now a new array with received the same id. Check and retry.
         */
@@ -1196,7 +1202,7 @@ retry_undos:
        if (error)
                goto out_unlock_free;
 
-       error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
+       error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
        if (error <= 0) {
                if (alter && error == 0)
                        update_queue (sma);
@@ -1211,7 +1217,7 @@ retry_undos:
        queue.sops = sops;
        queue.nsops = nsops;
        queue.undo = un;
-       queue.pid = current->tgid;
+       queue.pid = task_tgid_vnr(current);
        queue.id = semid;
        queue.alter = alter;
        if (alter)
@@ -1242,7 +1248,7 @@ retry_undos:
        }
 
        sma = sem_lock(ns, semid);
-       if(sma==NULL) {
+       if (IS_ERR(sma)) {
                BUG_ON(queue.prev != NULL);
                error = -EIDRM;
                goto out_free;
@@ -1279,10 +1285,6 @@ asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsop
 
 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
  * parent and child tasks.
- *
- * See the notes above unlock_semundo() regarding the spin_lock_init()
- * in this code.  Initialize the undo_list->lock here instead of get_undo_list()
- * because of the reasoning in the comment above unlock_semundo.
  */
 
 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
@@ -1342,13 +1344,13 @@ void exit_sem(struct task_struct *tsk)
                if(semid == -1)
                        continue;
                sma = sem_lock(ns, semid);
-               if (sma == NULL)
+               if (IS_ERR(sma))
                        continue;
 
                if (u->semid == -1)
                        goto next_entry;
 
-               BUG_ON(sem_checkid(ns,sma,u->semid));
+               BUG_ON(sem_checkid(sma, u->semid));
 
                /* remove u from the sma->undo list */
                for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
@@ -1382,7 +1384,7 @@ found:
                                        semaphore->semval = 0;
                                if (semaphore->semval > SEMVMX)
                                        semaphore->semval = SEMVMX;
-                               semaphore->sempid = current->tgid;
+                               semaphore->sempid = task_tgid_vnr(current);
                        }
                }
                sma->sem_otime = get_seconds();
@@ -1402,7 +1404,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
        return seq_printf(s,
                          "%10d %10d  %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
                          sma->sem_perm.key,
-                         sma->sem_id,
+                         sma->sem_perm.id,
                          sma->sem_perm.mode,
                          sma->sem_nsems,
                          sma->sem_perm.uid,
index 5fc5cf50cf1b847fc630cf2e127f37005337bb94..3818fae625c5252363380fa9c7a521e8b5c1a7d1 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -35,7 +35,7 @@
 #include <linux/capability.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 
@@ -59,17 +59,11 @@ static struct ipc_ids init_shm_ids;
 
 #define shm_ids(ns)    (*((ns)->ids[IPC_SHM_IDS]))
 
-#define shm_lock(ns, id)               \
-       ((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id))
 #define shm_unlock(shp)                        \
        ipc_unlock(&(shp)->shm_perm)
-#define shm_get(ns, id)                        \
-       ((struct shmid_kernel*)ipc_get(&shm_ids(ns),id))
-#define shm_buildid(ns, id, seq)       \
-       ipc_buildid(&shm_ids(ns), id, seq)
+#define shm_buildid(id, seq)   ipc_buildid(id, seq)
 
-static int newseg (struct ipc_namespace *ns, key_t key,
-               int shmflg, size_t size);
+static int newseg(struct ipc_namespace *, struct ipc_params *);
 static void shm_open(struct vm_area_struct *vma);
 static void shm_close(struct vm_area_struct *vma);
 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
@@ -84,9 +78,13 @@ static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
        ns->shm_ctlall = SHMALL;
        ns->shm_ctlmni = SHMMNI;
        ns->shm_tot = 0;
-       ipc_init_ids(ids, 1);
+       ipc_init_ids(ids);
 }
 
+/*
+ * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
+ * Only shm_ids.rw_mutex remains locked on exit.
+ */
 static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
        if (shp->shm_nattch){
@@ -112,20 +110,24 @@ int shm_init_ns(struct ipc_namespace *ns)
 
 void shm_exit_ns(struct ipc_namespace *ns)
 {
-       int i;
        struct shmid_kernel *shp;
+       int next_id;
+       int total, in_use;
+
+       down_write(&shm_ids(ns).rw_mutex);
 
-       mutex_lock(&shm_ids(ns).mutex);
-       for (i = 0; i <= shm_ids(ns).max_id; i++) {
-               shp = shm_lock(ns, i);
+       in_use = shm_ids(ns).in_use;
+
+       for (total = 0, next_id = 0; total < in_use; next_id++) {
+               shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
                if (shp == NULL)
                        continue;
-
+               ipc_lock_by_ptr(&shp->shm_perm);
                do_shm_rmid(ns, shp);
+               total++;
        }
-       mutex_unlock(&shm_ids(ns).mutex);
+       up_write(&shm_ids(ns).rw_mutex);
 
-       ipc_fini_ids(ns->ids[IPC_SHM_IDS]);
        kfree(ns->ids[IPC_SHM_IDS]);
        ns->ids[IPC_SHM_IDS] = NULL;
 }
@@ -138,17 +140,49 @@ void __init shm_init (void)
                                IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
-static inline int shm_checkid(struct ipc_namespace *ns,
-               struct shmid_kernel *s, int id)
+/*
+ * shm_lock_(check_)down routines are called in the paths where the rw_mutex
+ * is held to protect access to the idr tree.
+ */
+static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
+                                               int id)
 {
-       if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id))
-               return -EIDRM;
-       return 0;
+       struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_lock_check_down(
+                                               struct ipc_namespace *ns,
+                                               int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock_check_down(&shm_ids(ns), id);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+/*
+ * shm_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
+                                               int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
 }
 
-static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id)
+static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
-       return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id);
+       ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
 static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
@@ -166,9 +200,9 @@ static void shm_open(struct vm_area_struct *vma)
        struct shmid_kernel *shp;
 
        shp = shm_lock(sfd->ns, sfd->id);
-       BUG_ON(!shp);
+       BUG_ON(IS_ERR(shp));
        shp->shm_atim = get_seconds();
-       shp->shm_lprid = current->tgid;
+       shp->shm_lprid = task_tgid_vnr(current);
        shp->shm_nattch++;
        shm_unlock(shp);
 }
@@ -176,15 +210,16 @@ static void shm_open(struct vm_area_struct *vma)
 /*
  * shm_destroy - free the struct shmid_kernel
  *
+ * @ns: namespace
  * @shp: struct to free
  *
- * It has to be called with shp and shm_ids.mutex locked,
+ * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
  * but returns with shp unlocked and freed.
  */
 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
        ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       shm_rmid(ns, shp->id);
+       shm_rmid(ns, shp);
        shm_unlock(shp);
        if (!is_file_hugepages(shp->shm_file))
                shmem_lock(shp->shm_file, 0, shp->mlock_user);
@@ -209,11 +244,11 @@ static void shm_close(struct vm_area_struct *vma)
        struct shmid_kernel *shp;
        struct ipc_namespace *ns = sfd->ns;
 
-       mutex_lock(&shm_ids(ns).mutex);
+       down_write(&shm_ids(ns).rw_mutex);
        /* remove from the list of attaches of the shm segment */
-       shp = shm_lock(ns, sfd->id);
-       BUG_ON(!shp);
-       shp->shm_lprid = current->tgid;
+       shp = shm_lock_down(ns, sfd->id);
+       BUG_ON(IS_ERR(shp));
+       shp->shm_lprid = task_tgid_vnr(current);
        shp->shm_dtim = get_seconds();
        shp->shm_nattch--;
        if(shp->shm_nattch == 0 &&
@@ -221,7 +256,7 @@ static void shm_close(struct vm_area_struct *vma)
                shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       mutex_unlock(&shm_ids(ns).mutex);
+       up_write(&shm_ids(ns).rw_mutex);
 }
 
 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -337,8 +372,19 @@ static struct vm_operations_struct shm_vm_ops = {
 #endif
 };
 
-static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
+/**
+ * newseg - Create a new shared memory segment
+ * @ns: namespace
+ * @params: ptr to the structure that contains key, size and shmflg
+ *
+ * Called with shm_ids.rw_mutex held as a writer.
+ */
+
+static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 {
+       key_t key = params->key;
+       int shmflg = params->flg;
+       size_t size = params->u.size;
        int error;
        struct shmid_kernel *shp;
        int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
@@ -387,28 +433,30 @@ static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
        if (IS_ERR(file))
                goto no_file;
 
-       error = -ENOSPC;
        id = shm_addid(ns, shp);
-       if(id == -1) 
+       if (id < 0) {
+               error = id;
                goto no_id;
+       }
 
-       shp->shm_cprid = current->tgid;
+       shp->shm_cprid = task_tgid_vnr(current);
        shp->shm_lprid = 0;
        shp->shm_atim = shp->shm_dtim = 0;
        shp->shm_ctim = get_seconds();
        shp->shm_segsz = size;
        shp->shm_nattch = 0;
-       shp->id = shm_buildid(ns, id, shp->shm_perm.seq);
+       shp->shm_perm.id = shm_buildid(id, shp->shm_perm.seq);
        shp->shm_file = file;
        /*
         * shmid gets reported as "inode#" in /proc/pid/maps.
         * proc-ps tools use this. Changing this will break them.
         */
-       file->f_dentry->d_inode->i_ino = shp->id;
+       file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
 
        ns->shm_tot += numpages;
+       error = shp->shm_perm.id;
        shm_unlock(shp);
-       return shp->id;
+       return error;
 
 no_id:
        fput(file);
@@ -418,42 +466,49 @@ no_file:
        return error;
 }
 
-asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
+/*
+ * Called with shm_ids.rw_mutex and ipcp locked.
+ */
+static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 {
        struct shmid_kernel *shp;
-       int err, id = 0;
+
+       shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+       return security_shm_associate(shp, shmflg);
+}
+
+/*
+ * Called with shm_ids.rw_mutex and ipcp locked.
+ */
+static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
+                               struct ipc_params *params)
+{
+       struct shmid_kernel *shp;
+
+       shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+       if (shp->shm_segsz < params->u.size)
+               return -EINVAL;
+
+       return 0;
+}
+
+asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
+{
        struct ipc_namespace *ns;
+       struct ipc_ops shm_ops;
+       struct ipc_params shm_params;
 
        ns = current->nsproxy->ipc_ns;
 
-       mutex_lock(&shm_ids(ns).mutex);
-       if (key == IPC_PRIVATE) {
-               err = newseg(ns, key, shmflg, size);
-       } else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) {
-               if (!(shmflg & IPC_CREAT))
-                       err = -ENOENT;
-               else
-                       err = newseg(ns, key, shmflg, size);
-       } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
-               err = -EEXIST;
-       } else {
-               shp = shm_lock(ns, id);
-               BUG_ON(shp==NULL);
-               if (shp->shm_segsz < size)
-                       err = -EINVAL;
-               else if (ipcperms(&shp->shm_perm, shmflg))
-                       err = -EACCES;
-               else {
-                       int shmid = shm_buildid(ns, id, shp->shm_perm.seq);
-                       err = security_shm_associate(shp, shmflg);
-                       if (!err)
-                               err = shmid;
-               }
-               shm_unlock(shp);
-       }
-       mutex_unlock(&shm_ids(ns).mutex);
+       shm_ops.getnew = newseg;
+       shm_ops.associate = shm_security;
+       shm_ops.more_checks = shm_more_checks;
 
-       return err;
+       shm_params.key = key;
+       shm_params.flg = shmflg;
+       shm_params.u.size = size;
+
+       return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
 }
 
 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
@@ -547,20 +602,26 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
        }
 }
 
+/*
+ * Called with shm_ids.rw_mutex held as a reader
+ */
 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
                unsigned long *swp)
 {
-       int i;
+       int next_id;
+       int total, in_use;
 
        *rss = 0;
        *swp = 0;
 
-       for (i = 0; i <= shm_ids(ns).max_id; i++) {
+       in_use = shm_ids(ns).in_use;
+
+       for (total = 0, next_id = 0; total < in_use; next_id++) {
                struct shmid_kernel *shp;
                struct inode *inode;
 
-               shp = shm_get(ns, i);
-               if(!shp)
+               shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
+               if (shp == NULL)
                        continue;
 
                inode = shp->shm_file->f_path.dentry->d_inode;
@@ -575,6 +636,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
                        *swp += info->swapped;
                        spin_unlock(&info->lock);
                }
+
+               total++;
        }
 }
 
@@ -610,8 +673,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                shminfo.shmmin = SHMMIN;
                if(copy_shminfo_to_user (buf, &shminfo, version))
                        return -EFAULT;
-               /* reading a integer is always atomic */
-               err= shm_ids(ns).max_id;
+
+               down_read(&shm_ids(ns).rw_mutex);
+               err = ipc_get_maxid(&shm_ids(ns));
+               up_read(&shm_ids(ns).rw_mutex);
+
                if(err<0)
                        err = 0;
                goto out;
@@ -625,14 +691,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                        return err;
 
                memset(&shm_info,0,sizeof(shm_info));
-               mutex_lock(&shm_ids(ns).mutex);
+               down_read(&shm_ids(ns).rw_mutex);
                shm_info.used_ids = shm_ids(ns).in_use;
                shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
                shm_info.shm_tot = ns->shm_tot;
                shm_info.swap_attempts = 0;
                shm_info.swap_successes = 0;
-               err = shm_ids(ns).max_id;
-               mutex_unlock(&shm_ids(ns).mutex);
+               err = ipc_get_maxid(&shm_ids(ns));
+               up_read(&shm_ids(ns).rw_mutex);
                if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
                        err = -EFAULT;
                        goto out;
@@ -646,20 +712,25 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
        {
                struct shmid64_ds tbuf;
                int result;
-               memset(&tbuf, 0, sizeof(tbuf));
-               shp = shm_lock(ns, shmid);
-               if(shp==NULL) {
-                       err = -EINVAL;
+
+               if (!buf) {
+                       err = -EFAULT;
                        goto out;
-               } else if(cmd==SHM_STAT) {
-                       err = -EINVAL;
-                       if (shmid > shm_ids(ns).max_id)
-                               goto out_unlock;
-                       result = shm_buildid(ns, shmid, shp->shm_perm.seq);
+               }
+
+               if (cmd == SHM_STAT) {
+                       shp = shm_lock(ns, shmid);
+                       if (IS_ERR(shp)) {
+                               err = PTR_ERR(shp);
+                               goto out;
+                       }
+                       result = shp->shm_perm.id;
                } else {
-                       err = shm_checkid(ns, shp,shmid);
-                       if(err)
-                               goto out_unlock;
+                       shp = shm_lock_check(ns, shmid);
+                       if (IS_ERR(shp)) {
+                               err = PTR_ERR(shp);
+                               goto out;
+                       }
                        result = 0;
                }
                err=-EACCES;
@@ -668,6 +739,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                err = security_shm_shmctl(shp, cmd);
                if (err)
                        goto out_unlock;
+               memset(&tbuf, 0, sizeof(tbuf));
                kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
                tbuf.shm_segsz  = shp->shm_segsz;
                tbuf.shm_atime  = shp->shm_atim;
@@ -686,14 +758,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
        case SHM_LOCK:
        case SHM_UNLOCK:
        {
-               shp = shm_lock(ns, shmid);
-               if(shp==NULL) {
-                       err = -EINVAL;
+               shp = shm_lock_check(ns, shmid);
+               if (IS_ERR(shp)) {
+                       err = PTR_ERR(shp);
                        goto out;
                }
-               err = shm_checkid(ns, shp,shmid);
-               if(err)
-                       goto out_unlock;
 
                err = audit_ipc_obj(&(shp->shm_perm));
                if (err)
@@ -742,14 +811,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                 *      Instead we set a destroyed flag, and then blow
                 *      the name away when the usage hits zero.
                 */
-               mutex_lock(&shm_ids(ns).mutex);
-               shp = shm_lock(ns, shmid);
-               err = -EINVAL;
-               if (shp == NULL) 
+               down_write(&shm_ids(ns).rw_mutex);
+               shp = shm_lock_check_down(ns, shmid);
+               if (IS_ERR(shp)) {
+                       err = PTR_ERR(shp);
                        goto out_up;
-               err = shm_checkid(ns, shp, shmid);
-               if(err)
-                       goto out_unlock_up;
+               }
 
                err = audit_ipc_obj(&(shp->shm_perm));
                if (err)
@@ -767,24 +834,27 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                        goto out_unlock_up;
 
                do_shm_rmid(ns, shp);
-               mutex_unlock(&shm_ids(ns).mutex);
+               up_write(&shm_ids(ns).rw_mutex);
                goto out;
        }
 
        case IPC_SET:
        {
+               if (!buf) {
+                       err = -EFAULT;
+                       goto out;
+               }
+
                if (copy_shmid_from_user (&setbuf, buf, version)) {
                        err = -EFAULT;
                        goto out;
                }
-               mutex_lock(&shm_ids(ns).mutex);
-               shp = shm_lock(ns, shmid);
-               err=-EINVAL;
-               if(shp==NULL)
+               down_write(&shm_ids(ns).rw_mutex);
+               shp = shm_lock_check_down(ns, shmid);
+               if (IS_ERR(shp)) {
+                       err = PTR_ERR(shp);
                        goto out_up;
-               err = shm_checkid(ns, shp,shmid);
-               if(err)
-                       goto out_unlock_up;
+               }
                err = audit_ipc_obj(&(shp->shm_perm));
                if (err)
                        goto out_unlock_up;
@@ -819,7 +889,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 out_unlock_up:
        shm_unlock(shp);
 out_up:
-       mutex_unlock(&shm_ids(ns).mutex);
+       up_write(&shm_ids(ns).rw_mutex);
        goto out;
 out_unlock:
        shm_unlock(shp);
@@ -890,13 +960,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
         * additional creator id...
         */
        ns = current->nsproxy->ipc_ns;
-       shp = shm_lock(ns, shmid);
-       if(shp == NULL)
+       shp = shm_lock_check(ns, shmid);
+       if (IS_ERR(shp)) {
+               err = PTR_ERR(shp);
                goto out;
-
-       err = shm_checkid(ns, shp,shmid);
-       if (err)
-               goto out_unlock;
+       }
 
        err = -EACCES;
        if (ipcperms(&shp->shm_perm, acc_mode))
@@ -925,7 +993,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 
        file->private_data = sfd;
        file->f_mapping = shp->shm_file->f_mapping;
-       sfd->id = shp->id;
+       sfd->id = shp->shm_perm.id;
        sfd->ns = get_ipc_ns(ns);
        sfd->file = shp->shm_file;
        sfd->vm_ops = NULL;
@@ -955,16 +1023,16 @@ invalid:
        fput(file);
 
 out_nattch:
-       mutex_lock(&shm_ids(ns).mutex);
-       shp = shm_lock(ns, shmid);
-       BUG_ON(!shp);
+       down_write(&shm_ids(ns).rw_mutex);
+       shp = shm_lock_down(ns, shmid);
+       BUG_ON(IS_ERR(shp));
        shp->shm_nattch--;
        if(shp->shm_nattch == 0 &&
           shp->shm_perm.mode & SHM_DEST)
                shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       mutex_unlock(&shm_ids(ns).mutex);
+       up_write(&shm_ids(ns).rw_mutex);
 
 out:
        return err;
@@ -1094,7 +1162,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
                format = BIG_STRING;
        return seq_printf(s, format,
                          shp->shm_perm.key,
-                         shp->id,
+                         shp->shm_perm.id,
                          shp->shm_perm.mode,
                          shp->shm_segsz,
                          shp->shm_cprid,
index 44e5135aee4785e2da26de92e0d3b52b02246842..1aa0ebf71bac7e5c30559eb0f60a89bdc6688efb 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/proc_fs.h>
 #include <linux/audit.h>
 #include <linux/nsproxy.h>
+#include <linux/rwsem.h>
 
 #include <asm/unistd.h>
 
@@ -129,23 +130,16 @@ __initcall(ipc_init);
 /**
  *     ipc_init_ids            -       initialise IPC identifiers
  *     @ids: Identifier set
- *     @size: Number of identifiers
  *
- *     Given a size for the ipc identifier range (limited below IPCMNI)
- *     set up the sequence range to use then allocate and initialise the
- *     array itself. 
+ *     Set up the sequence range to use for the ipc identifier range (limited
+ *     below IPCMNI) then initialise the ids idr.
  */
  
-void ipc_init_ids(struct ipc_ids* ids, int size)
+void ipc_init_ids(struct ipc_ids *ids)
 {
-       int i;
+       init_rwsem(&ids->rw_mutex);
 
-       mutex_init(&ids->mutex);
-
-       if(size > IPCMNI)
-               size = IPCMNI;
        ids->in_use = 0;
-       ids->max_id = -1;
        ids->seq = 0;
        {
                int seq_limit = INT_MAX/SEQ_MULTIPLIER;
@@ -155,17 +149,7 @@ void ipc_init_ids(struct ipc_ids* ids, int size)
                        ids->seq_max = seq_limit;
        }
 
-       ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size +
-                                    sizeof(struct ipc_id_ary));
-
-       if(ids->entries == NULL) {
-               printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
-               size = 0;
-               ids->entries = &ids->nullentry;
-       }
-       ids->entries->size = size;
-       for(i=0;i<size;i++)
-               ids->entries->p[i] = NULL;
+       idr_init(&ids->ipcs_idr);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -208,99 +192,96 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  *     @ids: Identifier set
  *     @key: The key to find
  *     
- *     Requires ipc_ids.mutex locked.
- *     Returns the identifier if found or -1 if not.
+ *     Requires ipc_ids.rw_mutex locked.
+ *     Returns the LOCKED pointer to the ipc structure if found or NULL
+ *     if not.
+ *     If key is found ipc points to the owning ipc structure
  */
  
-int ipc_findkey(struct ipc_ids* ids, key_t key)
+static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
-       int id;
-       struct kern_ipc_perm* p;
-       int max_id = ids->max_id;
+       struct kern_ipc_perm *ipc;
+       int next_id;
+       int total;
 
-       /*
-        * rcu_dereference() is not needed here
-        * since ipc_ids.mutex is held
-        */
-       for (id = 0; id <= max_id; id++) {
-               p = ids->entries->p[id];
-               if(p==NULL)
+       for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
+               ipc = idr_find(&ids->ipcs_idr, next_id);
+
+               if (ipc == NULL)
+                       continue;
+
+               if (ipc->key != key) {
+                       total++;
                        continue;
-               if (key == p->key)
-                       return id;
+               }
+
+               ipc_lock_by_ptr(ipc);
+               return ipc;
        }
-       return -1;
+
+       return NULL;
 }
 
-/*
- * Requires ipc_ids.mutex locked
+/**
+ *     ipc_get_maxid   -       get the last assigned id
+ *     @ids: IPC identifier set
+ *
+ *     Called with ipc_ids.rw_mutex held.
  */
-static int grow_ary(struct ipc_ids* ids, int newsize)
-{
-       struct ipc_id_ary* new;
-       struct ipc_id_ary* old;
-       int i;
-       int size = ids->entries->size;
-
-       if(newsize > IPCMNI)
-               newsize = IPCMNI;
-       if(newsize <= size)
-               return newsize;
-
-       new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
-                           sizeof(struct ipc_id_ary));
-       if(new == NULL)
-               return size;
-       new->size = newsize;
-       memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
-       for(i=size;i<newsize;i++) {
-               new->p[i] = NULL;
-       }
-       old = ids->entries;
 
-       /*
-        * Use rcu_assign_pointer() to make sure the memcpyed contents
-        * of the new array are visible before the new array becomes visible.
-        */
-       rcu_assign_pointer(ids->entries, new);
+int ipc_get_maxid(struct ipc_ids *ids)
+{
+       struct kern_ipc_perm *ipc;
+       int max_id = -1;
+       int total, id;
+
+       if (ids->in_use == 0)
+               return -1;
 
-       __ipc_fini_ids(ids, old);
-       return newsize;
+       if (ids->in_use == IPCMNI)
+               return IPCMNI - 1;
+
+       /* Look for the last assigned id */
+       total = 0;
+       for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
+               ipc = idr_find(&ids->ipcs_idr, id);
+               if (ipc != NULL) {
+                       max_id = id;
+                       total++;
+               }
+       }
+       return max_id;
 }
 
 /**
  *     ipc_addid       -       add an IPC identifier
  *     @ids: IPC identifier set
  *     @new: new IPC permission set
- *     @size: new size limit for the id array
+ *     @size: limit for the number of used ids
  *
- *     Add an entry 'new' to the IPC arrays. The permissions object is
+ *     Add an entry 'new' to the IPC ids idr. The permissions object is
  *     initialised and the first free entry is set up and the id assigned
- *     is returned. The list is returned in a locked state on success.
- *     On failure the list is not locked and -1 is returned.
+ *     is returned. The 'new' entry is returned in a locked state on success.
+ *     On failure the entry is not locked and a negative err-code is returned.
  *
- *     Called with ipc_ids.mutex held.
+ *     Called with ipc_ids.rw_mutex held as a writer.
  */
  
 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 {
-       int id;
+       int id, err;
 
-       size = grow_ary(ids,size);
+       if (size > IPCMNI)
+               size = IPCMNI;
+
+       if (ids->in_use >= size)
+               return -ENOSPC;
+
+       err = idr_get_new(&ids->ipcs_idr, new, &id);
+       if (err)
+               return err;
 
-       /*
-        * rcu_dereference()() is not needed here since
-        * ipc_ids.mutex is held
-        */
-       for (id = 0; id < size; id++) {
-               if(ids->entries->p[id] == NULL)
-                       goto found;
-       }
-       return -1;
-found:
        ids->in_use++;
-       if (id > ids->max_id)
-               ids->max_id = id;
 
        new->cuid = new->uid = current->euid;
        new->gid = new->cgid = current->egid;
@@ -313,48 +294,153 @@ found:
        new->deleted = 0;
        rcu_read_lock();
        spin_lock(&new->lock);
-       ids->entries->p[id] = new;
        return id;
 }
 
+/**
+ *     ipcget_new      -       create a new ipc object
+ *     @ns: namespace
+ *     @ids: IPC identifer set
+ *     @ops: the actual creation routine to call
+ *     @params: its parameters
+ *
+ *     This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ *     when the key is IPC_PRIVATE.
+ */
+int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
+               struct ipc_ops *ops, struct ipc_params *params)
+{
+       int err;
+retry:
+       err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
+
+       if (!err)
+               return -ENOMEM;
+
+       down_write(&ids->rw_mutex);
+       err = ops->getnew(ns, params);
+       up_write(&ids->rw_mutex);
+
+       if (err == -EAGAIN)
+               goto retry;
+
+       return err;
+}
+
+/**
+ *     ipc_check_perms -       check security and permissions for an IPC
+ *     @ipcp: ipc permission set
+ *     @ops: the actual security routine to call
+ *     @params: its parameters
+ *
+ *     This routine is called by sys_msgget(), sys_semget() and sys_shmget()
+ *      when the key is not IPC_PRIVATE and that key already exists in the
+ *      ids IDR.
+ *
+ *     On success, the IPC id is returned.
+ *
+ *     It is called with ipc_ids.rw_mutex and ipcp->lock held.
+ */
+static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops,
+                       struct ipc_params *params)
+{
+       int err;
+
+       if (ipcperms(ipcp, params->flg))
+               err = -EACCES;
+       else {
+               err = ops->associate(ipcp, params->flg);
+               if (!err)
+                       err = ipcp->id;
+       }
+
+       return err;
+}
+
+/**
+ *     ipcget_public   -       get an ipc object or create a new one
+ *     @ns: namespace
+ *     @ids: IPC identifer set
+ *     @ops: the actual creation routine to call
+ *     @params: its parameters
+ *
+ *     This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ *     when the key is not IPC_PRIVATE.
+ *     It adds a new entry if the key is not found and does some permission
+ *      / security checkings if the key is found.
+ *
+ *     On success, the ipc id is returned.
+ */
+int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
+               struct ipc_ops *ops, struct ipc_params *params)
+{
+       struct kern_ipc_perm *ipcp;
+       int flg = params->flg;
+       int err;
+retry:
+       err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
+
+       /*
+        * Take the lock as a writer since we are potentially going to add
+        * a new entry + read locks are not "upgradable"
+        */
+       down_write(&ids->rw_mutex);
+       ipcp = ipc_findkey(ids, params->key);
+       if (ipcp == NULL) {
+               /* key not used */
+               if (!(flg & IPC_CREAT))
+                       err = -ENOENT;
+               else if (!err)
+                       err = -ENOMEM;
+               else
+                       err = ops->getnew(ns, params);
+       } else {
+               /* ipc object has been locked by ipc_findkey() */
+
+               if (flg & IPC_CREAT && flg & IPC_EXCL)
+                       err = -EEXIST;
+               else {
+                       err = 0;
+                       if (ops->more_checks)
+                               err = ops->more_checks(ipcp, params);
+                       if (!err)
+                               /*
+                                * ipc_check_perms returns the IPC id on
+                                * success
+                                */
+                               err = ipc_check_perms(ipcp, ops, params);
+               }
+               ipc_unlock(ipcp);
+       }
+       up_write(&ids->rw_mutex);
+
+       if (err == -EAGAIN)
+               goto retry;
+
+       return err;
+}
+
+
 /**
  *     ipc_rmid        -       remove an IPC identifier
- *     @ids: identifier set
- *     @id: Identifier to remove
+ *     @ids: IPC identifier set
+ *     @ipcp: ipc perm structure containing the identifier to remove
  *
- *     The identifier must be valid, and in use. The kernel will panic if
- *     fed an invalid identifier. The entry is removed and internal
- *     variables recomputed. The object associated with the identifier
- *     is returned.
- *     ipc_ids.mutex and the spinlock for this ID is hold before this function
- *     is called, and remain locked on the exit.
+ *     ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
+ *     before this function is called, and remain locked on the exit.
  */
  
-struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id)
+void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
-       struct kern_ipc_perm* p;
-       int lid = id % SEQ_MULTIPLIER;
-       BUG_ON(lid >= ids->entries->size);
+       int lid = ipcid_to_idx(ipcp->id);
+
+       idr_remove(&ids->ipcs_idr, lid);
 
-       /* 
-        * do not need a rcu_dereference()() here to force ordering
-        * on Alpha, since the ipc_ids.mutex is held.
-        */     
-       p = ids->entries->p[lid];
-       ids->entries->p[lid] = NULL;
-       BUG_ON(p==NULL);
        ids->in_use--;
 
-       if (lid == ids->max_id) {
-               do {
-                       lid--;
-                       if(lid == -1)
-                               break;
-               } while (ids->entries->p[lid] == NULL);
-               ids->max_id = lid;
-       }
-       p->deleted = 1;
-       return p;
+       ipcp->deleted = 1;
+
+       return;
 }
 
 /**
@@ -491,10 +577,12 @@ static void ipc_do_vfree(struct work_struct *work)
  */
 static void ipc_schedule_free(struct rcu_head *head)
 {
-       struct ipc_rcu_grace *grace =
-               container_of(head, struct ipc_rcu_grace, rcu);
-       struct ipc_rcu_sched *sched =
-                       container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
+       struct ipc_rcu_grace *grace;
+       struct ipc_rcu_sched *sched;
+
+       grace = container_of(head, struct ipc_rcu_grace, rcu);
+       sched = container_of(&(grace->data[0]), struct ipc_rcu_sched,
+                               data[0]);
 
        INIT_WORK(&sched->work, ipc_do_vfree);
        schedule_work(&sched->work);
@@ -583,7 +671,7 @@ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
 }
 
 /**
- *     ipc64_perm_to_ipc_perm  -       convert old ipc permissions to new
+ *     ipc64_perm_to_ipc_perm  -       convert new ipc permissions to old
  *     @in: new style IPC permissions
  *     @out: old style IPC permissions
  *
@@ -602,44 +690,37 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
        out->seq        = in->seq;
 }
 
-/*
- * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get()
- * is called with shm_ids.mutex locked.  Since grow_ary() is also called with
- * shm_ids.mutex down(for Shared Memory), there is no need to add read
- * barriers here to gurantee the writes in grow_ary() are seen in order 
- * here (for Alpha).
+/**
+ * ipc_lock - Lock an ipc structure without rw_mutex held
+ * @ids: IPC identifier set
+ * @id: ipc id to look for
+ *
+ * Look for an id in the ipc ids idr and lock the associated ipc object.
  *
- * However ipc_get() itself does not necessary require ipc_ids.mutex down. So
- * if in the future ipc_get() is used by other places without ipc_ids.mutex
- * down, then ipc_get() needs read memery barriers as ipc_lock() does.
+ * The ipc object is locked on exit.
+ *
+ * This is the routine that should be called when the rw_mutex is not already
+ * held, i.e. idr tree not protected: it protects the idr tree in read mode
+ * during the idr_find().
  */
-struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id)
-{
-       struct kern_ipc_perm* out;
-       int lid = id % SEQ_MULTIPLIER;
-       if(lid >= ids->entries->size)
-               return NULL;
-       out = ids->entries->p[lid];
-       return out;
-}
 
-struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
+struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
 {
-       struct kern_ipc_perm* out;
-       int lid = id % SEQ_MULTIPLIER;
-       struct ipc_id_ary* entries;
+       struct kern_ipc_perm *out;
+       int lid = ipcid_to_idx(id);
+
+       down_read(&ids->rw_mutex);
 
        rcu_read_lock();
-       entries = rcu_dereference(ids->entries);
-       if(lid >= entries->size) {
-               rcu_read_unlock();
-               return NULL;
-       }
-       out = entries->p[lid];
-       if(out == NULL) {
+       out = idr_find(&ids->ipcs_idr, lid);
+       if (out == NULL) {
                rcu_read_unlock();
-               return NULL;
+               up_read(&ids->rw_mutex);
+               return ERR_PTR(-EINVAL);
        }
+
+       up_read(&ids->rw_mutex);
+
        spin_lock(&out->lock);
        
        /* ipc_rmid() may have already freed the ID while ipc_lock
@@ -648,33 +729,44 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
        if (out->deleted) {
                spin_unlock(&out->lock);
                rcu_read_unlock();
-               return NULL;
+               return ERR_PTR(-EINVAL);
        }
+
        return out;
 }
 
-void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
-{
-       rcu_read_lock();
-       spin_lock(&perm->lock);
-}
+/**
+ * ipc_lock_down - Lock an ipc structure with rw_sem held
+ * @ids: IPC identifier set
+ * @id: ipc id to look for
+ *
+ * Look for an id in the ipc ids idr and lock the associated ipc object.
+ *
+ * The ipc object is locked on exit.
+ *
+ * This is the routine that should be called when the rw_mutex is already
+ * held, i.e. idr tree protected.
+ */
 
-void ipc_unlock(struct kern_ipc_perm* perm)
+struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
 {
-       spin_unlock(&perm->lock);
-       rcu_read_unlock();
-}
+       struct kern_ipc_perm *out;
+       int lid = ipcid_to_idx(id);
 
-int ipc_buildid(struct ipc_ids* ids, int id, int seq)
-{
-       return SEQ_MULTIPLIER*seq + id;
-}
+       rcu_read_lock();
+       out = idr_find(&ids->ipcs_idr, lid);
+       if (out == NULL) {
+               rcu_read_unlock();
+               return ERR_PTR(-EINVAL);
+       }
 
-int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid)
-{
-       if(uid/SEQ_MULTIPLIER != ipcp->seq)
-               return 1;
-       return 0;
+       spin_lock(&out->lock);
+
+       /*
+        * No need to verify that the structure is still valid since the
+        * rw_mutex is held.
+        */
+       return out;
 }
 
 #ifdef __ARCH_WANT_IPC_PARSE_VERSION
@@ -707,27 +799,30 @@ struct ipc_proc_iter {
        struct ipc_proc_iface *iface;
 };
 
-static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
+/*
+ * This routine locks the ipc structure found at least at position pos.
+ */
+struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
+                                       loff_t *new_pos)
 {
-       struct ipc_proc_iter *iter = s->private;
-       struct ipc_proc_iface *iface = iter->iface;
-       struct kern_ipc_perm *ipc = it;
-       loff_t p;
-       struct ipc_ids *ids;
+       struct kern_ipc_perm *ipc;
+       int total, id;
 
-       ids = iter->ns->ids[iface->ids];
+       total = 0;
+       for (id = 0; id < pos && total < ids->in_use; id++) {
+               ipc = idr_find(&ids->ipcs_idr, id);
+               if (ipc != NULL)
+                       total++;
+       }
 
-       /* If we had an ipc id locked before, unlock it */
-       if (ipc && ipc != SEQ_START_TOKEN)
-               ipc_unlock(ipc);
+       if (total >= ids->in_use)
+               return NULL;
 
-       /*
-        * p = *pos - 1 (because id 0 starts at position 1)
-        *          + 1 (because we increment the position by one)
-        */
-       for (p = *pos; p <= ids->max_id; p++) {
-               if ((ipc = ipc_lock(ids, p)) != NULL) {
-                       *pos = p + 1;
+       for ( ; pos < IPCMNI; pos++) {
+               ipc = idr_find(&ids->ipcs_idr, pos);
+               if (ipc != NULL) {
+                       *new_pos = pos + 1;
+                       ipc_lock_by_ptr(ipc);
                        return ipc;
                }
        }
@@ -736,16 +831,27 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
        return NULL;
 }
 
+static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
+{
+       struct ipc_proc_iter *iter = s->private;
+       struct ipc_proc_iface *iface = iter->iface;
+       struct kern_ipc_perm *ipc = it;
+
+       /* If we had an ipc id locked before, unlock it */
+       if (ipc && ipc != SEQ_START_TOKEN)
+               ipc_unlock(ipc);
+
+       return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos);
+}
+
 /*
- * File positions: pos 0 -> header, pos n -> ipc id + 1.
- * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START.
+ * File positions: pos 0 -> header, pos n -> ipc id = n - 1.
+ * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START.
  */
 static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
 {
        struct ipc_proc_iter *iter = s->private;
        struct ipc_proc_iface *iface = iter->iface;
-       struct kern_ipc_perm *ipc;
-       loff_t p;
        struct ipc_ids *ids;
 
        ids = iter->ns->ids[iface->ids];
@@ -754,7 +860,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
         * Take the lock - this will be released by the corresponding
         * call to stop().
         */
-       mutex_lock(&ids->mutex);
+       down_read(&ids->rw_mutex);
 
        /* pos < 0 is invalid */
        if (*pos < 0)
@@ -765,13 +871,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
                return SEQ_START_TOKEN;
 
        /* Find the (pos-1)th ipc */
-       for (p = *pos - 1; p <= ids->max_id; p++) {
-               if ((ipc = ipc_lock(ids, p)) != NULL) {
-                       *pos = p + 1;
-                       return ipc;
-               }
-       }
-       return NULL;
+       return sysvipc_find_ipc(ids, *pos - 1, pos);
 }
 
 static void sysvipc_proc_stop(struct seq_file *s, void *it)
@@ -781,13 +881,13 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
        struct ipc_proc_iface *iface = iter->iface;
        struct ipc_ids *ids;
 
-       /* If we had a locked segment, release it */
+       /* If we had a locked structure, release it */
        if (ipc && ipc != SEQ_START_TOKEN)
                ipc_unlock(ipc);
 
        ids = iter->ns->ids[iface->ids];
        /* Release the lock we took in start() */
-       mutex_unlock(&ids->mutex);
+       up_read(&ids->rw_mutex);
 }
 
 static int sysvipc_proc_show(struct seq_file *s, void *it)
index 333e891bcaca838cf71bfb27d5d9c6296a6102bd..9ffea40457cedb17eca4cc9df13ed1b3a5f1a3f7 100644 (file)
@@ -10,6 +10,9 @@
 #ifndef _IPC_UTIL_H
 #define _IPC_UTIL_H
 
+#include <linux/idr.h>
+#include <linux/err.h>
+
 #define USHRT_MAX 0xffff
 #define SEQ_MULTIPLIER (IPCMNI)
 
@@ -25,24 +28,46 @@ void sem_exit_ns(struct ipc_namespace *ns);
 void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 
-struct ipc_id_ary {
-       int size;
-       struct kern_ipc_perm *p[0];
-};
-
 struct ipc_ids {
        int in_use;
-       int max_id;
        unsigned short seq;
        unsigned short seq_max;
-       struct mutex mutex;
-       struct ipc_id_ary nullentry;
-       struct ipc_id_ary* entries;
+       struct rw_semaphore rw_mutex;
+       struct idr ipcs_idr;
+};
+
+/*
+ * Structure that holds the parameters needed by the ipc operations
+ * (see after)
+ */
+struct ipc_params {
+       key_t key;
+       int flg;
+       union {
+               size_t size;    /* for shared memories */
+               int nsems;      /* for semaphores */
+       } u;                    /* holds the getnew() specific param */
+};
+
+/*
+ * Structure that holds some ipc operations. This structure is used to unify
+ * the calls to sys_msgget(), sys_semget(), sys_shmget()
+ *      . routine to call to create a new ipc object. Can be one of newque,
+ *        newary, newseg
+ *      . routine to call to check permissions for a new ipc object.
+ *        Can be one of security_msg_associate, security_sem_associate,
+ *        security_shm_associate
+ *      . routine to call for an extra check if needed
+ */
+struct ipc_ops {
+       int (*getnew) (struct ipc_namespace *, struct ipc_params *);
+       int (*associate) (struct kern_ipc_perm *, int);
+       int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *);
 };
 
 struct seq_file;
 
-void ipc_init_ids(struct ipc_ids *ids, int size);
+void ipc_init_ids(struct ipc_ids *);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
                int ids, int (*show)(struct seq_file *, void *));
@@ -54,14 +79,19 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 #define IPC_MSG_IDS    1
 #define IPC_SHM_IDS    2
 
-/* must be called with ids->mutex acquired.*/
-int ipc_findkey(struct ipc_ids* ids, key_t key);
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
+#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
+
+/* must be called with ids->rw_mutex acquired for writing */
+int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
+
+/* must be called with ids->rw_mutex acquired for reading */
+int ipc_get_maxid(struct ipc_ids *);
 
 /* must be called with both locks acquired. */
-struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
+void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
 
-int ipcperms (struct kern_ipc_perm *ipcp, short flg);
+/* must be called with ipcp locked */
+int ipcperms(struct kern_ipc_perm *ipcp, short flg);
 
 /* for rare, potentially huge allocations.
  * both function can sleep
@@ -79,24 +109,12 @@ void* ipc_rcu_alloc(int size);
 void ipc_rcu_getref(void *ptr);
 void ipc_rcu_putref(void *ptr);
 
-static inline void __ipc_fini_ids(struct ipc_ids *ids,
-               struct ipc_id_ary *entries)
-{
-       if (entries != &ids->nullentry)
-               ipc_rcu_putref(entries);
-}
-
-static inline void ipc_fini_ids(struct ipc_ids *ids)
-{
-       __ipc_fini_ids(ids, ids->entries);
-}
-
-struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
-struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
-void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
-void ipc_unlock(struct kern_ipc_perm* perm);
-int ipc_buildid(struct ipc_ids* ids, int id, int seq);
-int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid);
+/*
+ * ipc_lock_down: called with rw_mutex held
+ * ipc_lock: called without that lock held
+ */
+struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
+struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 
 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
@@ -111,5 +129,89 @@ int ipc_parse_version (int *cmd);
 extern void free_msg(struct msg_msg *msg);
 extern struct msg_msg *load_msg(const void __user *src, int len);
 extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
+extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *,
+                       struct ipc_ops *, struct ipc_params *);
+extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *,
+                       struct ipc_ops *, struct ipc_params *);
+
+static inline int ipc_buildid(int id, int seq)
+{
+       return SEQ_MULTIPLIER * seq + id;
+}
+
+/*
+ * Must be called with ipcp locked
+ */
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+{
+       if (uid / SEQ_MULTIPLIER != ipcp->seq)
+               return 1;
+       return 0;
+}
+
+static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
+{
+       rcu_read_lock();
+       spin_lock(&perm->lock);
+}
+
+static inline void ipc_unlock(struct kern_ipc_perm *perm)
+{
+       spin_unlock(&perm->lock);
+       rcu_read_unlock();
+}
+
+static inline struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids,
+                                               int id)
+{
+       struct kern_ipc_perm *out;
+
+       out = ipc_lock_down(ids, id);
+       if (IS_ERR(out))
+               return out;
+
+       if (ipc_checkid(out, id)) {
+               ipc_unlock(out);
+               return ERR_PTR(-EIDRM);
+       }
+
+       return out;
+}
+
+static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids,
+                                               int id)
+{
+       struct kern_ipc_perm *out;
+
+       out = ipc_lock(ids, id);
+       if (IS_ERR(out))
+               return out;
+
+       if (ipc_checkid(out, id)) {
+               ipc_unlock(out);
+               return ERR_PTR(-EIDRM);
+       }
+
+       return out;
+}
+
+/**
+ * ipcget - Common sys_*get() code
+ * @ns : namsepace
+ * @ids : IPC identifier set
+ * @ops : operations to be called on ipc object creation, permission checks
+ *        and further checks
+ * @params : the parameters needed by the previous operations.
+ *
+ * Common routine called by sys_msgget(), sys_semget() and sys_shmget().
+ */
+static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
+                       struct ipc_ops *ops, struct ipc_params *params)
+{
+       if (params->key == IPC_PRIVATE)
+               return ipcget_new(ns, ids, ops, params);
+       else
+               return ipcget_public(ns, ids, ops, params);
+}
 
 #endif
diff --git a/kernel/Kconfig.instrumentation b/kernel/Kconfig.instrumentation
new file mode 100644 (file)
index 0000000..f5f2c76
--- /dev/null
@@ -0,0 +1,49 @@
+menuconfig INSTRUMENTATION
+       bool "Instrumentation Support"
+       default y
+       ---help---
+         Say Y here to get to see options related to performance measurement,
+         system-wide debugging, and testing. This option alone does not add any
+         kernel code.
+
+         If you say N, all options in this submenu will be skipped and
+         disabled. If you're trying to debug the kernel itself, go see the
+         Kernel Hacking menu.
+
+if INSTRUMENTATION
+
+config PROFILING
+       bool "Profiling support (EXPERIMENTAL)"
+       help
+         Say Y here to enable the extended profiling support mechanisms used
+         by profilers such as OProfile.
+
+config OPROFILE
+       tristate "OProfile system profiling (EXPERIMENTAL)"
+       depends on PROFILING
+       depends on ALPHA || ARM || BLACKFIN || X86_32 || IA64 || M32R || MIPS || PARISC || PPC || S390 || SUPERH || SPARC || X86_64
+       help
+         OProfile is a profiling system capable of profiling the
+         whole system, include the kernel, kernel modules, libraries,
+         and applications.
+
+         If unsure, say N.
+
+config KPROBES
+       bool "Kprobes"
+       depends on KALLSYMS && MODULES
+       depends on X86_32 || IA64 || PPC || S390 || SPARC64 || X86_64 || AVR32
+       help
+         Kprobes allows you to trap at almost any kernel address and
+         execute a callback function.  register_kprobe() establishes
+         a probepoint and specifies the callback.  Kprobes is useful
+         for kernel debugging, non-intrusive instrumentation and testing.
+         If in doubt, say "N".
+
+config MARKERS
+       bool "Activate markers"
+       help
+         Place an empty function call at each marker site. Can be
+         dynamically changed for a probe function.
+
+endif # INSTRUMENTATION
index d63fbb18798a420eab9d7fd5ea2d50ab501fdd2a..05c3e6df85973e44bba5127c0805a7fc64a7240e 100644 (file)
@@ -8,8 +8,8 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
            signal.o sys.o kmod.o workqueue.o pid.o \
            rcupdate.o extable.o params.o posix-timers.o \
            kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-           hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
-           utsname.o sysctl_check.o
+           hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
+           utsname.o sysctl_check.o notifier.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
@@ -36,7 +36,11 @@ obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_CGROUPS) += cgroup.o
+obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
+obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
@@ -51,6 +55,7 @@ obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+obj-$(CONFIG_MARKERS) += marker.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
index cbc5fd60c0f318dce6c1cdcd33b4cf5b5d37f6a1..efbd9cdce1322d8a6e0c93ada872d68aadacf38e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 
 /*
@@ -61,8 +62,8 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
        spin_lock(&task_capability_lock);
        read_lock(&tasklist_lock);
 
-       if (pid && pid != current->pid) {
-               target = find_task_by_pid(pid);
+       if (pid && pid != task_pid_vnr(current)) {
+               target = find_task_by_vpid(pid);
                if (!target) {
                        ret = -ESRCH;
                        goto out;
@@ -95,7 +96,7 @@ static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
        int found = 0;
        struct pid *pgrp;
 
-       pgrp = find_pid(pgrp_nr);
+       pgrp = find_vpid(pgrp_nr);
        do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
                target = g;
                while_each_thread(g, target) {
@@ -129,7 +130,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
      int found = 0;
 
      do_each_thread(g, target) {
-             if (target == current || is_init(target))
+             if (target == current || is_container_init(target->group_leader))
                      continue;
              found = 1;
             if (security_capset_check(target, effective, inheritable,
@@ -184,7 +185,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
        if (get_user(pid, &header->pid))
                return -EFAULT;
 
-       if (pid && pid != current->pid && !capable(CAP_SETPCAP))
+       if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
                return -EPERM;
 
        if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -195,8 +196,8 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
        spin_lock(&task_capability_lock);
        read_lock(&tasklist_lock);
 
-       if (pid > 0 && pid != current->pid) {
-               target = find_task_by_pid(pid);
+       if (pid > 0 && pid != task_pid_vnr(current)) {
+               target = find_task_by_vpid(pid);
                if (!target) {
                        ret = -ESRCH;
                        goto out;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
new file mode 100644 (file)
index 0000000..5987dcc
--- /dev/null
@@ -0,0 +1,2805 @@
+/*
+ *  kernel/cgroup.c
+ *
+ *  Generic process-grouping system.
+ *
+ *  Based originally on the cpuset system, extracted by Paul Menage
+ *  Copyright (C) 2006 Google, Inc
+ *
+ *  Copyright notices from the original cpuset code:
+ *  --------------------------------------------------
+ *  Copyright (C) 2003 BULL SA.
+ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *
+ *  Portions derived from Patrick Mochel's sysfs code.
+ *  sysfs is Copyright (c) 2001-3 Patrick Mochel
+ *
+ *  2003-10-10 Written by Simon Derr.
+ *  2003-10-22 Updates by Stephen Hemminger.
+ *  2004 May-July Rework by Paul Jackson.
+ *  ---------------------------------------------------
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/cgroup.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/backing-dev.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sort.h>
+#include <linux/kmod.h>
+#include <linux/delayacct.h>
+#include <linux/cgroupstats.h>
+
+#include <asm/atomic.h>
+
+static DEFINE_MUTEX(cgroup_mutex);
+
+/* Generate an array of cgroup subsystem pointers */
+#define SUBSYS(_x) &_x ## _subsys,
+
+static struct cgroup_subsys *subsys[] = {
+#include <linux/cgroup_subsys.h>
+};
+
+/*
+ * A cgroupfs_root represents the root of a cgroup hierarchy,
+ * and may be associated with a superblock to form an active
+ * hierarchy
+ */
+struct cgroupfs_root {
+       struct super_block *sb;
+
+       /*
+        * The bitmask of subsystems intended to be attached to this
+        * hierarchy
+        */
+       unsigned long subsys_bits;
+
+       /* The bitmask of subsystems currently attached to this hierarchy */
+       unsigned long actual_subsys_bits;
+
+       /* A list running through the attached subsystems */
+       struct list_head subsys_list;
+
+       /* The root cgroup for this hierarchy */
+       struct cgroup top_cgroup;
+
+       /* Tracks how many cgroups are currently defined in hierarchy.*/
+       int number_of_cgroups;
+
+       /* A list running through the mounted hierarchies */
+       struct list_head root_list;
+
+       /* Hierarchy-specific flags */
+       unsigned long flags;
+
+       /* The path to use for release notifications. No locking
+        * between setting and use - so if userspace updates this
+        * while child cgroups exist, you could miss a
+        * notification. We ensure that it's always a valid
+        * NUL-terminated string */
+       char release_agent_path[PATH_MAX];
+};
+
+
+/*
+ * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
+ * subsystems that are otherwise unattached - it never has more than a
+ * single cgroup, and all tasks are part of that cgroup.
+ */
+static struct cgroupfs_root rootnode;
+
+/* The list of hierarchy roots */
+
+static LIST_HEAD(roots);
+static int root_count;
+
+/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
+#define dummytop (&rootnode.top_cgroup)
+
+/* This flag indicates whether tasks in the fork and exit paths should
+ * take callback_mutex and check for fork/exit handlers to call. This
+ * avoids us having to do extra work in the fork/exit path if none of the
+ * subsystems need to be called.
+ */
+static int need_forkexit_callback;
+
+/* bits in struct cgroup flags field */
+enum {
+       /* Control Group is dead */
+       CGRP_REMOVED,
+       /* Control Group has previously had a child cgroup or a task,
+        * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */
+       CGRP_RELEASABLE,
+       /* Control Group requires release notifications to userspace */
+       CGRP_NOTIFY_ON_RELEASE,
+};
+
+/* convenient tests for these bits */
+inline int cgroup_is_removed(const struct cgroup *cgrp)
+{
+       return test_bit(CGRP_REMOVED, &cgrp->flags);
+}
+
+/* bits in struct cgroupfs_root flags field */
+enum {
+       ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+};
+
+inline int cgroup_is_releasable(const struct cgroup *cgrp)
+{
+       const int bits =
+               (1 << CGRP_RELEASABLE) |
+               (1 << CGRP_NOTIFY_ON_RELEASE);
+       return (cgrp->flags & bits) == bits;
+}
+
+inline int notify_on_release(const struct cgroup *cgrp)
+{
+       return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+}
+
+/*
+ * for_each_subsys() allows you to iterate on each subsystem attached to
+ * an active hierarchy
+ */
+#define for_each_subsys(_root, _ss) \
+list_for_each_entry(_ss, &_root->subsys_list, sibling)
+
+/* for_each_root() allows you to iterate across the active hierarchies */
+#define for_each_root(_root) \
+list_for_each_entry(_root, &roots, root_list)
+
+/* the list of cgroups eligible for automatic release. Protected by
+ * release_list_lock */
+static LIST_HEAD(release_list);
+static DEFINE_SPINLOCK(release_list_lock);
+static void cgroup_release_agent(struct work_struct *work);
+static DECLARE_WORK(release_agent_work, cgroup_release_agent);
+static void check_for_release(struct cgroup *cgrp);
+
+/* Link structure for associating css_set objects with cgroups */
+struct cg_cgroup_link {
+       /*
+        * List running through cg_cgroup_links associated with a
+        * cgroup, anchored on cgroup->css_sets
+        */
+       struct list_head cgrp_link_list;
+       /*
+        * List running through cg_cgroup_links pointing at a
+        * single css_set object, anchored on css_set->cg_links
+        */
+       struct list_head cg_link_list;
+       struct css_set *cg;
+};
+
+/* The default css_set - used by init and its children prior to any
+ * hierarchies being mounted. It contains a pointer to the root state
+ * for each subsystem. Also used to anchor the list of css_sets. Not
+ * reference-counted, to improve performance when child cgroups
+ * haven't been created.
+ */
+
+static struct css_set init_css_set;
+static struct cg_cgroup_link init_css_set_link;
+
+/* css_set_lock protects the list of css_set objects, and the
+ * chain of tasks off each css_set.  Nests outside task->alloc_lock
+ * due to cgroup_iter_start() */
+static DEFINE_RWLOCK(css_set_lock);
+static int css_set_count;
+
+/* We don't maintain the lists running through each css_set to its
+ * task until after the first call to cgroup_iter_start(). This
+ * reduces the fork()/exit() overhead for people who have cgroups
+ * compiled into their kernel but not actually in use */
+static int use_task_css_set_links;
+
+/* When we create or destroy a css_set, the operation simply
+ * takes/releases a reference count on all the cgroups referenced
+ * by subsystems in this css_set. This can end up multiple-counting
+ * some cgroups, but that's OK - the ref-count is just a
+ * busy/not-busy indicator; ensuring that we only count each cgroup
+ * once would require taking a global lock to ensure that no
+ * subsystems moved between hierarchies while we were doing so.
+ *
+ * Possible TODO: decide at boot time based on the number of
+ * registered subsystems and the number of CPUs or NUMA nodes whether
+ * it's better for performance to ref-count every subsystem, or to
+ * take a global lock and only add one ref count to each hierarchy.
+ */
+
+/*
+ * unlink a css_set from the list and free it
+ */
+static void unlink_css_set(struct css_set *cg)
+{
+       write_lock(&css_set_lock);
+       list_del(&cg->list);
+       css_set_count--;
+       while (!list_empty(&cg->cg_links)) {
+               struct cg_cgroup_link *link;
+               link = list_entry(cg->cg_links.next,
+                                 struct cg_cgroup_link, cg_link_list);
+               list_del(&link->cg_link_list);
+               list_del(&link->cgrp_link_list);
+               kfree(link);
+       }
+       write_unlock(&css_set_lock);
+}
+
+static void __release_css_set(struct kref *k, int taskexit)
+{
+       int i;
+       struct css_set *cg = container_of(k, struct css_set, ref);
+
+       unlink_css_set(cg);
+
+       rcu_read_lock();
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup *cgrp = cg->subsys[i]->cgroup;
+               if (atomic_dec_and_test(&cgrp->count) &&
+                   notify_on_release(cgrp)) {
+                       if (taskexit)
+                               set_bit(CGRP_RELEASABLE, &cgrp->flags);
+                       check_for_release(cgrp);
+               }
+       }
+       rcu_read_unlock();
+       kfree(cg);
+}
+
+static void release_css_set(struct kref *k)
+{
+       __release_css_set(k, 0);
+}
+
+static void release_css_set_taskexit(struct kref *k)
+{
+       __release_css_set(k, 1);
+}
+
+/*
+ * refcounted get/put for css_set objects
+ */
+static inline void get_css_set(struct css_set *cg)
+{
+       kref_get(&cg->ref);
+}
+
+static inline void put_css_set(struct css_set *cg)
+{
+       kref_put(&cg->ref, release_css_set);
+}
+
+static inline void put_css_set_taskexit(struct css_set *cg)
+{
+       kref_put(&cg->ref, release_css_set_taskexit);
+}
+
+/*
+ * find_existing_css_set() is a helper for
+ * find_css_set(), and checks to see whether an existing
+ * css_set is suitable. This currently walks a linked-list for
+ * simplicity; a later patch will use a hash table for better
+ * performance
+ *
+ * oldcg: the cgroup group that we're using before the cgroup
+ * transition
+ *
+ * cgrp: the cgroup that we're moving into
+ *
+ * template: location in which to build the desired set of subsystem
+ * state objects for the new cgroup group
+ */
+
+static struct css_set *find_existing_css_set(
+       struct css_set *oldcg,
+       struct cgroup *cgrp,
+       struct cgroup_subsys_state *template[])
+{
+       int i;
+       struct cgroupfs_root *root = cgrp->root;
+       struct list_head *l = &init_css_set.list;
+
+       /* Built the set of subsystem state objects that we want to
+        * see in the new css_set */
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               if (root->subsys_bits & (1ull << i)) {
+                       /* Subsystem is in this hierarchy. So we want
+                        * the subsystem state from the new
+                        * cgroup */
+                       template[i] = cgrp->subsys[i];
+               } else {
+                       /* Subsystem is not in this hierarchy, so we
+                        * don't want to change the subsystem state */
+                       template[i] = oldcg->subsys[i];
+               }
+       }
+
+       /* Look through existing cgroup groups to find one to reuse */
+       do {
+               struct css_set *cg =
+                       list_entry(l, struct css_set, list);
+
+               if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+                       /* All subsystems matched */
+                       return cg;
+               }
+               /* Try the next cgroup group */
+               l = l->next;
+       } while (l != &init_css_set.list);
+
+       /* No existing cgroup group matched */
+       return NULL;
+}
+
+/*
+ * allocate_cg_links() allocates "count" cg_cgroup_link structures
+ * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
+ * success or a negative error
+ */
+
+static int allocate_cg_links(int count, struct list_head *tmp)
+{
+       struct cg_cgroup_link *link;
+       int i;
+       INIT_LIST_HEAD(tmp);
+       for (i = 0; i < count; i++) {
+               link = kmalloc(sizeof(*link), GFP_KERNEL);
+               if (!link) {
+                       while (!list_empty(tmp)) {
+                               link = list_entry(tmp->next,
+                                                 struct cg_cgroup_link,
+                                                 cgrp_link_list);
+                               list_del(&link->cgrp_link_list);
+                               kfree(link);
+                       }
+                       return -ENOMEM;
+               }
+               list_add(&link->cgrp_link_list, tmp);
+       }
+       return 0;
+}
+
+static void free_cg_links(struct list_head *tmp)
+{
+       while (!list_empty(tmp)) {
+               struct cg_cgroup_link *link;
+               link = list_entry(tmp->next,
+                                 struct cg_cgroup_link,
+                                 cgrp_link_list);
+               list_del(&link->cgrp_link_list);
+               kfree(link);
+       }
+}
+
+/*
+ * find_css_set() takes an existing cgroup group and a
+ * cgroup object, and returns a css_set object that's
+ * equivalent to the old group, but with the given cgroup
+ * substituted into the appropriate hierarchy. Must be called with
+ * cgroup_mutex held
+ */
+
+static struct css_set *find_css_set(
+       struct css_set *oldcg, struct cgroup *cgrp)
+{
+       struct css_set *res;
+       struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+       int i;
+
+       struct list_head tmp_cg_links;
+       struct cg_cgroup_link *link;
+
+       /* First see if we already have a cgroup group that matches
+        * the desired set */
+       write_lock(&css_set_lock);
+       res = find_existing_css_set(oldcg, cgrp, template);
+       if (res)
+               get_css_set(res);
+       write_unlock(&css_set_lock);
+
+       if (res)
+               return res;
+
+       res = kmalloc(sizeof(*res), GFP_KERNEL);
+       if (!res)
+               return NULL;
+
+       /* Allocate all the cg_cgroup_link objects that we'll need */
+       if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
+               kfree(res);
+               return NULL;
+       }
+
+       kref_init(&res->ref);
+       INIT_LIST_HEAD(&res->cg_links);
+       INIT_LIST_HEAD(&res->tasks);
+
+       /* Copy the set of subsystem state objects generated in
+        * find_existing_css_set() */
+       memcpy(res->subsys, template, sizeof(res->subsys));
+
+       write_lock(&css_set_lock);
+       /* Add reference counts and links from the new css_set. */
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup *cgrp = res->subsys[i]->cgroup;
+               struct cgroup_subsys *ss = subsys[i];
+               atomic_inc(&cgrp->count);
+               /*
+                * We want to add a link once per cgroup, so we
+                * only do it for the first subsystem in each
+                * hierarchy
+                */
+               if (ss->root->subsys_list.next == &ss->sibling) {
+                       BUG_ON(list_empty(&tmp_cg_links));
+                       link = list_entry(tmp_cg_links.next,
+                                         struct cg_cgroup_link,
+                                         cgrp_link_list);
+                       list_del(&link->cgrp_link_list);
+                       list_add(&link->cgrp_link_list, &cgrp->css_sets);
+                       link->cg = res;
+                       list_add(&link->cg_link_list, &res->cg_links);
+               }
+       }
+       if (list_empty(&rootnode.subsys_list)) {
+               link = list_entry(tmp_cg_links.next,
+                                 struct cg_cgroup_link,
+                                 cgrp_link_list);
+               list_del(&link->cgrp_link_list);
+               list_add(&link->cgrp_link_list, &dummytop->css_sets);
+               link->cg = res;
+               list_add(&link->cg_link_list, &res->cg_links);
+       }
+
+       BUG_ON(!list_empty(&tmp_cg_links));
+
+       /* Link this cgroup group into the list */
+       list_add(&res->list, &init_css_set.list);
+       css_set_count++;
+       INIT_LIST_HEAD(&res->tasks);
+       write_unlock(&css_set_lock);
+
+       return res;
+}
+
+/*
+ * There is one global cgroup mutex. We also require taking
+ * task_lock() when dereferencing a task's cgroup subsys pointers.
+ * See "The task_lock() exception", at the end of this comment.
+ *
+ * A task must hold cgroup_mutex to modify cgroups.
+ *
+ * Any task can increment and decrement the count field without lock.
+ * So in general, code holding cgroup_mutex can't rely on the count
+ * field not changing.  However, if the count goes to zero, then only
+ * attach_task() can increment it again.  Because a count of zero
+ * means that no tasks are currently attached, therefore there is no
+ * way a task attached to that cgroup can fork (the other way to
+ * increment the count).  So code holding cgroup_mutex can safely
+ * assume that if the count is zero, it will stay zero. Similarly, if
+ * a task holds cgroup_mutex on a cgroup with zero count, it
+ * knows that the cgroup won't be removed, as cgroup_rmdir()
+ * needs that mutex.
+ *
+ * The cgroup_common_file_write handler for operations that modify
+ * the cgroup hierarchy holds cgroup_mutex across the entire operation,
+ * single threading all such cgroup modifications across the system.
+ *
+ * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
+ * (usually) take cgroup_mutex.  These are the two most performance
+ * critical pieces of code here.  The exception occurs on cgroup_exit(),
+ * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
+ * is taken, and if the cgroup count is zero, a usermode call made
+ * to /sbin/cgroup_release_agent with the name of the cgroup (path
+ * relative to the root of cgroup file system) as the argument.
+ *
+ * A cgroup can only be deleted if both its 'count' of using tasks
+ * is zero, and its list of 'children' cgroups is empty.  Since all
+ * tasks in the system use _some_ cgroup, and since there is always at
+ * least one task in the system (init, pid == 1), therefore, top_cgroup
+ * always has either children cgroups and/or using tasks.  So we don't
+ * need a special hack to ensure that top_cgroup cannot be deleted.
+ *
+ *     The task_lock() exception
+ *
+ * The need for this exception arises from the action of
+ * attach_task(), which overwrites one tasks cgroup pointer with
+ * another.  It does so using cgroup_mutexe, however there are
+ * several performance critical places that need to reference
+ * task->cgroup without the expense of grabbing a system global
+ * mutex.  Therefore except as noted below, when dereferencing or, as
+ * in attach_task(), modifying a task'ss cgroup pointer we use
+ * task_lock(), which acts on a spinlock (task->alloc_lock) already in
+ * the task_struct routinely used for such matters.
+ *
+ * P.S.  One more locking exception.  RCU is used to guard the
+ * update of a tasks cgroup pointer by attach_task()
+ */
+
+/**
+ * cgroup_lock - lock out any changes to cgroup structures
+ *
+ */
+
+void cgroup_lock(void)
+{
+       mutex_lock(&cgroup_mutex);
+}
+
+/**
+ * cgroup_unlock - release lock on cgroup changes
+ *
+ * Undo the lock taken in a previous cgroup_lock() call.
+ */
+
+void cgroup_unlock(void)
+{
+       mutex_unlock(&cgroup_mutex);
+}
+
+/*
+ * A couple of forward declarations required, due to cyclic reference loop:
+ * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
+ * cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
+ * -> cgroup_mkdir.
+ */
+
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
+static int cgroup_populate_dir(struct cgroup *cgrp);
+static struct inode_operations cgroup_dir_inode_operations;
+static struct file_operations proc_cgroupstats_operations;
+
+static struct backing_dev_info cgroup_backing_dev_info = {
+       .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
+{
+       struct inode *inode = new_inode(sb);
+
+       if (inode) {
+               inode->i_mode = mode;
+               inode->i_uid = current->fsuid;
+               inode->i_gid = current->fsgid;
+               inode->i_blocks = 0;
+               inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+               inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
+       }
+       return inode;
+}
+
+static void cgroup_diput(struct dentry *dentry, struct inode *inode)
+{
+       /* is dentry a directory ? if so, kfree() associated cgroup */
+       if (S_ISDIR(inode->i_mode)) {
+               struct cgroup *cgrp = dentry->d_fsdata;
+               BUG_ON(!(cgroup_is_removed(cgrp)));
+               /* It's possible for external users to be holding css
+                * reference counts on a cgroup; css_put() needs to
+                * be able to access the cgroup after decrementing
+                * the reference count in order to know if it needs to
+                * queue the cgroup to be handled by the release
+                * agent */
+               synchronize_rcu();
+               kfree(cgrp);
+       }
+       iput(inode);
+}
+
+static void remove_dir(struct dentry *d)
+{
+       struct dentry *parent = dget(d->d_parent);
+
+       d_delete(d);
+       simple_rmdir(parent->d_inode, d);
+       dput(parent);
+}
+
+static void cgroup_clear_directory(struct dentry *dentry)
+{
+       struct list_head *node;
+
+       BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+       spin_lock(&dcache_lock);
+       node = dentry->d_subdirs.next;
+       while (node != &dentry->d_subdirs) {
+               struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+               list_del_init(node);
+               if (d->d_inode) {
+                       /* This should never be called on a cgroup
+                        * directory with child cgroups */
+                       BUG_ON(d->d_inode->i_mode & S_IFDIR);
+                       d = dget_locked(d);
+                       spin_unlock(&dcache_lock);
+                       d_delete(d);
+                       simple_unlink(dentry->d_inode, d);
+                       dput(d);
+                       spin_lock(&dcache_lock);
+               }
+               node = dentry->d_subdirs.next;
+       }
+       spin_unlock(&dcache_lock);
+}
+
+/*
+ * NOTE : the dentry must have been dget()'ed
+ */
+static void cgroup_d_remove_dir(struct dentry *dentry)
+{
+       cgroup_clear_directory(dentry);
+
+       spin_lock(&dcache_lock);
+       list_del_init(&dentry->d_u.d_child);
+       spin_unlock(&dcache_lock);
+       remove_dir(dentry);
+}
+
+static int rebind_subsystems(struct cgroupfs_root *root,
+                             unsigned long final_bits)
+{
+       unsigned long added_bits, removed_bits;
+       struct cgroup *cgrp = &root->top_cgroup;
+       int i;
+
+       removed_bits = root->actual_subsys_bits & ~final_bits;
+       added_bits = final_bits & ~root->actual_subsys_bits;
+       /* Check that any added subsystems are currently free */
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               unsigned long long bit = 1ull << i;
+               struct cgroup_subsys *ss = subsys[i];
+               if (!(bit & added_bits))
+                       continue;
+               if (ss->root != &rootnode) {
+                       /* Subsystem isn't free */
+                       return -EBUSY;
+               }
+       }
+
+       /* Currently we don't handle adding/removing subsystems when
+        * any child cgroups exist. This is theoretically supportable
+        * but involves complex error handling, so it's being left until
+        * later */
+       if (!list_empty(&cgrp->children))
+               return -EBUSY;
+
+       /* Process each subsystem */
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup_subsys *ss = subsys[i];
+               unsigned long bit = 1UL << i;
+               if (bit & added_bits) {
+                       /* We're binding this subsystem to this hierarchy */
+                       BUG_ON(cgrp->subsys[i]);
+                       BUG_ON(!dummytop->subsys[i]);
+                       BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+                       cgrp->subsys[i] = dummytop->subsys[i];
+                       cgrp->subsys[i]->cgroup = cgrp;
+                       list_add(&ss->sibling, &root->subsys_list);
+                       rcu_assign_pointer(ss->root, root);
+                       if (ss->bind)
+                               ss->bind(ss, cgrp);
+
+               } else if (bit & removed_bits) {
+                       /* We're removing this subsystem */
+                       BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
+                       BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+                       if (ss->bind)
+                               ss->bind(ss, dummytop);
+                       dummytop->subsys[i]->cgroup = dummytop;
+                       cgrp->subsys[i] = NULL;
+                       rcu_assign_pointer(subsys[i]->root, &rootnode);
+                       list_del(&ss->sibling);
+               } else if (bit & final_bits) {
+                       /* Subsystem state should already exist */
+                       BUG_ON(!cgrp->subsys[i]);
+               } else {
+                       /* Subsystem state shouldn't exist */
+                       BUG_ON(cgrp->subsys[i]);
+               }
+       }
+       root->subsys_bits = root->actual_subsys_bits = final_bits;
+       synchronize_rcu();
+
+       return 0;
+}
+
+static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+       struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
+       struct cgroup_subsys *ss;
+
+       mutex_lock(&cgroup_mutex);
+       for_each_subsys(root, ss)
+               seq_printf(seq, ",%s", ss->name);
+       if (test_bit(ROOT_NOPREFIX, &root->flags))
+               seq_puts(seq, ",noprefix");
+       if (strlen(root->release_agent_path))
+               seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+       mutex_unlock(&cgroup_mutex);
+       return 0;
+}
+
+struct cgroup_sb_opts {
+       unsigned long subsys_bits;
+       unsigned long flags;
+       char *release_agent;
+};
+
+/* Convert a hierarchy specifier into a bitmask of subsystems and
+ * flags. */
+static int parse_cgroupfs_options(char *data,
+                                    struct cgroup_sb_opts *opts)
+{
+       char *token, *o = data ?: "all";
+
+       opts->subsys_bits = 0;
+       opts->flags = 0;
+       opts->release_agent = NULL;
+
+       while ((token = strsep(&o, ",")) != NULL) {
+               if (!*token)
+                       return -EINVAL;
+               if (!strcmp(token, "all")) {
+                       opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1;
+               } else if (!strcmp(token, "noprefix")) {
+                       set_bit(ROOT_NOPREFIX, &opts->flags);
+               } else if (!strncmp(token, "release_agent=", 14)) {
+                       /* Specifying two release agents is forbidden */
+                       if (opts->release_agent)
+                               return -EINVAL;
+                       opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
+                       if (!opts->release_agent)
+                               return -ENOMEM;
+                       strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
+                       opts->release_agent[PATH_MAX - 1] = 0;
+               } else {
+                       struct cgroup_subsys *ss;
+                       int i;
+                       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                               ss = subsys[i];
+                               if (!strcmp(token, ss->name)) {
+                                       set_bit(i, &opts->subsys_bits);
+                                       break;
+                               }
+                       }
+                       if (i == CGROUP_SUBSYS_COUNT)
+                               return -ENOENT;
+               }
+       }
+
+       /* We can't have an empty hierarchy */
+       if (!opts->subsys_bits)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int cgroup_remount(struct super_block *sb, int *flags, char *data)
+{
+       int ret = 0;
+       struct cgroupfs_root *root = sb->s_fs_info;
+       struct cgroup *cgrp = &root->top_cgroup;
+       struct cgroup_sb_opts opts;
+
+       mutex_lock(&cgrp->dentry->d_inode->i_mutex);
+       mutex_lock(&cgroup_mutex);
+
+       /* See what subsystems are wanted */
+       ret = parse_cgroupfs_options(data, &opts);
+       if (ret)
+               goto out_unlock;
+
+       /* Don't allow flags to change at remount */
+       if (opts.flags != root->flags) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       ret = rebind_subsystems(root, opts.subsys_bits);
+
+       /* (re)populate subsystem files */
+       if (!ret)
+               cgroup_populate_dir(cgrp);
+
+       if (opts.release_agent)
+               strcpy(root->release_agent_path, opts.release_agent);
+ out_unlock:
+       if (opts.release_agent)
+               kfree(opts.release_agent);
+       mutex_unlock(&cgroup_mutex);
+       mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+       return ret;
+}
+
+static struct super_operations cgroup_ops = {
+       .statfs = simple_statfs,
+       .drop_inode = generic_delete_inode,
+       .show_options = cgroup_show_options,
+       .remount_fs = cgroup_remount,
+};
+
+static void init_cgroup_root(struct cgroupfs_root *root)
+{
+       struct cgroup *cgrp = &root->top_cgroup;
+       INIT_LIST_HEAD(&root->subsys_list);
+       INIT_LIST_HEAD(&root->root_list);
+       root->number_of_cgroups = 1;
+       cgrp->root = root;
+       cgrp->top_cgroup = cgrp;
+       INIT_LIST_HEAD(&cgrp->sibling);
+       INIT_LIST_HEAD(&cgrp->children);
+       INIT_LIST_HEAD(&cgrp->css_sets);
+       INIT_LIST_HEAD(&cgrp->release_list);
+}
+
+static int cgroup_test_super(struct super_block *sb, void *data)
+{
+       struct cgroupfs_root *new = data;
+       struct cgroupfs_root *root = sb->s_fs_info;
+
+       /* First check subsystems */
+       if (new->subsys_bits != root->subsys_bits)
+           return 0;
+
+       /* Next check flags */
+       if (new->flags != root->flags)
+               return 0;
+
+       return 1;
+}
+
+static int cgroup_set_super(struct super_block *sb, void *data)
+{
+       int ret;
+       struct cgroupfs_root *root = data;
+
+       ret = set_anon_super(sb, NULL);
+       if (ret)
+               return ret;
+
+       sb->s_fs_info = root;
+       root->sb = sb;
+
+       sb->s_blocksize = PAGE_CACHE_SIZE;
+       sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+       sb->s_magic = CGROUP_SUPER_MAGIC;
+       sb->s_op = &cgroup_ops;
+
+       return 0;
+}
+
+static int cgroup_get_rootdir(struct super_block *sb)
+{
+       struct inode *inode =
+               cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
+       struct dentry *dentry;
+
+       if (!inode)
+               return -ENOMEM;
+
+       inode->i_op = &simple_dir_inode_operations;
+       inode->i_fop = &simple_dir_operations;
+       inode->i_op = &cgroup_dir_inode_operations;
+       /* directories start off with i_nlink == 2 (for "." entry) */
+       inc_nlink(inode);
+       dentry = d_alloc_root(inode);
+       if (!dentry) {
+               iput(inode);
+               return -ENOMEM;
+       }
+       sb->s_root = dentry;
+       return 0;
+}
+
+static int cgroup_get_sb(struct file_system_type *fs_type,
+                        int flags, const char *unused_dev_name,
+                        void *data, struct vfsmount *mnt)
+{
+       struct cgroup_sb_opts opts;
+       int ret = 0;
+       struct super_block *sb;
+       struct cgroupfs_root *root;
+       struct list_head tmp_cg_links, *l;
+       INIT_LIST_HEAD(&tmp_cg_links);
+
+       /* First find the desired set of subsystems */
+       ret = parse_cgroupfs_options(data, &opts);
+       if (ret) {
+               if (opts.release_agent)
+                       kfree(opts.release_agent);
+               return ret;
+       }
+
+       root = kzalloc(sizeof(*root), GFP_KERNEL);
+       if (!root)
+               return -ENOMEM;
+
+       init_cgroup_root(root);
+       root->subsys_bits = opts.subsys_bits;
+       root->flags = opts.flags;
+       if (opts.release_agent) {
+               strcpy(root->release_agent_path, opts.release_agent);
+               kfree(opts.release_agent);
+       }
+
+       sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
+
+       if (IS_ERR(sb)) {
+               kfree(root);
+               return PTR_ERR(sb);
+       }
+
+       if (sb->s_fs_info != root) {
+               /* Reusing an existing superblock */
+               BUG_ON(sb->s_root == NULL);
+               kfree(root);
+               root = NULL;
+       } else {
+               /* New superblock */
+               struct cgroup *cgrp = &root->top_cgroup;
+               struct inode *inode;
+
+               BUG_ON(sb->s_root != NULL);
+
+               ret = cgroup_get_rootdir(sb);
+               if (ret)
+                       goto drop_new_super;
+               inode = sb->s_root->d_inode;
+
+               mutex_lock(&inode->i_mutex);
+               mutex_lock(&cgroup_mutex);
+
+               /*
+                * We're accessing css_set_count without locking
+                * css_set_lock here, but that's OK - it can only be
+                * increased by someone holding cgroup_lock, and
+                * that's us. The worst that can happen is that we
+                * have some link structures left over
+                */
+               ret = allocate_cg_links(css_set_count, &tmp_cg_links);
+               if (ret) {
+                       mutex_unlock(&cgroup_mutex);
+                       mutex_unlock(&inode->i_mutex);
+                       goto drop_new_super;
+               }
+
+               ret = rebind_subsystems(root, root->subsys_bits);
+               if (ret == -EBUSY) {
+                       mutex_unlock(&cgroup_mutex);
+                       mutex_unlock(&inode->i_mutex);
+                       goto drop_new_super;
+               }
+
+               /* EBUSY should be the only error here */
+               BUG_ON(ret);
+
+               list_add(&root->root_list, &roots);
+               root_count++;
+
+               sb->s_root->d_fsdata = &root->top_cgroup;
+               root->top_cgroup.dentry = sb->s_root;
+
+               /* Link the top cgroup in this hierarchy into all
+                * the css_set objects */
+               write_lock(&css_set_lock);
+               l = &init_css_set.list;
+               do {
+                       struct css_set *cg;
+                       struct cg_cgroup_link *link;
+                       cg = list_entry(l, struct css_set, list);
+                       BUG_ON(list_empty(&tmp_cg_links));
+                       link = list_entry(tmp_cg_links.next,
+                                         struct cg_cgroup_link,
+                                         cgrp_link_list);
+                       list_del(&link->cgrp_link_list);
+                       link->cg = cg;
+                       list_add(&link->cgrp_link_list,
+                                &root->top_cgroup.css_sets);
+                       list_add(&link->cg_link_list, &cg->cg_links);
+                       l = l->next;
+               } while (l != &init_css_set.list);
+               write_unlock(&css_set_lock);
+
+               free_cg_links(&tmp_cg_links);
+
+               BUG_ON(!list_empty(&cgrp->sibling));
+               BUG_ON(!list_empty(&cgrp->children));
+               BUG_ON(root->number_of_cgroups != 1);
+
+               cgroup_populate_dir(cgrp);
+               mutex_unlock(&inode->i_mutex);
+               mutex_unlock(&cgroup_mutex);
+       }
+
+       return simple_set_mnt(mnt, sb);
+
+ drop_new_super:
+       up_write(&sb->s_umount);
+       deactivate_super(sb);
+       free_cg_links(&tmp_cg_links);
+       return ret;
+}
+
+static void cgroup_kill_sb(struct super_block *sb) {
+       struct cgroupfs_root *root = sb->s_fs_info;
+       struct cgroup *cgrp = &root->top_cgroup;
+       int ret;
+
+       BUG_ON(!root);
+
+       BUG_ON(root->number_of_cgroups != 1);
+       BUG_ON(!list_empty(&cgrp->children));
+       BUG_ON(!list_empty(&cgrp->sibling));
+
+       mutex_lock(&cgroup_mutex);
+
+       /* Rebind all subsystems back to the default hierarchy */
+       ret = rebind_subsystems(root, 0);
+       /* Shouldn't be able to fail ... */
+       BUG_ON(ret);
+
+       /*
+        * Release all the links from css_sets to this hierarchy's
+        * root cgroup
+        */
+       write_lock(&css_set_lock);
+       while (!list_empty(&cgrp->css_sets)) {
+               struct cg_cgroup_link *link;
+               link = list_entry(cgrp->css_sets.next,
+                                 struct cg_cgroup_link, cgrp_link_list);
+               list_del(&link->cg_link_list);
+               list_del(&link->cgrp_link_list);
+               kfree(link);
+       }
+       write_unlock(&css_set_lock);
+
+       if (!list_empty(&root->root_list)) {
+               list_del(&root->root_list);
+               root_count--;
+       }
+       mutex_unlock(&cgroup_mutex);
+
+       kfree(root);
+       kill_litter_super(sb);
+}
+
+static struct file_system_type cgroup_fs_type = {
+       .name = "cgroup",
+       .get_sb = cgroup_get_sb,
+       .kill_sb = cgroup_kill_sb,
+};
+
+static inline struct cgroup *__d_cgrp(struct dentry *dentry)
+{
+       return dentry->d_fsdata;
+}
+
+static inline struct cftype *__d_cft(struct dentry *dentry)
+{
+       return dentry->d_fsdata;
+}
+
+/*
+ * Called with cgroup_mutex held.  Writes path of cgroup into buf.
+ * Returns 0 on success, -errno on error.
+ */
+int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+{
+       char *start;
+
+       if (cgrp == dummytop) {
+               /*
+                * Inactive subsystems have no dentry for their root
+                * cgroup
+                */
+               strcpy(buf, "/");
+               return 0;
+       }
+
+       start = buf + buflen;
+
+       *--start = '\0';
+       for (;;) {
+               int len = cgrp->dentry->d_name.len;
+               if ((start -= len) < buf)
+                       return -ENAMETOOLONG;
+               memcpy(start, cgrp->dentry->d_name.name, len);
+               cgrp = cgrp->parent;
+               if (!cgrp)
+                       break;
+               if (!cgrp->parent)
+                       continue;
+               if (--start < buf)
+                       return -ENAMETOOLONG;
+               *start = '/';
+       }
+       memmove(buf, start, buf + buflen - start);
+       return 0;
+}
+
+/*
+ * Return the first subsystem attached to a cgroup's hierarchy, and
+ * its subsystem id.
+ */
+
+static void get_first_subsys(const struct cgroup *cgrp,
+                       struct cgroup_subsys_state **css, int *subsys_id)
+{
+       const struct cgroupfs_root *root = cgrp->root;
+       const struct cgroup_subsys *test_ss;
+       BUG_ON(list_empty(&root->subsys_list));
+       test_ss = list_entry(root->subsys_list.next,
+                            struct cgroup_subsys, sibling);
+       if (css) {
+               *css = cgrp->subsys[test_ss->subsys_id];
+               BUG_ON(!*css);
+       }
+       if (subsys_id)
+               *subsys_id = test_ss->subsys_id;
+}
+
+/*
+ * Attach task 'tsk' to cgroup 'cgrp'
+ *
+ * Call holding cgroup_mutex.  May take task_lock of
+ * the task 'pid' during call.
+ */
+static int attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
+       int retval = 0;
+       struct cgroup_subsys *ss;
+       struct cgroup *oldcgrp;
+       struct css_set *cg = tsk->cgroups;
+       struct css_set *newcg;
+       struct cgroupfs_root *root = cgrp->root;
+       int subsys_id;
+
+       get_first_subsys(cgrp, NULL, &subsys_id);
+
+       /* Nothing to do if the task is already in that cgroup */
+       oldcgrp = task_cgroup(tsk, subsys_id);
+       if (cgrp == oldcgrp)
+               return 0;
+
+       for_each_subsys(root, ss) {
+               if (ss->can_attach) {
+                       retval = ss->can_attach(ss, cgrp, tsk);
+                       if (retval) {
+                               return retval;
+                       }
+               }
+       }
+
+       /*
+        * Locate or allocate a new css_set for this task,
+        * based on its final set of cgroups
+        */
+       newcg = find_css_set(cg, cgrp);
+       if (!newcg) {
+               return -ENOMEM;
+       }
+
+       task_lock(tsk);
+       if (tsk->flags & PF_EXITING) {
+               task_unlock(tsk);
+               put_css_set(newcg);
+               return -ESRCH;
+       }
+       rcu_assign_pointer(tsk->cgroups, newcg);
+       task_unlock(tsk);
+
+       /* Update the css_set linked lists if we're using them */
+       write_lock(&css_set_lock);
+       if (!list_empty(&tsk->cg_list)) {
+               list_del(&tsk->cg_list);
+               list_add(&tsk->cg_list, &newcg->tasks);
+       }
+       write_unlock(&css_set_lock);
+
+       for_each_subsys(root, ss) {
+               if (ss->attach) {
+                       ss->attach(ss, cgrp, oldcgrp, tsk);
+               }
+       }
+       set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+       synchronize_rcu();
+       put_css_set(cg);
+       return 0;
+}
+
+/*
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
+ * cgroup_mutex, may take task_lock of task
+ */
+static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+{
+       pid_t pid;
+       struct task_struct *tsk;
+       int ret;
+
+       if (sscanf(pidbuf, "%d", &pid) != 1)
+               return -EIO;
+
+       if (pid) {
+               rcu_read_lock();
+               tsk = find_task_by_pid(pid);
+               if (!tsk || tsk->flags & PF_EXITING) {
+                       rcu_read_unlock();
+                       return -ESRCH;
+               }
+               get_task_struct(tsk);
+               rcu_read_unlock();
+
+               if ((current->euid) && (current->euid != tsk->uid)
+                   && (current->euid != tsk->suid)) {
+                       put_task_struct(tsk);
+                       return -EACCES;
+               }
+       } else {
+               tsk = current;
+               get_task_struct(tsk);
+       }
+
+       ret = attach_task(cgrp, tsk);
+       put_task_struct(tsk);
+       return ret;
+}
+
+/* The various types of files and directories in a cgroup file system */
+
+enum cgroup_filetype {
+       FILE_ROOT,
+       FILE_DIR,
+       FILE_TASKLIST,
+       FILE_NOTIFY_ON_RELEASE,
+       FILE_RELEASABLE,
+       FILE_RELEASE_AGENT,
+};
+
+static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft,
+                                struct file *file,
+                                const char __user *userbuf,
+                                size_t nbytes, loff_t *unused_ppos)
+{
+       char buffer[64];
+       int retval = 0;
+       u64 val;
+       char *end;
+
+       if (!nbytes)
+               return -EINVAL;
+       if (nbytes >= sizeof(buffer))
+               return -E2BIG;
+       if (copy_from_user(buffer, userbuf, nbytes))
+               return -EFAULT;
+
+       buffer[nbytes] = 0;     /* nul-terminate */
+
+       /* strip newline if necessary */
+       if (nbytes && (buffer[nbytes-1] == '\n'))
+               buffer[nbytes-1] = 0;
+       val = simple_strtoull(buffer, &end, 0);
+       if (*end)
+               return -EINVAL;
+
+       /* Pass to subsystem */
+       retval = cft->write_uint(cgrp, cft, val);
+       if (!retval)
+               retval = nbytes;
+       return retval;
+}
+
+static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
+                                          struct cftype *cft,
+                                          struct file *file,
+                                          const char __user *userbuf,
+                                          size_t nbytes, loff_t *unused_ppos)
+{
+       enum cgroup_filetype type = cft->private;
+       char *buffer;
+       int retval = 0;
+
+       if (nbytes >= PATH_MAX)
+               return -E2BIG;
+
+       /* +1 for nul-terminator */
+       buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+       if (buffer == NULL)
+               return -ENOMEM;
+
+       if (copy_from_user(buffer, userbuf, nbytes)) {
+               retval = -EFAULT;
+               goto out1;
+       }
+       buffer[nbytes] = 0;     /* nul-terminate */
+
+       mutex_lock(&cgroup_mutex);
+
+       if (cgroup_is_removed(cgrp)) {
+               retval = -ENODEV;
+               goto out2;
+       }
+
+       switch (type) {
+       case FILE_TASKLIST:
+               retval = attach_task_by_pid(cgrp, buffer);
+               break;
+       case FILE_NOTIFY_ON_RELEASE:
+               clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+               if (simple_strtoul(buffer, NULL, 10) != 0)
+                       set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+               else
+                       clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+               break;
+       case FILE_RELEASE_AGENT:
+       {
+               struct cgroupfs_root *root = cgrp->root;
+               /* Strip trailing newline */
+               if (nbytes && (buffer[nbytes-1] == '\n')) {
+                       buffer[nbytes-1] = 0;
+               }
+               if (nbytes < sizeof(root->release_agent_path)) {
+                       /* We never write anything other than '\0'
+                        * into the last char of release_agent_path,
+                        * so it always remains a NUL-terminated
+                        * string */
+                       strncpy(root->release_agent_path, buffer, nbytes);
+                       root->release_agent_path[nbytes] = 0;
+               } else {
+                       retval = -ENOSPC;
+               }
+               break;
+       }
+       default:
+               retval = -EINVAL;
+               goto out2;
+       }
+
+       if (retval == 0)
+               retval = nbytes;
+out2:
+       mutex_unlock(&cgroup_mutex);
+out1:
+       kfree(buffer);
+       return retval;
+}
+
+static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
+                                               size_t nbytes, loff_t *ppos)
+{
+       struct cftype *cft = __d_cft(file->f_dentry);
+       struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+       if (!cft)
+               return -ENODEV;
+       if (cft->write)
+               return cft->write(cgrp, cft, file, buf, nbytes, ppos);
+       if (cft->write_uint)
+               return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos);
+       return -EINVAL;
+}
+
+static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft,
+                                  struct file *file,
+                                  char __user *buf, size_t nbytes,
+                                  loff_t *ppos)
+{
+       char tmp[64];
+       u64 val = cft->read_uint(cgrp, cft);
+       int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
+
+       return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
+}
+
+static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
+                                         struct cftype *cft,
+                                         struct file *file,
+                                         char __user *buf,
+                                         size_t nbytes, loff_t *ppos)
+{
+       enum cgroup_filetype type = cft->private;
+       char *page;
+       ssize_t retval = 0;
+       char *s;
+
+       if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+               return -ENOMEM;
+
+       s = page;
+
+       switch (type) {
+       case FILE_RELEASE_AGENT:
+       {
+               struct cgroupfs_root *root;
+               size_t n;
+               mutex_lock(&cgroup_mutex);
+               root = cgrp->root;
+               n = strnlen(root->release_agent_path,
+                           sizeof(root->release_agent_path));
+               n = min(n, (size_t) PAGE_SIZE);
+               strncpy(s, root->release_agent_path, n);
+               mutex_unlock(&cgroup_mutex);
+               s += n;
+               break;
+       }
+       default:
+               retval = -EINVAL;
+               goto out;
+       }
+       *s++ = '\n';
+
+       retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
+out:
+       free_page((unsigned long)page);
+       return retval;
+}
+
+static ssize_t cgroup_file_read(struct file *file, char __user *buf,
+                                  size_t nbytes, loff_t *ppos)
+{
+       struct cftype *cft = __d_cft(file->f_dentry);
+       struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+       if (!cft)
+               return -ENODEV;
+
+       if (cft->read)
+               return cft->read(cgrp, cft, file, buf, nbytes, ppos);
+       if (cft->read_uint)
+               return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos);
+       return -EINVAL;
+}
+
+static int cgroup_file_open(struct inode *inode, struct file *file)
+{
+       int err;
+       struct cftype *cft;
+
+       err = generic_file_open(inode, file);
+       if (err)
+               return err;
+
+       cft = __d_cft(file->f_dentry);
+       if (!cft)
+               return -ENODEV;
+       if (cft->open)
+               err = cft->open(inode, file);
+       else
+               err = 0;
+
+       return err;
+}
+
+static int cgroup_file_release(struct inode *inode, struct file *file)
+{
+       struct cftype *cft = __d_cft(file->f_dentry);
+       if (cft->release)
+               return cft->release(inode, file);
+       return 0;
+}
+
+/*
+ * cgroup_rename - Only allow simple rename of directories in place.
+ */
+static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
+                           struct inode *new_dir, struct dentry *new_dentry)
+{
+       if (!S_ISDIR(old_dentry->d_inode->i_mode))
+               return -ENOTDIR;
+       if (new_dentry->d_inode)
+               return -EEXIST;
+       if (old_dir != new_dir)
+               return -EIO;
+       return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+static struct file_operations cgroup_file_operations = {
+       .read = cgroup_file_read,
+       .write = cgroup_file_write,
+       .llseek = generic_file_llseek,
+       .open = cgroup_file_open,
+       .release = cgroup_file_release,
+};
+
+static struct inode_operations cgroup_dir_inode_operations = {
+       .lookup = simple_lookup,
+       .mkdir = cgroup_mkdir,
+       .rmdir = cgroup_rmdir,
+       .rename = cgroup_rename,
+};
+
+static int cgroup_create_file(struct dentry *dentry, int mode,
+                               struct super_block *sb)
+{
+       static struct dentry_operations cgroup_dops = {
+               .d_iput = cgroup_diput,
+       };
+
+       struct inode *inode;
+
+       if (!dentry)
+               return -ENOENT;
+       if (dentry->d_inode)
+               return -EEXIST;
+
+       inode = cgroup_new_inode(mode, sb);
+       if (!inode)
+               return -ENOMEM;
+
+       if (S_ISDIR(mode)) {
+               inode->i_op = &cgroup_dir_inode_operations;
+               inode->i_fop = &simple_dir_operations;
+
+               /* start off with i_nlink == 2 (for "." entry) */
+               inc_nlink(inode);
+
+               /* start with the directory inode held, so that we can
+                * populate it without racing with another mkdir */
+               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+       } else if (S_ISREG(mode)) {
+               inode->i_size = 0;
+               inode->i_fop = &cgroup_file_operations;
+       }
+       dentry->d_op = &cgroup_dops;
+       d_instantiate(dentry, inode);
+       dget(dentry);   /* Extra count - pin the dentry in core */
+       return 0;
+}
+
+/*
+ *     cgroup_create_dir - create a directory for an object.
+ *     cgrp:   the cgroup we create the directory for.
+ *             It must have a valid ->parent field
+ *             And we are going to fill its ->dentry field.
+ *     dentry: dentry of the new cgroup
+ *     mode:   mode to set on new directory.
+ */
+static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
+                               int mode)
+{
+       struct dentry *parent;
+       int error = 0;
+
+       parent = cgrp->parent->dentry;
+       error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
+       if (!error) {
+               dentry->d_fsdata = cgrp;
+               inc_nlink(parent->d_inode);
+               cgrp->dentry = dentry;
+               dget(dentry);
+       }
+       dput(dentry);
+
+       return error;
+}
+
+int cgroup_add_file(struct cgroup *cgrp,
+                      struct cgroup_subsys *subsys,
+                      const struct cftype *cft)
+{
+       struct dentry *dir = cgrp->dentry;
+       struct dentry *dentry;
+       int error;
+
+       char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
+       if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
+               strcpy(name, subsys->name);
+               strcat(name, ".");
+       }
+       strcat(name, cft->name);
+       BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
+       dentry = lookup_one_len(name, dir, strlen(name));
+       if (!IS_ERR(dentry)) {
+               error = cgroup_create_file(dentry, 0644 | S_IFREG,
+                                               cgrp->root->sb);
+               if (!error)
+                       dentry->d_fsdata = (void *)cft;
+               dput(dentry);
+       } else
+               error = PTR_ERR(dentry);
+       return error;
+}
+
+int cgroup_add_files(struct cgroup *cgrp,
+                       struct cgroup_subsys *subsys,
+                       const struct cftype cft[],
+                       int count)
+{
+       int i, err;
+       for (i = 0; i < count; i++) {
+               err = cgroup_add_file(cgrp, subsys, &cft[i]);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
+/* Count the number of tasks in a cgroup. */
+
+int cgroup_task_count(const struct cgroup *cgrp)
+{
+       int count = 0;
+       struct list_head *l;
+
+       read_lock(&css_set_lock);
+       l = cgrp->css_sets.next;
+       while (l != &cgrp->css_sets) {
+               struct cg_cgroup_link *link =
+                       list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+               count += atomic_read(&link->cg->ref.refcount);
+               l = l->next;
+       }
+       read_unlock(&css_set_lock);
+       return count;
+}
+
+/*
+ * Advance a list_head iterator.  The iterator should be positioned at
+ * the start of a css_set
+ */
+static void cgroup_advance_iter(struct cgroup *cgrp,
+                                         struct cgroup_iter *it)
+{
+       struct list_head *l = it->cg_link;
+       struct cg_cgroup_link *link;
+       struct css_set *cg;
+
+       /* Advance to the next non-empty css_set */
+       do {
+               l = l->next;
+               if (l == &cgrp->css_sets) {
+                       it->cg_link = NULL;
+                       return;
+               }
+               link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+               cg = link->cg;
+       } while (list_empty(&cg->tasks));
+       it->cg_link = l;
+       it->task = cg->tasks.next;
+}
+
+void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
+{
+       /*
+        * The first time anyone tries to iterate across a cgroup,
+        * we need to enable the list linking each css_set to its
+        * tasks, and fix up all existing tasks.
+        */
+       if (!use_task_css_set_links) {
+               struct task_struct *p, *g;
+               write_lock(&css_set_lock);
+               use_task_css_set_links = 1;
+               do_each_thread(g, p) {
+                       task_lock(p);
+                       if (list_empty(&p->cg_list))
+                               list_add(&p->cg_list, &p->cgroups->tasks);
+                       task_unlock(p);
+               } while_each_thread(g, p);
+               write_unlock(&css_set_lock);
+       }
+       read_lock(&css_set_lock);
+       it->cg_link = &cgrp->css_sets;
+       cgroup_advance_iter(cgrp, it);
+}
+
+struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
+                                       struct cgroup_iter *it)
+{
+       struct task_struct *res;
+       struct list_head *l = it->task;
+
+       /* If the iterator cg is NULL, we have no tasks */
+       if (!it->cg_link)
+               return NULL;
+       res = list_entry(l, struct task_struct, cg_list);
+       /* Advance iterator to find next entry */
+       l = l->next;
+       if (l == &res->cgroups->tasks) {
+               /* We reached the end of this task list - move on to
+                * the next cg_cgroup_link */
+               cgroup_advance_iter(cgrp, it);
+       } else {
+               it->task = l;
+       }
+       return res;
+}
+
+void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
+{
+       read_unlock(&css_set_lock);
+}
+
+/*
+ * Stuff for reading the 'tasks' file.
+ *
+ * Reading this file can return large amounts of data if a cgroup has
+ * *lots* of attached tasks. So it may need several calls to read(),
+ * but we cannot guarantee that the information we produce is correct
+ * unless we produce it entirely atomically.
+ *
+ * Upon tasks file open(), a struct ctr_struct is allocated, that
+ * will have a pointer to an array (also allocated here).  The struct
+ * ctr_struct * is stored in file->private_data.  Its resources will
+ * be freed by release() when the file is closed.  The array is used
+ * to sprintf the PIDs and then used by read().
+ */
+struct ctr_struct {
+       char *buf;
+       int bufsz;
+};
+
+/*
+ * Load into 'pidarray' up to 'npids' of the tasks using cgroup
+ * 'cgrp'.  Return actual number of pids loaded.  No need to
+ * task_lock(p) when reading out p->cgroup, since we're in an RCU
+ * read section, so the css_set can't go away, and is
+ * immutable after creation.
+ */
+static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+{
+       int n = 0;
+       struct cgroup_iter it;
+       struct task_struct *tsk;
+       cgroup_iter_start(cgrp, &it);
+       while ((tsk = cgroup_iter_next(cgrp, &it))) {
+               if (unlikely(n == npids))
+                       break;
+               pidarray[n++] = task_pid_nr(tsk);
+       }
+       cgroup_iter_end(cgrp, &it);
+       return n;
+}
+
+/**
+ * Build and fill cgroupstats so that taskstats can export it to user
+ * space.
+ *
+ * @stats: cgroupstats to fill information into
+ * @dentry: A dentry entry belonging to the cgroup for which stats have
+ * been requested.
+ */
+int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
+{
+       int ret = -EINVAL;
+       struct cgroup *cgrp;
+       struct cgroup_iter it;
+       struct task_struct *tsk;
+       /*
+        * Validate dentry by checking the superblock operations
+        */
+       if (dentry->d_sb->s_op != &cgroup_ops)
+                goto err;
+
+       ret = 0;
+       cgrp = dentry->d_fsdata;
+       rcu_read_lock();
+
+       cgroup_iter_start(cgrp, &it);
+       while ((tsk = cgroup_iter_next(cgrp, &it))) {
+               switch (tsk->state) {
+               case TASK_RUNNING:
+                       stats->nr_running++;
+                       break;
+               case TASK_INTERRUPTIBLE:
+                       stats->nr_sleeping++;
+                       break;
+               case TASK_UNINTERRUPTIBLE:
+                       stats->nr_uninterruptible++;
+                       break;
+               case TASK_STOPPED:
+                       stats->nr_stopped++;
+                       break;
+               default:
+                       if (delayacct_is_task_waiting_on_io(tsk))
+                               stats->nr_io_wait++;
+                       break;
+               }
+       }
+       cgroup_iter_end(cgrp, &it);
+
+       rcu_read_unlock();
+err:
+       return ret;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+       return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * Convert array 'a' of 'npids' pid_t's to a string of newline separated
+ * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
+ * count 'cnt' of how many chars would be written if buf were large enough.
+ */
+static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+{
+       int cnt = 0;
+       int i;
+
+       for (i = 0; i < npids; i++)
+               cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
+       return cnt;
+}
+
+/*
+ * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * process id's of tasks currently attached to the cgroup being opened.
+ *
+ * Does not require any specific cgroup mutexes, and does not take any.
+ */
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+       struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+       struct ctr_struct *ctr;
+       pid_t *pidarray;
+       int npids;
+       char c;
+
+       if (!(file->f_mode & FMODE_READ))
+               return 0;
+
+       ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
+       if (!ctr)
+               goto err0;
+
+       /*
+        * If cgroup gets more users after we read count, we won't have
+        * enough space - tough.  This race is indistinguishable to the
+        * caller from the case that the additional cgroup users didn't
+        * show up until sometime later on.
+        */
+       npids = cgroup_task_count(cgrp);
+       if (npids) {
+               pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+               if (!pidarray)
+                       goto err1;
+
+               npids = pid_array_load(pidarray, npids, cgrp);
+               sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
+
+               /* Call pid_array_to_buf() twice, first just to get bufsz */
+               ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
+               ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
+               if (!ctr->buf)
+                       goto err2;
+               ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
+
+               kfree(pidarray);
+       } else {
+               ctr->buf = 0;
+               ctr->bufsz = 0;
+       }
+       file->private_data = ctr;
+       return 0;
+
+err2:
+       kfree(pidarray);
+err1:
+       kfree(ctr);
+err0:
+       return -ENOMEM;
+}
+
+static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
+                                   struct cftype *cft,
+                                   struct file *file, char __user *buf,
+                                   size_t nbytes, loff_t *ppos)
+{
+       struct ctr_struct *ctr = file->private_data;
+
+       return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
+}
+
+static int cgroup_tasks_release(struct inode *unused_inode,
+                                       struct file *file)
+{
+       struct ctr_struct *ctr;
+
+       if (file->f_mode & FMODE_READ) {
+               ctr = file->private_data;
+               kfree(ctr->buf);
+               kfree(ctr);
+       }
+       return 0;
+}
+
+static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
+                                           struct cftype *cft)
+{
+       return notify_on_release(cgrp);
+}
+
+static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft)
+{
+       return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+/*
+ * for the common functions, 'private' gives the type of file
+ */
+static struct cftype files[] = {
+       {
+               .name = "tasks",
+               .open = cgroup_tasks_open,
+               .read = cgroup_tasks_read,
+               .write = cgroup_common_file_write,
+               .release = cgroup_tasks_release,
+               .private = FILE_TASKLIST,
+       },
+
+       {
+               .name = "notify_on_release",
+               .read_uint = cgroup_read_notify_on_release,
+               .write = cgroup_common_file_write,
+               .private = FILE_NOTIFY_ON_RELEASE,
+       },
+
+       {
+               .name = "releasable",
+               .read_uint = cgroup_read_releasable,
+               .private = FILE_RELEASABLE,
+       }
+};
+
+static struct cftype cft_release_agent = {
+       .name = "release_agent",
+       .read = cgroup_common_file_read,
+       .write = cgroup_common_file_write,
+       .private = FILE_RELEASE_AGENT,
+};
+
+static int cgroup_populate_dir(struct cgroup *cgrp)
+{
+       int err;
+       struct cgroup_subsys *ss;
+
+       /* First clear out any existing files */
+       cgroup_clear_directory(cgrp->dentry);
+
+       err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
+       if (err < 0)
+               return err;
+
+       if (cgrp == cgrp->top_cgroup) {
+               if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
+                       return err;
+       }
+
+       for_each_subsys(cgrp->root, ss) {
+               if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
+                       return err;
+       }
+
+       return 0;
+}
+
+static void init_cgroup_css(struct cgroup_subsys_state *css,
+                              struct cgroup_subsys *ss,
+                              struct cgroup *cgrp)
+{
+       css->cgroup = cgrp;
+       atomic_set(&css->refcnt, 0);
+       css->flags = 0;
+       if (cgrp == dummytop)
+               set_bit(CSS_ROOT, &css->flags);
+       BUG_ON(cgrp->subsys[ss->subsys_id]);
+       cgrp->subsys[ss->subsys_id] = css;
+}
+
+/*
+ *     cgroup_create - create a cgroup
+ *     parent: cgroup that will be parent of the new cgroup.
+ *     name:           name of the new cgroup. Will be strcpy'ed.
+ *     mode:           mode to set on new inode
+ *
+ *     Must be called with the mutex on the parent inode held
+ */
+
+static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
+                            int mode)
+{
+       struct cgroup *cgrp;
+       struct cgroupfs_root *root = parent->root;
+       int err = 0;
+       struct cgroup_subsys *ss;
+       struct super_block *sb = root->sb;
+
+       cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
+       if (!cgrp)
+               return -ENOMEM;
+
+       /* Grab a reference on the superblock so the hierarchy doesn't
+        * get deleted on unmount if there are child cgroups.  This
+        * can be done outside cgroup_mutex, since the sb can't
+        * disappear while someone has an open control file on the
+        * fs */
+       atomic_inc(&sb->s_active);
+
+       mutex_lock(&cgroup_mutex);
+
+       cgrp->flags = 0;
+       INIT_LIST_HEAD(&cgrp->sibling);
+       INIT_LIST_HEAD(&cgrp->children);
+       INIT_LIST_HEAD(&cgrp->css_sets);
+       INIT_LIST_HEAD(&cgrp->release_list);
+
+       cgrp->parent = parent;
+       cgrp->root = parent->root;
+       cgrp->top_cgroup = parent->top_cgroup;
+
+       for_each_subsys(root, ss) {
+               struct cgroup_subsys_state *css = ss->create(ss, cgrp);
+               if (IS_ERR(css)) {
+                       err = PTR_ERR(css);
+                       goto err_destroy;
+               }
+               init_cgroup_css(css, ss, cgrp);
+       }
+
+       list_add(&cgrp->sibling, &cgrp->parent->children);
+       root->number_of_cgroups++;
+
+       err = cgroup_create_dir(cgrp, dentry, mode);
+       if (err < 0)
+               goto err_remove;
+
+       /* The cgroup directory was pre-locked for us */
+       BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
+
+       err = cgroup_populate_dir(cgrp);
+       /* If err < 0, we have a half-filled directory - oh well ;) */
+
+       mutex_unlock(&cgroup_mutex);
+       mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+
+       return 0;
+
+ err_remove:
+
+       list_del(&cgrp->sibling);
+       root->number_of_cgroups--;
+
+ err_destroy:
+
+       for_each_subsys(root, ss) {
+               if (cgrp->subsys[ss->subsys_id])
+                       ss->destroy(ss, cgrp);
+       }
+
+       mutex_unlock(&cgroup_mutex);
+
+       /* Release the reference count that we took on the superblock */
+       deactivate_super(sb);
+
+       kfree(cgrp);
+       return err;
+}
+
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+       struct cgroup *c_parent = dentry->d_parent->d_fsdata;
+
+       /* the vfs holds inode->i_mutex already */
+       return cgroup_create(c_parent, dentry, mode | S_IFDIR);
+}
+
+static inline int cgroup_has_css_refs(struct cgroup *cgrp)
+{
+       /* Check the reference count on each subsystem. Since we
+        * already established that there are no tasks in the
+        * cgroup, if the css refcount is also 0, then there should
+        * be no outstanding references, so the subsystem is safe to
+        * destroy. We scan across all subsystems rather than using
+        * the per-hierarchy linked list of mounted subsystems since
+        * we can be called via check_for_release() with no
+        * synchronization other than RCU, and the subsystem linked
+        * list isn't RCU-safe */
+       int i;
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup_subsys *ss = subsys[i];
+               struct cgroup_subsys_state *css;
+               /* Skip subsystems not in this hierarchy */
+               if (ss->root != cgrp->root)
+                       continue;
+               css = cgrp->subsys[ss->subsys_id];
+               /* When called from check_for_release() it's possible
+                * that by this point the cgroup has been removed
+                * and the css deleted. But a false-positive doesn't
+                * matter, since it can only happen if the cgroup
+                * has been deleted and hence no longer needs the
+                * release agent to be called anyway. */
+               if (css && atomic_read(&css->refcnt)) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
+{
+       struct cgroup *cgrp = dentry->d_fsdata;
+       struct dentry *d;
+       struct cgroup *parent;
+       struct cgroup_subsys *ss;
+       struct super_block *sb;
+       struct cgroupfs_root *root;
+
+       /* the vfs holds both inode->i_mutex already */
+
+       mutex_lock(&cgroup_mutex);
+       if (atomic_read(&cgrp->count) != 0) {
+               mutex_unlock(&cgroup_mutex);
+               return -EBUSY;
+       }
+       if (!list_empty(&cgrp->children)) {
+               mutex_unlock(&cgroup_mutex);
+               return -EBUSY;
+       }
+
+       parent = cgrp->parent;
+       root = cgrp->root;
+       sb = root->sb;
+
+       if (cgroup_has_css_refs(cgrp)) {
+               mutex_unlock(&cgroup_mutex);
+               return -EBUSY;
+       }
+
+       for_each_subsys(root, ss) {
+               if (cgrp->subsys[ss->subsys_id])
+                       ss->destroy(ss, cgrp);
+       }
+
+       spin_lock(&release_list_lock);
+       set_bit(CGRP_REMOVED, &cgrp->flags);
+       if (!list_empty(&cgrp->release_list))
+               list_del(&cgrp->release_list);
+       spin_unlock(&release_list_lock);
+       /* delete my sibling from parent->children */
+       list_del(&cgrp->sibling);
+       spin_lock(&cgrp->dentry->d_lock);
+       d = dget(cgrp->dentry);
+       cgrp->dentry = NULL;
+       spin_unlock(&d->d_lock);
+
+       cgroup_d_remove_dir(d);
+       dput(d);
+       root->number_of_cgroups--;
+
+       set_bit(CGRP_RELEASABLE, &parent->flags);
+       check_for_release(parent);
+
+       mutex_unlock(&cgroup_mutex);
+       /* Drop the active superblock reference that we took when we
+        * created the cgroup */
+       deactivate_super(sb);
+       return 0;
+}
+
+static void cgroup_init_subsys(struct cgroup_subsys *ss)
+{
+       struct cgroup_subsys_state *css;
+       struct list_head *l;
+       printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name);
+
+       /* Create the top cgroup state for this subsystem */
+       ss->root = &rootnode;
+       css = ss->create(ss, dummytop);
+       /* We don't handle early failures gracefully */
+       BUG_ON(IS_ERR(css));
+       init_cgroup_css(css, ss, dummytop);
+
+       /* Update all cgroup groups to contain a subsys
+        * pointer to this state - since the subsystem is
+        * newly registered, all tasks and hence all cgroup
+        * groups are in the subsystem's top cgroup. */
+       write_lock(&css_set_lock);
+       l = &init_css_set.list;
+       do {
+               struct css_set *cg =
+                       list_entry(l, struct css_set, list);
+               cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
+               l = l->next;
+       } while (l != &init_css_set.list);
+       write_unlock(&css_set_lock);
+
+       /* If this subsystem requested that it be notified with fork
+        * events, we should send it one now for every process in the
+        * system */
+       if (ss->fork) {
+               struct task_struct *g, *p;
+
+               read_lock(&tasklist_lock);
+               do_each_thread(g, p) {
+                       ss->fork(ss, p);
+               } while_each_thread(g, p);
+               read_unlock(&tasklist_lock);
+       }
+
+       need_forkexit_callback |= ss->fork || ss->exit;
+
+       ss->active = 1;
+}
+
+/**
+ * cgroup_init_early - initialize cgroups at system boot, and
+ * initialize any subsystems that request early init.
+ */
+int __init cgroup_init_early(void)
+{
+       int i;
+       kref_init(&init_css_set.ref);
+       kref_get(&init_css_set.ref);
+       INIT_LIST_HEAD(&init_css_set.list);
+       INIT_LIST_HEAD(&init_css_set.cg_links);
+       INIT_LIST_HEAD(&init_css_set.tasks);
+       css_set_count = 1;
+       init_cgroup_root(&rootnode);
+       list_add(&rootnode.root_list, &roots);
+       root_count = 1;
+       init_task.cgroups = &init_css_set;
+
+       init_css_set_link.cg = &init_css_set;
+       list_add(&init_css_set_link.cgrp_link_list,
+                &rootnode.top_cgroup.css_sets);
+       list_add(&init_css_set_link.cg_link_list,
+                &init_css_set.cg_links);
+
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup_subsys *ss = subsys[i];
+
+               BUG_ON(!ss->name);
+               BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
+               BUG_ON(!ss->create);
+               BUG_ON(!ss->destroy);
+               if (ss->subsys_id != i) {
+                       printk(KERN_ERR "Subsys %s id == %d\n",
+                              ss->name, ss->subsys_id);
+                       BUG();
+               }
+
+               if (ss->early_init)
+                       cgroup_init_subsys(ss);
+       }
+       return 0;
+}
+
+/**
+ * cgroup_init - register cgroup filesystem and /proc file, and
+ * initialize any subsystems that didn't request early init.
+ */
+int __init cgroup_init(void)
+{
+       int err;
+       int i;
+       struct proc_dir_entry *entry;
+
+       err = bdi_init(&cgroup_backing_dev_info);
+       if (err)
+               return err;
+
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup_subsys *ss = subsys[i];
+               if (!ss->early_init)
+                       cgroup_init_subsys(ss);
+       }
+
+       err = register_filesystem(&cgroup_fs_type);
+       if (err < 0)
+               goto out;
+
+       entry = create_proc_entry("cgroups", 0, NULL);
+       if (entry)
+               entry->proc_fops = &proc_cgroupstats_operations;
+
+out:
+       if (err)
+               bdi_destroy(&cgroup_backing_dev_info);
+
+       return err;
+}
+
+/*
+ * proc_cgroup_show()
+ *  - Print task's cgroup paths into seq_file, one line for each hierarchy
+ *  - Used for /proc/<pid>/cgroup.
+ *  - No need to task_lock(tsk) on this tsk->cgroup reference, as it
+ *    doesn't really matter if tsk->cgroup changes after we read it,
+ *    and we take cgroup_mutex, keeping attach_task() from changing it
+ *    anyway.  No need to check that tsk->cgroup != NULL, thanks to
+ *    the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
+ *    cgroup to top_cgroup.
+ */
+
+/* TODO: Use a proper seq_file iterator */
+static int proc_cgroup_show(struct seq_file *m, void *v)
+{
+       struct pid *pid;
+       struct task_struct *tsk;
+       char *buf;
+       int retval;
+       struct cgroupfs_root *root;
+
+       retval = -ENOMEM;
+       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!buf)
+               goto out;
+
+       retval = -ESRCH;
+       pid = m->private;
+       tsk = get_pid_task(pid, PIDTYPE_PID);
+       if (!tsk)
+               goto out_free;
+
+       retval = 0;
+
+       mutex_lock(&cgroup_mutex);
+
+       for_each_root(root) {
+               struct cgroup_subsys *ss;
+               struct cgroup *cgrp;
+               int subsys_id;
+               int count = 0;
+
+               /* Skip this hierarchy if it has no active subsystems */
+               if (!root->actual_subsys_bits)
+                       continue;
+               for_each_subsys(root, ss)
+                       seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+               seq_putc(m, ':');
+               get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
+               cgrp = task_cgroup(tsk, subsys_id);
+               retval = cgroup_path(cgrp, buf, PAGE_SIZE);
+               if (retval < 0)
+                       goto out_unlock;
+               seq_puts(m, buf);
+               seq_putc(m, '\n');
+       }
+
+out_unlock:
+       mutex_unlock(&cgroup_mutex);
+       put_task_struct(tsk);
+out_free:
+       kfree(buf);
+out:
+       return retval;
+}
+
+static int cgroup_open(struct inode *inode, struct file *file)
+{
+       struct pid *pid = PROC_I(inode)->pid;
+       return single_open(file, proc_cgroup_show, pid);
+}
+
+struct file_operations proc_cgroup_operations = {
+       .open           = cgroup_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+/* Display information about each subsystem and each hierarchy */
+static int proc_cgroupstats_show(struct seq_file *m, void *v)
+{
+       int i;
+       struct cgroupfs_root *root;
+
+       seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n");
+       mutex_lock(&cgroup_mutex);
+       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+               struct cgroup_subsys *ss = subsys[i];
+               seq_printf(m, "%s\t%lu\t%d\n",
+                          ss->name, ss->root->subsys_bits,
+                          ss->root->number_of_cgroups);
+       }
+       mutex_unlock(&cgroup_mutex);
+       return 0;
+}
+
+static int cgroupstats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, proc_cgroupstats_show, 0);
+}
+
+static struct file_operations proc_cgroupstats_operations = {
+       .open = cgroupstats_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+/**
+ * cgroup_fork - attach newly forked task to its parents cgroup.
+ * @tsk: pointer to task_struct of forking parent process.
+ *
+ * Description: A task inherits its parent's cgroup at fork().
+ *
+ * A pointer to the shared css_set was automatically copied in
+ * fork.c by dup_task_struct().  However, we ignore that copy, since
+ * it was not made under the protection of RCU or cgroup_mutex, so
+ * might no longer be a valid cgroup pointer.  attach_task() might
+ * have already changed current->cgroups, allowing the previously
+ * referenced cgroup group to be removed and freed.
+ *
+ * At the point that cgroup_fork() is called, 'current' is the parent
+ * task, and the passed argument 'child' points to the child task.
+ */
+void cgroup_fork(struct task_struct *child)
+{
+       task_lock(current);
+       child->cgroups = current->cgroups;
+       get_css_set(child->cgroups);
+       task_unlock(current);
+       INIT_LIST_HEAD(&child->cg_list);
+}
+
+/**
+ * cgroup_fork_callbacks - called on a new task very soon before
+ * adding it to the tasklist. No need to take any locks since no-one
+ * can be operating on this task
+ */
+void cgroup_fork_callbacks(struct task_struct *child)
+{
+       if (need_forkexit_callback) {
+               int i;
+               for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                       struct cgroup_subsys *ss = subsys[i];
+                       if (ss->fork)
+                               ss->fork(ss, child);
+               }
+       }
+}
+
+/**
+ * cgroup_post_fork - called on a new task after adding it to the
+ * task list. Adds the task to the list running through its css_set
+ * if necessary. Has to be after the task is visible on the task list
+ * in case we race with the first call to cgroup_iter_start() - to
+ * guarantee that the new task ends up on its list. */
+void cgroup_post_fork(struct task_struct *child)
+{
+       if (use_task_css_set_links) {
+               write_lock(&css_set_lock);
+               if (list_empty(&child->cg_list))
+                       list_add(&child->cg_list, &child->cgroups->tasks);
+               write_unlock(&css_set_lock);
+       }
+}
+/**
+ * cgroup_exit - detach cgroup from exiting task
+ * @tsk: pointer to task_struct of exiting process
+ *
+ * Description: Detach cgroup from @tsk and release it.
+ *
+ * Note that cgroups marked notify_on_release force every task in
+ * them to take the global cgroup_mutex mutex when exiting.
+ * This could impact scaling on very large systems.  Be reluctant to
+ * use notify_on_release cgroups where very high task exit scaling
+ * is required on large systems.
+ *
+ * the_top_cgroup_hack:
+ *
+ *    Set the exiting tasks cgroup to the root cgroup (top_cgroup).
+ *
+ *    We call cgroup_exit() while the task is still competent to
+ *    handle notify_on_release(), then leave the task attached to the
+ *    root cgroup in each hierarchy for the remainder of its exit.
+ *
+ *    To do this properly, we would increment the reference count on
+ *    top_cgroup, and near the very end of the kernel/exit.c do_exit()
+ *    code we would add a second cgroup function call, to drop that
+ *    reference.  This would just create an unnecessary hot spot on
+ *    the top_cgroup reference count, to no avail.
+ *
+ *    Normally, holding a reference to a cgroup without bumping its
+ *    count is unsafe.   The cgroup could go away, or someone could
+ *    attach us to a different cgroup, decrementing the count on
+ *    the first cgroup that we never incremented.  But in this case,
+ *    top_cgroup isn't going away, and either task has PF_EXITING set,
+ *    which wards off any attach_task() attempts, or task is a failed
+ *    fork, never visible to attach_task.
+ *
+ */
+void cgroup_exit(struct task_struct *tsk, int run_callbacks)
+{
+       int i;
+       struct css_set *cg;
+
+       if (run_callbacks && need_forkexit_callback) {
+               for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+                       struct cgroup_subsys *ss = subsys[i];
+                       if (ss->exit)
+                               ss->exit(ss, tsk);
+               }
+       }
+
+       /*
+        * Unlink from the css_set task list if necessary.
+        * Optimistically check cg_list before taking
+        * css_set_lock
+        */
+       if (!list_empty(&tsk->cg_list)) {
+               write_lock(&css_set_lock);
+               if (!list_empty(&tsk->cg_list))
+                       list_del(&tsk->cg_list);
+               write_unlock(&css_set_lock);
+       }
+
+       /* Reassign the task to the init_css_set. */
+       task_lock(tsk);
+       cg = tsk->cgroups;
+       tsk->cgroups = &init_css_set;
+       task_unlock(tsk);
+       if (cg)
+               put_css_set_taskexit(cg);
+}
+
+/**
+ * cgroup_clone - duplicate the current cgroup in the hierarchy
+ * that the given subsystem is attached to, and move this task into
+ * the new child
+ */
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+{
+       struct dentry *dentry;
+       int ret = 0;
+       char nodename[MAX_CGROUP_TYPE_NAMELEN];
+       struct cgroup *parent, *child;
+       struct inode *inode;
+       struct css_set *cg;
+       struct cgroupfs_root *root;
+       struct cgroup_subsys *ss;
+
+       /* We shouldn't be called by an unregistered subsystem */
+       BUG_ON(!subsys->active);
+
+       /* First figure out what hierarchy and cgroup we're dealing
+        * with, and pin them so we can drop cgroup_mutex */
+       mutex_lock(&cgroup_mutex);
+ again:
+       root = subsys->root;
+       if (root == &rootnode) {
+               printk(KERN_INFO
+                      "Not cloning cgroup for unused subsystem %s\n",
+                      subsys->name);
+               mutex_unlock(&cgroup_mutex);
+               return 0;
+       }
+       cg = tsk->cgroups;
+       parent = task_cgroup(tsk, subsys->subsys_id);
+
+       snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
+
+       /* Pin the hierarchy */
+       atomic_inc(&parent->root->sb->s_active);
+
+       /* Keep the cgroup alive */
+       get_css_set(cg);
+       mutex_unlock(&cgroup_mutex);
+
+       /* Now do the VFS work to create a cgroup */
+       inode = parent->dentry->d_inode;
+
+       /* Hold the parent directory mutex across this operation to
+        * stop anyone else deleting the new cgroup */
+       mutex_lock(&inode->i_mutex);
+       dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
+       if (IS_ERR(dentry)) {
+               printk(KERN_INFO
+                      "Couldn't allocate dentry for %s: %ld\n", nodename,
+                      PTR_ERR(dentry));
+               ret = PTR_ERR(dentry);
+               goto out_release;
+       }
+
+       /* Create the cgroup directory, which also creates the cgroup */
+       ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+       child = __d_cgrp(dentry);
+       dput(dentry);
+       if (ret) {
+               printk(KERN_INFO
+                      "Failed to create cgroup %s: %d\n", nodename,
+                      ret);
+               goto out_release;
+       }
+
+       if (!child) {
+               printk(KERN_INFO
+                      "Couldn't find new cgroup %s\n", nodename);
+               ret = -ENOMEM;
+               goto out_release;
+       }
+
+       /* The cgroup now exists. Retake cgroup_mutex and check
+        * that we're still in the same state that we thought we
+        * were. */
+       mutex_lock(&cgroup_mutex);
+       if ((root != subsys->root) ||
+           (parent != task_cgroup(tsk, subsys->subsys_id))) {
+               /* Aargh, we raced ... */
+               mutex_unlock(&inode->i_mutex);
+               put_css_set(cg);
+
+               deactivate_super(parent->root->sb);
+               /* The cgroup is still accessible in the VFS, but
+                * we're not going to try to rmdir() it at this
+                * point. */
+               printk(KERN_INFO
+                      "Race in cgroup_clone() - leaking cgroup %s\n",
+                      nodename);
+               goto again;
+       }
+
+       /* do any required auto-setup */
+       for_each_subsys(root, ss) {
+               if (ss->post_clone)
+                       ss->post_clone(ss, child);
+       }
+
+       /* All seems fine. Finish by moving the task into the new cgroup */
+       ret = attach_task(child, tsk);
+       mutex_unlock(&cgroup_mutex);
+
+ out_release:
+       mutex_unlock(&inode->i_mutex);
+
+       mutex_lock(&cgroup_mutex);
+       put_css_set(cg);
+       mutex_unlock(&cgroup_mutex);
+       deactivate_super(parent->root->sb);
+       return ret;
+}
+
+/*
+ * See if "cgrp" is a descendant of the current task's cgroup in
+ * the appropriate hierarchy
+ *
+ * If we are sending in dummytop, then presumably we are creating
+ * the top cgroup in the subsystem.
+ *
+ * Called only by the ns (nsproxy) cgroup.
+ */
+int cgroup_is_descendant(const struct cgroup *cgrp)
+{
+       int ret;
+       struct cgroup *target;
+       int subsys_id;
+
+       if (cgrp == dummytop)
+               return 1;
+
+       get_first_subsys(cgrp, NULL, &subsys_id);
+       target = task_cgroup(current, subsys_id);
+       while (cgrp != target && cgrp!= cgrp->top_cgroup)
+               cgrp = cgrp->parent;
+       ret = (cgrp == target);
+       return ret;
+}
+
+static void check_for_release(struct cgroup *cgrp)
+{
+       /* All of these checks rely on RCU to keep the cgroup
+        * structure alive */
+       if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
+           && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
+               /* Control Group is currently removeable. If it's not
+                * already queued for a userspace notification, queue
+                * it now */
+               int need_schedule_work = 0;
+               spin_lock(&release_list_lock);
+               if (!cgroup_is_removed(cgrp) &&
+                   list_empty(&cgrp->release_list)) {
+                       list_add(&cgrp->release_list, &release_list);
+                       need_schedule_work = 1;
+               }
+               spin_unlock(&release_list_lock);
+               if (need_schedule_work)
+                       schedule_work(&release_agent_work);
+       }
+}
+
+void __css_put(struct cgroup_subsys_state *css)
+{
+       struct cgroup *cgrp = css->cgroup;
+       rcu_read_lock();
+       if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
+               set_bit(CGRP_RELEASABLE, &cgrp->flags);
+               check_for_release(cgrp);
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * Notify userspace when a cgroup is released, by running the
+ * configured release agent with the name of the cgroup (path
+ * relative to the root of cgroup file system) as the argument.
+ *
+ * Most likely, this user command will try to rmdir this cgroup.
+ *
+ * This races with the possibility that some other task will be
+ * attached to this cgroup before it is removed, or that some other
+ * user task will 'mkdir' a child cgroup of this cgroup.  That's ok.
+ * The presumed 'rmdir' will fail quietly if this cgroup is no longer
+ * unused, and this cgroup will be reprieved from its death sentence,
+ * to continue to serve a useful existence.  Next time it's released,
+ * we will get notified again, if it still has 'notify_on_release' set.
+ *
+ * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
+ * means only wait until the task is successfully execve()'d.  The
+ * separate release agent task is forked by call_usermodehelper(),
+ * then control in this thread returns here, without waiting for the
+ * release agent task.  We don't bother to wait because the caller of
+ * this routine has no use for the exit status of the release agent
+ * task, so no sense holding our caller up for that.
+ *
+ */
+
+static void cgroup_release_agent(struct work_struct *work)
+{
+       BUG_ON(work != &release_agent_work);
+       mutex_lock(&cgroup_mutex);
+       spin_lock(&release_list_lock);
+       while (!list_empty(&release_list)) {
+               char *argv[3], *envp[3];
+               int i;
+               char *pathbuf;
+               struct cgroup *cgrp = list_entry(release_list.next,
+                                                   struct cgroup,
+                                                   release_list);
+               list_del_init(&cgrp->release_list);
+               spin_unlock(&release_list_lock);
+               pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               if (!pathbuf) {
+                       spin_lock(&release_list_lock);
+                       continue;
+               }
+
+               if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
+                       kfree(pathbuf);
+                       spin_lock(&release_list_lock);
+                       continue;
+               }
+
+               i = 0;
+               argv[i++] = cgrp->root->release_agent_path;
+               argv[i++] = (char *)pathbuf;
+               argv[i] = NULL;
+
+               i = 0;
+               /* minimal command environment */
+               envp[i++] = "HOME=/";
+               envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+               envp[i] = NULL;
+
+               /* Drop the lock while we invoke the usermode helper,
+                * since the exec could involve hitting disk and hence
+                * be a slow process */
+               mutex_unlock(&cgroup_mutex);
+               call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+               kfree(pathbuf);
+               mutex_lock(&cgroup_mutex);
+               spin_lock(&release_list_lock);
+       }
+       spin_unlock(&release_list_lock);
+       mutex_unlock(&cgroup_mutex);
+}
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
new file mode 100644 (file)
index 0000000..37301e8
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * kernel/ccontainer_debug.c - Example cgroup subsystem that
+ * exposes debug info
+ *
+ * Copyright (C) Google Inc, 2007
+ *
+ * Developed by Paul Menage (menage@google.com)
+ *
+ */
+
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+
+#include <asm/atomic.h>
+
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+                                                  struct cgroup *cont)
+{
+       struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+       if (!css)
+               return ERR_PTR(-ENOMEM);
+
+       return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+       return atomic_read(&cont->count);
+}
+
+static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+       u64 count;
+
+       cgroup_lock();
+       count = cgroup_task_count(cont);
+       cgroup_unlock();
+       return count;
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+       return (u64)(long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+                                          struct cftype *cft)
+{
+       u64 count;
+
+       rcu_read_lock();
+       count = atomic_read(&current->cgroups->ref.refcount);
+       rcu_read_unlock();
+       return count;
+}
+
+static struct cftype files[] =  {
+       {
+               .name = "cgroup_refcount",
+               .read_uint = cgroup_refcount_read,
+       },
+       {
+               .name = "taskcount",
+               .read_uint = taskcount_read,
+       },
+
+       {
+               .name = "current_css_set",
+               .read_uint = current_css_set_read,
+       },
+
+       {
+               .name = "current_css_set_refcount",
+               .read_uint = current_css_set_refcount_read,
+       },
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys debug_subsys = {
+       .name = "debug",
+       .create = debug_create,
+       .destroy = debug_destroy,
+       .populate = debug_populate,
+       .subsys_id = debug_subsys_id,
+};
index a21f71af9d81fba3c0012afd8538d25a1f7c0c62..6b3a0c15144f3404294dde422348e17242990e2b 100644 (file)
@@ -98,7 +98,8 @@ static inline void check_for_tasks(int cpu)
                     !cputime_eq(p->stime, cputime_zero)))
                        printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
                                (state = %ld, flags = %x) \n",
-                                p->comm, p->pid, cpu, p->state, p->flags);
+                                p->comm, task_pid_nr(p), cpu,
+                                p->state, p->flags);
        }
        write_unlock_irq(&tasklist_lock);
 }
@@ -264,6 +265,15 @@ out_notify:
 int __cpuinit cpu_up(unsigned int cpu)
 {
        int err = 0;
+       if (!cpu_isset(cpu, cpu_possible_map)) {
+               printk(KERN_ERR "can't online cpu %d because it is not "
+                       "configured as may-hotadd at boot time\n", cpu);
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
+               printk(KERN_ERR "please check additional_cpus= boot "
+                               "parameter\n");
+#endif
+               return -EINVAL;
+       }
 
        mutex_lock(&cpu_add_remove_lock);
        if (cpu_hotplug_disabled)
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c
new file mode 100644 (file)
index 0000000..731e47e
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+ * kernel/cpu_acct.c - CPU accounting cgroup subsystem
+ *
+ * Copyright (C) Google Inc, 2006
+ *
+ * Developed by Paul Menage (menage@google.com) and Balbir Singh
+ * (balbir@in.ibm.com)
+ *
+ */
+
+/*
+ * Example cgroup subsystem for reporting total CPU usage of tasks in a
+ * cgroup, along with percentage load over a time interval
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/rcupdate.h>
+
+#include <asm/div64.h>
+
+struct cpuacct {
+       struct cgroup_subsys_state css;
+       spinlock_t lock;
+       /* total time used by this class */
+       cputime64_t time;
+
+       /* time when next load calculation occurs */
+       u64 next_interval_check;
+
+       /* time used in current period */
+       cputime64_t current_interval_time;
+
+       /* time used in last period */
+       cputime64_t last_interval_time;
+};
+
+struct cgroup_subsys cpuacct_subsys;
+
+static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
+{
+       return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
+                           struct cpuacct, css);
+}
+
+static inline struct cpuacct *task_ca(struct task_struct *task)
+{
+       return container_of(task_subsys_state(task, cpuacct_subsys_id),
+                           struct cpuacct, css);
+}
+
+#define INTERVAL (HZ * 10)
+
+static inline u64 next_interval_boundary(u64 now)
+{
+       /* calculate the next interval boundary beyond the
+        * current time */
+       do_div(now, INTERVAL);
+       return (now + 1) * INTERVAL;
+}
+
+static struct cgroup_subsys_state *cpuacct_create(
+       struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+
+       if (!ca)
+               return ERR_PTR(-ENOMEM);
+       spin_lock_init(&ca->lock);
+       ca->next_interval_check = next_interval_boundary(get_jiffies_64());
+       return &ca->css;
+}
+
+static void cpuacct_destroy(struct cgroup_subsys *ss,
+                           struct cgroup *cont)
+{
+       kfree(cgroup_ca(cont));
+}
+
+/* Lazily update the load calculation if necessary. Called with ca locked */
+static void cpuusage_update(struct cpuacct *ca)
+{
+       u64 now = get_jiffies_64();
+
+       /* If we're not due for an update, return */
+       if (ca->next_interval_check > now)
+               return;
+
+       if (ca->next_interval_check <= (now - INTERVAL)) {
+               /* If it's been more than an interval since the last
+                * check, then catch up - the last interval must have
+                * been zero load */
+               ca->last_interval_time = 0;
+               ca->next_interval_check = next_interval_boundary(now);
+       } else {
+               /* If a steal takes the last interval time negative,
+                * then we just ignore it */
+               if ((s64)ca->current_interval_time > 0)
+                       ca->last_interval_time = ca->current_interval_time;
+               else
+                       ca->last_interval_time = 0;
+               ca->next_interval_check += INTERVAL;
+       }
+       ca->current_interval_time = 0;
+}
+
+static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
+{
+       struct cpuacct *ca = cgroup_ca(cont);
+       u64 time;
+
+       spin_lock_irq(&ca->lock);
+       cpuusage_update(ca);
+       time = cputime64_to_jiffies64(ca->time);
+       spin_unlock_irq(&ca->lock);
+
+       /* Convert 64-bit jiffies to seconds */
+       time *= 1000;
+       do_div(time, HZ);
+       return time;
+}
+
+static u64 load_read(struct cgroup *cont, struct cftype *cft)
+{
+       struct cpuacct *ca = cgroup_ca(cont);
+       u64 time;
+
+       /* Find the time used in the previous interval */
+       spin_lock_irq(&ca->lock);
+       cpuusage_update(ca);
+       time = cputime64_to_jiffies64(ca->last_interval_time);
+       spin_unlock_irq(&ca->lock);
+
+       /* Convert time to a percentage, to give the load in the
+        * previous period */
+       time *= 100;
+       do_div(time, INTERVAL);
+
+       return time;
+}
+
+static struct cftype files[] = {
+       {
+               .name = "usage",
+               .read_uint = cpuusage_read,
+       },
+       {
+               .name = "load",
+               .read_uint = load_read,
+       }
+};
+
+static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+void cpuacct_charge(struct task_struct *task, cputime_t cputime)
+{
+
+       struct cpuacct *ca;
+       unsigned long flags;
+
+       if (!cpuacct_subsys.active)
+               return;
+       rcu_read_lock();
+       ca = task_ca(task);
+       if (ca) {
+               spin_lock_irqsave(&ca->lock, flags);
+               cpuusage_update(ca);
+               ca->time = cputime64_add(ca->time, cputime);
+               ca->current_interval_time =
+                       cputime64_add(ca->current_interval_time, cputime);
+               spin_unlock_irqrestore(&ca->lock, flags);
+       }
+       rcu_read_unlock();
+}
+
+struct cgroup_subsys cpuacct_subsys = {
+       .name = "cpuacct",
+       .create = cpuacct_create,
+       .destroy = cpuacct_destroy,
+       .populate = cpuacct_populate,
+       .subsys_id = cpuacct_subsys_id,
+};
index 64950fa5d3211c5f2843e9637b38452a70eef9f7..50f5dc46368841de3497fe96534df12b871e530b 100644 (file)
@@ -4,7 +4,8 @@
  *  Processor and Memory placement constraints for sets of tasks.
  *
  *  Copyright (C) 2003 BULL SA.
- *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
+ *  Copyright (C) 2006 Google, Inc
  *
  *  Portions derived from Patrick Mochel's sysfs code.
  *  sysfs is Copyright (c) 2001-3 Patrick Mochel
@@ -12,6 +13,7 @@
  *  2003-10-10 Written by Simon Derr.
  *  2003-10-22 Updates by Stephen Hemminger.
  *  2004 May-July Rework by Paul Jackson.
+ *  2006 Rework by Paul Menage to use generic cgroups
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of the Linux
@@ -36,6 +38,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
+#include <linux/prio_heap.h>
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
@@ -52,8 +55,7 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 #include <linux/mutex.h>
-
-#define CPUSET_SUPER_MAGIC             0x27e0eb
+#include <linux/kfifo.h>
 
 /*
  * Tracks how many cpusets are currently defined in system.
  */
 int number_of_cpusets __read_mostly;
 
+/* Retrieve the cpuset from a cgroup */
+struct cgroup_subsys cpuset_subsys;
+struct cpuset;
+
 /* See "Frequency meter" comments, below. */
 
 struct fmeter {
@@ -72,24 +78,13 @@ struct fmeter {
 };
 
 struct cpuset {
+       struct cgroup_subsys_state css;
+
        unsigned long flags;            /* "unsigned long" so bitops work */
        cpumask_t cpus_allowed;         /* CPUs allowed to tasks in cpuset */
        nodemask_t mems_allowed;        /* Memory Nodes allowed to tasks */
 
-       /*
-        * Count is atomic so can incr (fork) or decr (exit) without a lock.
-        */
-       atomic_t count;                 /* count tasks using this cpuset */
-
-       /*
-        * We link our 'sibling' struct into our parents 'children'.
-        * Our children link their 'sibling' into our 'children'.
-        */
-       struct list_head sibling;       /* my parents children */
-       struct list_head children;      /* my children */
-
        struct cpuset *parent;          /* my parent */
-       struct dentry *dentry;          /* cpuset fs entry */
 
        /*
         * Copy of global cpuset_mems_generation as of the most
@@ -98,15 +93,32 @@ struct cpuset {
        int mems_generation;
 
        struct fmeter fmeter;           /* memory_pressure filter */
+
+       /* partition number for rebuild_sched_domains() */
+       int pn;
 };
 
+/* Retrieve the cpuset for a cgroup */
+static inline struct cpuset *cgroup_cs(struct cgroup *cont)
+{
+       return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),
+                           struct cpuset, css);
+}
+
+/* Retrieve the cpuset for a task */
+static inline struct cpuset *task_cs(struct task_struct *task)
+{
+       return container_of(task_subsys_state(task, cpuset_subsys_id),
+                           struct cpuset, css);
+}
+
+
 /* bits in struct cpuset flags field */
 typedef enum {
        CS_CPU_EXCLUSIVE,
        CS_MEM_EXCLUSIVE,
        CS_MEMORY_MIGRATE,
-       CS_REMOVED,
-       CS_NOTIFY_ON_RELEASE,
+       CS_SCHED_LOAD_BALANCE,
        CS_SPREAD_PAGE,
        CS_SPREAD_SLAB,
 } cpuset_flagbits_t;
@@ -122,14 +134,9 @@ static inline int is_mem_exclusive(const struct cpuset *cs)
        return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
 }
 
-static inline int is_removed(const struct cpuset *cs)
+static inline int is_sched_load_balance(const struct cpuset *cs)
 {
-       return test_bit(CS_REMOVED, &cs->flags);
-}
-
-static inline int notify_on_release(const struct cpuset *cs)
-{
-       return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+       return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 }
 
 static inline int is_memory_migrate(const struct cpuset *cs)
@@ -172,14 +179,8 @@ static struct cpuset top_cpuset = {
        .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
        .cpus_allowed = CPU_MASK_ALL,
        .mems_allowed = NODE_MASK_ALL,
-       .count = ATOMIC_INIT(0),
-       .sibling = LIST_HEAD_INIT(top_cpuset.sibling),
-       .children = LIST_HEAD_INIT(top_cpuset.children),
 };
 
-static struct vfsmount *cpuset_mount;
-static struct super_block *cpuset_sb;
-
 /*
  * We have two global cpuset mutexes below.  They can nest.
  * It is ok to first take manage_mutex, then nest callback_mutex.  We also
@@ -263,297 +264,33 @@ static struct super_block *cpuset_sb;
  * the routine cpuset_update_task_memory_state().
  */
 
-static DEFINE_MUTEX(manage_mutex);
 static DEFINE_MUTEX(callback_mutex);
 
-/*
- * A couple of forward declarations required, due to cyclic reference loop:
- *  cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file
- *  -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
- */
-
-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode);
-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry);
-
-static struct backing_dev_info cpuset_backing_dev_info = {
-       .ra_pages = 0,          /* No readahead */
-       .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
-static struct inode *cpuset_new_inode(mode_t mode)
-{
-       struct inode *inode = new_inode(cpuset_sb);
-
-       if (inode) {
-               inode->i_mode = mode;
-               inode->i_uid = current->fsuid;
-               inode->i_gid = current->fsgid;
-               inode->i_blocks = 0;
-               inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-               inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
-       }
-       return inode;
-}
-
-static void cpuset_diput(struct dentry *dentry, struct inode *inode)
-{
-       /* is dentry a directory ? if so, kfree() associated cpuset */
-       if (S_ISDIR(inode->i_mode)) {
-               struct cpuset *cs = dentry->d_fsdata;
-               BUG_ON(!(is_removed(cs)));
-               kfree(cs);
-       }
-       iput(inode);
-}
-
-static struct dentry_operations cpuset_dops = {
-       .d_iput = cpuset_diput,
-};
-
-static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
-{
-       struct dentry *d = lookup_one_len(name, parent, strlen(name));
-       if (!IS_ERR(d))
-               d->d_op = &cpuset_dops;
-       return d;
-}
-
-static void remove_dir(struct dentry *d)
-{
-       struct dentry *parent = dget(d->d_parent);
-
-       d_delete(d);
-       simple_rmdir(parent->d_inode, d);
-       dput(parent);
-}
-
-/*
- * NOTE : the dentry must have been dget()'ed
- */
-static void cpuset_d_remove_dir(struct dentry *dentry)
-{
-       struct list_head *node;
-
-       spin_lock(&dcache_lock);
-       node = dentry->d_subdirs.next;
-       while (node != &dentry->d_subdirs) {
-               struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
-               list_del_init(node);
-               if (d->d_inode) {
-                       d = dget_locked(d);
-                       spin_unlock(&dcache_lock);
-                       d_delete(d);
-                       simple_unlink(dentry->d_inode, d);
-                       dput(d);
-                       spin_lock(&dcache_lock);
-               }
-               node = dentry->d_subdirs.next;
-       }
-       list_del_init(&dentry->d_u.d_child);
-       spin_unlock(&dcache_lock);
-       remove_dir(dentry);
-}
-
-static struct super_operations cpuset_ops = {
-       .statfs = simple_statfs,
-       .drop_inode = generic_delete_inode,
-};
-
-static int cpuset_fill_super(struct super_block *sb, void *unused_data,
-                                                       int unused_silent)
-{
-       struct inode *inode;
-       struct dentry *root;
-
-       sb->s_blocksize = PAGE_CACHE_SIZE;
-       sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-       sb->s_magic = CPUSET_SUPER_MAGIC;
-       sb->s_op = &cpuset_ops;
-       cpuset_sb = sb;
-
-       inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR);
-       if (inode) {
-               inode->i_op = &simple_dir_inode_operations;
-               inode->i_fop = &simple_dir_operations;
-               /* directories start off with i_nlink == 2 (for "." entry) */
-               inc_nlink(inode);
-       } else {
-               return -ENOMEM;
-       }
-
-       root = d_alloc_root(inode);
-       if (!root) {
-               iput(inode);
-               return -ENOMEM;
-       }
-       sb->s_root = root;
-       return 0;
-}
-
+/* This is ugly, but preserves the userspace API for existing cpuset
+ * users. If someone tries to mount the "cpuset" filesystem, we
+ * silently switch it to mount "cgroup" instead */
 static int cpuset_get_sb(struct file_system_type *fs_type,
                         int flags, const char *unused_dev_name,
                         void *data, struct vfsmount *mnt)
 {
-       return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
+       struct file_system_type *cgroup_fs = get_fs_type("cgroup");
+       int ret = -ENODEV;
+       if (cgroup_fs) {
+               char mountopts[] =
+                       "cpuset,noprefix,"
+                       "release_agent=/sbin/cpuset_release_agent";
+               ret = cgroup_fs->get_sb(cgroup_fs, flags,
+                                          unused_dev_name, mountopts, mnt);
+               put_filesystem(cgroup_fs);
+       }
+       return ret;
 }
 
 static struct file_system_type cpuset_fs_type = {
        .name = "cpuset",
        .get_sb = cpuset_get_sb,
-       .kill_sb = kill_litter_super,
 };
 
-/* struct cftype:
- *
- * The files in the cpuset filesystem mostly have a very simple read/write
- * handling, some common function will take care of it. Nevertheless some cases
- * (read tasks) are special and therefore I define this structure for every
- * kind of file.
- *
- *
- * When reading/writing to a file:
- *     - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
- *     - the 'cftype' of the file is file->f_path.dentry->d_fsdata
- */
-
-struct cftype {
-       char *name;
-       int private;
-       int (*open) (struct inode *inode, struct file *file);
-       ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes,
-                                                       loff_t *ppos);
-       int (*write) (struct file *file, const char __user *buf, size_t nbytes,
-                                                       loff_t *ppos);
-       int (*release) (struct inode *inode, struct file *file);
-};
-
-static inline struct cpuset *__d_cs(struct dentry *dentry)
-{
-       return dentry->d_fsdata;
-}
-
-static inline struct cftype *__d_cft(struct dentry *dentry)
-{
-       return dentry->d_fsdata;
-}
-
-/*
- * Call with manage_mutex held.  Writes path of cpuset into buf.
- * Returns 0 on success, -errno on error.
- */
-
-static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
-{
-       char *start;
-
-       start = buf + buflen;
-
-       *--start = '\0';
-       for (;;) {
-               int len = cs->dentry->d_name.len;
-               if ((start -= len) < buf)
-                       return -ENAMETOOLONG;
-               memcpy(start, cs->dentry->d_name.name, len);
-               cs = cs->parent;
-               if (!cs)
-                       break;
-               if (!cs->parent)
-                       continue;
-               if (--start < buf)
-                       return -ENAMETOOLONG;
-               *start = '/';
-       }
-       memmove(buf, start, buf + buflen - start);
-       return 0;
-}
-
-/*
- * Notify userspace when a cpuset is released, by running
- * /sbin/cpuset_release_agent with the name of the cpuset (path
- * relative to the root of cpuset file system) as the argument.
- *
- * Most likely, this user command will try to rmdir this cpuset.
- *
- * This races with the possibility that some other task will be
- * attached to this cpuset before it is removed, or that some other
- * user task will 'mkdir' a child cpuset of this cpuset.  That's ok.
- * The presumed 'rmdir' will fail quietly if this cpuset is no longer
- * unused, and this cpuset will be reprieved from its death sentence,
- * to continue to serve a useful existence.  Next time it's released,
- * we will get notified again, if it still has 'notify_on_release' set.
- *
- * The final arg to call_usermodehelper() is 0, which means don't
- * wait.  The separate /sbin/cpuset_release_agent task is forked by
- * call_usermodehelper(), then control in this thread returns here,
- * without waiting for the release agent task.  We don't bother to
- * wait because the caller of this routine has no use for the exit
- * status of the /sbin/cpuset_release_agent task, so no sense holding
- * our caller up for that.
- *
- * When we had only one cpuset mutex, we had to call this
- * without holding it, to avoid deadlock when call_usermodehelper()
- * allocated memory.  With two locks, we could now call this while
- * holding manage_mutex, but we still don't, so as to minimize
- * the time manage_mutex is held.
- */
-
-static void cpuset_release_agent(const char *pathbuf)
-{
-       char *argv[3], *envp[3];
-       int i;
-
-       if (!pathbuf)
-               return;
-
-       i = 0;
-       argv[i++] = "/sbin/cpuset_release_agent";
-       argv[i++] = (char *)pathbuf;
-       argv[i] = NULL;
-
-       i = 0;
-       /* minimal command environment */
-       envp[i++] = "HOME=/";
-       envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-       envp[i] = NULL;
-
-       call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-       kfree(pathbuf);
-}
-
-/*
- * Either cs->count of using tasks transitioned to zero, or the
- * cs->children list of child cpusets just became empty.  If this
- * cs is notify_on_release() and now both the user count is zero and
- * the list of children is empty, prepare cpuset path in a kmalloc'd
- * buffer, to be returned via ppathbuf, so that the caller can invoke
- * cpuset_release_agent() with it later on, once manage_mutex is dropped.
- * Call here with manage_mutex held.
- *
- * This check_for_release() routine is responsible for kmalloc'ing
- * pathbuf.  The above cpuset_release_agent() is responsible for
- * kfree'ing pathbuf.  The caller of these routines is responsible
- * for providing a pathbuf pointer, initialized to NULL, then
- * calling check_for_release() with manage_mutex held and the address
- * of the pathbuf pointer, then dropping manage_mutex, then calling
- * cpuset_release_agent() with pathbuf, as set by check_for_release().
- */
-
-static void check_for_release(struct cpuset *cs, char **ppathbuf)
-{
-       if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
-           list_empty(&cs->children)) {
-               char *buf;
-
-               buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-               if (!buf)
-                       return;
-               if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
-                       kfree(buf);
-               else
-                       *ppathbuf = buf;
-       }
-}
-
 /*
  * Return in *pmask the portion of a cpusets's cpus_allowed that
  * are online.  If none are online, walk up the cpuset hierarchy
@@ -653,20 +390,19 @@ void cpuset_update_task_memory_state(void)
        struct task_struct *tsk = current;
        struct cpuset *cs;
 
-       if (tsk->cpuset == &top_cpuset) {
+       if (task_cs(tsk) == &top_cpuset) {
                /* Don't need rcu for top_cpuset.  It's never freed. */
                my_cpusets_mem_gen = top_cpuset.mems_generation;
        } else {
                rcu_read_lock();
-               cs = rcu_dereference(tsk->cpuset);
-               my_cpusets_mem_gen = cs->mems_generation;
+               my_cpusets_mem_gen = task_cs(current)->mems_generation;
                rcu_read_unlock();
        }
 
        if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
                mutex_lock(&callback_mutex);
                task_lock(tsk);
-               cs = tsk->cpuset;       /* Maybe changed when task not locked */
+               cs = task_cs(tsk); /* Maybe changed when task not locked */
                guarantee_online_mems(cs, &tsk->mems_allowed);
                tsk->cpuset_mems_generation = cs->mems_generation;
                if (is_spread_page(cs))
@@ -721,11 +457,12 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 
 static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 {
+       struct cgroup *cont;
        struct cpuset *c, *par;
 
        /* Each of our child cpusets must be a subset of us */
-       list_for_each_entry(c, &cur->children, sibling) {
-               if (!is_cpuset_subset(c, trial))
+       list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
+               if (!is_cpuset_subset(cgroup_cs(cont), trial))
                        return -EBUSY;
        }
 
@@ -740,7 +477,8 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
                return -EACCES;
 
        /* If either I or some sibling (!= me) is exclusive, we can't overlap */
-       list_for_each_entry(c, &par->children, sibling) {
+       list_for_each_entry(cont, &par->css.cgroup->children, sibling) {
+               c = cgroup_cs(cont);
                if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                    c != cur &&
                    cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
@@ -751,9 +489,249 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
                        return -EINVAL;
        }
 
+       /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
+       if (cgroup_task_count(cur->css.cgroup)) {
+               if (cpus_empty(trial->cpus_allowed) ||
+                   nodes_empty(trial->mems_allowed)) {
+                       return -ENOSPC;
+               }
+       }
+
        return 0;
 }
 
+/*
+ * Helper routine for rebuild_sched_domains().
+ * Do cpusets a, b have overlapping cpus_allowed masks?
+ */
+
+static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
+{
+       return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
+}
+
+/*
+ * rebuild_sched_domains()
+ *
+ * If the flag 'sched_load_balance' of any cpuset with non-empty
+ * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
+ * which has that flag enabled, or if any cpuset with a non-empty
+ * 'cpus' is removed, then call this routine to rebuild the
+ * scheduler's dynamic sched domains.
+ *
+ * This routine builds a partial partition of the systems CPUs
+ * (the set of non-overlappping cpumask_t's in the array 'part'
+ * below), and passes that partial partition to the kernel/sched.c
+ * partition_sched_domains() routine, which will rebuild the
+ * schedulers load balancing domains (sched domains) as specified
+ * by that partial partition.  A 'partial partition' is a set of
+ * non-overlapping subsets whose union is a subset of that set.
+ *
+ * See "What is sched_load_balance" in Documentation/cpusets.txt
+ * for a background explanation of this.
+ *
+ * Does not return errors, on the theory that the callers of this
+ * routine would rather not worry about failures to rebuild sched
+ * domains when operating in the severe memory shortage situations
+ * that could cause allocation failures below.
+ *
+ * Call with cgroup_mutex held.  May take callback_mutex during
+ * call due to the kfifo_alloc() and kmalloc() calls.  May nest
+ * a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
+ * Must not be called holding callback_mutex, because we must not
+ * call lock_cpu_hotplug() while holding callback_mutex.  Elsewhere
+ * the kernel nests callback_mutex inside lock_cpu_hotplug() calls.
+ * So the reverse nesting would risk an ABBA deadlock.
+ *
+ * The three key local variables below are:
+ *    q  - a kfifo queue of cpuset pointers, used to implement a
+ *        top-down scan of all cpusets.  This scan loads a pointer
+ *        to each cpuset marked is_sched_load_balance into the
+ *        array 'csa'.  For our purposes, rebuilding the schedulers
+ *        sched domains, we can ignore !is_sched_load_balance cpusets.
+ *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
+ *        that need to be load balanced, for convenient iterative
+ *        access by the subsequent code that finds the best partition,
+ *        i.e the set of domains (subsets) of CPUs such that the
+ *        cpus_allowed of every cpuset marked is_sched_load_balance
+ *        is a subset of one of these domains, while there are as
+ *        many such domains as possible, each as small as possible.
+ * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
+ *        the kernel/sched.c routine partition_sched_domains() in a
+ *        convenient format, that can be easily compared to the prior
+ *        value to determine what partition elements (sched domains)
+ *        were changed (added or removed.)
+ *
+ * Finding the best partition (set of domains):
+ *     The triple nested loops below over i, j, k scan over the
+ *     load balanced cpusets (using the array of cpuset pointers in
+ *     csa[]) looking for pairs of cpusets that have overlapping
+ *     cpus_allowed, but which don't have the same 'pn' partition
+ *     number and gives them in the same partition number.  It keeps
+ *     looping on the 'restart' label until it can no longer find
+ *     any such pairs.
+ *
+ *     The union of the cpus_allowed masks from the set of
+ *     all cpusets having the same 'pn' value then form the one
+ *     element of the partition (one sched domain) to be passed to
+ *     partition_sched_domains().
+ */
+
+static void rebuild_sched_domains(void)
+{
+       struct kfifo *q;        /* queue of cpusets to be scanned */
+       struct cpuset *cp;      /* scans q */
+       struct cpuset **csa;    /* array of all cpuset ptrs */
+       int csn;                /* how many cpuset ptrs in csa so far */
+       int i, j, k;            /* indices for partition finding loops */
+       cpumask_t *doms;        /* resulting partition; i.e. sched domains */
+       int ndoms;              /* number of sched domains in result */
+       int nslot;              /* next empty doms[] cpumask_t slot */
+
+       q = NULL;
+       csa = NULL;
+       doms = NULL;
+
+       /* Special case for the 99% of systems with one, full, sched domain */
+       if (is_sched_load_balance(&top_cpuset)) {
+               ndoms = 1;
+               doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+               if (!doms)
+                       goto rebuild;
+               *doms = top_cpuset.cpus_allowed;
+               goto rebuild;
+       }
+
+       q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
+       if (IS_ERR(q))
+               goto done;
+       csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
+       if (!csa)
+               goto done;
+       csn = 0;
+
+       cp = &top_cpuset;
+       __kfifo_put(q, (void *)&cp, sizeof(cp));
+       while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
+               struct cgroup *cont;
+               struct cpuset *child;   /* scans child cpusets of cp */
+               if (is_sched_load_balance(cp))
+                       csa[csn++] = cp;
+               list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
+                       child = cgroup_cs(cont);
+                       __kfifo_put(q, (void *)&child, sizeof(cp));
+               }
+       }
+
+       for (i = 0; i < csn; i++)
+               csa[i]->pn = i;
+       ndoms = csn;
+
+restart:
+       /* Find the best partition (set of sched domains) */
+       for (i = 0; i < csn; i++) {
+               struct cpuset *a = csa[i];
+               int apn = a->pn;
+
+               for (j = 0; j < csn; j++) {
+                       struct cpuset *b = csa[j];
+                       int bpn = b->pn;
+
+                       if (apn != bpn && cpusets_overlap(a, b)) {
+                               for (k = 0; k < csn; k++) {
+                                       struct cpuset *c = csa[k];
+
+                                       if (c->pn == bpn)
+                                               c->pn = apn;
+                               }
+                               ndoms--;        /* one less element */
+                               goto restart;
+                       }
+               }
+       }
+
+       /* Convert <csn, csa> to <ndoms, doms> */
+       doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
+       if (!doms)
+               goto rebuild;
+
+       for (nslot = 0, i = 0; i < csn; i++) {
+               struct cpuset *a = csa[i];
+               int apn = a->pn;
+
+               if (apn >= 0) {
+                       cpumask_t *dp = doms + nslot;
+
+                       if (nslot == ndoms) {
+                               static int warnings = 10;
+                               if (warnings) {
+                                       printk(KERN_WARNING
+                                        "rebuild_sched_domains confused:"
+                                         " nslot %d, ndoms %d, csn %d, i %d,"
+                                         " apn %d\n",
+                                         nslot, ndoms, csn, i, apn);
+                                       warnings--;
+                               }
+                               continue;
+                       }
+
+                       cpus_clear(*dp);
+                       for (j = i; j < csn; j++) {
+                               struct cpuset *b = csa[j];
+
+                               if (apn == b->pn) {
+                                       cpus_or(*dp, *dp, b->cpus_allowed);
+                                       b->pn = -1;
+                               }
+                       }
+                       nslot++;
+               }
+       }
+       BUG_ON(nslot != ndoms);
+
+rebuild:
+       /* Have scheduler rebuild sched domains */
+       lock_cpu_hotplug();
+       partition_sched_domains(ndoms, doms);
+       unlock_cpu_hotplug();
+
+done:
+       if (q && !IS_ERR(q))
+               kfifo_free(q);
+       kfree(csa);
+       /* Don't kfree(doms) -- partition_sched_domains() does that. */
+}
+
+static inline int started_after_time(struct task_struct *t1,
+                                    struct timespec *time,
+                                    struct task_struct *t2)
+{
+       int start_diff = timespec_compare(&t1->start_time, time);
+       if (start_diff > 0) {
+               return 1;
+       } else if (start_diff < 0) {
+               return 0;
+       } else {
+               /*
+                * Arbitrarily, if two processes started at the same
+                * time, we'll say that the lower pointer value
+                * started first. Note that t2 may have exited by now
+                * so this may not be a valid pointer any longer, but
+                * that's fine - it still serves to distinguish
+                * between two tasks started (effectively)
+                * simultaneously.
+                */
+               return t1 > t2;
+       }
+}
+
+static inline int started_after(void *p1, void *p2)
+{
+       struct task_struct *t1 = p1;
+       struct task_struct *t2 = p2;
+       return started_after_time(t1, &t2->start_time, t2);
+}
+
 /*
  * Call with manage_mutex held.  May take callback_mutex during call.
  */
@@ -761,7 +739,15 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 static int update_cpumask(struct cpuset *cs, char *buf)
 {
        struct cpuset trialcs;
-       int retval;
+       int retval, i;
+       int is_load_balanced;
+       struct cgroup_iter it;
+       struct cgroup *cgrp = cs->css.cgroup;
+       struct task_struct *p, *dropped;
+       /* Never dereference latest_task, since it's not refcounted */
+       struct task_struct *latest_task = NULL;
+       struct ptr_heap heap;
+       struct timespec latest_time = { 0, 0 };
 
        /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
        if (cs == &top_cpuset)
@@ -770,11 +756,13 @@ static int update_cpumask(struct cpuset *cs, char *buf)
        trialcs = *cs;
 
        /*
-        * We allow a cpuset's cpus_allowed to be empty; if it has attached
-        * tasks, we'll catch it later when we validate the change and return
-        * -ENOSPC.
+        * An empty cpus_allowed is ok iff there are no tasks in the cpuset.
+        * Since cpulist_parse() fails on an empty mask, we special case
+        * that parsing.  The validate_change() call ensures that cpusets
+        * with tasks have cpus.
         */
-       if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+       buf = strstrip(buf);
+       if (!*buf) {
                cpus_clear(trialcs.cpus_allowed);
        } else {
                retval = cpulist_parse(buf, trialcs.cpus_allowed);
@@ -782,15 +770,79 @@ static int update_cpumask(struct cpuset *cs, char *buf)
                        return retval;
        }
        cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
-       /* cpus_allowed cannot be empty for a cpuset with attached tasks. */
-       if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))
-               return -ENOSPC;
        retval = validate_change(cs, &trialcs);
        if (retval < 0)
                return retval;
+
+       /* Nothing to do if the cpus didn't change */
+       if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
+               return 0;
+       retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
+       if (retval)
+               return retval;
+
+       is_load_balanced = is_sched_load_balance(&trialcs);
+
        mutex_lock(&callback_mutex);
        cs->cpus_allowed = trialcs.cpus_allowed;
        mutex_unlock(&callback_mutex);
+
+ again:
+       /*
+        * Scan tasks in the cpuset, and update the cpumasks of any
+        * that need an update. Since we can't call set_cpus_allowed()
+        * while holding tasklist_lock, gather tasks to be processed
+        * in a heap structure. If the statically-sized heap fills up,
+        * overflow tasks that started later, and in future iterations
+        * only consider tasks that started after the latest task in
+        * the previous pass. This guarantees forward progress and
+        * that we don't miss any tasks
+        */
+       heap.size = 0;
+       cgroup_iter_start(cgrp, &it);
+       while ((p = cgroup_iter_next(cgrp, &it))) {
+               /* Only affect tasks that don't have the right cpus_allowed */
+               if (cpus_equal(p->cpus_allowed, cs->cpus_allowed))
+                       continue;
+               /*
+                * Only process tasks that started after the last task
+                * we processed
+                */
+               if (!started_after_time(p, &latest_time, latest_task))
+                       continue;
+               dropped = heap_insert(&heap, p);
+               if (dropped == NULL) {
+                       get_task_struct(p);
+               } else if (dropped != p) {
+                       get_task_struct(p);
+                       put_task_struct(dropped);
+               }
+       }
+       cgroup_iter_end(cgrp, &it);
+       if (heap.size) {
+               for (i = 0; i < heap.size; i++) {
+                       struct task_struct *p = heap.ptrs[i];
+                       if (i == 0) {
+                               latest_time = p->start_time;
+                               latest_task = p;
+                       }
+                       set_cpus_allowed(p, cs->cpus_allowed);
+                       put_task_struct(p);
+               }
+               /*
+                * If we had to process any tasks at all, scan again
+                * in case some of them were in the middle of forking
+                * children that didn't notice the new cpumask
+                * restriction.  Not the most efficient way to do it,
+                * but it avoids having to take callback_mutex in the
+                * fork path
+                */
+               goto again;
+       }
+       heap_free(&heap);
+       if (is_load_balanced)
+               rebuild_sched_domains();
+
        return 0;
 }
 
@@ -839,7 +891,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
        do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
 
        mutex_lock(&callback_mutex);
-       guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
+       guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
        mutex_unlock(&callback_mutex);
 }
 
@@ -857,16 +909,19 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
  * their mempolicies to the cpusets new mems_allowed.
  */
 
+static void *cpuset_being_rebound;
+
 static int update_nodemask(struct cpuset *cs, char *buf)
 {
        struct cpuset trialcs;
        nodemask_t oldmem;
-       struct task_struct *g, *p;
+       struct task_struct *p;
        struct mm_struct **mmarray;
        int i, n, ntasks;
        int migrate;
        int fudge;
        int retval;
+       struct cgroup_iter it;
 
        /*
         * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
@@ -878,29 +933,19 @@ static int update_nodemask(struct cpuset *cs, char *buf)
        trialcs = *cs;
 
        /*
-        * We allow a cpuset's mems_allowed to be empty; if it has attached
-        * tasks, we'll catch it later when we validate the change and return
-        * -ENOSPC.
+        * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+        * Since nodelist_parse() fails on an empty mask, we special case
+        * that parsing.  The validate_change() call ensures that cpusets
+        * with tasks have memory.
         */
-       if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+       buf = strstrip(buf);
+       if (!*buf) {
                nodes_clear(trialcs.mems_allowed);
        } else {
                retval = nodelist_parse(buf, trialcs.mems_allowed);
                if (retval < 0)
                        goto done;
-               if (!nodes_intersects(trialcs.mems_allowed,
-                                               node_states[N_HIGH_MEMORY])) {
-                       /*
-                        * error if only memoryless nodes specified.
-                        */
-                       retval = -ENOSPC;
-                       goto done;
-               }
        }
-       /*
-        * Exclude memoryless nodes.  We know that trialcs.mems_allowed
-        * contains at least one node with memory.
-        */
        nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
                                                node_states[N_HIGH_MEMORY]);
        oldmem = cs->mems_allowed;
@@ -908,11 +953,6 @@ static int update_nodemask(struct cpuset *cs, char *buf)
                retval = 0;             /* Too easy - nothing to do */
                goto done;
        }
-       /* mems_allowed cannot be empty for a cpuset with attached tasks. */
-       if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) {
-               retval = -ENOSPC;
-               goto done;
-       }
        retval = validate_change(cs, &trialcs);
        if (retval < 0)
                goto done;
@@ -922,7 +962,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
        cs->mems_generation = cpuset_mems_generation++;
        mutex_unlock(&callback_mutex);
 
-       set_cpuset_being_rebound(cs);           /* causes mpol_copy() rebind */
+       cpuset_being_rebound = cs;              /* causes mpol_copy() rebind */
 
        fudge = 10;                             /* spare mmarray[] slots */
        fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
@@ -936,13 +976,13 @@ static int update_nodemask(struct cpuset *cs, char *buf)
         * enough mmarray[] w/o using GFP_ATOMIC.
         */
        while (1) {
-               ntasks = atomic_read(&cs->count);       /* guess */
+               ntasks = cgroup_task_count(cs->css.cgroup);  /* guess */
                ntasks += fudge;
                mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
                if (!mmarray)
                        goto done;
                read_lock(&tasklist_lock);              /* block fork */
-               if (atomic_read(&cs->count) <= ntasks)
+               if (cgroup_task_count(cs->css.cgroup) <= ntasks)
                        break;                          /* got enough */
                read_unlock(&tasklist_lock);            /* try again */
                kfree(mmarray);
@@ -951,21 +991,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
        n = 0;
 
        /* Load up mmarray[] with mm reference for each task in cpuset. */
-       do_each_thread(g, p) {
+       cgroup_iter_start(cs->css.cgroup, &it);
+       while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
                struct mm_struct *mm;
 
                if (n >= ntasks) {
                        printk(KERN_WARNING
                                "Cpuset mempolicy rebind incomplete.\n");
-                       continue;
+                       break;
                }
-               if (p->cpuset != cs)
-                       continue;
                mm = get_task_mm(p);
                if (!mm)
                        continue;
                mmarray[n++] = mm;
-       } while_each_thread(g, p);
+       }
+       cgroup_iter_end(cs->css.cgroup, &it);
        read_unlock(&tasklist_lock);
 
        /*
@@ -993,12 +1033,17 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 
        /* We're done rebinding vma's to this cpusets new mems_allowed. */
        kfree(mmarray);
-       set_cpuset_being_rebound(NULL);
+       cpuset_being_rebound = NULL;
        retval = 0;
 done:
        return retval;
 }
 
+int current_cpuset_is_being_rebound(void)
+{
+       return task_cs(current) == cpuset_being_rebound;
+}
+
 /*
  * Call with manage_mutex held.
  */
@@ -1015,6 +1060,7 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
 /*
  * update_flag - read a 0 or a 1 in a file and update associated flag
  * bit:        the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
+ *                             CS_SCHED_LOAD_BALANCE,
  *                             CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
  *                             CS_SPREAD_PAGE, CS_SPREAD_SLAB)
  * cs: the cpuset to update
@@ -1028,6 +1074,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
        int turning_on;
        struct cpuset trialcs;
        int err;
+       int cpus_nonempty, balance_flag_changed;
 
        turning_on = (simple_strtoul(buf, NULL, 10) != 0);
 
@@ -1040,10 +1087,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
        err = validate_change(cs, &trialcs);
        if (err < 0)
                return err;
+
+       cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
+       balance_flag_changed = (is_sched_load_balance(cs) !=
+                                       is_sched_load_balance(&trialcs));
+
        mutex_lock(&callback_mutex);
        cs->flags = trialcs.flags;
        mutex_unlock(&callback_mutex);
 
+       if (cpus_nonempty && balance_flag_changed)
+               rebuild_sched_domains();
+
        return 0;
 }
 
@@ -1145,85 +1200,34 @@ static int fmeter_getrate(struct fmeter *fmp)
        return val;
 }
 
-/*
- * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
- * writing the path of the old cpuset in 'ppathbuf' if it needs to be
- * notified on release.
- *
- * Call holding manage_mutex.  May take callback_mutex and task_lock of
- * the task 'pid' during call.
- */
-
-static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
+static int cpuset_can_attach(struct cgroup_subsys *ss,
+                            struct cgroup *cont, struct task_struct *tsk)
 {
-       pid_t pid;
-       struct task_struct *tsk;
-       struct cpuset *oldcs;
-       cpumask_t cpus;
-       nodemask_t from, to;
-       struct mm_struct *mm;
-       int retval;
+       struct cpuset *cs = cgroup_cs(cont);
 
-       if (sscanf(pidbuf, "%d", &pid) != 1)
-               return -EIO;
        if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
                return -ENOSPC;
 
-       if (pid) {
-               read_lock(&tasklist_lock);
-
-               tsk = find_task_by_pid(pid);
-               if (!tsk || tsk->flags & PF_EXITING) {
-                       read_unlock(&tasklist_lock);
-                       return -ESRCH;
-               }
-
-               get_task_struct(tsk);
-               read_unlock(&tasklist_lock);
-
-               if ((current->euid) && (current->euid != tsk->uid)
-                   && (current->euid != tsk->suid)) {
-                       put_task_struct(tsk);
-                       return -EACCES;
-               }
-       } else {
-               tsk = current;
-               get_task_struct(tsk);
-       }
+       return security_task_setscheduler(tsk, 0, NULL);
+}
 
-       retval = security_task_setscheduler(tsk, 0, NULL);
-       if (retval) {
-               put_task_struct(tsk);
-               return retval;
-       }
+static void cpuset_attach(struct cgroup_subsys *ss,
+                         struct cgroup *cont, struct cgroup *oldcont,
+                         struct task_struct *tsk)
+{
+       cpumask_t cpus;
+       nodemask_t from, to;
+       struct mm_struct *mm;
+       struct cpuset *cs = cgroup_cs(cont);
+       struct cpuset *oldcs = cgroup_cs(oldcont);
 
        mutex_lock(&callback_mutex);
-
-       task_lock(tsk);
-       oldcs = tsk->cpuset;
-       /*
-        * After getting 'oldcs' cpuset ptr, be sure still not exiting.
-        * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
-        * then fail this attach_task(), to avoid breaking top_cpuset.count.
-        */
-       if (tsk->flags & PF_EXITING) {
-               task_unlock(tsk);
-               mutex_unlock(&callback_mutex);
-               put_task_struct(tsk);
-               return -ESRCH;
-       }
-       atomic_inc(&cs->count);
-       rcu_assign_pointer(tsk->cpuset, cs);
-       task_unlock(tsk);
-
        guarantee_online_cpus(cs, &cpus);
        set_cpus_allowed(tsk, cpus);
+       mutex_unlock(&callback_mutex);
 
        from = oldcs->mems_allowed;
        to = cs->mems_allowed;
-
-       mutex_unlock(&callback_mutex);
-
        mm = get_task_mm(tsk);
        if (mm) {
                mpol_rebind_mm(mm, &to);
@@ -1232,44 +1236,36 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
                mmput(mm);
        }
 
-       put_task_struct(tsk);
-       synchronize_rcu();
-       if (atomic_dec_and_test(&oldcs->count))
-               check_for_release(oldcs, ppathbuf);
-       return 0;
 }
 
 /* The various types of files and directories in a cpuset file system */
 
 typedef enum {
-       FILE_ROOT,
-       FILE_DIR,
        FILE_MEMORY_MIGRATE,
        FILE_CPULIST,
        FILE_MEMLIST,
        FILE_CPU_EXCLUSIVE,
        FILE_MEM_EXCLUSIVE,
-       FILE_NOTIFY_ON_RELEASE,
+       FILE_SCHED_LOAD_BALANCE,
        FILE_MEMORY_PRESSURE_ENABLED,
        FILE_MEMORY_PRESSURE,
        FILE_SPREAD_PAGE,
        FILE_SPREAD_SLAB,
-       FILE_TASKLIST,
 } cpuset_filetype_t;
 
-static ssize_t cpuset_common_file_write(struct file *file,
+static ssize_t cpuset_common_file_write(struct cgroup *cont,
+                                       struct cftype *cft,
+                                       struct file *file,
                                        const char __user *userbuf,
                                        size_t nbytes, loff_t *unused_ppos)
 {
-       struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
-       struct cftype *cft = __d_cft(file->f_path.dentry);
+       struct cpuset *cs = cgroup_cs(cont);
        cpuset_filetype_t type = cft->private;
        char *buffer;
-       char *pathbuf = NULL;
        int retval = 0;
 
        /* Crude upper limit on largest legitimate cpulist user might write. */
-       if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES))
+       if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
                return -E2BIG;
 
        /* +1 for nul-terminator */
@@ -1282,9 +1278,9 @@ static ssize_t cpuset_common_file_write(struct file *file,
        }
        buffer[nbytes] = 0;     /* nul-terminate */
 
-       mutex_lock(&manage_mutex);
+       cgroup_lock();
 
-       if (is_removed(cs)) {
+       if (cgroup_is_removed(cont)) {
                retval = -ENODEV;
                goto out2;
        }
@@ -1302,8 +1298,8 @@ static ssize_t cpuset_common_file_write(struct file *file,
        case FILE_MEM_EXCLUSIVE:
                retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
                break;
-       case FILE_NOTIFY_ON_RELEASE:
-               retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
+       case FILE_SCHED_LOAD_BALANCE:
+               retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
                break;
        case FILE_MEMORY_MIGRATE:
                retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
@@ -1322,9 +1318,6 @@ static ssize_t cpuset_common_file_write(struct file *file,
                retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
                cs->mems_generation = cpuset_mems_generation++;
                break;
-       case FILE_TASKLIST:
-               retval = attach_task(cs, buffer, &pathbuf);
-               break;
        default:
                retval = -EINVAL;
                goto out2;
@@ -1333,30 +1326,12 @@ static ssize_t cpuset_common_file_write(struct file *file,
        if (retval == 0)
                retval = nbytes;
 out2:
-       mutex_unlock(&manage_mutex);
-       cpuset_release_agent(pathbuf);
+       cgroup_unlock();
 out1:
        kfree(buffer);
        return retval;
 }
 
-static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
-                                               size_t nbytes, loff_t *ppos)
-{
-       ssize_t retval = 0;
-       struct cftype *cft = __d_cft(file->f_path.dentry);
-       if (!cft)
-               return -ENODEV;
-
-       /* special function ? */
-       if (cft->write)
-               retval = cft->write(file, buf, nbytes, ppos);
-       else
-               retval = cpuset_common_file_write(file, buf, nbytes, ppos);
-
-       return retval;
-}
-
 /*
  * These ascii lists should be read in a single call, by using a user
  * buffer large enough to hold the entire map.  If read in smaller
@@ -1391,11 +1366,13 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
        return nodelist_scnprintf(page, PAGE_SIZE, mask);
 }
 
-static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
-                               size_t nbytes, loff_t *ppos)
+static ssize_t cpuset_common_file_read(struct cgroup *cont,
+                                      struct cftype *cft,
+                                      struct file *file,
+                                      char __user *buf,
+                                      size_t nbytes, loff_t *ppos)
 {
-       struct cftype *cft = __d_cft(file->f_path.dentry);
-       struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+       struct cpuset *cs = cgroup_cs(cont);
        cpuset_filetype_t type = cft->private;
        char *page;
        ssize_t retval = 0;
@@ -1419,8 +1396,8 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
        case FILE_MEM_EXCLUSIVE:
                *s++ = is_mem_exclusive(cs) ? '1' : '0';
                break;
-       case FILE_NOTIFY_ON_RELEASE:
-               *s++ = notify_on_release(cs) ? '1' : '0';
+       case FILE_SCHED_LOAD_BALANCE:
+               *s++ = is_sched_load_balance(cs) ? '1' : '0';
                break;
        case FILE_MEMORY_MIGRATE:
                *s++ = is_memory_migrate(cs) ? '1' : '0';
@@ -1449,389 +1426,149 @@ out:
        return retval;
 }
 
-static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes,
-                                                               loff_t *ppos)
-{
-       ssize_t retval = 0;
-       struct cftype *cft = __d_cft(file->f_path.dentry);
-       if (!cft)
-               return -ENODEV;
-
-       /* special function ? */
-       if (cft->read)
-               retval = cft->read(file, buf, nbytes, ppos);
-       else
-               retval = cpuset_common_file_read(file, buf, nbytes, ppos);
-
-       return retval;
-}
-
-static int cpuset_file_open(struct inode *inode, struct file *file)
-{
-       int err;
-       struct cftype *cft;
-
-       err = generic_file_open(inode, file);
-       if (err)
-               return err;
-
-       cft = __d_cft(file->f_path.dentry);
-       if (!cft)
-               return -ENODEV;
-       if (cft->open)
-               err = cft->open(inode, file);
-       else
-               err = 0;
-
-       return err;
-}
-
-static int cpuset_file_release(struct inode *inode, struct file *file)
-{
-       struct cftype *cft = __d_cft(file->f_path.dentry);
-       if (cft->release)
-               return cft->release(inode, file);
-       return 0;
-}
-
-/*
- * cpuset_rename - Only allow simple rename of directories in place.
- */
-static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
-                  struct inode *new_dir, struct dentry *new_dentry)
-{
-       if (!S_ISDIR(old_dentry->d_inode->i_mode))
-               return -ENOTDIR;
-       if (new_dentry->d_inode)
-               return -EEXIST;
-       if (old_dir != new_dir)
-               return -EIO;
-       return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
-}
-
-static const struct file_operations cpuset_file_operations = {
-       .read = cpuset_file_read,
-       .write = cpuset_file_write,
-       .llseek = generic_file_llseek,
-       .open = cpuset_file_open,
-       .release = cpuset_file_release,
-};
-
-static const struct inode_operations cpuset_dir_inode_operations = {
-       .lookup = simple_lookup,
-       .mkdir = cpuset_mkdir,
-       .rmdir = cpuset_rmdir,
-       .rename = cpuset_rename,
-};
-
-static int cpuset_create_file(struct dentry *dentry, int mode)
-{
-       struct inode *inode;
-
-       if (!dentry)
-               return -ENOENT;
-       if (dentry->d_inode)
-               return -EEXIST;
-
-       inode = cpuset_new_inode(mode);
-       if (!inode)
-               return -ENOMEM;
-
-       if (S_ISDIR(mode)) {
-               inode->i_op = &cpuset_dir_inode_operations;
-               inode->i_fop = &simple_dir_operations;
-
-               /* start off with i_nlink == 2 (for "." entry) */
-               inc_nlink(inode);
-       } else if (S_ISREG(mode)) {
-               inode->i_size = 0;
-               inode->i_fop = &cpuset_file_operations;
-       }
-
-       d_instantiate(dentry, inode);
-       dget(dentry);   /* Extra count - pin the dentry in core */
-       return 0;
-}
-
-/*
- *     cpuset_create_dir - create a directory for an object.
- *     cs:     the cpuset we create the directory for.
- *             It must have a valid ->parent field
- *             And we are going to fill its ->dentry field.
- *     name:   The name to give to the cpuset directory. Will be copied.
- *     mode:   mode to set on new directory.
- */
-
-static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
-{
-       struct dentry *dentry = NULL;
-       struct dentry *parent;
-       int error = 0;
-
-       parent = cs->parent->dentry;
-       dentry = cpuset_get_dentry(parent, name);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-       error = cpuset_create_file(dentry, S_IFDIR | mode);
-       if (!error) {
-               dentry->d_fsdata = cs;
-               inc_nlink(parent->d_inode);
-               cs->dentry = dentry;
-       }
-       dput(dentry);
-
-       return error;
-}
-
-static int cpuset_add_file(struct dentry *dir, const struct cftype *cft)
-{
-       struct dentry *dentry;
-       int error;
-
-       mutex_lock(&dir->d_inode->i_mutex);
-       dentry = cpuset_get_dentry(dir, cft->name);
-       if (!IS_ERR(dentry)) {
-               error = cpuset_create_file(dentry, 0644 | S_IFREG);
-               if (!error)
-                       dentry->d_fsdata = (void *)cft;
-               dput(dentry);
-       } else
-               error = PTR_ERR(dentry);
-       mutex_unlock(&dir->d_inode->i_mutex);
-       return error;
-}
-
-/*
- * Stuff for reading the 'tasks' file.
- *
- * Reading this file can return large amounts of data if a cpuset has
- * *lots* of attached tasks. So it may need several calls to read(),
- * but we cannot guarantee that the information we produce is correct
- * unless we produce it entirely atomically.
- *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
- */
-
-/* cpusets_tasks_read array */
-
-struct ctr_struct {
-       char *buf;
-       int bufsz;
-};
-
-/*
- * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'.
- * Return actual number of pids loaded.  No need to task_lock(p)
- * when reading out p->cpuset, as we don't really care if it changes
- * on the next cycle, and we are not going to try to dereference it.
- */
-static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
-{
-       int n = 0;
-       struct task_struct *g, *p;
-
-       read_lock(&tasklist_lock);
-
-       do_each_thread(g, p) {
-               if (p->cpuset == cs) {
-                       if (unlikely(n == npids))
-                               goto array_full;
-                       pidarray[n++] = p->pid;
-               }
-       } while_each_thread(g, p);
-
-array_full:
-       read_unlock(&tasklist_lock);
-       return n;
-}
-
-static int cmppid(const void *a, const void *b)
-{
-       return *(pid_t *)a - *(pid_t *)b;
-}
-
-/*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
- */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
-{
-       int cnt = 0;
-       int i;
-
-       for (i = 0; i < npids; i++)
-               cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
-       return cnt;
-}
-
-/*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
- * process id's of tasks currently attached to the cpuset being opened.
- *
- * Does not require any specific cpuset mutexes, and does not take any.
- */
-static int cpuset_tasks_open(struct inode *unused, struct file *file)
-{
-       struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
-       struct ctr_struct *ctr;
-       pid_t *pidarray;
-       int npids;
-       char c;
-
-       if (!(file->f_mode & FMODE_READ))
-               return 0;
-
-       ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-       if (!ctr)
-               goto err0;
-
-       /*
-        * If cpuset gets more users after we read count, we won't have
-        * enough space - tough.  This race is indistinguishable to the
-        * caller from the case that the additional cpuset users didn't
-        * show up until sometime later on.
-        */
-       npids = atomic_read(&cs->count);
-       pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-       if (!pidarray)
-               goto err1;
-
-       npids = pid_array_load(pidarray, npids, cs);
-       sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-       /* Call pid_array_to_buf() twice, first just to get bufsz */
-       ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-       ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-       if (!ctr->buf)
-               goto err2;
-       ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
-       kfree(pidarray);
-       file->private_data = ctr;
-       return 0;
-
-err2:
-       kfree(pidarray);
-err1:
-       kfree(ctr);
-err0:
-       return -ENOMEM;
-}
-
-static ssize_t cpuset_tasks_read(struct file *file, char __user *buf,
-                                               size_t nbytes, loff_t *ppos)
-{
-       struct ctr_struct *ctr = file->private_data;
 
-       return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
 
-static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
-{
-       struct ctr_struct *ctr;
 
-       if (file->f_mode & FMODE_READ) {
-               ctr = file->private_data;
-               kfree(ctr->buf);
-               kfree(ctr);
-       }
-       return 0;
-}
 
 /*
  * for the common functions, 'private' gives the type of file
  */
 
-static struct cftype cft_tasks = {
-       .name = "tasks",
-       .open = cpuset_tasks_open,
-       .read = cpuset_tasks_read,
-       .release = cpuset_tasks_release,
-       .private = FILE_TASKLIST,
-};
-
 static struct cftype cft_cpus = {
        .name = "cpus",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_CPULIST,
 };
 
 static struct cftype cft_mems = {
        .name = "mems",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_MEMLIST,
 };
 
 static struct cftype cft_cpu_exclusive = {
        .name = "cpu_exclusive",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_CPU_EXCLUSIVE,
 };
 
 static struct cftype cft_mem_exclusive = {
        .name = "mem_exclusive",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_MEM_EXCLUSIVE,
 };
 
-static struct cftype cft_notify_on_release = {
-       .name = "notify_on_release",
-       .private = FILE_NOTIFY_ON_RELEASE,
+static struct cftype cft_sched_load_balance = {
+       .name = "sched_load_balance",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
+       .private = FILE_SCHED_LOAD_BALANCE,
 };
 
 static struct cftype cft_memory_migrate = {
        .name = "memory_migrate",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_MEMORY_MIGRATE,
 };
 
 static struct cftype cft_memory_pressure_enabled = {
        .name = "memory_pressure_enabled",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_MEMORY_PRESSURE_ENABLED,
 };
 
 static struct cftype cft_memory_pressure = {
        .name = "memory_pressure",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_MEMORY_PRESSURE,
 };
 
 static struct cftype cft_spread_page = {
        .name = "memory_spread_page",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_SPREAD_PAGE,
 };
 
 static struct cftype cft_spread_slab = {
        .name = "memory_spread_slab",
+       .read = cpuset_common_file_read,
+       .write = cpuset_common_file_write,
        .private = FILE_SPREAD_SLAB,
 };
 
-static int cpuset_populate_dir(struct dentry *cs_dentry)
+static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
 {
        int err;
 
-       if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
                return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
-               return err;
-       if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
+       if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
                return err;
+       /* memory_pressure_enabled is in root cpuset only */
+       if (err == 0 && !cont->parent)
+               err = cgroup_add_file(cont, ss,
+                                        &cft_memory_pressure_enabled);
        return 0;
 }
 
+/*
+ * post_clone() is called at the end of cgroup_clone().
+ * 'cgroup' was just created automatically as a result of
+ * a cgroup_clone(), and the current task is about to
+ * be moved into 'cgroup'.
+ *
+ * Currently we refuse to set up the cgroup - thereby
+ * refusing the task to be entered, and as a result refusing
+ * the sys_unshare() or clone() which initiated it - if any
+ * sibling cpusets have exclusive cpus or mem.
+ *
+ * If this becomes a problem for some users who wish to
+ * allow that scenario, then cpuset_post_clone() could be
+ * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
+ * (and likewise for mems) to the new cgroup.
+ */
+static void cpuset_post_clone(struct cgroup_subsys *ss,
+                             struct cgroup *cgroup)
+{
+       struct cgroup *parent, *child;
+       struct cpuset *cs, *parent_cs;
+
+       parent = cgroup->parent;
+       list_for_each_entry(child, &parent->children, sibling) {
+               cs = cgroup_cs(child);
+               if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
+                       return;
+       }
+       cs = cgroup_cs(cgroup);
+       parent_cs = cgroup_cs(parent);
+
+       cs->mems_allowed = parent_cs->mems_allowed;
+       cs->cpus_allowed = parent_cs->cpus_allowed;
+       return;
+}
+
 /*
  *     cpuset_create - create a cpuset
  *     parent: cpuset that will be parent of the new cpuset.
@@ -1841,106 +1578,77 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
  *     Must be called with the mutex on the parent inode held
  */
 
-static long cpuset_create(struct cpuset *parent, const char *name, int mode)
+static struct cgroup_subsys_state *cpuset_create(
+       struct cgroup_subsys *ss,
+       struct cgroup *cont)
 {
        struct cpuset *cs;
-       int err;
+       struct cpuset *parent;
 
+       if (!cont->parent) {
+               /* This is early initialization for the top cgroup */
+               top_cpuset.mems_generation = cpuset_mems_generation++;
+               return &top_cpuset.css;
+       }
+       parent = cgroup_cs(cont->parent);
        cs = kmalloc(sizeof(*cs), GFP_KERNEL);
        if (!cs)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
-       mutex_lock(&manage_mutex);
        cpuset_update_task_memory_state();
        cs->flags = 0;
-       if (notify_on_release(parent))
-               set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
        if (is_spread_page(parent))
                set_bit(CS_SPREAD_PAGE, &cs->flags);
        if (is_spread_slab(parent))
                set_bit(CS_SPREAD_SLAB, &cs->flags);
+       set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        cs->cpus_allowed = CPU_MASK_NONE;
        cs->mems_allowed = NODE_MASK_NONE;
-       atomic_set(&cs->count, 0);
-       INIT_LIST_HEAD(&cs->sibling);
-       INIT_LIST_HEAD(&cs->children);
        cs->mems_generation = cpuset_mems_generation++;
        fmeter_init(&cs->fmeter);
 
        cs->parent = parent;
-
-       mutex_lock(&callback_mutex);
-       list_add(&cs->sibling, &cs->parent->children);
        number_of_cpusets++;
-       mutex_unlock(&callback_mutex);
-
-       err = cpuset_create_dir(cs, name, mode);
-       if (err < 0)
-               goto err;
-
-       /*
-        * Release manage_mutex before cpuset_populate_dir() because it
-        * will down() this new directory's i_mutex and if we race with
-        * another mkdir, we might deadlock.
-        */
-       mutex_unlock(&manage_mutex);
-
-       err = cpuset_populate_dir(cs->dentry);
-       /* If err < 0, we have a half-filled directory - oh well ;) */
-       return 0;
-err:
-       list_del(&cs->sibling);
-       mutex_unlock(&manage_mutex);
-       kfree(cs);
-       return err;
+       return &cs->css ;
 }
 
-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-       struct cpuset *c_parent = dentry->d_parent->d_fsdata;
-
-       /* the vfs holds inode->i_mutex already */
-       return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
-}
+/*
+ * Locking note on the strange update_flag() call below:
+ *
+ * If the cpuset being removed has its flag 'sched_load_balance'
+ * enabled, then simulate turning sched_load_balance off, which
+ * will call rebuild_sched_domains().  The lock_cpu_hotplug()
+ * call in rebuild_sched_domains() must not be made while holding
+ * callback_mutex.  Elsewhere the kernel nests callback_mutex inside
+ * lock_cpu_hotplug() calls.  So the reverse nesting would risk an
+ * ABBA deadlock.
+ */
 
-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
+static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
 {
-       struct cpuset *cs = dentry->d_fsdata;
-       struct dentry *d;
-       struct cpuset *parent;
-       char *pathbuf = NULL;
+       struct cpuset *cs = cgroup_cs(cont);
 
-       /* the vfs holds both inode->i_mutex already */
-
-       mutex_lock(&manage_mutex);
        cpuset_update_task_memory_state();
-       if (atomic_read(&cs->count) > 0) {
-               mutex_unlock(&manage_mutex);
-               return -EBUSY;
-       }
-       if (!list_empty(&cs->children)) {
-               mutex_unlock(&manage_mutex);
-               return -EBUSY;
-       }
-       parent = cs->parent;
-       mutex_lock(&callback_mutex);
-       set_bit(CS_REMOVED, &cs->flags);
-       list_del(&cs->sibling); /* delete my sibling from parent->children */
-       spin_lock(&cs->dentry->d_lock);
-       d = dget(cs->dentry);
-       cs->dentry = NULL;
-       spin_unlock(&d->d_lock);
-       cpuset_d_remove_dir(d);
-       dput(d);
+
+       if (is_sched_load_balance(cs))
+               update_flag(CS_SCHED_LOAD_BALANCE, cs, "0");
+
        number_of_cpusets--;
-       mutex_unlock(&callback_mutex);
-       if (list_empty(&parent->children))
-               check_for_release(parent, &pathbuf);
-       mutex_unlock(&manage_mutex);
-       cpuset_release_agent(pathbuf);
-       return 0;
+       kfree(cs);
 }
 
+struct cgroup_subsys cpuset_subsys = {
+       .name = "cpuset",
+       .create = cpuset_create,
+       .destroy  = cpuset_destroy,
+       .can_attach = cpuset_can_attach,
+       .attach = cpuset_attach,
+       .populate = cpuset_populate,
+       .post_clone = cpuset_post_clone,
+       .subsys_id = cpuset_subsys_id,
+       .early_init = 1,
+};
+
 /*
  * cpuset_init_early - just enough so that the calls to
  * cpuset_update_task_memory_state() in early init code
@@ -1949,13 +1657,11 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
 
 int __init cpuset_init_early(void)
 {
-       struct task_struct *tsk = current;
-
-       tsk->cpuset = &top_cpuset;
-       tsk->cpuset->mems_generation = cpuset_mems_generation++;
+       top_cpuset.mems_generation = cpuset_mems_generation++;
        return 0;
 }
 
+
 /**
  * cpuset_init - initialize cpusets at system boot
  *
@@ -1964,39 +1670,21 @@ int __init cpuset_init_early(void)
 
 int __init cpuset_init(void)
 {
-       struct dentry *root;
-       int err;
+       int err = 0;
 
        top_cpuset.cpus_allowed = CPU_MASK_ALL;
        top_cpuset.mems_allowed = NODE_MASK_ALL;
 
        fmeter_init(&top_cpuset.fmeter);
        top_cpuset.mems_generation = cpuset_mems_generation++;
-
-       init_task.cpuset = &top_cpuset;
+       set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
 
        err = register_filesystem(&cpuset_fs_type);
        if (err < 0)
-               goto out;
-       cpuset_mount = kern_mount(&cpuset_fs_type);
-       if (IS_ERR(cpuset_mount)) {
-               printk(KERN_ERR "cpuset: could not mount!\n");
-               err = PTR_ERR(cpuset_mount);
-               cpuset_mount = NULL;
-               goto out;
-       }
-       root = cpuset_mount->mnt_sb->s_root;
-       root->d_fsdata = &top_cpuset;
-       inc_nlink(root->d_inode);
-       top_cpuset.dentry = root;
-       root->d_inode->i_op = &cpuset_dir_inode_operations;
+               return err;
+
        number_of_cpusets = 1;
-       err = cpuset_populate_dir(root);
-       /* memory_pressure_enabled is in root cpuset only */
-       if (err == 0)
-               err = cpuset_add_file(root, &cft_memory_pressure_enabled);
-out:
-       return err;
+       return 0;
 }
 
 /*
@@ -2022,10 +1710,12 @@ out:
 
 static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 {
+       struct cgroup *cont;
        struct cpuset *c;
 
        /* Each of our child cpusets mems must be online */
-       list_for_each_entry(c, &cur->children, sibling) {
+       list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
+               c = cgroup_cs(cont);
                guarantee_online_cpus_mems_in_subtree(c);
                if (!cpus_empty(c->cpus_allowed))
                        guarantee_online_cpus(c, &c->cpus_allowed);
@@ -2053,7 +1743,7 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 
 static void common_cpu_mem_hotplug_unplug(void)
 {
-       mutex_lock(&manage_mutex);
+       cgroup_lock();
        mutex_lock(&callback_mutex);
 
        guarantee_online_cpus_mems_in_subtree(&top_cpuset);
@@ -2061,7 +1751,7 @@ static void common_cpu_mem_hotplug_unplug(void)
        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
        mutex_unlock(&callback_mutex);
-       mutex_unlock(&manage_mutex);
+       cgroup_unlock();
 }
 
 /*
@@ -2074,8 +1764,8 @@ static void common_cpu_mem_hotplug_unplug(void)
  * cpu_online_map on each CPU hotplug (cpuhp) event.
  */
 
-static int cpuset_handle_cpuhp(struct notifier_block *nb,
-                               unsigned long phase, void *cpu)
+static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
+                               unsigned long phase, void *unused_cpu)
 {
        if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
                return NOTIFY_DONE;
@@ -2113,109 +1803,7 @@ void __init cpuset_init_smp(void)
 }
 
 /**
- * cpuset_fork - attach newly forked task to its parents cpuset.
- * @tsk: pointer to task_struct of forking parent process.
- *
- * Description: A task inherits its parent's cpuset at fork().
- *
- * A pointer to the shared cpuset was automatically copied in fork.c
- * by dup_task_struct().  However, we ignore that copy, since it was
- * not made under the protection of task_lock(), so might no longer be
- * a valid cpuset pointer.  attach_task() might have already changed
- * current->cpuset, allowing the previously referenced cpuset to
- * be removed and freed.  Instead, we task_lock(current) and copy
- * its present value of current->cpuset for our freshly forked child.
- *
- * At the point that cpuset_fork() is called, 'current' is the parent
- * task, and the passed argument 'child' points to the child task.
- **/
-
-void cpuset_fork(struct task_struct *child)
-{
-       task_lock(current);
-       child->cpuset = current->cpuset;
-       atomic_inc(&child->cpuset->count);
-       task_unlock(current);
-}
-
-/**
- * cpuset_exit - detach cpuset from exiting task
- * @tsk: pointer to task_struct of exiting process
- *
- * Description: Detach cpuset from @tsk and release it.
- *
- * Note that cpusets marked notify_on_release force every task in
- * them to take the global manage_mutex mutex when exiting.
- * This could impact scaling on very large systems.  Be reluctant to
- * use notify_on_release cpusets where very high task exit scaling
- * is required on large systems.
- *
- * Don't even think about derefencing 'cs' after the cpuset use count
- * goes to zero, except inside a critical section guarded by manage_mutex
- * or callback_mutex.   Otherwise a zero cpuset use count is a license to
- * any other task to nuke the cpuset immediately, via cpuset_rmdir().
- *
- * This routine has to take manage_mutex, not callback_mutex, because
- * it is holding that mutex while calling check_for_release(),
- * which calls kmalloc(), so can't be called holding callback_mutex().
- *
- * the_top_cpuset_hack:
- *
- *    Set the exiting tasks cpuset to the root cpuset (top_cpuset).
- *
- *    Don't leave a task unable to allocate memory, as that is an
- *    accident waiting to happen should someone add a callout in
- *    do_exit() after the cpuset_exit() call that might allocate.
- *    If a task tries to allocate memory with an invalid cpuset,
- *    it will oops in cpuset_update_task_memory_state().
- *
- *    We call cpuset_exit() while the task is still competent to
- *    handle notify_on_release(), then leave the task attached to
- *    the root cpuset (top_cpuset) for the remainder of its exit.
- *
- *    To do this properly, we would increment the reference count on
- *    top_cpuset, and near the very end of the kernel/exit.c do_exit()
- *    code we would add a second cpuset function call, to drop that
- *    reference.  This would just create an unnecessary hot spot on
- *    the top_cpuset reference count, to no avail.
- *
- *    Normally, holding a reference to a cpuset without bumping its
- *    count is unsafe.   The cpuset could go away, or someone could
- *    attach us to a different cpuset, decrementing the count on
- *    the first cpuset that we never incremented.  But in this case,
- *    top_cpuset isn't going away, and either task has PF_EXITING set,
- *    which wards off any attach_task() attempts, or task is a failed
- *    fork, never visible to attach_task.
- *
- *    Another way to do this would be to set the cpuset pointer
- *    to NULL here, and check in cpuset_update_task_memory_state()
- *    for a NULL pointer.  This hack avoids that NULL check, for no
- *    cost (other than this way too long comment ;).
- **/
 
-void cpuset_exit(struct task_struct *tsk)
-{
-       struct cpuset *cs;
-
-       task_lock(current);
-       cs = tsk->cpuset;
-       tsk->cpuset = &top_cpuset;      /* the_top_cpuset_hack - see above */
-       task_unlock(current);
-
-       if (notify_on_release(cs)) {
-               char *pathbuf = NULL;
-
-               mutex_lock(&manage_mutex);
-               if (atomic_dec_and_test(&cs->count))
-                       check_for_release(cs, &pathbuf);
-               mutex_unlock(&manage_mutex);
-               cpuset_release_agent(pathbuf);
-       } else {
-               atomic_dec(&cs->count);
-       }
-}
-
-/**
  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
  *
@@ -2230,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
        cpumask_t mask;
 
        mutex_lock(&callback_mutex);
+       mask = cpuset_cpus_allowed_locked(tsk);
+       mutex_unlock(&callback_mutex);
+
+       return mask;
+}
+
+/**
+ * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+ * Must be  called with callback_mutex held.
+ **/
+cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
+{
+       cpumask_t mask;
+
        task_lock(tsk);
-       guarantee_online_cpus(tsk->cpuset, &mask);
+       guarantee_online_cpus(task_cs(tsk), &mask);
        task_unlock(tsk);
-       mutex_unlock(&callback_mutex);
 
        return mask;
 }
@@ -2259,7 +1860,7 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
 
        mutex_lock(&callback_mutex);
        task_lock(tsk);
-       guarantee_online_mems(tsk->cpuset, &mask);
+       guarantee_online_mems(task_cs(tsk), &mask);
        task_unlock(tsk);
        mutex_unlock(&callback_mutex);
 
@@ -2390,7 +1991,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
        mutex_lock(&callback_mutex);
 
        task_lock(current);
-       cs = nearest_exclusive_ancestor(current->cpuset);
+       cs = nearest_exclusive_ancestor(task_cs(current));
        task_unlock(current);
 
        allowed = node_isset(node, cs->mems_allowed);
@@ -2550,14 +2151,12 @@ int cpuset_memory_pressure_enabled __read_mostly;
 
 void __cpuset_memory_pressure_bump(void)
 {
-       struct cpuset *cs;
-
        task_lock(current);
-       cs = current->cpuset;
-       fmeter_markevent(&cs->fmeter);
+       fmeter_markevent(&task_cs(current)->fmeter);
        task_unlock(current);
 }
 
+#ifdef CONFIG_PROC_PID_CPUSET
 /*
  * proc_cpuset_show()
  *  - Print tasks cpuset path into seq_file.
@@ -2569,11 +2168,12 @@ void __cpuset_memory_pressure_bump(void)
  *    the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks
  *    cpuset to top_cpuset.
  */
-static int proc_cpuset_show(struct seq_file *m, void *v)
+static int proc_cpuset_show(struct seq_file *m, void *unused_v)
 {
        struct pid *pid;
        struct task_struct *tsk;
        char *buf;
+       struct cgroup_subsys_state *css;
        int retval;
 
        retval = -ENOMEM;
@@ -2588,15 +2188,15 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
                goto out_free;
 
        retval = -EINVAL;
-       mutex_lock(&manage_mutex);
-
-       retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
+       cgroup_lock();
+       css = task_subsys_state(tsk, cpuset_subsys_id);
+       retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
        if (retval < 0)
                goto out_unlock;
        seq_puts(m, buf);
        seq_putc(m, '\n');
 out_unlock:
-       mutex_unlock(&manage_mutex);
+       cgroup_unlock();
        put_task_struct(tsk);
 out_free:
        kfree(buf);
@@ -2616,6 +2216,7 @@ const struct file_operations proc_cpuset_operations = {
        .llseek         = seq_lseek,
        .release        = single_release,
 };
+#endif /* CONFIG_PROC_PID_CPUSET */
 
 /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
 char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
diff --git a/kernel/die_notifier.c b/kernel/die_notifier.c
deleted file mode 100644 (file)
index 0d98827..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/vmalloc.h>
-#include <linux/kdebug.h>
-
-
-static ATOMIC_NOTIFIER_HEAD(die_chain);
-
-int notify_die(enum die_val val, const char *str,
-              struct pt_regs *regs, long err, int trap, int sig)
-{
-       struct die_args args = {
-               .regs           = regs,
-               .str            = str,
-               .err            = err,
-               .trapnr         = trap,
-               .signr          = sig,
-
-       };
-
-       return atomic_notifier_call_chain(&die_chain, val, &args);
-}
-
-int register_die_notifier(struct notifier_block *nb)
-{
-       vmalloc_sync_all();
-       return atomic_notifier_chain_register(&die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_die_notifier);
-
-int unregister_die_notifier(struct notifier_block *nb)
-{
-       return atomic_notifier_chain_unregister(&die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_die_notifier);
-
-
index 2c704c86edb3200864d4ec950c4e2bd89cbf3443..f1aec27f1df00eaae4cbb9c3f99dde1b4e1c8520 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/freezer.h>
-#include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/posix-timers.h>
@@ -148,6 +148,7 @@ void release_task(struct task_struct * p)
        int zap_leader;
 repeat:
        atomic_dec(&p->user->processes);
+       proc_flush_task(p);
        write_lock_irq(&tasklist_lock);
        ptrace_unlink(p);
        BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -175,7 +176,6 @@ repeat:
        }
 
        write_unlock_irq(&tasklist_lock);
-       proc_flush_task(p);
        release_thread(p);
        call_rcu(&p->rcu, delayed_put_task_struct);
 
@@ -221,7 +221,7 @@ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignor
        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                if (p == ignored_task
                                || p->exit_state
-                               || is_init(p->real_parent))
+                               || is_global_init(p->real_parent))
                        continue;
                if (task_pgrp(p->real_parent) != pgrp &&
                    task_session(p->real_parent) == task_session(p)) {
@@ -299,14 +299,14 @@ void __set_special_pids(pid_t session, pid_t pgrp)
 {
        struct task_struct *curr = current->group_leader;
 
-       if (process_session(curr) != session) {
+       if (task_session_nr(curr) != session) {
                detach_pid(curr, PIDTYPE_SID);
-               set_signal_session(curr->signal, session);
+               set_task_session(curr, session);
                attach_pid(curr, PIDTYPE_SID, find_pid(session));
        }
-       if (process_group(curr) != pgrp) {
+       if (task_pgrp_nr(curr) != pgrp) {
                detach_pid(curr, PIDTYPE_PGID);
-               curr->signal->pgrp = pgrp;
+               set_task_pgrp(curr, pgrp);
                attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp));
        }
 }
@@ -400,11 +400,12 @@ void daemonize(const char *name, ...)
        current->fs = fs;
        atomic_inc(&fs->count);
 
-       exit_task_namespaces(current);
-       current->nsproxy = init_task.nsproxy;
-       get_task_namespaces(current);
+       if (current->nsproxy != init_task.nsproxy) {
+               get_nsproxy(init_task.nsproxy);
+               switch_task_namespaces(current, init_task.nsproxy);
+       }
 
-       exit_files(current);
+       exit_files(current);
        current->files = init_task.files;
        atomic_inc(&current->files->count);
 
@@ -492,7 +493,7 @@ void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
 }
 EXPORT_SYMBOL(reset_files_struct);
 
-static inline void __exit_files(struct task_struct *tsk)
+static void __exit_files(struct task_struct *tsk)
 {
        struct files_struct * files = tsk->files;
 
@@ -509,7 +510,7 @@ void exit_files(struct task_struct *tsk)
        __exit_files(tsk);
 }
 
-static inline void __put_fs_struct(struct fs_struct *fs)
+static void __put_fs_struct(struct fs_struct *fs)
 {
        /* No need to hold fs->lock if we are killing it */
        if (atomic_dec_and_test(&fs->count)) {
@@ -530,7 +531,7 @@ void put_fs_struct(struct fs_struct *fs)
        __put_fs_struct(fs);
 }
 
-static inline void __exit_fs(struct task_struct *tsk)
+static void __exit_fs(struct task_struct *tsk)
 {
        struct fs_struct * fs = tsk->fs;
 
@@ -665,19 +666,22 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
  * the child reaper process (ie "init") in our pid
  * space.
  */
-static void
-forget_original_parent(struct task_struct *father, struct list_head *to_release)
+static void forget_original_parent(struct task_struct *father)
 {
-       struct task_struct *p, *reaper = father;
-       struct list_head *_p, *_n;
+       struct task_struct *p, *n, *reaper = father;
+       struct list_head ptrace_dead;
+
+       INIT_LIST_HEAD(&ptrace_dead);
+
+       write_lock_irq(&tasklist_lock);
 
        do {
                reaper = next_thread(reaper);
                if (reaper == father) {
-                       reaper = child_reaper(father);
+                       reaper = task_child_reaper(father);
                        break;
                }
-       } while (reaper->exit_state);
+       } while (reaper->flags & PF_EXITING);
 
        /*
         * There are only two places where our children can be:
@@ -687,9 +691,8 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
         *
         * Search them and reparent children.
         */
-       list_for_each_safe(_p, _n, &father->children) {
+       list_for_each_entry_safe(p, n, &father->children, sibling) {
                int ptrace;
-               p = list_entry(_p, struct task_struct, sibling);
 
                ptrace = p->ptrace;
 
@@ -715,13 +718,23 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
                 * while it was being traced by us, to be able to see it in wait4.
                 */
                if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
-                       list_add(&p->ptrace_list, to_release);
+                       list_add(&p->ptrace_list, &ptrace_dead);
        }
-       list_for_each_safe(_p, _n, &father->ptrace_children) {
-               p = list_entry(_p, struct task_struct, ptrace_list);
+
+       list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
                p->real_parent = reaper;
                reparent_thread(p, father, 1);
        }
+
+       write_unlock_irq(&tasklist_lock);
+       BUG_ON(!list_empty(&father->children));
+       BUG_ON(!list_empty(&father->ptrace_children));
+
+       list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
+               list_del_init(&p->ptrace_list);
+               release_task(p);
+       }
+
 }
 
 /*
@@ -732,7 +745,6 @@ static void exit_notify(struct task_struct *tsk)
 {
        int state;
        struct task_struct *t;
-       struct list_head ptrace_dead, *_p, *_n;
        struct pid *pgrp;
 
        if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
@@ -753,8 +765,6 @@ static void exit_notify(struct task_struct *tsk)
                spin_unlock_irq(&tsk->sighand->siglock);
        }
 
-       write_lock_irq(&tasklist_lock);
-
        /*
         * This does two things:
         *
@@ -763,12 +773,10 @@ static void exit_notify(struct task_struct *tsk)
         *      as a result of our exiting, and if they have any stopped
         *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
         */
+       forget_original_parent(tsk);
+       exit_task_namespaces(tsk);
 
-       INIT_LIST_HEAD(&ptrace_dead);
-       forget_original_parent(tsk, &ptrace_dead);
-       BUG_ON(!list_empty(&tsk->children));
-       BUG_ON(!list_empty(&tsk->ptrace_children));
-
+       write_lock_irq(&tasklist_lock);
        /*
         * Check to see if any process groups have become orphaned
         * as a result of our exiting, and if they have any stopped
@@ -792,7 +800,7 @@ static void exit_notify(struct task_struct *tsk)
        /* Let father know we died
         *
         * Thread signals are configurable, but you aren't going to use
-        * that to send signals to arbitary processes. 
+        * that to send signals to arbitary processes.
         * That stops right now.
         *
         * If the parent exec id doesn't match the exec id we saved
@@ -833,12 +841,6 @@ static void exit_notify(struct task_struct *tsk)
 
        write_unlock_irq(&tasklist_lock);
 
-       list_for_each_safe(_p, _n, &ptrace_dead) {
-               list_del_init(_p);
-               t = list_entry(_p, struct task_struct, ptrace_list);
-               release_task(t);
-       }
-
        /* If the process is dead, release it - nobody will wait for it */
        if (state == EXIT_DEAD)
                release_task(tsk);
@@ -874,10 +876,35 @@ static inline void check_stack_usage(void) {}
 
 static inline void exit_child_reaper(struct task_struct *tsk)
 {
-       if (likely(tsk->group_leader != child_reaper(tsk)))
+       if (likely(tsk->group_leader != task_child_reaper(tsk)))
                return;
 
-       panic("Attempted to kill init!");
+       if (tsk->nsproxy->pid_ns == &init_pid_ns)
+               panic("Attempted to kill init!");
+
+       /*
+        * @tsk is the last thread in the 'cgroup-init' and is exiting.
+        * Terminate all remaining processes in the namespace and reap them
+        * before exiting @tsk.
+        *
+        * Note that @tsk (last thread of cgroup-init) may not necessarily
+        * be the child-reaper (i.e main thread of cgroup-init) of the
+        * namespace i.e the child_reaper may have already exited.
+        *
+        * Even after a child_reaper exits, we let it inherit orphaned children,
+        * because, pid_ns->child_reaper remains valid as long as there is
+        * at least one living sub-thread in the cgroup init.
+
+        * This living sub-thread of the cgroup-init will be notified when
+        * a child inherited by the 'child-reaper' exits (do_notify_parent()
+        * uses __group_send_sig_info()). Further, when reaping child processes,
+        * do_wait() iterates over children of all living sub threads.
+
+        * i.e even though 'child_reaper' thread is listed as the parent of the
+        * orphaned children, any living sub-thread in the cgroup-init can
+        * perform the role of the child_reaper.
+        */
+       zap_pid_ns_processes(tsk->nsproxy->pid_ns);
 }
 
 fastcall NORET_TYPE void do_exit(long code)
@@ -932,7 +959,7 @@ fastcall NORET_TYPE void do_exit(long code)
 
        if (unlikely(in_atomic()))
                printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
-                               current->comm, current->pid,
+                               current->comm, task_pid_nr(current),
                                preempt_count());
 
        acct_update_integrals(tsk);
@@ -972,7 +999,7 @@ fastcall NORET_TYPE void do_exit(long code)
        __exit_fs(tsk);
        check_stack_usage();
        exit_thread();
-       cpuset_exit(tsk);
+       cgroup_exit(tsk, 1);
        exit_keys(tsk);
 
        if (group_dead && tsk->signal->leader)
@@ -983,7 +1010,6 @@ fastcall NORET_TYPE void do_exit(long code)
                module_put(tsk->binfmt->module);
 
        proc_exit_connector(tsk);
-       exit_task_namespaces(tsk);
        exit_notify(tsk);
 #ifdef CONFIG_NUMA
        mpol_free(tsk->mempolicy);
@@ -1086,15 +1112,17 @@ asmlinkage void sys_exit_group(int error_code)
 static int eligible_child(pid_t pid, int options, struct task_struct *p)
 {
        int err;
+       struct pid_namespace *ns;
 
+       ns = current->nsproxy->pid_ns;
        if (pid > 0) {
-               if (p->pid != pid)
+               if (task_pid_nr_ns(p, ns) != pid)
                        return 0;
        } else if (!pid) {
-               if (process_group(p) != process_group(current))
+               if (task_pgrp_nr_ns(p, ns) != task_pgrp_vnr(current))
                        return 0;
        } else if (pid != -1) {
-               if (process_group(p) != -pid)
+               if (task_pgrp_nr_ns(p, ns) != -pid)
                        return 0;
        }
 
@@ -1164,9 +1192,12 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
 {
        unsigned long state;
        int retval, status, traced;
+       struct pid_namespace *ns;
+
+       ns = current->nsproxy->pid_ns;
 
        if (unlikely(noreap)) {
-               pid_t pid = p->pid;
+               pid_t pid = task_pid_nr_ns(p, ns);
                uid_t uid = p->uid;
                int exit_code = p->exit_code;
                int why, status;
@@ -1285,11 +1316,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
                        retval = put_user(status, &infop->si_status);
        }
        if (!retval && infop)
-               retval = put_user(p->pid, &infop->si_pid);
+               retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
        if (!retval && infop)
                retval = put_user(p->uid, &infop->si_uid);
        if (!retval)
-               retval = p->pid;
+               retval = task_pid_nr_ns(p, ns);
 
        if (traced) {
                write_lock_irq(&tasklist_lock);
@@ -1326,6 +1357,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
                             int __user *stat_addr, struct rusage __user *ru)
 {
        int retval, exit_code;
+       struct pid_namespace *ns;
 
        if (!p->exit_code)
                return 0;
@@ -1344,11 +1376,12 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
         * keep holding onto the tasklist_lock while we call getrusage and
         * possibly take page faults for user memory.
         */
+       ns = current->nsproxy->pid_ns;
        get_task_struct(p);
        read_unlock(&tasklist_lock);
 
        if (unlikely(noreap)) {
-               pid_t pid = p->pid;
+               pid_t pid = task_pid_nr_ns(p, ns);
                uid_t uid = p->uid;
                int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
 
@@ -1419,11 +1452,11 @@ bail_ref:
        if (!retval && infop)
                retval = put_user(exit_code, &infop->si_status);
        if (!retval && infop)
-               retval = put_user(p->pid, &infop->si_pid);
+               retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
        if (!retval && infop)
                retval = put_user(p->uid, &infop->si_uid);
        if (!retval)
-               retval = p->pid;
+               retval = task_pid_nr_ns(p, ns);
        put_task_struct(p);
 
        BUG_ON(!retval);
@@ -1443,6 +1476,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
        int retval;
        pid_t pid;
        uid_t uid;
+       struct pid_namespace *ns;
 
        if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
                return 0;
@@ -1457,7 +1491,8 @@ static int wait_task_continued(struct task_struct *p, int noreap,
                p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
        spin_unlock_irq(&p->sighand->siglock);
 
-       pid = p->pid;
+       ns = current->nsproxy->pid_ns;
+       pid = task_pid_nr_ns(p, ns);
        uid = p->uid;
        get_task_struct(p);
        read_unlock(&tasklist_lock);
@@ -1468,7 +1503,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
                if (!retval && stat_addr)
                        retval = put_user(0xffff, stat_addr);
                if (!retval)
-                       retval = p->pid;
+                       retval = task_pid_nr_ns(p, ns);
        } else {
                retval = wait_noreap_copyout(p, pid, uid,
                                             CLD_CONTINUED, SIGCONT,
@@ -1517,12 +1552,9 @@ repeat:
        tsk = current;
        do {
                struct task_struct *p;
-               struct list_head *_p;
                int ret;
 
-               list_for_each(_p,&tsk->children) {
-                       p = list_entry(_p, struct task_struct, sibling);
-
+               list_for_each_entry(p, &tsk->children, sibling) {
                        ret = eligible_child(pid, options, p);
                        if (!ret)
                                continue;
@@ -1604,9 +1636,8 @@ check_continued:
                        }
                }
                if (!flag) {
-                       list_for_each(_p, &tsk->ptrace_children) {
-                               p = list_entry(_p, struct task_struct,
-                                               ptrace_list);
+                       list_for_each_entry(p, &tsk->ptrace_children,
+                                           ptrace_list) {
                                if (!eligible_child(pid, options, p))
                                        continue;
                                flag = 1;
index 2ce28f165e31a1e671e014b10a852e53cd1c535b..ddafdfac9456e151b2d8731b52475a5af4ca68e1 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
-#include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
@@ -50,6 +50,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/random.h>
 #include <linux/tty.h>
+#include <linux/proc_fs.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -116,7 +117,7 @@ EXPORT_SYMBOL(free_task);
 
 void __put_task_struct(struct task_struct *tsk)
 {
-       WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+       WARN_ON(!tsk->exit_state);
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
 
@@ -205,7 +206,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 }
 
 #ifdef CONFIG_MMU
-static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
        struct vm_area_struct *mpnt, *tmp, **pprev;
        struct rb_node **rb_link, *rb_parent;
@@ -583,7 +584,7 @@ fail_nomem:
        return retval;
 }
 
-static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 {
        struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
        /* We don't need to lock fs - think why ;-) */
@@ -615,7 +616,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 
 EXPORT_SYMBOL_GPL(copy_fs_struct);
 
-static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 {
        if (clone_flags & CLONE_FS) {
                atomic_inc(&current->fs->count);
@@ -818,7 +819,7 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
-static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct sighand_struct *sig;
 
@@ -841,7 +842,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
                kmem_cache_free(sighand_cachep, sighand);
 }
 
-static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct signal_struct *sig;
        int ret;
@@ -923,7 +924,7 @@ void __cleanup_signal(struct signal_struct *sig)
        kmem_cache_free(signal_cachep, sig);
 }
 
-static inline void cleanup_signal(struct task_struct *tsk)
+static void cleanup_signal(struct task_struct *tsk)
 {
        struct signal_struct *sig = tsk->signal;
 
@@ -933,7 +934,7 @@ static inline void cleanup_signal(struct task_struct *tsk)
                __cleanup_signal(sig);
 }
 
-static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
        unsigned long new_flags = p->flags;
 
@@ -949,10 +950,10 @@ asmlinkage long sys_set_tid_address(int __user *tidptr)
 {
        current->clear_child_tid = tidptr;
 
-       return current->pid;
+       return task_pid_vnr(current);
 }
 
-static inline void rt_mutex_init_task(struct task_struct *p)
+static void rt_mutex_init_task(struct task_struct *p)
 {
        spin_lock_init(&p->pi_lock);
 #ifdef CONFIG_RT_MUTEXES
@@ -973,12 +974,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                                        unsigned long stack_start,
                                        struct pt_regs *regs,
                                        unsigned long stack_size,
-                                       int __user *parent_tidptr,
                                        int __user *child_tidptr,
                                        struct pid *pid)
 {
        int retval;
-       struct task_struct *p = NULL;
+       struct task_struct *p;
+       int cgroup_callbacks_done = 0;
 
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);
@@ -1042,12 +1043,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->did_exec = 0;
        delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
        copy_flags(clone_flags, p);
-       p->pid = pid_nr(pid);
-       retval = -EFAULT;
-       if (clone_flags & CLONE_PARENT_SETTID)
-               if (put_user(p->pid, parent_tidptr))
-                       goto bad_fork_cleanup_delays_binfmt;
-
        INIT_LIST_HEAD(&p->children);
        INIT_LIST_HEAD(&p->sibling);
        p->vfork_done = NULL;
@@ -1087,13 +1082,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
        p->io_context = NULL;
        p->audit_context = NULL;
-       cpuset_fork(p);
+       cgroup_fork(p);
 #ifdef CONFIG_NUMA
        p->mempolicy = mpol_copy(p->mempolicy);
        if (IS_ERR(p->mempolicy)) {
                retval = PTR_ERR(p->mempolicy);
                p->mempolicy = NULL;
-               goto bad_fork_cleanup_cpuset;
+               goto bad_fork_cleanup_cgroup;
        }
        mpol_fix_fork_child_flag(p);
 #endif
@@ -1126,10 +1121,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->blocked_on = NULL; /* not blocked yet */
 #endif
 
-       p->tgid = p->pid;
-       if (clone_flags & CLONE_THREAD)
-               p->tgid = current->tgid;
-
        if ((retval = security_task_alloc(p)))
                goto bad_fork_cleanup_policy;
        if ((retval = audit_alloc(p)))
@@ -1155,6 +1146,24 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (retval)
                goto bad_fork_cleanup_namespaces;
 
+       if (pid != &init_struct_pid) {
+               retval = -ENOMEM;
+               pid = alloc_pid(task_active_pid_ns(p));
+               if (!pid)
+                       goto bad_fork_cleanup_namespaces;
+
+               if (clone_flags & CLONE_NEWPID) {
+                       retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+                       if (retval < 0)
+                               goto bad_fork_free_pid;
+               }
+       }
+
+       p->pid = pid_nr(pid);
+       p->tgid = p->pid;
+       if (clone_flags & CLONE_THREAD)
+               p->tgid = current->tgid;
+
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
        /*
         * Clear TID on mm_release()?
@@ -1204,6 +1213,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p, clone_flags);
 
+       /* Now that the task is set up, run cgroup callbacks if
+        * necessary. We need to run them before the task is visible
+        * on the tasklist. */
+       cgroup_fork_callbacks(p);
+       cgroup_callbacks_done = 1;
+
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
 
@@ -1246,7 +1261,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-               goto bad_fork_cleanup_namespaces;
+               goto bad_fork_free_pid;
        }
 
        if (clone_flags & CLONE_THREAD) {
@@ -1275,11 +1290,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                        __ptrace_link(p, current->parent);
 
                if (thread_group_leader(p)) {
-                       p->signal->tty = current->signal->tty;
-                       p->signal->pgrp = process_group(current);
-                       set_signal_session(p->signal, process_session(current));
-                       attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
-                       attach_pid(p, PIDTYPE_SID, task_session(current));
+                       if (clone_flags & CLONE_NEWPID) {
+                               p->nsproxy->pid_ns->child_reaper = p;
+                               p->signal->tty = NULL;
+                               set_task_pgrp(p, p->pid);
+                               set_task_session(p, p->pid);
+                               attach_pid(p, PIDTYPE_PGID, pid);
+                               attach_pid(p, PIDTYPE_SID, pid);
+                       } else {
+                               p->signal->tty = current->signal->tty;
+                               set_task_pgrp(p, task_pgrp_nr(current));
+                               set_task_session(p, task_session_nr(current));
+                               attach_pid(p, PIDTYPE_PGID,
+                                               task_pgrp(current));
+                               attach_pid(p, PIDTYPE_SID,
+                                               task_session(current));
+                       }
 
                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
                        __get_cpu_var(process_counts)++;
@@ -1292,8 +1318,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
        proc_fork_connector(p);
+       cgroup_post_fork(p);
        return p;
 
+bad_fork_free_pid:
+       if (pid != &init_struct_pid)
+               free_pid(pid);
 bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
 bad_fork_cleanup_keys:
@@ -1318,10 +1348,9 @@ bad_fork_cleanup_security:
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
        mpol_free(p->mempolicy);
-bad_fork_cleanup_cpuset:
+bad_fork_cleanup_cgroup:
 #endif
-       cpuset_exit(p);
-bad_fork_cleanup_delays_binfmt:
+       cgroup_exit(p, cgroup_callbacks_done);
        delayacct_tsk_free(p);
        if (p->binfmt)
                module_put(p->binfmt->module);
@@ -1348,7 +1377,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
        struct task_struct *task;
        struct pt_regs regs;
 
-       task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL,
+       task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
                                &init_struct_pid);
        if (!IS_ERR(task))
                init_idle(task, cpu);
@@ -1356,7 +1385,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
        return task;
 }
 
-static inline int fork_traceflag (unsigned clone_flags)
+static int fork_traceflag(unsigned clone_flags)
 {
        if (clone_flags & CLONE_UNTRACED)
                return 0;
@@ -1387,19 +1416,16 @@ long do_fork(unsigned long clone_flags,
 {
        struct task_struct *p;
        int trace = 0;
-       struct pid *pid = alloc_pid();
        long nr;
 
-       if (!pid)
-               return -EAGAIN;
-       nr = pid->nr;
        if (unlikely(current->ptrace)) {
                trace = fork_traceflag (clone_flags);
                if (trace)
                        clone_flags |= CLONE_PTRACE;
        }
 
-       p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+       p = copy_process(clone_flags, stack_start, regs, stack_size,
+                       child_tidptr, NULL);
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
@@ -1407,6 +1433,17 @@ long do_fork(unsigned long clone_flags,
        if (!IS_ERR(p)) {
                struct completion vfork;
 
+               /*
+                * this is enough to call pid_nr_ns here, but this if
+                * improves optimisation of regular fork()
+                */
+               nr = (clone_flags & CLONE_NEWPID) ?
+                       task_pid_nr_ns(p, current->nsproxy->pid_ns) :
+                               task_pid_vnr(p);
+
+               if (clone_flags & CLONE_PARENT_SETTID)
+                       put_user(nr, parent_tidptr);
+
                if (clone_flags & CLONE_VFORK) {
                        p->vfork_done = &vfork;
                        init_completion(&vfork);
@@ -1440,7 +1477,6 @@ long do_fork(unsigned long clone_flags,
                        }
                }
        } else {
-               free_pid(pid);
                nr = PTR_ERR(p);
        }
        return nr;
@@ -1485,7 +1521,7 @@ void __init proc_caches_init(void)
  * Check constraints on flags passed to the unshare system call and
  * force unsharing of additional process context as appropriate.
  */
-static inline void check_unshare_flags(unsigned long *flags_ptr)
+static void check_unshare_flags(unsigned long *flags_ptr)
 {
        /*
         * If unsharing a thread from a thread group, must also
@@ -1617,7 +1653,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
        struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
        struct files_struct *fd, *new_fd = NULL;
        struct sem_undo_list *new_ulist = NULL;
-       struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+       struct nsproxy *new_nsproxy = NULL;
 
        check_unshare_flags(&unshare_flags);
 
@@ -1647,14 +1683,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 
        if (new_fs ||  new_mm || new_fd || new_ulist || new_nsproxy) {
 
-               task_lock(current);
-
                if (new_nsproxy) {
-                       old_nsproxy = current->nsproxy;
-                       current->nsproxy = new_nsproxy;
-                       new_nsproxy = old_nsproxy;
+                       switch_task_namespaces(current, new_nsproxy);
+                       new_nsproxy = NULL;
                }
 
+               task_lock(current);
+
                if (new_fs) {
                        fs = current->fs;
                        current->fs = new_fs;
index e45a65e41686c19e599140e0c4e04de5203f5bee..32710451dc20ea7763cc22d02f0363c9155a772f 100644 (file)
@@ -53,6 +53,9 @@
 #include <linux/signal.h>
 #include <linux/module.h>
 #include <linux/magic.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
+
 #include <asm/futex.h>
 
 #include "rtmutex_common.h"
@@ -443,8 +446,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
        struct task_struct *p;
 
        rcu_read_lock();
-       p = find_task_by_pid(pid);
-
+       p = find_task_by_vpid(pid);
        if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
                p = ERR_PTR(-ESRCH);
        else
@@ -653,7 +655,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
        if (!(uval & FUTEX_OWNER_DIED)) {
                int ret = 0;
 
-               newval = FUTEX_WAITERS | new_owner->pid;
+               newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
 
                curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
 
@@ -1106,7 +1108,7 @@ static void unqueue_me_pi(struct futex_q *q)
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
                                struct task_struct *curr)
 {
-       u32 newtid = curr->pid | FUTEX_WAITERS;
+       u32 newtid = task_pid_vnr(curr) | FUTEX_WAITERS;
        struct futex_pi_state *pi_state = q->pi_state;
        u32 uval, curval, newval;
        int ret;
@@ -1368,7 +1370,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
         * (by doing a 0 -> TID atomic cmpxchg), while holding all
         * the locks. It will most likely not succeed.
         */
-       newval = current->pid;
+       newval = task_pid_vnr(current);
 
        curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
 
@@ -1379,7 +1381,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
         * Detect deadlocks. In case of REQUEUE_PI this is a valid
         * situation and we return success to user space.
         */
-       if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
+       if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
                ret = -EDEADLK;
                goto out_unlock_release_sem;
        }
@@ -1408,7 +1410,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
         */
        if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
                /* Keep the OWNER_DIED bit */
-               newval = (curval & ~FUTEX_TID_MASK) | current->pid;
+               newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
                ownerdied = 0;
                lock_taken = 1;
        }
@@ -1587,7 +1589,7 @@ retry:
        /*
         * We release only a lock we actually own:
         */
-       if ((uval & FUTEX_TID_MASK) != current->pid)
+       if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                return -EPERM;
        /*
         * First take all the futex related locks:
@@ -1608,7 +1610,7 @@ retry_unlocked:
         * anyone else up:
         */
        if (!(uval & FUTEX_OWNER_DIED))
-               uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
+               uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
 
 
        if (unlikely(uval == -EFAULT))
@@ -1617,7 +1619,7 @@ retry_unlocked:
         * Rare case: we managed to release the lock atomically,
         * no need to wake anyone else up:
         */
-       if (unlikely(uval == current->pid))
+       if (unlikely(uval == task_pid_vnr(current)))
                goto out_unlock;
 
        /*
@@ -1854,7 +1856,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
 
                ret = -ESRCH;
                rcu_read_lock();
-               p = find_task_by_pid(pid);
+               p = find_task_by_vpid(pid);
                if (!p)
                        goto err_unlock;
                ret = -EPERM;
@@ -1887,7 +1889,7 @@ retry:
        if (get_user(uval, uaddr))
                return -1;
 
-       if ((uval & FUTEX_TID_MASK) == curr->pid) {
+       if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
                /*
                 * Ok, this dying thread is truly holding a futex
                 * of interest. Set the OWNER_DIED bit atomically
index 2c2e2954b713b2681a5bf120c0097cb2c2affc8c..00b572666cc76178d81979f512dc9e3078b08fda 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/linkage.h>
 #include <linux/compat.h>
+#include <linux/nsproxy.h>
 #include <linux/futex.h>
 
 #include <asm/uaccess.h>
@@ -124,7 +125,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 
                ret = -ESRCH;
                read_lock(&tasklist_lock);
-               p = find_task_by_pid(pid);
+               p = find_task_by_vpid(pid);
                if (!p)
                        goto err_unlock;
                ret = -EPERM;
index e9f1b4ea504d920bd514deb8868536fd10de77bb..aa74a1ef2da8003768519e4283d2b392be0a4171 100644 (file)
@@ -51,7 +51,7 @@ struct resource crashk_res = {
 
 int kexec_should_crash(struct task_struct *p)
 {
-       if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
+       if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
                return 1;
        return 0;
 }
@@ -1146,6 +1146,172 @@ static int __init crash_notes_memory_init(void)
 }
 module_init(crash_notes_memory_init)
 
+
+/*
+ * parsing the "crashkernel" commandline
+ *
+ * this code is intended to be called from architecture specific code
+ */
+
+
+/*
+ * This function parses command lines in the format
+ *
+ *   crashkernel=ramsize-range:size[,...][@offset]
+ *
+ * The function returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_mem(char                   *cmdline,
+                                       unsigned long long      system_ram,
+                                       unsigned long long      *crash_size,
+                                       unsigned long long      *crash_base)
+{
+       char *cur = cmdline, *tmp;
+
+       /* for each entry of the comma-separated list */
+       do {
+               unsigned long long start, end = ULLONG_MAX, size;
+
+               /* get the start of the range */
+               start = memparse(cur, &tmp);
+               if (cur == tmp) {
+                       pr_warning("crashkernel: Memory value expected\n");
+                       return -EINVAL;
+               }
+               cur = tmp;
+               if (*cur != '-') {
+                       pr_warning("crashkernel: '-' expected\n");
+                       return -EINVAL;
+               }
+               cur++;
+
+               /* if no ':' is here, than we read the end */
+               if (*cur != ':') {
+                       end = memparse(cur, &tmp);
+                       if (cur == tmp) {
+                               pr_warning("crashkernel: Memory "
+                                               "value expected\n");
+                               return -EINVAL;
+                       }
+                       cur = tmp;
+                       if (end <= start) {
+                               pr_warning("crashkernel: end <= start\n");
+                               return -EINVAL;
+                       }
+               }
+
+               if (*cur != ':') {
+                       pr_warning("crashkernel: ':' expected\n");
+                       return -EINVAL;
+               }
+               cur++;
+
+               size = memparse(cur, &tmp);
+               if (cur == tmp) {
+                       pr_warning("Memory value expected\n");
+                       return -EINVAL;
+               }
+               cur = tmp;
+               if (size >= system_ram) {
+                       pr_warning("crashkernel: invalid size\n");
+                       return -EINVAL;
+               }
+
+               /* match ? */
+               if (system_ram >= start && system_ram <= end) {
+                       *crash_size = size;
+                       break;
+               }
+       } while (*cur++ == ',');
+
+       if (*crash_size > 0) {
+               while (*cur != ' ' && *cur != '@')
+                       cur++;
+               if (*cur == '@') {
+                       cur++;
+                       *crash_base = memparse(cur, &tmp);
+                       if (cur == tmp) {
+                               pr_warning("Memory value expected "
+                                               "after '@'\n");
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * That function parses "simple" (old) crashkernel command lines like
+ *
+ *     crashkernel=size[@offset]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_simple(char                *cmdline,
+                                          unsigned long long   *crash_size,
+                                          unsigned long long   *crash_base)
+{
+       char *cur = cmdline;
+
+       *crash_size = memparse(cmdline, &cur);
+       if (cmdline == cur) {
+               pr_warning("crashkernel: memory value expected\n");
+               return -EINVAL;
+       }
+
+       if (*cur == '@')
+               *crash_base = memparse(cur+1, &cur);
+
+       return 0;
+}
+
+/*
+ * That function is the entry point for command line parsing and should be
+ * called from the arch-specific code.
+ */
+int __init parse_crashkernel(char               *cmdline,
+                            unsigned long long system_ram,
+                            unsigned long long *crash_size,
+                            unsigned long long *crash_base)
+{
+       char    *p = cmdline, *ck_cmdline = NULL;
+       char    *first_colon, *first_space;
+
+       BUG_ON(!crash_size || !crash_base);
+       *crash_size = 0;
+       *crash_base = 0;
+
+       /* find crashkernel and use the last one if there are more */
+       p = strstr(p, "crashkernel=");
+       while (p) {
+               ck_cmdline = p;
+               p = strstr(p+1, "crashkernel=");
+       }
+
+       if (!ck_cmdline)
+               return -EINVAL;
+
+       ck_cmdline += 12; /* strlen("crashkernel=") */
+
+       /*
+        * if the commandline contains a ':', then that's the extended
+        * syntax -- if not, it must be the classic syntax
+        */
+       first_colon = strchr(ck_cmdline, ':');
+       first_space = strchr(ck_cmdline, ' ');
+       if (first_colon && (!first_space || first_colon < first_space))
+               return parse_crashkernel_mem(ck_cmdline, system_ram,
+                               crash_size, crash_base);
+       else
+               return parse_crashkernel_simple(ck_cmdline, crash_size,
+                               crash_base);
+
+       return 0;
+}
+
+
+
 void crash_save_vmcoreinfo(void)
 {
        u32 *buf;
index a6f1ee9c92d9fb7900ae02e44debe938a293aedb..55fe0c7cd95fc6776dda4f93d85fa1541200ebb1 100644 (file)
@@ -511,11 +511,11 @@ static void lockdep_print_held_locks(struct task_struct *curr)
        int i, depth = curr->lockdep_depth;
 
        if (!depth) {
-               printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
+               printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
                return;
        }
        printk("%d lock%s held by %s/%d:\n",
-               depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
+               depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
 
        for (i = 0; i < depth; i++) {
                printk(" #%d: ", i);
@@ -904,7 +904,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth)
        print_kernel_version();
        printk(  "-------------------------------------------------------\n");
        printk("%s/%d is trying to acquire lock:\n",
-               curr->comm, curr->pid);
+               curr->comm, task_pid_nr(curr));
        print_lock(check_source);
        printk("\nbut task is already holding lock:\n");
        print_lock(check_target);
@@ -1085,7 +1085,7 @@ print_bad_irq_dependency(struct task_struct *curr,
        print_kernel_version();
        printk(  "------------------------------------------------------\n");
        printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
-               curr->comm, curr->pid,
+               curr->comm, task_pid_nr(curr),
                curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
                curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
                curr->hardirqs_enabled,
@@ -1237,7 +1237,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
        print_kernel_version();
        printk(  "---------------------------------------------\n");
        printk("%s/%d is trying to acquire lock:\n",
-               curr->comm, curr->pid);
+               curr->comm, task_pid_nr(curr));
        print_lock(next);
        printk("\nbut task is already holding lock:\n");
        print_lock(prev);
@@ -1521,7 +1521,7 @@ cache_hit:
 }
 
 static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
-               struct held_lock *hlock, int chain_head, u64 chain_key)
+               struct held_lock *hlock, int chain_head, u64 chain_key)
 {
        /*
         * Trylock needs to maintain the stack of held locks, but it
@@ -1641,7 +1641,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
                usage_str[prev_bit], usage_str[new_bit]);
 
        printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
-               curr->comm, curr->pid,
+               curr->comm, task_pid_nr(curr),
                trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
                trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
                trace_hardirqs_enabled(curr),
@@ -1694,7 +1694,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
        print_kernel_version();
        printk(  "---------------------------------------------------------\n");
        printk("%s/%d just changed the state of lock:\n",
-               curr->comm, curr->pid);
+               curr->comm, task_pid_nr(curr));
        print_lock(this);
        if (forwards)
                printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
@@ -2487,7 +2487,7 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
        printk(  "[ BUG: bad unlock balance detected! ]\n");
        printk(  "-------------------------------------\n");
        printk("%s/%d is trying to release lock (",
-               curr->comm, curr->pid);
+               curr->comm, task_pid_nr(curr));
        print_lockdep_cache(lock);
        printk(") at:\n");
        print_ip_sym(ip);
@@ -2737,7 +2737,7 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
        printk(  "[ BUG: bad contention detected! ]\n");
        printk(  "---------------------------------\n");
        printk("%s/%d is trying to contend lock (",
-               curr->comm, curr->pid);
+               curr->comm, task_pid_nr(curr));
        print_lockdep_cache(lock);
        printk(") at:\n");
        print_ip_sym(ip);
@@ -3072,7 +3072,7 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
        printk(  "[ BUG: held lock freed! ]\n");
        printk(  "-------------------------\n");
        printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
-               curr->comm, curr->pid, mem_from, mem_to-1);
+               curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
        print_lock(hlock);
        lockdep_print_held_locks(curr);
 
@@ -3125,7 +3125,7 @@ static void print_held_locks_bug(struct task_struct *curr)
        printk(  "[ BUG: lock held at task exit time! ]\n");
        printk(  "-------------------------------------\n");
        printk("%s/%d is exiting with locks still held!\n",
-               curr->comm, curr->pid);
+               curr->comm, task_pid_nr(curr));
        lockdep_print_held_locks(curr);
 
        printk("\nstack backtrace:\n");
diff --git a/kernel/marker.c b/kernel/marker.c
new file mode 100644 (file)
index 0000000..ccb48d9
--- /dev/null
@@ -0,0 +1,525 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+
+extern struct marker __start___markers[];
+extern struct marker __stop___markers[];
+
+/*
+ * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+static DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * given module do we call synchronize_sched()). However, we need to make sure
+ * every critical region has ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+       struct hlist_node hlist;
+       char *format;
+       marker_probe_func *probe;
+       void *private;
+       int refcount;   /* Number of times armed. 0 if disarmed. */
+       char name[0];   /* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we make sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct marker *mdata, void *private,
+       const char *fmt, ...)
+{
+}
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *get_marker(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct marker_entry *e;
+       u32 hash = jhash(name, strlen(name), 0);
+
+       head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name))
+                       return e;
+       }
+       return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int add_marker(const char *name, const char *format,
+       marker_probe_func *probe, void *private)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct marker_entry *e;
+       size_t name_len = strlen(name) + 1;
+       size_t format_len = 0;
+       u32 hash = jhash(name, name_len-1, 0);
+
+       if (format)
+               format_len = strlen(format) + 1;
+       head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name)) {
+                       printk(KERN_NOTICE
+                               "Marker %s busy, probe %p already installed\n",
+                               name, e->probe);
+                       return -EBUSY;  /* Already there */
+               }
+       }
+       /*
+        * Using kmalloc here to allocate a variable length element. Could
+        * cause some memory fragmentation if overused.
+        */
+       e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+                       GFP_KERNEL);
+       if (!e)
+               return -ENOMEM;
+       memcpy(&e->name[0], name, name_len);
+       if (format) {
+               e->format = &e->name[name_len];
+               memcpy(e->format, format, format_len);
+               trace_mark(core_marker_format, "name %s format %s",
+                               e->name, e->format);
+       } else
+               e->format = NULL;
+       e->probe = probe;
+       e->private = private;
+       e->refcount = 0;
+       hlist_add_head(&e->hlist, head);
+       return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *remove_marker(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct marker_entry *e;
+       int found = 0;
+       size_t len = strlen(name) + 1;
+       void *private = NULL;
+       u32 hash = jhash(name, len-1, 0);
+
+       head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name)) {
+                       found = 1;
+                       break;
+               }
+       }
+       if (found) {
+               private = e->private;
+               hlist_del(&e->hlist);
+               kfree(e);
+       }
+       return private;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int marker_set_format(struct marker_entry **entry, const char *format)
+{
+       struct marker_entry *e;
+       size_t name_len = strlen((*entry)->name) + 1;
+       size_t format_len = strlen(format) + 1;
+
+       e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+                       GFP_KERNEL);
+       if (!e)
+               return -ENOMEM;
+       memcpy(&e->name[0], (*entry)->name, name_len);
+       e->format = &e->name[name_len];
+       memcpy(e->format, format, format_len);
+       e->probe = (*entry)->probe;
+       e->private = (*entry)->private;
+       e->refcount = (*entry)->refcount;
+       hlist_add_before(&e->hlist, &(*entry)->hlist);
+       hlist_del(&(*entry)->hlist);
+       kfree(*entry);
+       *entry = e;
+       trace_mark(core_marker_format, "name %s format %s",
+                       e->name, e->format);
+       return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int set_marker(struct marker_entry **entry, struct marker *elem)
+{
+       int ret;
+       WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+       if ((*entry)->format) {
+               if (strcmp((*entry)->format, elem->format) != 0) {
+                       printk(KERN_NOTICE
+                               "Format mismatch for probe %s "
+                               "(%s), marker (%s)\n",
+                               (*entry)->name,
+                               (*entry)->format,
+                               elem->format);
+                       return -EPERM;
+               }
+       } else {
+               ret = marker_set_format(entry, elem->format);
+               if (ret)
+                       return ret;
+       }
+       elem->call = (*entry)->probe;
+       elem->private = (*entry)->private;
+       elem->state = 1;
+       return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void disable_marker(struct marker *elem)
+{
+       elem->state = 0;
+       elem->call = __mark_empty_function;
+       /*
+        * Leave the private data and id there, because removal is racy and
+        * should be done only after a synchronize_sched(). These are never used
+        * until the next initialization anyway.
+        */
+}
+
+/**
+ * marker_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ * @probe_module: module address of the probe being updated
+ * @refcount: number of references left to the given probe_module (out)
+ *
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+void marker_update_probe_range(struct marker *begin,
+       struct marker *end, struct module *probe_module,
+       int *refcount)
+{
+       struct marker *iter;
+       struct marker_entry *mark_entry;
+
+       for (iter = begin; iter < end; iter++) {
+               mark_entry = get_marker(iter->name);
+               if (mark_entry && mark_entry->refcount) {
+                       set_marker(&mark_entry, iter);
+                       /*
+                        * ignore error, continue
+                        */
+                       if (probe_module)
+                               if (probe_module ==
+                       __module_text_address((unsigned long)mark_entry->probe))
+                                       (*refcount)++;
+               } else {
+                       disable_marker(iter);
+               }
+       }
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static void marker_update_probes(struct module *probe_module)
+{
+       int refcount = 0;
+
+       mutex_lock(&markers_mutex);
+       /* Core kernel markers */
+       marker_update_probe_range(__start___markers,
+                       __stop___markers, probe_module, &refcount);
+       /* Markers in modules. */
+       module_update_markers(probe_module, &refcount);
+       if (probe_module && refcount == 0) {
+               synchronize_sched();
+               deferred_sync = 0;
+       }
+       mutex_unlock(&markers_mutex);
+}
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @private: probe private data
+ *
+ * private data must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+                       marker_probe_func *probe, void *private)
+{
+       struct marker_entry *entry;
+       int ret = 0, need_update = 0;
+
+       mutex_lock(&markers_mutex);
+       entry = get_marker(name);
+       if (entry && entry->refcount) {
+               ret = -EBUSY;
+               goto end;
+       }
+       if (deferred_sync) {
+               synchronize_sched();
+               deferred_sync = 0;
+       }
+       ret = add_marker(name, format, probe, private);
+       if (ret)
+               goto end;
+       need_update = 1;
+end:
+       mutex_unlock(&markers_mutex);
+       if (need_update)
+               marker_update_probes(NULL);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+       struct module *probe_module;
+       struct marker_entry *entry;
+       void *private;
+       int need_update = 0;
+
+       mutex_lock(&markers_mutex);
+       entry = get_marker(name);
+       if (!entry) {
+               private = ERR_PTR(-ENOENT);
+               goto end;
+       }
+       entry->refcount = 0;
+       /* In what module is the probe handler ? */
+       probe_module = __module_text_address((unsigned long)entry->probe);
+       private = remove_marker(name);
+       deferred_sync = 1;
+       need_update = 1;
+end:
+       mutex_unlock(&markers_mutex);
+       if (need_update)
+               marker_update_probes(probe_module);
+       return private;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister_private_data -  Disconnect a probe from a marker
+ * @private: probe private data
+ *
+ * Unregister a marker by providing the registered private data.
+ * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_private_data(void *private)
+{
+       struct module *probe_module;
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct marker_entry *entry;
+       int found = 0;
+       unsigned int i;
+       int need_update = 0;
+
+       mutex_lock(&markers_mutex);
+       for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+               head = &marker_table[i];
+               hlist_for_each_entry(entry, node, head, hlist) {
+                       if (entry->private == private) {
+                               found = 1;
+                               goto iter_end;
+                       }
+               }
+       }
+iter_end:
+       if (!found) {
+               private = ERR_PTR(-ENOENT);
+               goto end;
+       }
+       entry->refcount = 0;
+       /* In what module is the probe handler ? */
+       probe_module = __module_text_address((unsigned long)entry->probe);
+       private = remove_marker(entry->name);
+       deferred_sync = 1;
+       need_update = 1;
+end:
+       mutex_unlock(&markers_mutex);
+       if (need_update)
+               marker_update_probes(probe_module);
+       return private;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+       struct marker_entry *entry;
+       int ret = 0, need_update = 0;
+
+       mutex_lock(&markers_mutex);
+       entry = get_marker(name);
+       if (!entry) {
+               ret = -ENOENT;
+               goto end;
+       }
+       /*
+        * Only need to update probes when refcount passes from 0 to 1.
+        */
+       if (entry->refcount++)
+               goto end;
+       need_update = 1;
+end:
+       mutex_unlock(&markers_mutex);
+       if (need_update)
+               marker_update_probes(NULL);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+       struct marker_entry *entry;
+       int ret = 0, need_update = 0;
+
+       mutex_lock(&markers_mutex);
+       entry = get_marker(name);
+       if (!entry) {
+               ret = -ENOENT;
+               goto end;
+       }
+       /*
+        * Only permit decrement refcount if higher than 0.
+        * Do probe update only on 1 -> 0 transition.
+        */
+       if (entry->refcount) {
+               if (--entry->refcount)
+                       goto end;
+       } else {
+               ret = -EPERM;
+               goto end;
+       }
+       need_update = 1;
+end:
+       mutex_unlock(&markers_mutex);
+       if (need_update)
+               marker_update_probes(NULL);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_private_data - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the private data pointer, or an ERR_PTR.
+ * The private data pointer should _only_ be dereferenced if the caller is the
+ * owner of the data, or its content could vanish. This is mostly used to
+ * confirm that a caller is the owner of a registered probe.
+ */
+void *marker_get_private_data(const char *name)
+{
+       struct hlist_head *head;
+       struct hlist_node *node;
+       struct marker_entry *e;
+       size_t name_len = strlen(name) + 1;
+       u32 hash = jhash(name, name_len-1, 0);
+       int found = 0;
+
+       head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+       hlist_for_each_entry(e, node, head, hlist) {
+               if (!strcmp(name, e->name)) {
+                       found = 1;
+                       return e->private;
+               }
+       }
+       return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_private_data);
index 7734595bd3296923947432521095020096f69fdf..3202c9950073ebc05fb232a625da27d2ec35eee8 100644 (file)
@@ -1673,6 +1673,8 @@ static struct module *load_module(void __user *umod,
        unsigned int unusedcrcindex;
        unsigned int unusedgplindex;
        unsigned int unusedgplcrcindex;
+       unsigned int markersindex;
+       unsigned int markersstringsindex;
        struct module *mod;
        long err = 0;
        void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1939,6 +1941,9 @@ static struct module *load_module(void __user *umod,
                add_taint_module(mod, TAINT_FORCED_MODULE);
        }
 #endif
+       markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+       markersstringsindex = find_sec(hdr, sechdrs, secstrings,
+                                       "__markers_strings");
 
        /* Now do relocations. */
        for (i = 1; i < hdr->e_shnum; i++) {
@@ -1961,6 +1966,11 @@ static struct module *load_module(void __user *umod,
                if (err < 0)
                        goto cleanup;
        }
+#ifdef CONFIG_MARKERS
+       mod->markers = (void *)sechdrs[markersindex].sh_addr;
+       mod->num_markers =
+               sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+#endif
 
         /* Find duplicate symbols */
        err = verify_export_symbols(mod);
@@ -1979,6 +1989,11 @@ static struct module *load_module(void __user *umod,
 
        add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+#ifdef CONFIG_MARKERS
+       if (!mod->taints)
+               marker_update_probe_range(mod->markers,
+                       mod->markers + mod->num_markers, NULL, NULL);
+#endif
        err = module_finalize(hdr, sechdrs, mod);
        if (err < 0)
                goto cleanup;
@@ -2570,3 +2585,18 @@ EXPORT_SYMBOL(module_remove_driver);
 void struct_module(struct module *mod) { return; }
 EXPORT_SYMBOL(struct_module);
 #endif
+
+#ifdef CONFIG_MARKERS
+void module_update_markers(struct module *probe_module, int *refcount)
+{
+       struct module *mod;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry(mod, &modules, list)
+               if (!mod->taints)
+                       marker_update_probe_range(mod->markers,
+                               mod->markers + mod->num_markers,
+                               probe_module, refcount);
+       mutex_unlock(&module_mutex);
+}
+#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
new file mode 100644 (file)
index 0000000..4253f47
--- /dev/null
@@ -0,0 +1,539 @@
+#include <linux/kdebug.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/vmalloc.h>
+
+/*
+ *     Notifier list for kernel code which wants to be called
+ *     at shutdown. This is used to stop any idling DMA operations
+ *     and the like.
+ */
+BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
+
+/*
+ *     Notifier chain core routines.  The exported routines below
+ *     are layered on top of these, with appropriate locking added.
+ */
+
+static int notifier_chain_register(struct notifier_block **nl,
+               struct notifier_block *n)
+{
+       while ((*nl) != NULL) {
+               if (n->priority > (*nl)->priority)
+                       break;
+               nl = &((*nl)->next);
+       }
+       n->next = *nl;
+       rcu_assign_pointer(*nl, n);
+       return 0;
+}
+
+static int notifier_chain_unregister(struct notifier_block **nl,
+               struct notifier_block *n)
+{
+       while ((*nl) != NULL) {
+               if ((*nl) == n) {
+                       rcu_assign_pointer(*nl, n->next);
+                       return 0;
+               }
+               nl = &((*nl)->next);
+       }
+       return -ENOENT;
+}
+
+/**
+ * notifier_call_chain - Informs the registered notifiers about an event.
+ *     @nl:            Pointer to head of the blocking notifier chain
+ *     @val:           Value passed unmodified to notifier function
+ *     @v:             Pointer passed unmodified to notifier function
+ *     @nr_to_call:    Number of notifier functions to be called. Don't care
+ *                     value of this parameter is -1.
+ *     @nr_calls:      Records the number of notifications sent. Don't care
+ *                     value of this field is NULL.
+ *     @returns:       notifier_call_chain returns the value returned by the
+ *                     last notifier function called.
+ */
+static int __kprobes notifier_call_chain(struct notifier_block **nl,
+                                       unsigned long val, void *v,
+                                       int nr_to_call, int *nr_calls)
+{
+       int ret = NOTIFY_DONE;
+       struct notifier_block *nb, *next_nb;
+
+       nb = rcu_dereference(*nl);
+
+       while (nb && nr_to_call) {
+               next_nb = rcu_dereference(nb->next);
+               ret = nb->notifier_call(nb, val, v);
+
+               if (nr_calls)
+                       (*nr_calls)++;
+
+               if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
+                       break;
+               nb = next_nb;
+               nr_to_call--;
+       }
+       return ret;
+}
+
+/*
+ *     Atomic notifier chain routines.  Registration and unregistration
+ *     use a spinlock, and call_chain is synchronized by RCU (no locks).
+ */
+
+/**
+ *     atomic_notifier_chain_register - Add notifier to an atomic notifier chain
+ *     @nh: Pointer to head of the atomic notifier chain
+ *     @n: New entry in notifier chain
+ *
+ *     Adds a notifier to an atomic notifier chain.
+ *
+ *     Currently always returns zero.
+ */
+int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+               struct notifier_block *n)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&nh->lock, flags);
+       ret = notifier_chain_register(&nh->head, n);
+       spin_unlock_irqrestore(&nh->lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
+
+/**
+ *     atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
+ *     @nh: Pointer to head of the atomic notifier chain
+ *     @n: Entry to remove from notifier chain
+ *
+ *     Removes a notifier from an atomic notifier chain.
+ *
+ *     Returns zero on success or %-ENOENT on failure.
+ */
+int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+               struct notifier_block *n)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&nh->lock, flags);
+       ret = notifier_chain_unregister(&nh->head, n);
+       spin_unlock_irqrestore(&nh->lock, flags);
+       synchronize_rcu();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
+
+/**
+ *     __atomic_notifier_call_chain - Call functions in an atomic notifier chain
+ *     @nh: Pointer to head of the atomic notifier chain
+ *     @val: Value passed unmodified to notifier function
+ *     @v: Pointer passed unmodified to notifier function
+ *     @nr_to_call: See the comment for notifier_call_chain.
+ *     @nr_calls: See the comment for notifier_call_chain.
+ *
+ *     Calls each function in a notifier chain in turn.  The functions
+ *     run in an atomic context, so they must not block.
+ *     This routine uses RCU to synchronize with changes to the chain.
+ *
+ *     If the return value of the notifier can be and'ed
+ *     with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
+ *     will return immediately, with the return value of
+ *     the notifier function which halted execution.
+ *     Otherwise the return value is the return value
+ *     of the last notifier function called.
+ */
+int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+                                       unsigned long val, void *v,
+                                       int nr_to_call, int *nr_calls)
+{
+       int ret;
+
+       rcu_read_lock();
+       ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+       rcu_read_unlock();
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
+
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+               unsigned long val, void *v)
+{
+       return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+
+/*
+ *     Blocking notifier chain routines.  All access to the chain is
+ *     synchronized by an rwsem.
+ */
+
+/**
+ *     blocking_notifier_chain_register - Add notifier to a blocking notifier chain
+ *     @nh: Pointer to head of the blocking notifier chain
+ *     @n: New entry in notifier chain
+ *
+ *     Adds a notifier to a blocking notifier chain.
+ *     Must be called in process context.
+ *
+ *     Currently always returns zero.
+ */
+int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+               struct notifier_block *n)
+{
+       int ret;
+
+       /*
+        * This code gets used during boot-up, when task switching is
+        * not yet working and interrupts must remain disabled.  At
+        * such times we must not call down_write().
+        */
+       if (unlikely(system_state == SYSTEM_BOOTING))
+               return notifier_chain_register(&nh->head, n);
+
+       down_write(&nh->rwsem);
+       ret = notifier_chain_register(&nh->head, n);
+       up_write(&nh->rwsem);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
+
+/**
+ *     blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
+ *     @nh: Pointer to head of the blocking notifier chain
+ *     @n: Entry to remove from notifier chain
+ *
+ *     Removes a notifier from a blocking notifier chain.
+ *     Must be called from process context.
+ *
+ *     Returns zero on success or %-ENOENT on failure.
+ */
+int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
+               struct notifier_block *n)
+{
+       int ret;
+
+       /*
+        * This code gets used during boot-up, when task switching is
+        * not yet working and interrupts must remain disabled.  At
+        * such times we must not call down_write().
+        */
+       if (unlikely(system_state == SYSTEM_BOOTING))
+               return notifier_chain_unregister(&nh->head, n);
+
+       down_write(&nh->rwsem);
+       ret = notifier_chain_unregister(&nh->head, n);
+       up_write(&nh->rwsem);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
+
+/**
+ *     __blocking_notifier_call_chain - Call functions in a blocking notifier chain
+ *     @nh: Pointer to head of the blocking notifier chain
+ *     @val: Value passed unmodified to notifier function
+ *     @v: Pointer passed unmodified to notifier function
+ *     @nr_to_call: See comment for notifier_call_chain.
+ *     @nr_calls: See comment for notifier_call_chain.
+ *
+ *     Calls each function in a notifier chain in turn.  The functions
+ *     run in a process context, so they are allowed to block.
+ *
+ *     If the return value of the notifier can be and'ed
+ *     with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
+ *     will return immediately, with the return value of
+ *     the notifier function which halted execution.
+ *     Otherwise the return value is the return value
+ *     of the last notifier function called.
+ */
+int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+                                  unsigned long val, void *v,
+                                  int nr_to_call, int *nr_calls)
+{
+       int ret = NOTIFY_DONE;
+
+       /*
+        * We check the head outside the lock, but if this access is
+        * racy then it does not matter what the result of the test
+        * is, we re-check the list after having taken the lock anyway:
+        */
+       if (rcu_dereference(nh->head)) {
+               down_read(&nh->rwsem);
+               ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
+                                       nr_calls);
+               up_read(&nh->rwsem);
+       }
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
+
+int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+               unsigned long val, void *v)
+{
+       return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
+
+/*
+ *     Raw notifier chain routines.  There is no protection;
+ *     the caller must provide it.  Use at your own risk!
+ */
+
+/**
+ *     raw_notifier_chain_register - Add notifier to a raw notifier chain
+ *     @nh: Pointer to head of the raw notifier chain
+ *     @n: New entry in notifier chain
+ *
+ *     Adds a notifier to a raw notifier chain.
+ *     All locking must be provided by the caller.
+ *
+ *     Currently always returns zero.
+ */
+int raw_notifier_chain_register(struct raw_notifier_head *nh,
+               struct notifier_block *n)
+{
+       return notifier_chain_register(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
+
+/**
+ *     raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
+ *     @nh: Pointer to head of the raw notifier chain
+ *     @n: Entry to remove from notifier chain
+ *
+ *     Removes a notifier from a raw notifier chain.
+ *     All locking must be provided by the caller.
+ *
+ *     Returns zero on success or %-ENOENT on failure.
+ */
+int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
+               struct notifier_block *n)
+{
+       return notifier_chain_unregister(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
+
+/**
+ *     __raw_notifier_call_chain - Call functions in a raw notifier chain
+ *     @nh: Pointer to head of the raw notifier chain
+ *     @val: Value passed unmodified to notifier function
+ *     @v: Pointer passed unmodified to notifier function
+ *     @nr_to_call: See comment for notifier_call_chain.
+ *     @nr_calls: See comment for notifier_call_chain
+ *
+ *     Calls each function in a notifier chain in turn.  The functions
+ *     run in an undefined context.
+ *     All locking must be provided by the caller.
+ *
+ *     If the return value of the notifier can be and'ed
+ *     with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
+ *     will return immediately, with the return value of
+ *     the notifier function which halted execution.
+ *     Otherwise the return value is the return value
+ *     of the last notifier function called.
+ */
+int __raw_notifier_call_chain(struct raw_notifier_head *nh,
+                             unsigned long val, void *v,
+                             int nr_to_call, int *nr_calls)
+{
+       return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+}
+EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
+
+int raw_notifier_call_chain(struct raw_notifier_head *nh,
+               unsigned long val, void *v)
+{
+       return __raw_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
+
+/*
+ *     SRCU notifier chain routines.    Registration and unregistration
+ *     use a mutex, and call_chain is synchronized by SRCU (no locks).
+ */
+
+/**
+ *     srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
+ *     @nh: Pointer to head of the SRCU notifier chain
+ *     @n: New entry in notifier chain
+ *
+ *     Adds a notifier to an SRCU notifier chain.
+ *     Must be called in process context.
+ *
+ *     Currently always returns zero.
+ */
+int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+               struct notifier_block *n)
+{
+       int ret;
+
+       /*
+        * This code gets used during boot-up, when task switching is
+        * not yet working and interrupts must remain disabled.  At
+        * such times we must not call mutex_lock().
+        */
+       if (unlikely(system_state == SYSTEM_BOOTING))
+               return notifier_chain_register(&nh->head, n);
+
+       mutex_lock(&nh->mutex);
+       ret = notifier_chain_register(&nh->head, n);
+       mutex_unlock(&nh->mutex);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
+
+/**
+ *     srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
+ *     @nh: Pointer to head of the SRCU notifier chain
+ *     @n: Entry to remove from notifier chain
+ *
+ *     Removes a notifier from an SRCU notifier chain.
+ *     Must be called from process context.
+ *
+ *     Returns zero on success or %-ENOENT on failure.
+ */
+int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+               struct notifier_block *n)
+{
+       int ret;
+
+       /*
+        * This code gets used during boot-up, when task switching is
+        * not yet working and interrupts must remain disabled.  At
+        * such times we must not call mutex_lock().
+        */
+       if (unlikely(system_state == SYSTEM_BOOTING))
+               return notifier_chain_unregister(&nh->head, n);
+
+       mutex_lock(&nh->mutex);
+       ret = notifier_chain_unregister(&nh->head, n);
+       mutex_unlock(&nh->mutex);
+       synchronize_srcu(&nh->srcu);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
+
+/**
+ *     __srcu_notifier_call_chain - Call functions in an SRCU notifier chain
+ *     @nh: Pointer to head of the SRCU notifier chain
+ *     @val: Value passed unmodified to notifier function
+ *     @v: Pointer passed unmodified to notifier function
+ *     @nr_to_call: See comment for notifier_call_chain.
+ *     @nr_calls: See comment for notifier_call_chain
+ *
+ *     Calls each function in a notifier chain in turn.  The functions
+ *     run in a process context, so they are allowed to block.
+ *
+ *     If the return value of the notifier can be and'ed
+ *     with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
+ *     will return immediately, with the return value of
+ *     the notifier function which halted execution.
+ *     Otherwise the return value is the return value
+ *     of the last notifier function called.
+ */
+int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+                              unsigned long val, void *v,
+                              int nr_to_call, int *nr_calls)
+{
+       int ret;
+       int idx;
+
+       idx = srcu_read_lock(&nh->srcu);
+       ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+       srcu_read_unlock(&nh->srcu, idx);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
+
+int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+               unsigned long val, void *v)
+{
+       return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
+
+/**
+ *     srcu_init_notifier_head - Initialize an SRCU notifier head
+ *     @nh: Pointer to head of the srcu notifier chain
+ *
+ *     Unlike other sorts of notifier heads, SRCU notifier heads require
+ *     dynamic initialization.  Be sure to call this routine before
+ *     calling any of the other SRCU notifier routines for this head.
+ *
+ *     If an SRCU notifier head is deallocated, it must first be cleaned
+ *     up by calling srcu_cleanup_notifier_head().  Otherwise the head's
+ *     per-cpu data (used by the SRCU mechanism) will leak.
+ */
+void srcu_init_notifier_head(struct srcu_notifier_head *nh)
+{
+       mutex_init(&nh->mutex);
+       if (init_srcu_struct(&nh->srcu) < 0)
+               BUG();
+       nh->head = NULL;
+}
+EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
+
+/**
+ *     register_reboot_notifier - Register function to be called at reboot time
+ *     @nb: Info about notifier function to be called
+ *
+ *     Registers a function with the list of functions
+ *     to be called at reboot time.
+ *
+ *     Currently always returns zero, as blocking_notifier_chain_register()
+ *     always returns zero.
+ */
+int register_reboot_notifier(struct notifier_block *nb)
+{
+       return blocking_notifier_chain_register(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(register_reboot_notifier);
+
+/**
+ *     unregister_reboot_notifier - Unregister previously registered reboot notifier
+ *     @nb: Hook to be unregistered
+ *
+ *     Unregisters a previously registered reboot
+ *     notifier function.
+ *
+ *     Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_reboot_notifier(struct notifier_block *nb)
+{
+       return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(unregister_reboot_notifier);
+
+static ATOMIC_NOTIFIER_HEAD(die_chain);
+
+int notify_die(enum die_val val, const char *str,
+              struct pt_regs *regs, long err, int trap, int sig)
+{
+       struct die_args args = {
+               .regs   = regs,
+               .str    = str,
+               .err    = err,
+               .trapnr = trap,
+               .signr  = sig,
+
+       };
+       return atomic_notifier_call_chain(&die_chain, val, &args);
+}
+
+int register_die_notifier(struct notifier_block *nb)
+{
+       vmalloc_sync_all();
+       return atomic_notifier_chain_register(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_unregister(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_die_notifier);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
new file mode 100644 (file)
index 0000000..aead4d6
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * ns_cgroup.c - namespace cgroup subsystem
+ *
+ * Copyright 2006, 2007 IBM Corp
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+
+struct ns_cgroup {
+       struct cgroup_subsys_state css;
+       spinlock_t lock;
+};
+
+struct cgroup_subsys ns_subsys;
+
+static inline struct ns_cgroup *cgroup_to_ns(
+               struct cgroup *cgroup)
+{
+       return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
+                           struct ns_cgroup, css);
+}
+
+int ns_cgroup_clone(struct task_struct *task)
+{
+       return cgroup_clone(task, &ns_subsys);
+}
+
+/*
+ * Rules:
+ *   1. you can only enter a cgroup which is a child of your current
+ *     cgroup
+ *   2. you can only place another process into a cgroup if
+ *     a. you have CAP_SYS_ADMIN
+ *     b. your cgroup is an ancestor of task's destination cgroup
+ *       (hence either you are in the same cgroup as task, or in an
+ *        ancestor cgroup thereof)
+ */
+static int ns_can_attach(struct cgroup_subsys *ss,
+               struct cgroup *new_cgroup, struct task_struct *task)
+{
+       struct cgroup *orig;
+
+       if (current != task) {
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (!cgroup_is_descendant(new_cgroup))
+                       return -EPERM;
+       }
+
+       if (atomic_read(&new_cgroup->count) != 0)
+               return -EPERM;
+
+       orig = task_cgroup(task, ns_subsys_id);
+       if (orig && orig != new_cgroup->parent)
+               return -EPERM;
+
+       return 0;
+}
+
+/*
+ * Rules: you can only create a cgroup if
+ *     1. you are capable(CAP_SYS_ADMIN)
+ *     2. the target cgroup is a descendant of your own cgroup
+ */
+static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
+                                               struct cgroup *cgroup)
+{
+       struct ns_cgroup *ns_cgroup;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return ERR_PTR(-EPERM);
+       if (!cgroup_is_descendant(cgroup))
+               return ERR_PTR(-EPERM);
+
+       ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
+       if (!ns_cgroup)
+               return ERR_PTR(-ENOMEM);
+       spin_lock_init(&ns_cgroup->lock);
+       return &ns_cgroup->css;
+}
+
+static void ns_destroy(struct cgroup_subsys *ss,
+                       struct cgroup *cgroup)
+{
+       struct ns_cgroup *ns_cgroup;
+
+       ns_cgroup = cgroup_to_ns(cgroup);
+       kfree(ns_cgroup);
+}
+
+struct cgroup_subsys ns_subsys = {
+       .name = "ns",
+       .can_attach = ns_can_attach,
+       .create = ns_create,
+       .destroy  = ns_destroy,
+       .subsys_id = ns_subsys_id,
+};
index 049e7c0ac5668964cb08554aee935ee687642852..79f871bc0ef40026e111df575622cbc6b11031c3 100644 (file)
@@ -26,19 +26,6 @@ static struct kmem_cache *nsproxy_cachep;
 
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
-static inline void get_nsproxy(struct nsproxy *ns)
-{
-       atomic_inc(&ns->count);
-}
-
-void get_task_namespaces(struct task_struct *tsk)
-{
-       struct nsproxy *ns = tsk->nsproxy;
-       if (ns) {
-               get_nsproxy(ns);
-       }
-}
-
 /*
  * creates a copy of "orig" with refcount 1.
  */
@@ -87,7 +74,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
                goto out_ipc;
        }
 
-       new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns);
+       new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
        if (IS_ERR(new_nsp->pid_ns)) {
                err = PTR_ERR(new_nsp->pid_ns);
                goto out_pid;
@@ -142,7 +129,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 
        get_nsproxy(old_ns);
 
-       if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
+       if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+                               CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
                return 0;
 
        if (!capable(CAP_SYS_ADMIN)) {
@@ -156,7 +144,14 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
                goto out;
        }
 
+       err = ns_cgroup_clone(tsk);
+       if (err) {
+               put_nsproxy(new_ns);
+               goto out;
+       }
+
        tsk->nsproxy = new_ns;
+
 out:
        put_nsproxy(old_ns);
        return err;
@@ -196,11 +191,46 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 
        *new_nsp = create_new_namespaces(unshare_flags, current,
                                new_fs ? new_fs : current->fs);
-       if (IS_ERR(*new_nsp))
+       if (IS_ERR(*new_nsp)) {
                err = PTR_ERR(*new_nsp);
+               goto out;
+       }
+
+       err = ns_cgroup_clone(current);
+       if (err)
+               put_nsproxy(*new_nsp);
+
+out:
        return err;
 }
 
+void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
+{
+       struct nsproxy *ns;
+
+       might_sleep();
+
+       ns = p->nsproxy;
+
+       rcu_assign_pointer(p->nsproxy, new);
+
+       if (ns && atomic_dec_and_test(&ns->count)) {
+               /*
+                * wait for others to get what they want from this nsproxy.
+                *
+                * cannot release this nsproxy via the call_rcu() since
+                * put_mnt_ns() will want to sleep
+                */
+               synchronize_rcu();
+               free_nsproxy(ns);
+       }
+}
+
+void exit_task_namespaces(struct task_struct *p)
+{
+       switch_task_namespaces(p, NULL);
+}
+
 static int __init nsproxy_cache_init(void)
 {
        nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
index c6e3f9ffff87e62afa5aa93233833b8642144dd9..d1db36b94674dbdc318845233b1804c659515740 100644 (file)
  * allocation scenario when all but one out of 1 million PIDs possible are
  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
+ *
+ * Pid namespaces:
+ *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
+ *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
+ *     Many thanks to Oleg Nesterov for comments and help
+ *
  */
 
 #include <linux/mm.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
+#include <linux/syscalls.h>
 
-#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+#define pid_hashfn(nr, ns)     \
+       hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
-static struct kmem_cache *pid_cachep;
 struct pid init_struct_pid = INIT_STRUCT_PID;
+static struct kmem_cache *pid_ns_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
 
@@ -68,8 +76,25 @@ struct pid_namespace init_pid_ns = {
                [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
        },
        .last_pid = 0,
-       .child_reaper = &init_task
+       .level = 0,
+       .child_reaper = &init_task,
 };
+EXPORT_SYMBOL_GPL(init_pid_ns);
+
+int is_container_init(struct task_struct *tsk)
+{
+       int ret = 0;
+       struct pid *pid;
+
+       rcu_read_lock();
+       pid = task_pid(tsk);
+       if (pid != NULL && pid->numbers[pid->level].nr == 1)
+               ret = 1;
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(is_container_init);
 
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
@@ -176,11 +201,17 @@ static int next_pidmap(struct pid_namespace *pid_ns, int last)
 
 fastcall void put_pid(struct pid *pid)
 {
+       struct pid_namespace *ns;
+
        if (!pid)
                return;
+
+       ns = pid->numbers[pid->level].ns;
        if ((atomic_read(&pid->count) == 1) ||
-            atomic_dec_and_test(&pid->count))
-               kmem_cache_free(pid_cachep, pid);
+            atomic_dec_and_test(&pid->count)) {
+               kmem_cache_free(ns->pid_cachep, pid);
+               put_pid_ns(ns);
+       }
 }
 EXPORT_SYMBOL_GPL(put_pid);
 
@@ -193,60 +224,94 @@ static void delayed_put_pid(struct rcu_head *rhp)
 fastcall void free_pid(struct pid *pid)
 {
        /* We can be called with write_lock_irq(&tasklist_lock) held */
+       int i;
        unsigned long flags;
 
        spin_lock_irqsave(&pidmap_lock, flags);
-       hlist_del_rcu(&pid->pid_chain);
+       for (i = 0; i <= pid->level; i++)
+               hlist_del_rcu(&pid->numbers[i].pid_chain);
        spin_unlock_irqrestore(&pidmap_lock, flags);
 
-       free_pidmap(&init_pid_ns, pid->nr);
+       for (i = 0; i <= pid->level; i++)
+               free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
+
        call_rcu(&pid->rcu, delayed_put_pid);
 }
 
-struct pid *alloc_pid(void)
+struct pid *alloc_pid(struct pid_namespace *ns)
 {
        struct pid *pid;
        enum pid_type type;
-       int nr = -1;
+       int i, nr;
+       struct pid_namespace *tmp;
+       struct upid *upid;
 
-       pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
+       pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
        if (!pid)
                goto out;
 
-       nr = alloc_pidmap(current->nsproxy->pid_ns);
-       if (nr < 0)
-               goto out_free;
+       tmp = ns;
+       for (i = ns->level; i >= 0; i--) {
+               nr = alloc_pidmap(tmp);
+               if (nr < 0)
+                       goto out_free;
+
+               pid->numbers[i].nr = nr;
+               pid->numbers[i].ns = tmp;
+               tmp = tmp->parent;
+       }
 
+       get_pid_ns(ns);
+       pid->level = ns->level;
        atomic_set(&pid->count, 1);
-       pid->nr = nr;
        for (type = 0; type < PIDTYPE_MAX; ++type)
                INIT_HLIST_HEAD(&pid->tasks[type]);
 
        spin_lock_irq(&pidmap_lock);
-       hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
+       for (i = ns->level; i >= 0; i--) {
+               upid = &pid->numbers[i];
+               hlist_add_head_rcu(&upid->pid_chain,
+                               &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+       }
        spin_unlock_irq(&pidmap_lock);
 
 out:
        return pid;
 
 out_free:
-       kmem_cache_free(pid_cachep, pid);
+       for (i++; i <= ns->level; i++)
+               free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
+
+       kmem_cache_free(ns->pid_cachep, pid);
        pid = NULL;
        goto out;
 }
 
-struct pid * fastcall find_pid(int nr)
+struct pid * fastcall find_pid_ns(int nr, struct pid_namespace *ns)
 {
        struct hlist_node *elem;
-       struct pid *pid;
+       struct upid *pnr;
+
+       hlist_for_each_entry_rcu(pnr, elem,
+                       &pid_hash[pid_hashfn(nr, ns)], pid_chain)
+               if (pnr->nr == nr && pnr->ns == ns)
+                       return container_of(pnr, struct pid,
+                                       numbers[ns->level]);
 
-       hlist_for_each_entry_rcu(pid, elem,
-                       &pid_hash[pid_hashfn(nr)], pid_chain) {
-               if (pid->nr == nr)
-                       return pid;
-       }
        return NULL;
 }
+EXPORT_SYMBOL_GPL(find_pid_ns);
+
+struct pid *find_vpid(int nr)
+{
+       return find_pid_ns(nr, current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL_GPL(find_vpid);
+
+struct pid *find_pid(int nr)
+{
+       return find_pid_ns(nr, &init_pid_ns);
+}
 EXPORT_SYMBOL_GPL(find_pid);
 
 /*
@@ -307,12 +372,32 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
 /*
  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
-struct task_struct *find_task_by_pid_type(int type, int nr)
+struct task_struct *find_task_by_pid_type_ns(int type, int nr,
+               struct pid_namespace *ns)
 {
-       return pid_task(find_pid(nr), type);
+       return pid_task(find_pid_ns(nr, ns), type);
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type);
+EXPORT_SYMBOL(find_task_by_pid_type_ns);
+
+struct task_struct *find_task_by_pid(pid_t nr)
+{
+       return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_pid);
+
+struct task_struct *find_task_by_vpid(pid_t vnr)
+{
+       return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
+                       current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_vpid);
+
+struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
+{
+       return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
+}
+EXPORT_SYMBOL(find_task_by_pid_ns);
 
 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
@@ -339,45 +424,239 @@ struct pid *find_get_pid(pid_t nr)
        struct pid *pid;
 
        rcu_read_lock();
-       pid = get_pid(find_pid(nr));
+       pid = get_pid(find_vpid(nr));
        rcu_read_unlock();
 
        return pid;
 }
 
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+{
+       struct upid *upid;
+       pid_t nr = 0;
+
+       if (pid && ns->level <= pid->level) {
+               upid = &pid->numbers[ns->level];
+               if (upid->ns == ns)
+                       nr = upid->nr;
+       }
+       return nr;
+}
+
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_pid(tsk), ns);
+}
+EXPORT_SYMBOL(task_pid_nr_ns);
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_tgid(tsk), ns);
+}
+EXPORT_SYMBOL(task_tgid_nr_ns);
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_pgrp(tsk), ns);
+}
+EXPORT_SYMBOL(task_pgrp_nr_ns);
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_session(tsk), ns);
+}
+EXPORT_SYMBOL(task_session_nr_ns);
+
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
  * If there is a pid at nr this function is exactly the same as find_pid.
  */
-struct pid *find_ge_pid(int nr)
+struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
        struct pid *pid;
 
        do {
-               pid = find_pid(nr);
+               pid = find_pid_ns(nr, ns);
                if (pid)
                        break;
-               nr = next_pidmap(current->nsproxy->pid_ns, nr);
+               nr = next_pidmap(ns, nr);
        } while (nr > 0);
 
        return pid;
 }
 EXPORT_SYMBOL_GPL(find_get_pid);
 
+struct pid_cache {
+       int nr_ids;
+       char name[16];
+       struct kmem_cache *cachep;
+       struct list_head list;
+};
+
+static LIST_HEAD(pid_caches_lh);
+static DEFINE_MUTEX(pid_caches_mutex);
+
+/*
+ * creates the kmem cache to allocate pids from.
+ * @nr_ids: the number of numerical ids this pid will have to carry
+ */
+
+static struct kmem_cache *create_pid_cachep(int nr_ids)
+{
+       struct pid_cache *pcache;
+       struct kmem_cache *cachep;
+
+       mutex_lock(&pid_caches_mutex);
+       list_for_each_entry (pcache, &pid_caches_lh, list)
+               if (pcache->nr_ids == nr_ids)
+                       goto out;
+
+       pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
+       if (pcache == NULL)
+               goto err_alloc;
+
+       snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
+       cachep = kmem_cache_create(pcache->name,
+                       sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
+                       0, SLAB_HWCACHE_ALIGN, NULL);
+       if (cachep == NULL)
+               goto err_cachep;
+
+       pcache->nr_ids = nr_ids;
+       pcache->cachep = cachep;
+       list_add(&pcache->list, &pid_caches_lh);
+out:
+       mutex_unlock(&pid_caches_mutex);
+       return pcache->cachep;
+
+err_cachep:
+       kfree(pcache);
+err_alloc:
+       mutex_unlock(&pid_caches_mutex);
+       return NULL;
+}
+
+static struct pid_namespace *create_pid_namespace(int level)
+{
+       struct pid_namespace *ns;
+       int i;
+
+       ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+       if (ns == NULL)
+               goto out;
+
+       ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!ns->pidmap[0].page)
+               goto out_free;
+
+       ns->pid_cachep = create_pid_cachep(level + 1);
+       if (ns->pid_cachep == NULL)
+               goto out_free_map;
+
+       kref_init(&ns->kref);
+       ns->last_pid = 0;
+       ns->child_reaper = NULL;
+       ns->level = level;
+
+       set_bit(0, ns->pidmap[0].page);
+       atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
+
+       for (i = 1; i < PIDMAP_ENTRIES; i++) {
+               ns->pidmap[i].page = 0;
+               atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+       }
+
+       return ns;
+
+out_free_map:
+       kfree(ns->pidmap[0].page);
+out_free:
+       kmem_cache_free(pid_ns_cachep, ns);
+out:
+       return ERR_PTR(-ENOMEM);
+}
+
+static void destroy_pid_namespace(struct pid_namespace *ns)
+{
+       int i;
+
+       for (i = 0; i < PIDMAP_ENTRIES; i++)
+               kfree(ns->pidmap[i].page);
+       kmem_cache_free(pid_ns_cachep, ns);
+}
+
 struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
 {
+       struct pid_namespace *new_ns;
+
        BUG_ON(!old_ns);
-       get_pid_ns(old_ns);
-       return old_ns;
+       new_ns = get_pid_ns(old_ns);
+       if (!(flags & CLONE_NEWPID))
+               goto out;
+
+       new_ns = ERR_PTR(-EINVAL);
+       if (flags & CLONE_THREAD)
+               goto out_put;
+
+       new_ns = create_pid_namespace(old_ns->level + 1);
+       if (!IS_ERR(new_ns))
+               new_ns->parent = get_pid_ns(old_ns);
+
+out_put:
+       put_pid_ns(old_ns);
+out:
+       return new_ns;
 }
 
 void free_pid_ns(struct kref *kref)
 {
-       struct pid_namespace *ns;
+       struct pid_namespace *ns, *parent;
 
        ns = container_of(kref, struct pid_namespace, kref);
-       kfree(ns);
+
+       parent = ns->parent;
+       destroy_pid_namespace(ns);
+
+       if (parent != NULL)
+               put_pid_ns(parent);
+}
+
+void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+{
+       int nr;
+       int rc;
+
+       /*
+        * The last thread in the cgroup-init thread group is terminating.
+        * Find remaining pid_ts in the namespace, signal and wait for them
+        * to exit.
+        *
+        * Note:  This signals each threads in the namespace - even those that
+        *        belong to the same thread group, To avoid this, we would have
+        *        to walk the entire tasklist looking a processes in this
+        *        namespace, but that could be unnecessarily expensive if the
+        *        pid namespace has just a few processes. Or we need to
+        *        maintain a tasklist for each pid namespace.
+        *
+        */
+       read_lock(&tasklist_lock);
+       nr = next_pidmap(pid_ns, 1);
+       while (nr > 0) {
+               kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
+               nr = next_pidmap(pid_ns, nr);
+       }
+       read_unlock(&tasklist_lock);
+
+       do {
+               clear_thread_flag(TIF_SIGPENDING);
+               rc = sys_wait4(-1, NULL, __WALL, NULL);
+       } while (rc != -ECHILD);
+
+
+       /* Child reaper for the pid namespace is going away */
+       pid_ns->child_reaper = NULL;
+       return;
 }
 
 /*
@@ -412,5 +691,9 @@ void __init pidmap_init(void)
        set_bit(0, init_pid_ns.pidmap[0].page);
        atomic_dec(&init_pid_ns.pidmap[0].nr_free);
 
-       pid_cachep = KMEM_CACHE(pid, SLAB_PANIC);
+       init_pid_ns.pid_cachep = create_pid_cachep(1);
+       if (init_pid_ns.pid_cachep == NULL)
+               panic("Can't create pid_1 cachep\n");
+
+       pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
 }
index b53c8fcd9d82d25044867c9529a17f9b1fdb00f8..68c96376e84a6078da9c07d6ebfc2b1c64a3e2a7 100644 (file)
@@ -21,8 +21,8 @@ static int check_clock(const clockid_t which_clock)
 
        read_lock(&tasklist_lock);
        p = find_task_by_pid(pid);
-       if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
-                  p->tgid != current->tgid : p->tgid != pid)) {
+       if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
+                  same_thread_group(p, current) : thread_group_leader(p))) {
                error = -EINVAL;
        }
        read_unlock(&tasklist_lock);
@@ -308,13 +308,13 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
                p = find_task_by_pid(pid);
                if (p) {
                        if (CPUCLOCK_PERTHREAD(which_clock)) {
-                               if (p->tgid == current->tgid) {
+                               if (same_thread_group(p, current)) {
                                        error = cpu_clock_sample(which_clock,
                                                                 p, &rtn);
                                }
                        } else {
                                read_lock(&tasklist_lock);
-                               if (p->tgid == pid && p->signal) {
+                               if (thread_group_leader(p) && p->signal) {
                                        error =
                                            cpu_clock_sample_group(which_clock,
                                                                   p, &rtn);
@@ -355,7 +355,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
                        p = current;
                } else {
                        p = find_task_by_pid(pid);
-                       if (p && p->tgid != current->tgid)
+                       if (p && !same_thread_group(p, current))
                                p = NULL;
                }
        } else {
@@ -363,7 +363,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
                        p = current->group_leader;
                } else {
                        p = find_task_by_pid(pid);
-                       if (p && p->tgid != pid)
+                       if (p && !thread_group_leader(p))
                                p = NULL;
                }
        }
index d11f579d189a1b75f753addcdfba961f7561b74b..35b4bbfc78ff6ee7d3f389baf0430231ed09af7b 100644 (file)
@@ -404,7 +404,7 @@ static struct task_struct * good_sigevent(sigevent_t * event)
 
        if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
                (!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
-                rtn->tgid != current->tgid ||
+                !same_thread_group(rtn, current) ||
                 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
                return NULL;
 
@@ -608,7 +608,7 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
                spin_lock(&timr->it_lock);
 
                if ((timr->it_id != timer_id) || !(timr->it_process) ||
-                               timr->it_process->tgid != current->tgid) {
+                               !same_thread_group(timr->it_process, current)) {
                        spin_unlock(&timr->it_lock);
                        spin_unlock_irqrestore(&idr_lock, *flags);
                        timr = NULL;
index a73ebd3b9d4ca5019522fcf80c47eac8d5d7fa0c..7c76f2ffaeaad78060cfb3223857a89c7a4ccb8f 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/audit.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -168,7 +169,7 @@ int ptrace_attach(struct task_struct *task)
        retval = -EPERM;
        if (task->pid <= 1)
                goto out;
-       if (task->tgid == current->tgid)
+       if (same_thread_group(task, current))
                goto out;
 
 repeat:
@@ -443,7 +444,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
                return ERR_PTR(-EPERM);
 
        read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
+       child = find_task_by_vpid(pid);
        if (child)
                get_task_struct(child);
 
index 6b0703db152d2a61411595380ce4b608e98f6890..56d73cb8826d86ff75ce0dea818ce658be11cf49 100644 (file)
@@ -87,7 +87,7 @@ static int rt_trace_on = 1;
 static void printk_task(struct task_struct *p)
 {
        if (p)
-               printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
+               printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
        else
                printk("<none>");
 }
@@ -152,22 +152,25 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
        printk(  "[ BUG: circular locking deadlock detected! ]\n");
        printk(  "--------------------------------------------\n");
        printk("%s/%d is deadlocking current task %s/%d\n\n",
-              task->comm, task->pid, current->comm, current->pid);
+              task->comm, task_pid_nr(task),
+              current->comm, task_pid_nr(current));
 
        printk("\n1) %s/%d is trying to acquire this lock:\n",
-              current->comm, current->pid);
+              current->comm, task_pid_nr(current));
        printk_lock(waiter->lock, 1);
 
-       printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
+       printk("\n2) %s/%d is blocked on this lock:\n",
+               task->comm, task_pid_nr(task));
        printk_lock(waiter->deadlock_lock, 1);
 
        debug_show_held_locks(current);
        debug_show_held_locks(task);
 
-       printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
+       printk("\n%s/%d's [blocked] stackdump:\n\n",
+               task->comm, task_pid_nr(task));
        show_stack(task, NULL);
        printk("\n%s/%d's [current] stackdump:\n\n",
-              current->comm, current->pid);
+               current->comm, task_pid_nr(current));
        dump_stack();
        debug_show_all_locks();
 
index 8cd9bd2cdb34243a89c45102708abdcc2c7cbdbd..0deef71ff8d2a7444f74af1904b11b93e2a86766 100644 (file)
@@ -185,7 +185,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
                        prev_max = max_lock_depth;
                        printk(KERN_WARNING "Maximum lock depth %d reached "
                               "task: %s (%d)\n", max_lock_depth,
-                              top_task->comm, top_task->pid);
+                              top_task->comm, task_pid_nr(top_task));
                }
                put_task_struct(task);
 
index ed90be46fb31a3a3a98fb0ef92e4fff8a1dedcfc..afe76ec2e7feffb1f7a070d3f433b2838dca378a 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
+#include <linux/pid_namespace.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/timer.h>
@@ -51,6 +52,7 @@
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/percpu.h>
+#include <linux/cpu_acct.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
 #include <linux/sysctl.h>
@@ -153,10 +155,15 @@ struct rt_prio_array {
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
+#include <linux/cgroup.h>
+
 struct cfs_rq;
 
 /* task group related information */
 struct task_group {
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+       struct cgroup_subsys_state css;
+#endif
        /* schedulable entities of this group on each cpu */
        struct sched_entity **se;
        /* runqueue "owned" by this group on each cpu */
@@ -197,6 +204,9 @@ static inline struct task_group *task_group(struct task_struct *p)
 
 #ifdef CONFIG_FAIR_USER_SCHED
        tg = p->user->tg;
+#elif defined(CONFIG_FAIR_CGROUP_SCHED)
+       tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
+                               struct task_group, css);
 #else
        tg  = &init_task_group;
 #endif
@@ -1875,7 +1885,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
        preempt_enable();
 #endif
        if (current->set_child_tid)
-               put_user(current->pid, current->set_child_tid);
+               put_user(task_pid_vnr(current), current->set_child_tid);
 }
 
 /*
@@ -3307,9 +3317,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 {
        struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
        cputime64_t tmp;
+       struct rq *rq = this_rq();
 
        p->utime = cputime_add(p->utime, cputime);
 
+       if (p != rq->idle)
+               cpuacct_charge(p, cputime);
+
        /* Add user time to cpustat. */
        tmp = cputime_to_cputime64(cputime);
        if (TASK_NICE(p) > 0)
@@ -3374,9 +3388,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
                cpustat->irq = cputime64_add(cpustat->irq, tmp);
        else if (softirq_count())
                cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-       else if (p != rq->idle)
+       else if (p != rq->idle) {
                cpustat->system = cputime64_add(cpustat->system, tmp);
-       else if (atomic_read(&rq->nr_iowait) > 0)
+               cpuacct_charge(p, cputime);
+       } else if (atomic_read(&rq->nr_iowait) > 0)
                cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
        else
                cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -3412,8 +3427,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
                        cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
                else
                        cpustat->idle = cputime64_add(cpustat->idle, tmp);
-       } else
+       } else {
                cpustat->steal = cputime64_add(cpustat->steal, tmp);
+               cpuacct_charge(p, -tmp);
+       }
 }
 
 /*
@@ -3493,7 +3510,7 @@ EXPORT_SYMBOL(sub_preempt_count);
 static noinline void __schedule_bug(struct task_struct *prev)
 {
        printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n",
-               prev->comm, preempt_count(), prev->pid);
+               prev->comm, preempt_count(), task_pid_nr(prev));
        debug_show_held_locks(prev);
        if (irqs_disabled())
                print_irqtrace_events(prev);
@@ -4159,7 +4176,7 @@ struct task_struct *idle_task(int cpu)
  */
 static struct task_struct *find_process_by_pid(pid_t pid)
 {
-       return pid ? find_task_by_pid(pid) : current;
+       return pid ? find_task_by_vpid(pid) : current;
 }
 
 /* Actually do priority change: must hold rq lock. */
@@ -4462,8 +4479,21 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
 
        cpus_allowed = cpuset_cpus_allowed(p);
        cpus_and(new_mask, new_mask, cpus_allowed);
+ again:
        retval = set_cpus_allowed(p, new_mask);
 
+       if (!retval) {
+               cpus_allowed = cpuset_cpus_allowed(p);
+               if (!cpus_subset(new_mask, cpus_allowed)) {
+                       /*
+                        * We must have raced with a concurrent cpuset
+                        * update. Just reset the cpus_allowed to the
+                        * cpuset's cpus_allowed
+                        */
+                       new_mask = cpus_allowed;
+                       goto again;
+               }
+       }
 out_unlock:
        put_task_struct(p);
        mutex_unlock(&sched_hotcpu_mutex);
@@ -4843,7 +4873,8 @@ static void show_task(struct task_struct *p)
                free = (unsigned long)n - (unsigned long)end_of_stack(p);
        }
 #endif
-       printk(KERN_CONT "%5lu %5d %6d\n", free, p->pid, p->parent->pid);
+       printk(KERN_CONT "%5lu %5d %6d\n", free,
+               task_pid_nr(p), task_pid_nr(p->parent));
 
        if (state != TASK_RUNNING)
                show_stack(p, NULL);
@@ -5137,8 +5168,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 
                /* No more Mr. Nice Guy. */
                if (dest_cpu == NR_CPUS) {
+                       cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
+                       /*
+                        * Try to stay on the same cpuset, where the
+                        * current cpuset may be a subset of all cpus.
+                        * The cpuset_cpus_allowed_locked() variant of
+                        * cpuset_cpus_allowed() will not block.  It must be
+                        * called within calls to cpuset_lock/cpuset_unlock.
+                        */
                        rq = task_rq_lock(p, &flags);
-                       cpus_setall(p->cpus_allowed);
+                       p->cpus_allowed = cpus_allowed;
                        dest_cpu = any_online_cpu(p->cpus_allowed);
                        task_rq_unlock(rq, &flags);
 
@@ -5150,7 +5189,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
                        if (p->mm && printk_ratelimit())
                                printk(KERN_INFO "process %d (%s) no "
                                       "longer affine to cpu%d\n",
-                                      p->pid, p->comm, dead_cpu);
+                              task_pid_nr(p), p->comm, dead_cpu);
                }
        } while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
 }
@@ -5257,7 +5296,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
        struct rq *rq = cpu_rq(dead_cpu);
 
        /* Must be exiting, otherwise would be on tasklist. */
-       BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
+       BUG_ON(!p->exit_state);
 
        /* Cannot have done final schedule yet: would have vanished. */
        BUG_ON(p->state == TASK_DEAD);
@@ -5504,6 +5543,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
+               cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
                migrate_live_tasks(cpu);
                rq = cpu_rq(cpu);
                kthread_stop(rq->migration_thread);
@@ -5517,6 +5557,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                rq->idle->sched_class = &idle_sched_class;
                migrate_dead_tasks(cpu);
                spin_unlock_irq(&rq->lock);
+               cpuset_unlock();
                migrate_nr_uninterruptible(rq);
                BUG_ON(rq->nr_running != 0);
 
@@ -6367,26 +6408,31 @@ error:
        return -ENOMEM;
 #endif
 }
+
+static cpumask_t *doms_cur;    /* current sched domains */
+static int ndoms_cur;          /* number of sched domains in 'doms_cur' */
+
+/*
+ * Special case: If a kmalloc of a doms_cur partition (array of
+ * cpumask_t) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask_t fallback_doms.
+ */
+static cpumask_t fallback_doms;
+
 /*
  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ * For now this just excludes isolated cpus, but could be used to
+ * exclude other special cases in the future.
  */
 static int arch_init_sched_domains(const cpumask_t *cpu_map)
 {
-       cpumask_t cpu_default_map;
-       int err;
-
-       /*
-        * Setup mask for cpus without special case scheduling requirements.
-        * For now this just excludes isolated cpus, but could be used to
-        * exclude other special cases in the future.
-        */
-       cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
-
-       err = build_sched_domains(&cpu_default_map);
-
+       ndoms_cur = 1;
+       doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+       if (!doms_cur)
+               doms_cur = &fallback_doms;
+       cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
        register_sched_domain_sysctl();
-
-       return err;
+       return build_sched_domains(doms_cur);
 }
 
 static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
@@ -6410,6 +6456,68 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
        arch_destroy_sched_domains(cpu_map);
 }
 
+/*
+ * Partition sched domains as specified by the 'ndoms_new'
+ * cpumasks in the array doms_new[] of cpumasks.  This compares
+ * doms_new[] to the current sched domain partitioning, doms_cur[].
+ * It destroys each deleted domain and builds each new domain.
+ *
+ * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+ * The masks don't intersect (don't overlap.)  We should setup one
+ * sched domain for each mask.  CPUs not in any of the cpumasks will
+ * not be load balanced.  If the same cpumask appears both in the
+ * current 'doms_cur' domains and in the new 'doms_new', we can leave
+ * it as it is.
+ *
+ * The passed in 'doms_new' should be kmalloc'd.  This routine takes
+ * ownership of it and will kfree it when done with it.  If the caller
+ * failed the kmalloc call, then it can pass in doms_new == NULL,
+ * and partition_sched_domains() will fallback to the single partition
+ * 'fallback_doms'.
+ *
+ * Call with hotplug lock held
+ */
+void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
+{
+       int i, j;
+
+       if (doms_new == NULL) {
+               ndoms_new = 1;
+               doms_new = &fallback_doms;
+               cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+       }
+
+       /* Destroy deleted domains */
+       for (i = 0; i < ndoms_cur; i++) {
+               for (j = 0; j < ndoms_new; j++) {
+                       if (cpus_equal(doms_cur[i], doms_new[j]))
+                               goto match1;
+               }
+               /* no match - a current sched domain not in new doms_new[] */
+               detach_destroy_domains(doms_cur + i);
+match1:
+               ;
+       }
+
+       /* Build new domains */
+       for (i = 0; i < ndoms_new; i++) {
+               for (j = 0; j < ndoms_cur; j++) {
+                       if (cpus_equal(doms_new[i], doms_cur[j]))
+                               goto match2;
+               }
+               /* no match - add a new doms_new */
+               build_sched_domains(doms_new + i);
+match2:
+               ;
+       }
+
+       /* Remember the new sched domains */
+       if (doms_cur != &fallback_doms)
+               kfree(doms_cur);
+       doms_cur = doms_new;
+       ndoms_cur = ndoms_new;
+}
+
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 static int arch_reinit_sched_domains(void)
 {
@@ -6991,3 +7099,116 @@ unsigned long sched_group_shares(struct task_group *tg)
 }
 
 #endif /* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+
+/* return corresponding task_group object of a cgroup */
+static inline struct task_group *cgroup_tg(struct cgroup *cont)
+{
+       return container_of(cgroup_subsys_state(cont, cpu_cgroup_subsys_id),
+                                        struct task_group, css);
+}
+
+static struct cgroup_subsys_state *
+cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       struct task_group *tg;
+
+       if (!cont->parent) {
+               /* This is early initialization for the top cgroup */
+               init_task_group.css.cgroup = cont;
+               return &init_task_group.css;
+       }
+
+       /* we support only 1-level deep hierarchical scheduler atm */
+       if (cont->parent->parent)
+               return ERR_PTR(-EINVAL);
+
+       tg = sched_create_group();
+       if (IS_ERR(tg))
+               return ERR_PTR(-ENOMEM);
+
+       /* Bind the cgroup to task_group object we just created */
+       tg->css.cgroup = cont;
+
+       return &tg->css;
+}
+
+static void cpu_cgroup_destroy(struct cgroup_subsys *ss,
+                                       struct cgroup *cont)
+{
+       struct task_group *tg = cgroup_tg(cont);
+
+       sched_destroy_group(tg);
+}
+
+static int cpu_cgroup_can_attach(struct cgroup_subsys *ss,
+                            struct cgroup *cont, struct task_struct *tsk)
+{
+       /* We don't support RT-tasks being in separate groups */
+       if (tsk->sched_class != &fair_sched_class)
+               return -EINVAL;
+
+       return 0;
+}
+
+static void
+cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+                       struct cgroup *old_cont, struct task_struct *tsk)
+{
+       sched_move_task(tsk);
+}
+
+static ssize_t cpu_shares_write(struct cgroup *cont, struct cftype *cftype,
+                               struct file *file, const char __user *userbuf,
+                               size_t nbytes, loff_t *ppos)
+{
+       unsigned long shareval;
+       struct task_group *tg = cgroup_tg(cont);
+       char buffer[2*sizeof(unsigned long) + 1];
+       int rc;
+
+       if (nbytes > 2*sizeof(unsigned long))   /* safety check */
+               return -E2BIG;
+
+       if (copy_from_user(buffer, userbuf, nbytes))
+               return -EFAULT;
+
+       buffer[nbytes] = 0;     /* nul-terminate */
+       shareval = simple_strtoul(buffer, NULL, 10);
+
+       rc = sched_group_set_shares(tg, shareval);
+
+       return (rc < 0 ? rc : nbytes);
+}
+
+static u64 cpu_shares_read_uint(struct cgroup *cont, struct cftype *cft)
+{
+       struct task_group *tg = cgroup_tg(cont);
+
+       return (u64) tg->shares;
+}
+
+static struct cftype cpu_shares = {
+       .name = "shares",
+       .read_uint = cpu_shares_read_uint,
+       .write = cpu_shares_write,
+};
+
+static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       return cgroup_add_file(cont, ss, &cpu_shares);
+}
+
+struct cgroup_subsys cpu_cgroup_subsys = {
+       .name           = "cpu",
+       .create         = cpu_cgroup_create,
+       .destroy        = cpu_cgroup_destroy,
+       .can_attach     = cpu_cgroup_can_attach,
+       .attach         = cpu_cgroup_attach,
+       .populate       = cpu_cgroup_populate,
+       .subsys_id      = cpu_cgroup_subsys_id,
+       .early_init     = 1,
+};
+
+#endif /* CONFIG_FAIR_CGROUP_SCHED */
index e4f059cd9867df74d44949cf7d16be947ee7c9e8..12006308c7eba5f920ba417bcff27a42312399af 100644 (file)
@@ -256,7 +256,7 @@ flush_signal_handlers(struct task_struct *t, int force_default)
 
 int unhandled_signal(struct task_struct *tsk, int sig)
 {
-       if (is_init(tsk))
+       if (is_global_init(tsk))
                return 1;
        if (tsk->ptrace & PT_PTRACED)
                return 0;
@@ -536,7 +536,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
                        return error;
                error = -EPERM;
                if (((sig != SIGCONT) ||
-                       (process_session(current) != process_session(t)))
+                       (task_session_nr(current) != task_session_nr(t)))
                    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
                    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
                    && !capable(CAP_KILL))
@@ -694,7 +694,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
                        q->info.si_signo = sig;
                        q->info.si_errno = 0;
                        q->info.si_code = SI_USER;
-                       q->info.si_pid = current->pid;
+                       q->info.si_pid = task_pid_vnr(current);
                        q->info.si_uid = current->uid;
                        break;
                case (unsigned long) SEND_SIG_PRIV:
@@ -730,7 +730,7 @@ int print_fatal_signals;
 static void print_fatal_signal(struct pt_regs *regs, int signr)
 {
        printk("%s/%d: potentially unexpected fatal signal %d.\n",
-               current->comm, current->pid, signr);
+               current->comm, task_pid_nr(current), signr);
 
 #ifdef __i386__
        printk("code at %08lx: ", regs->eip);
@@ -1089,7 +1089,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
        int error;
        rcu_read_lock();
-       error = kill_pid_info(sig, info, find_pid(pid));
+       error = kill_pid_info(sig, info, find_vpid(pid));
        rcu_read_unlock();
        return error;
 }
@@ -1150,7 +1150,7 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
 
                read_lock(&tasklist_lock);
                for_each_process(p) {
-                       if (p->pid > 1 && p->tgid != current->tgid) {
+                       if (p->pid > 1 && !same_thread_group(p, current)) {
                                int err = group_send_sig_info(sig, info, p);
                                ++count;
                                if (err != -EPERM)
@@ -1160,9 +1160,9 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
                read_unlock(&tasklist_lock);
                ret = count ? retval : -ESRCH;
        } else if (pid < 0) {
-               ret = kill_pgrp_info(sig, info, find_pid(-pid));
+               ret = kill_pgrp_info(sig, info, find_vpid(-pid));
        } else {
-               ret = kill_pid_info(sig, info, find_pid(pid));
+               ret = kill_pid_info(sig, info, find_vpid(pid));
        }
        rcu_read_unlock();
        return ret;
@@ -1266,7 +1266,12 @@ EXPORT_SYMBOL(kill_pid);
 int
 kill_proc(pid_t pid, int sig, int priv)
 {
-       return kill_proc_info(sig, __si_special(priv), pid);
+       int ret;
+
+       rcu_read_lock();
+       ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
+       rcu_read_unlock();
+       return ret;
 }
 
 /*
@@ -1443,7 +1448,22 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 
        info.si_signo = sig;
        info.si_errno = 0;
-       info.si_pid = tsk->pid;
+       /*
+        * we are under tasklist_lock here so our parent is tied to
+        * us and cannot exit and release its namespace.
+        *
+        * the only it can is to switch its nsproxy with sys_unshare,
+        * bu uncharing pid namespaces is not allowed, so we'll always
+        * see relevant namespace
+        *
+        * write_lock() currently calls preempt_disable() which is the
+        * same as rcu_read_lock(), but according to Oleg, this is not
+        * correct to rely on this
+        */
+       rcu_read_lock();
+       info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+       rcu_read_unlock();
+
        info.si_uid = tsk->uid;
 
        /* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1508,7 +1528,13 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
 
        info.si_signo = SIGCHLD;
        info.si_errno = 0;
-       info.si_pid = tsk->pid;
+       /*
+        * see comment in do_notify_parent() abot the following 3 lines
+        */
+       rcu_read_lock();
+       info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+       rcu_read_unlock();
+
        info.si_uid = tsk->uid;
 
        /* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1634,7 +1660,7 @@ void ptrace_notify(int exit_code)
        memset(&info, 0, sizeof info);
        info.si_signo = SIGTRAP;
        info.si_code = exit_code;
-       info.si_pid = current->pid;
+       info.si_pid = task_pid_vnr(current);
        info.si_uid = current->uid;
 
        /* Let the debugger run.  */
@@ -1804,7 +1830,7 @@ relock:
                                info->si_signo = signr;
                                info->si_errno = 0;
                                info->si_code = SI_USER;
-                               info->si_pid = current->parent->pid;
+                               info->si_pid = task_pid_vnr(current->parent);
                                info->si_uid = current->parent->uid;
                        }
 
@@ -1835,11 +1861,9 @@ relock:
                        continue;
 
                /*
-                * Init of a pid space gets no signals it doesn't want from
-                * within that pid space. It can of course get signals from
-                * its parent pid space.
+                * Global init gets no signals it doesn't want.
                 */
-               if (current == child_reaper(current))
+               if (is_global_init(current))
                        continue;
 
                if (sig_kernel_stop(signr)) {
@@ -2193,7 +2217,7 @@ sys_kill(int pid, int sig)
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code = SI_USER;
-       info.si_pid = current->tgid;
+       info.si_pid = task_tgid_vnr(current);
        info.si_uid = current->uid;
 
        return kill_something_info(sig, &info, pid);
@@ -2209,12 +2233,12 @@ static int do_tkill(int tgid, int pid, int sig)
        info.si_signo = sig;
        info.si_errno = 0;
        info.si_code = SI_TKILL;
-       info.si_pid = current->tgid;
+       info.si_pid = task_tgid_vnr(current);
        info.si_uid = current->uid;
 
        read_lock(&tasklist_lock);
-       p = find_task_by_pid(pid);
-       if (p && (tgid <= 0 || p->tgid == tgid)) {
+       p = find_task_by_vpid(pid);
+       if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
                error = check_kill_permission(sig, &info, p);
                /*
                 * The null signal is a permissions and process existence
index edeeef3a6a322917b5b2befb95238df5557a9af5..11df812263c8fd3eed16f4f1a57d36a68f6eaedc 100644 (file)
@@ -113,7 +113,7 @@ void softlockup_tick(void)
        spin_lock(&print_lock);
        printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
                        this_cpu, now - touch_timestamp,
-                               current->comm, current->pid);
+                       current->comm, task_pid_nr(current));
        if (regs)
                show_regs(regs);
        else
index bc8879c822a54444657df6b9c51a7dcf227e055e..304b5410d746ec724afcb4e3ce50bfe65f1bf2ff 100644 (file)
@@ -106,537 +106,6 @@ EXPORT_SYMBOL(cad_pid);
 
 void (*pm_power_off_prepare)(void);
 
-/*
- *     Notifier list for kernel code which wants to be called
- *     at shutdown. This is used to stop any idling DMA operations
- *     and the like. 
- */
-
-static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
-
-/*
- *     Notifier chain core routines.  The exported routines below
- *     are layered on top of these, with appropriate locking added.
- */
-
-static int notifier_chain_register(struct notifier_block **nl,
-               struct notifier_block *n)
-{
-       while ((*nl) != NULL) {
-               if (n->priority > (*nl)->priority)
-                       break;
-               nl = &((*nl)->next);
-       }
-       n->next = *nl;
-       rcu_assign_pointer(*nl, n);
-       return 0;
-}
-
-static int notifier_chain_unregister(struct notifier_block **nl,
-               struct notifier_block *n)
-{
-       while ((*nl) != NULL) {
-               if ((*nl) == n) {
-                       rcu_assign_pointer(*nl, n->next);
-                       return 0;
-               }
-               nl = &((*nl)->next);
-       }
-       return -ENOENT;
-}
-
-/**
- * notifier_call_chain - Informs the registered notifiers about an event.
- *     @nl:            Pointer to head of the blocking notifier chain
- *     @val:           Value passed unmodified to notifier function
- *     @v:             Pointer passed unmodified to notifier function
- *     @nr_to_call:    Number of notifier functions to be called. Don't care
- *                     value of this parameter is -1.
- *     @nr_calls:      Records the number of notifications sent. Don't care
- *                     value of this field is NULL.
- *     @returns:       notifier_call_chain returns the value returned by the
- *                     last notifier function called.
- */
-
-static int __kprobes notifier_call_chain(struct notifier_block **nl,
-                                       unsigned long val, void *v,
-                                       int nr_to_call, int *nr_calls)
-{
-       int ret = NOTIFY_DONE;
-       struct notifier_block *nb, *next_nb;
-
-       nb = rcu_dereference(*nl);
-
-       while (nb && nr_to_call) {
-               next_nb = rcu_dereference(nb->next);
-               ret = nb->notifier_call(nb, val, v);
-
-               if (nr_calls)
-                       (*nr_calls)++;
-
-               if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
-                       break;
-               nb = next_nb;
-               nr_to_call--;
-       }
-       return ret;
-}
-
-/*
- *     Atomic notifier chain routines.  Registration and unregistration
- *     use a spinlock, and call_chain is synchronized by RCU (no locks).
- */
-
-/**
- *     atomic_notifier_chain_register - Add notifier to an atomic notifier chain
- *     @nh: Pointer to head of the atomic notifier chain
- *     @n: New entry in notifier chain
- *
- *     Adds a notifier to an atomic notifier chain.
- *
- *     Currently always returns zero.
- */
-
-int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
-               struct notifier_block *n)
-{
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&nh->lock, flags);
-       ret = notifier_chain_register(&nh->head, n);
-       spin_unlock_irqrestore(&nh->lock, flags);
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
-
-/**
- *     atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
- *     @nh: Pointer to head of the atomic notifier chain
- *     @n: Entry to remove from notifier chain
- *
- *     Removes a notifier from an atomic notifier chain.
- *
- *     Returns zero on success or %-ENOENT on failure.
- */
-int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
-               struct notifier_block *n)
-{
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&nh->lock, flags);
-       ret = notifier_chain_unregister(&nh->head, n);
-       spin_unlock_irqrestore(&nh->lock, flags);
-       synchronize_rcu();
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
-
-/**
- *     __atomic_notifier_call_chain - Call functions in an atomic notifier chain
- *     @nh: Pointer to head of the atomic notifier chain
- *     @val: Value passed unmodified to notifier function
- *     @v: Pointer passed unmodified to notifier function
- *     @nr_to_call: See the comment for notifier_call_chain.
- *     @nr_calls: See the comment for notifier_call_chain.
- *
- *     Calls each function in a notifier chain in turn.  The functions
- *     run in an atomic context, so they must not block.
- *     This routine uses RCU to synchronize with changes to the chain.
- *
- *     If the return value of the notifier can be and'ed
- *     with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
- *     will return immediately, with the return value of
- *     the notifier function which halted execution.
- *     Otherwise the return value is the return value
- *     of the last notifier function called.
- */
-int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-                                       unsigned long val, void *v,
-                                       int nr_to_call, int *nr_calls)
-{
-       int ret;
-
-       rcu_read_lock();
-       ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-       rcu_read_unlock();
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
-
-int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-               unsigned long val, void *v)
-{
-       return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
-/*
- *     Blocking notifier chain routines.  All access to the chain is
- *     synchronized by an rwsem.
- */
-
-/**
- *     blocking_notifier_chain_register - Add notifier to a blocking notifier chain
- *     @nh: Pointer to head of the blocking notifier chain
- *     @n: New entry in notifier chain
- *
- *     Adds a notifier to a blocking notifier chain.
- *     Must be called in process context.
- *
- *     Currently always returns zero.
- */
-int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
-               struct notifier_block *n)
-{
-       int ret;
-
-       /*
-        * This code gets used during boot-up, when task switching is
-        * not yet working and interrupts must remain disabled.  At
-        * such times we must not call down_write().
-        */
-       if (unlikely(system_state == SYSTEM_BOOTING))
-               return notifier_chain_register(&nh->head, n);
-
-       down_write(&nh->rwsem);
-       ret = notifier_chain_register(&nh->head, n);
-       up_write(&nh->rwsem);
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
-
-/**
- *     blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
- *     @nh: Pointer to head of the blocking notifier chain
- *     @n: Entry to remove from notifier chain
- *
- *     Removes a notifier from a blocking notifier chain.
- *     Must be called from process context.
- *
- *     Returns zero on success or %-ENOENT on failure.
- */
-int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
-               struct notifier_block *n)
-{
-       int ret;
-
-       /*
-        * This code gets used during boot-up, when task switching is
-        * not yet working and interrupts must remain disabled.  At
-        * such times we must not call down_write().
-        */
-       if (unlikely(system_state == SYSTEM_BOOTING))
-               return notifier_chain_unregister(&nh->head, n);
-
-       down_write(&nh->rwsem);
-       ret = notifier_chain_unregister(&nh->head, n);
-       up_write(&nh->rwsem);
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
-
-/**
- *     __blocking_notifier_call_chain - Call functions in a blocking notifier chain
- *     @nh: Pointer to head of the blocking notifier chain
- *     @val: Value passed unmodified to notifier function
- *     @v: Pointer passed unmodified to notifier function
- *     @nr_to_call: See comment for notifier_call_chain.
- *     @nr_calls: See comment for notifier_call_chain.
- *
- *     Calls each function in a notifier chain in turn.  The functions
- *     run in a process context, so they are allowed to block.
- *
- *     If the return value of the notifier can be and'ed
- *     with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
- *     will return immediately, with the return value of
- *     the notifier function which halted execution.
- *     Otherwise the return value is the return value
- *     of the last notifier function called.
- */
-int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
-                                  unsigned long val, void *v,
-                                  int nr_to_call, int *nr_calls)
-{
-       int ret = NOTIFY_DONE;
-
-       /*
-        * We check the head outside the lock, but if this access is
-        * racy then it does not matter what the result of the test
-        * is, we re-check the list after having taken the lock anyway:
-        */
-       if (rcu_dereference(nh->head)) {
-               down_read(&nh->rwsem);
-               ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
-                                       nr_calls);
-               up_read(&nh->rwsem);
-       }
-       return ret;
-}
-EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
-
-int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
-               unsigned long val, void *v)
-{
-       return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
-}
-EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
-
-/*
- *     Raw notifier chain routines.  There is no protection;
- *     the caller must provide it.  Use at your own risk!
- */
-
-/**
- *     raw_notifier_chain_register - Add notifier to a raw notifier chain
- *     @nh: Pointer to head of the raw notifier chain
- *     @n: New entry in notifier chain
- *
- *     Adds a notifier to a raw notifier chain.
- *     All locking must be provided by the caller.
- *
- *     Currently always returns zero.
- */
-
-int raw_notifier_chain_register(struct raw_notifier_head *nh,
-               struct notifier_block *n)
-{
-       return notifier_chain_register(&nh->head, n);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
-
-/**
- *     raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
- *     @nh: Pointer to head of the raw notifier chain
- *     @n: Entry to remove from notifier chain
- *
- *     Removes a notifier from a raw notifier chain.
- *     All locking must be provided by the caller.
- *
- *     Returns zero on success or %-ENOENT on failure.
- */
-int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
-               struct notifier_block *n)
-{
-       return notifier_chain_unregister(&nh->head, n);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
-
-/**
- *     __raw_notifier_call_chain - Call functions in a raw notifier chain
- *     @nh: Pointer to head of the raw notifier chain
- *     @val: Value passed unmodified to notifier function
- *     @v: Pointer passed unmodified to notifier function
- *     @nr_to_call: See comment for notifier_call_chain.
- *     @nr_calls: See comment for notifier_call_chain
- *
- *     Calls each function in a notifier chain in turn.  The functions
- *     run in an undefined context.
- *     All locking must be provided by the caller.
- *
- *     If the return value of the notifier can be and'ed
- *     with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
- *     will return immediately, with the return value of
- *     the notifier function which halted execution.
- *     Otherwise the return value is the return value
- *     of the last notifier function called.
- */
-
-int __raw_notifier_call_chain(struct raw_notifier_head *nh,
-                             unsigned long val, void *v,
-                             int nr_to_call, int *nr_calls)
-{
-       return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-}
-
-EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
-
-int raw_notifier_call_chain(struct raw_notifier_head *nh,
-               unsigned long val, void *v)
-{
-       return __raw_notifier_call_chain(nh, val, v, -1, NULL);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
-
-/*
- *     SRCU notifier chain routines.    Registration and unregistration
- *     use a mutex, and call_chain is synchronized by SRCU (no locks).
- */
-
-/**
- *     srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
- *     @nh: Pointer to head of the SRCU notifier chain
- *     @n: New entry in notifier chain
- *
- *     Adds a notifier to an SRCU notifier chain.
- *     Must be called in process context.
- *
- *     Currently always returns zero.
- */
-
-int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
-               struct notifier_block *n)
-{
-       int ret;
-
-       /*
-        * This code gets used during boot-up, when task switching is
-        * not yet working and interrupts must remain disabled.  At
-        * such times we must not call mutex_lock().
-        */
-       if (unlikely(system_state == SYSTEM_BOOTING))
-               return notifier_chain_register(&nh->head, n);
-
-       mutex_lock(&nh->mutex);
-       ret = notifier_chain_register(&nh->head, n);
-       mutex_unlock(&nh->mutex);
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
-
-/**
- *     srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
- *     @nh: Pointer to head of the SRCU notifier chain
- *     @n: Entry to remove from notifier chain
- *
- *     Removes a notifier from an SRCU notifier chain.
- *     Must be called from process context.
- *
- *     Returns zero on success or %-ENOENT on failure.
- */
-int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
-               struct notifier_block *n)
-{
-       int ret;
-
-       /*
-        * This code gets used during boot-up, when task switching is
-        * not yet working and interrupts must remain disabled.  At
-        * such times we must not call mutex_lock().
-        */
-       if (unlikely(system_state == SYSTEM_BOOTING))
-               return notifier_chain_unregister(&nh->head, n);
-
-       mutex_lock(&nh->mutex);
-       ret = notifier_chain_unregister(&nh->head, n);
-       mutex_unlock(&nh->mutex);
-       synchronize_srcu(&nh->srcu);
-       return ret;
-}
-
-EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
-
-/**
- *     __srcu_notifier_call_chain - Call functions in an SRCU notifier chain
- *     @nh: Pointer to head of the SRCU notifier chain
- *     @val: Value passed unmodified to notifier function
- *     @v: Pointer passed unmodified to notifier function
- *     @nr_to_call: See comment for notifier_call_chain.
- *     @nr_calls: See comment for notifier_call_chain
- *
- *     Calls each function in a notifier chain in turn.  The functions
- *     run in a process context, so they are allowed to block.
- *
- *     If the return value of the notifier can be and'ed
- *     with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
- *     will return immediately, with the return value of
- *     the notifier function which halted execution.
- *     Otherwise the return value is the return value
- *     of the last notifier function called.
- */
-
-int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
-                              unsigned long val, void *v,
-                              int nr_to_call, int *nr_calls)
-{
-       int ret;
-       int idx;
-
-       idx = srcu_read_lock(&nh->srcu);
-       ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-       srcu_read_unlock(&nh->srcu, idx);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
-
-int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
-               unsigned long val, void *v)
-{
-       return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
-}
-EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
-
-/**
- *     srcu_init_notifier_head - Initialize an SRCU notifier head
- *     @nh: Pointer to head of the srcu notifier chain
- *
- *     Unlike other sorts of notifier heads, SRCU notifier heads require
- *     dynamic initialization.  Be sure to call this routine before
- *     calling any of the other SRCU notifier routines for this head.
- *
- *     If an SRCU notifier head is deallocated, it must first be cleaned
- *     up by calling srcu_cleanup_notifier_head().  Otherwise the head's
- *     per-cpu data (used by the SRCU mechanism) will leak.
- */
-
-void srcu_init_notifier_head(struct srcu_notifier_head *nh)
-{
-       mutex_init(&nh->mutex);
-       if (init_srcu_struct(&nh->srcu) < 0)
-               BUG();
-       nh->head = NULL;
-}
-
-EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
-
-/**
- *     register_reboot_notifier - Register function to be called at reboot time
- *     @nb: Info about notifier function to be called
- *
- *     Registers a function with the list of functions
- *     to be called at reboot time.
- *
- *     Currently always returns zero, as blocking_notifier_chain_register()
- *     always returns zero.
- */
-int register_reboot_notifier(struct notifier_block * nb)
-{
-       return blocking_notifier_chain_register(&reboot_notifier_list, nb);
-}
-
-EXPORT_SYMBOL(register_reboot_notifier);
-
-/**
- *     unregister_reboot_notifier - Unregister previously registered reboot notifier
- *     @nb: Hook to be unregistered
- *
- *     Unregisters a previously registered reboot
- *     notifier function.
- *
- *     Returns zero on success, or %-ENOENT on failure.
- */
-int unregister_reboot_notifier(struct notifier_block * nb)
-{
-       return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
-}
-
-EXPORT_SYMBOL(unregister_reboot_notifier);
-
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
        int no_nice;
@@ -683,7 +152,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
        switch (which) {
                case PRIO_PROCESS:
                        if (who)
-                               p = find_task_by_pid(who);
+                               p = find_task_by_vpid(who);
                        else
                                p = current;
                        if (p)
@@ -691,7 +160,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
                        break;
                case PRIO_PGRP:
                        if (who)
-                               pgrp = find_pid(who);
+                               pgrp = find_vpid(who);
                        else
                                pgrp = task_pgrp(current);
                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -740,7 +209,7 @@ asmlinkage long sys_getpriority(int which, int who)
        switch (which) {
                case PRIO_PROCESS:
                        if (who)
-                               p = find_task_by_pid(who);
+                               p = find_task_by_vpid(who);
                        else
                                p = current;
                        if (p) {
@@ -751,7 +220,7 @@ asmlinkage long sys_getpriority(int which, int who)
                        break;
                case PRIO_PGRP:
                        if (who)
-                               pgrp = find_pid(who);
+                               pgrp = find_vpid(who);
                        else
                                pgrp = task_pgrp(current);
                        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -1448,9 +917,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
        struct task_struct *p;
        struct task_struct *group_leader = current->group_leader;
        int err = -EINVAL;
+       struct pid_namespace *ns;
 
        if (!pid)
-               pid = group_leader->pid;
+               pid = task_pid_vnr(group_leader);
        if (!pgid)
                pgid = pid;
        if (pgid < 0)
@@ -1459,10 +929,12 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
        /* From this point forward we keep holding onto the tasklist lock
         * so that our parent does not change from under us. -DaveM
         */
+       ns = current->nsproxy->pid_ns;
+
        write_lock_irq(&tasklist_lock);
 
        err = -ESRCH;
-       p = find_task_by_pid(pid);
+       p = find_task_by_pid_ns(pid, ns);
        if (!p)
                goto out;
 
@@ -1488,9 +960,9 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
                goto out;
 
        if (pgid != pid) {
-               struct task_struct *g =
-                       find_task_by_pid_type(PIDTYPE_PGID, pgid);
+               struct task_struct *g;
 
+               g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns);
                if (!g || task_session(g) != task_session(group_leader))
                        goto out;
        }
@@ -1499,10 +971,13 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
        if (err)
                goto out;
 
-       if (process_group(p) != pgid) {
+       if (task_pgrp_nr_ns(p, ns) != pgid) {
+               struct pid *pid;
+
                detach_pid(p, PIDTYPE_PGID);
-               p->signal->pgrp = pgid;
-               attach_pid(p, PIDTYPE_PGID, find_pid(pgid));
+               pid = find_vpid(pgid);
+               attach_pid(p, PIDTYPE_PGID, pid);
+               set_task_pgrp(p, pid_nr(pid));
        }
 
        err = 0;
@@ -1515,19 +990,21 @@ out:
 asmlinkage long sys_getpgid(pid_t pid)
 {
        if (!pid)
-               return process_group(current);
+               return task_pgrp_vnr(current);
        else {
                int retval;
                struct task_struct *p;
+               struct pid_namespace *ns;
 
-               read_lock(&tasklist_lock);
-               p = find_task_by_pid(pid);
+               ns = current->nsproxy->pid_ns;
 
+               read_lock(&tasklist_lock);
+               p = find_task_by_pid_ns(pid, ns);
                retval = -ESRCH;
                if (p) {
                        retval = security_task_getpgid(p);
                        if (!retval)
-                               retval = process_group(p);
+                               retval = task_pgrp_nr_ns(p, ns);
                }
                read_unlock(&tasklist_lock);
                return retval;
@@ -1539,7 +1016,7 @@ asmlinkage long sys_getpgid(pid_t pid)
 asmlinkage long sys_getpgrp(void)
 {
        /* SMP - assuming writes are word atomic this is fine */
-       return process_group(current);
+       return task_pgrp_vnr(current);
 }
 
 #endif
@@ -1547,19 +1024,21 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
        if (!pid)
-               return process_session(current);
+               return task_session_vnr(current);
        else {
                int retval;
                struct task_struct *p;
+               struct pid_namespace *ns;
 
-               read_lock(&tasklist_lock);
-               p = find_task_by_pid(pid);
+               ns = current->nsproxy->pid_ns;
 
+               read_lock(&tasklist_lock);
+               p = find_task_by_pid_ns(pid, ns);
                retval = -ESRCH;
                if (p) {
                        retval = security_task_getsid(p);
                        if (!retval)
-                               retval = process_session(p);
+                               retval = task_session_nr_ns(p, ns);
                }
                read_unlock(&tasklist_lock);
                return retval;
@@ -1586,7 +1065,8 @@ asmlinkage long sys_setsid(void)
         * session id and so the check will always fail and make it so
         * init cannot successfully call setsid.
         */
-       if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
+       if (session > 1 && find_task_by_pid_type_ns(PIDTYPE_PGID,
+                               session, &init_pid_ns))
                goto out;
 
        group_leader->signal->leader = 1;
@@ -1596,7 +1076,7 @@ asmlinkage long sys_setsid(void)
        group_leader->signal->tty = NULL;
        spin_unlock(&group_leader->sighand->siglock);
 
-       err = process_group(group_leader);
+       err = task_pgrp_vnr(group_leader);
 out:
        write_unlock_irq(&tasklist_lock);
        return err;
index 067554bda8b79ba3954f2dc653dfa4f25f98f09c..3b4efbe2644572a32576b1a591f003e9869699f0 100644 (file)
@@ -1888,7 +1888,7 @@ int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
                return -EPERM;
        }
 
-       op = is_init(current) ? OP_SET : OP_AND;
+       op = is_global_init(current) ? OP_SET : OP_AND;
        return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
                                do_proc_dointvec_bset_conv,&op);
 }
@@ -2278,7 +2278,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
        pid_t tmp;
        int r;
 
-       tmp = pid_nr(cad_pid);
+       tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns);
 
        r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
                               lenp, ppos, NULL, NULL);
index 7d4d7f9c1bb2a0575cf15f9119c403e696657bb8..9f360f68aad6b45df2125c558e4d9dc763ae6b9a 100644 (file)
 #include <linux/delayacct.h>
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
+#include <linux/cgroupstats.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/file.h>
 #include <net/genetlink.h>
 #include <asm/atomic.h>
 
@@ -49,6 +53,11 @@ __read_mostly = {
        [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
        [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
 
+static struct nla_policy
+cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
+       [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
+};
+
 struct listener {
        struct list_head list;
        pid_t pid;
@@ -372,6 +381,51 @@ err:
        return NULL;
 }
 
+static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+       int rc = 0;
+       struct sk_buff *rep_skb;
+       struct cgroupstats *stats;
+       struct nlattr *na;
+       size_t size;
+       u32 fd;
+       struct file *file;
+       int fput_needed;
+
+       na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
+       if (!na)
+               return -EINVAL;
+
+       fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
+       file = fget_light(fd, &fput_needed);
+       if (file) {
+               size = nla_total_size(sizeof(struct cgroupstats));
+
+               rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
+                                       size);
+               if (rc < 0)
+                       goto err;
+
+               na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
+                                       sizeof(struct cgroupstats));
+               stats = nla_data(na);
+               memset(stats, 0, sizeof(*stats));
+
+               rc = cgroupstats_build(stats, file->f_dentry);
+               if (rc < 0)
+                       goto err;
+
+               fput_light(file, fput_needed);
+               return send_reply(rep_skb, info->snd_pid);
+       }
+
+err:
+       if (file)
+               fput_light(file, fput_needed);
+       nlmsg_free(rep_skb);
+       return rc;
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
        int rc = 0;
@@ -522,6 +576,12 @@ static struct genl_ops taskstats_ops = {
        .policy         = taskstats_cmd_get_policy,
 };
 
+static struct genl_ops cgroupstats_ops = {
+       .cmd            = CGROUPSTATS_CMD_GET,
+       .doit           = cgroupstats_user_cmd,
+       .policy         = cgroupstats_cmd_get_policy,
+};
+
 /* Needed early in initialization */
 void __init taskstats_init_early(void)
 {
@@ -546,8 +606,15 @@ static int __init taskstats_init(void)
        if (rc < 0)
                goto err;
 
+       rc = genl_register_ops(&family, &cgroupstats_ops);
+       if (rc < 0)
+               goto err_cgroup_ops;
+
        family_registered = 1;
+       printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
        return 0;
+err_cgroup_ops:
+       genl_unregister_ops(&family, &taskstats_ops);
 err:
        genl_unregister_family(&family);
        return rc;
index 51b6a6a6158cab016aa11b09f0a65c1919c750ec..c8a9d13874dfdcf85c6b2ab9d7d03f92a8f39401 100644 (file)
@@ -207,15 +207,12 @@ static inline void clocksource_resume_watchdog(void) { }
  */
 void clocksource_resume(void)
 {
-       struct list_head *tmp;
+       struct clocksource *cs;
        unsigned long flags;
 
        spin_lock_irqsave(&clocksource_lock, flags);
 
-       list_for_each(tmp, &clocksource_list) {
-               struct clocksource *cs;
-
-               cs = list_entry(tmp, struct clocksource, list);
+       list_for_each_entry(cs, &clocksource_list, list) {
                if (cs->resume)
                        cs->resume();
        }
@@ -369,7 +366,6 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
                                          const char *buf, size_t count)
 {
        struct clocksource *ovr = NULL;
-       struct list_head *tmp;
        size_t ret = count;
        int len;
 
@@ -389,12 +385,11 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 
        len = strlen(override_name);
        if (len) {
+               struct clocksource *cs;
+
                ovr = clocksource_override;
                /* try to select it: */
-               list_for_each(tmp, &clocksource_list) {
-                       struct clocksource *cs;
-
-                       cs = list_entry(tmp, struct clocksource, list);
+               list_for_each_entry(cs, &clocksource_list, list) {
                        if (strlen(cs->name) == len &&
                            !strcmp(cs->name, override_name))
                                ovr = cs;
@@ -422,14 +417,11 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 static ssize_t
 sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
 {
-       struct list_head *tmp;
+       struct clocksource *src;
        char *curr = buf;
 
        spin_lock_irq(&clocksource_lock);
-       list_for_each(tmp, &clocksource_list) {
-               struct clocksource *src;
-
-               src = list_entry(tmp, struct clocksource, list);
+       list_for_each_entry(src, &clocksource_list, list) {
                curr += sprintf(curr, "%s ", src->name);
        }
        spin_unlock_irq(&clocksource_lock);
index 8521d10fbb27543602bcd71ae503ce44d21d73f3..fb4e67d5dd6032eab811202526a194aa004523ad 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
+#include <linux/pid_namespace.h>
 #include <linux/notifier.h>
 #include <linux/thread_info.h>
 #include <linux/time.h>
@@ -956,7 +957,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds)
  */
 asmlinkage long sys_getpid(void)
 {
-       return current->tgid;
+       return task_tgid_vnr(current);
 }
 
 /*
@@ -970,7 +971,7 @@ asmlinkage long sys_getppid(void)
        int pid;
 
        rcu_read_lock();
-       pid = rcu_dereference(current->real_parent)->tgid;
+       pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
        rcu_read_unlock();
 
        return pid;
@@ -1102,7 +1103,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
-       return current->pid;
+       return task_pid_vnr(current);
 }
 
 /**
index e080d1d744cce242b587664445888fa23eeb0b9a..52d5e7c9a8e6e82e68a0cb88ba0cdbad508eded5 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/freezer.h>
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
+#include <linux/lockdep.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -61,6 +62,9 @@ struct workqueue_struct {
        const char *name;
        int singlethread;
        int freezeable;         /* Freeze threads during suspend */
+#ifdef CONFIG_LOCKDEP
+       struct lockdep_map lockdep_map;
+#endif
 };
 
 /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
@@ -250,6 +254,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
                struct work_struct *work = list_entry(cwq->worklist.next,
                                                struct work_struct, entry);
                work_func_t f = work->func;
+#ifdef CONFIG_LOCKDEP
+               /*
+                * It is permissible to free the struct work_struct
+                * from inside the function that is called from it,
+                * this we need to take into account for lockdep too.
+                * To avoid bogus "held lock freed" warnings as well
+                * as problems when looking into work->lockdep_map,
+                * make a copy and use that here.
+                */
+               struct lockdep_map lockdep_map = work->lockdep_map;
+#endif
 
                cwq->current_work = work;
                list_del_init(cwq->worklist.next);
@@ -257,13 +272,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 
                BUG_ON(get_wq_data(work) != cwq);
                work_clear_pending(work);
+               lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+               lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
                f(work);
+               lock_release(&lockdep_map, 1, _THIS_IP_);
+               lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
 
                if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
                        printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
                                        "%s/0x%08x/%d\n",
                                        current->comm, preempt_count(),
-                                       current->pid);
+                                       task_pid_nr(current));
                        printk(KERN_ERR "    last function: ");
                        print_symbol("%s\n", (unsigned long)f);
                        debug_show_held_locks(current);
@@ -376,6 +395,8 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
        int cpu;
 
        might_sleep();
+       lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+       lock_release(&wq->lockdep_map, 1, _THIS_IP_);
        for_each_cpu_mask(cpu, *cpu_map)
                flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
@@ -446,6 +467,9 @@ static void wait_on_work(struct work_struct *work)
 
        might_sleep();
 
+       lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+       lock_release(&work->lockdep_map, 1, _THIS_IP_);
+
        cwq = get_wq_data(work);
        if (!cwq)
                return;
@@ -695,8 +719,10 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
        }
 }
 
-struct workqueue_struct *__create_workqueue(const char *name,
-                                           int singlethread, int freezeable)
+struct workqueue_struct *__create_workqueue_key(const char *name,
+                                               int singlethread,
+                                               int freezeable,
+                                               struct lock_class_key *key)
 {
        struct workqueue_struct *wq;
        struct cpu_workqueue_struct *cwq;
@@ -713,6 +739,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
        }
 
        wq->name = name;
+       lockdep_init_map(&wq->lockdep_map, name, key, 0);
        wq->singlethread = singlethread;
        wq->freezeable = freezeable;
        INIT_LIST_HEAD(&wq->list);
@@ -741,7 +768,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
        }
        return wq;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue);
+EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
@@ -752,6 +779,9 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
        if (cwq->thread == NULL)
                return;
 
+       lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+       lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
        flush_cpu_workqueue(cwq);
        /*
         * If the caller is CPU_DEAD and cwq->worklist was not empty,
index 7d16e64333025db69440fe96b46beb229749e746..c567f219191d590116bcdd4498349d72e62ce612 100644 (file)
@@ -498,3 +498,5 @@ config FAULT_INJECTION_STACKTRACE_FILTER
        select FRAME_POINTER
        help
          Provide stacktrace filter for fault-injection capabilities
+
+source "samples/Kconfig"
index c5f215d509d3e36429f8f9a8eb1fd52fb9411dcc..3a0983b77412cdc6b0336bb0e3e1f539ddc2d0cd 100644 (file)
@@ -6,7 +6,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
         rbtree.o radix-tree.o dump_stack.o \
         idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
         sha1.o irq_regs.o reciprocal_div.o argv_split.o \
-        proportions.o
+        proportions.o prio_heap.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
index 360556a7803d4dae582b39edec3e91bb78ac7749..389424ecb129f13760de1677bd297b392264c953 100644 (file)
@@ -1,6 +1,6 @@
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <asm/types.h>
-#include <asm/bitops.h>
 
 /**
  * hweightN - returns the hamming weight of a N-bit word
index 9659eabffc319c5235669a87687949d270536b4e..393a0e915c23b252b747c62645beb463e25843ed 100644 (file)
@@ -124,12 +124,13 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
        mutex_lock(&percpu_counters_lock);
        list_for_each_entry(fbc, &percpu_counters, list) {
                s32 *pcount;
+               unsigned long flags;
 
-               spin_lock(&fbc->lock);
+               spin_lock_irqsave(&fbc->lock, flags);
                pcount = per_cpu_ptr(fbc->counters, cpu);
                fbc->count += *pcount;
                *pcount = 0;
-               spin_unlock(&fbc->lock);
+               spin_unlock_irqrestore(&fbc->lock, flags);
        }
        mutex_unlock(&percpu_counters_lock);
        return NOTIFY_OK;
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
new file mode 100644 (file)
index 0000000..471944a
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Simple insertion-only static-sized priority heap containing
+ * pointers, based on CLR, chapter 7
+ */
+
+#include <linux/slab.h>
+#include <linux/prio_heap.h>
+
+int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
+             int (*gt)(void *, void *))
+{
+       heap->ptrs = kmalloc(size, gfp_mask);
+       if (!heap->ptrs)
+               return -ENOMEM;
+       heap->size = 0;
+       heap->max = size / sizeof(void *);
+       heap->gt = gt;
+       return 0;
+}
+
+void heap_free(struct ptr_heap *heap)
+{
+       kfree(heap->ptrs);
+}
+
+void *heap_insert(struct ptr_heap *heap, void *p)
+{
+       void *res;
+       void **ptrs = heap->ptrs;
+       int pos;
+
+       if (heap->size < heap->max) {
+               /* Heap insertion */
+               int pos = heap->size++;
+               while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
+                       ptrs[pos] = ptrs[(pos-1)/2];
+                       pos = (pos-1)/2;
+               }
+               ptrs[pos] = p;
+               return NULL;
+       }
+
+       /* The heap is full, so something will have to be dropped */
+
+       /* If the new pointer is greater than the current max, drop it */
+       if (heap->gt(p, ptrs[0]))
+               return p;
+
+       /* Replace the current max and heapify */
+       res = ptrs[0];
+       ptrs[0] = p;
+       pos = 0;
+
+       while (1) {
+               int left = 2 * pos + 1;
+               int right = 2 * pos + 2;
+               int largest = pos;
+               if (left < heap->size && heap->gt(ptrs[left], p))
+                       largest = left;
+               if (right < heap->size && heap->gt(ptrs[right], ptrs[largest]))
+                       largest = right;
+               if (largest == pos)
+                       break;
+               /* Push p down the heap one level and bump one up */
+               ptrs[pos] = ptrs[largest];
+               ptrs[largest] = p;
+               pos = largest;
+       }
+       return res;
+}
index 479fd462eaa9c7630df676bdddd91d4dd1212a2c..9c4b0256490bd240b37f8811915e873463c88537 100644 (file)
@@ -60,12 +60,12 @@ static void spin_bug(spinlock_t *lock, const char *msg)
                owner = lock->owner;
        printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
                msg, raw_smp_processor_id(),
-               current->comm, current->pid);
+               current->comm, task_pid_nr(current));
        printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
                        ".owner_cpu: %d\n",
                lock, lock->magic,
                owner ? owner->comm : "<none>",
-               owner ? owner->pid : -1,
+               owner ? task_pid_nr(owner) : -1,
                lock->owner_cpu);
        dump_stack();
 }
@@ -116,7 +116,7 @@ static void __spin_lock_debug(spinlock_t *lock)
                        printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, "
                                        "%s/%d, %p\n",
                                raw_smp_processor_id(), current->comm,
-                               current->pid, lock);
+                               task_pid_nr(current), lock);
                        dump_stack();
 #ifdef CONFIG_SMP
                        trigger_all_cpu_backtrace();
@@ -161,7 +161,7 @@ static void rwlock_bug(rwlock_t *lock, const char *msg)
 
        printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
                msg, raw_smp_processor_id(), current->comm,
-               current->pid, lock);
+               task_pid_nr(current), lock);
        dump_stack();
 }
 
index 920366399eed6a3adb409d58622c74db46abcd7a..5209e47b7fe39dba855e84ee9ca4d89283fd3d80 100644 (file)
@@ -841,7 +841,7 @@ static void shrink_readahead_size_eio(struct file *filp,
 /**
  * do_generic_mapping_read - generic file read routine
  * @mapping:   address_space to be read
- * @_ra:       file's readahead state
+ * @ra:                file's readahead state
  * @filp:      the file to read
  * @ppos:      current file position
  * @desc:      read_descriptor
index bd16dcaeefb8ce4e3b504916b81b67c536d0c4dc..142683df8755988a4cdf2fde72f8f4323d2f3121 100644 (file)
@@ -259,9 +259,6 @@ void free_pgd_range(struct mmu_gather **tlb,
                        continue;
                free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
        } while (pgd++, addr = next, addr != end);
-
-       if (!(*tlb)->fullmm)
-               flush_tlb_pgtables((*tlb)->mm, start, end);
 }
 
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
index 568152ae6cafe9c6f6052cf704c0e6dae0d198f7..c1592a94582f8c2950a6a4cf34711f02a35066c6 100644 (file)
@@ -78,6 +78,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/module.h>
+#include <linux/nsproxy.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/compat.h>
@@ -940,7 +941,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 
        /* Find the mm_struct */
        read_lock(&tasklist_lock);
-       task = pid ? find_task_by_pid(pid) : current;
+       task = pid ? find_task_by_vpid(pid) : current;
        if (!task) {
                read_unlock(&tasklist_lock);
                return -ESRCH;
@@ -1388,7 +1389,6 @@ EXPORT_SYMBOL(alloc_pages_current);
  * keeps mempolicies cpuset relative after its cpuset moves.  See
  * further kernel/cpuset.c update_nodemask().
  */
-void *cpuset_being_rebound;
 
 /* Slow path of a mempolicy copy */
 struct mempolicy *__mpol_copy(struct mempolicy *old)
@@ -2019,4 +2019,3 @@ out:
                m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
        return 0;
 }
-
index 06d0877a66efa74a3d599b4ea50f0c5100dd7a1f..4d6ee03db94608e3bc49e5bffb0d9fdd697ec449 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/mm_inline.h>
+#include <linux/nsproxy.h>
 #include <linux/pagevec.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
@@ -924,7 +925,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 
        /* Find the mm_struct */
        read_lock(&tasklist_lock);
-       task = pid ? find_task_by_pid(pid) : current;
+       task = pid ? find_task_by_vpid(pid) : current;
        if (!task) {
                read_unlock(&tasklist_lock);
                return -ESRCH;
index 4275e81e25ba69cf49b777ca2819fb6295d7b084..7a30c4988231377fbec31faf6a1dcfb7e6af44e6 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1048,8 +1048,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
 
        /* The open routine did something to the protections already? */
        if (pgprot_val(vma->vm_page_prot) !=
-           pgprot_val(protection_map[vm_flags &
-                   (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+           pgprot_val(vm_get_page_prot(vm_flags)))
                return 0;
 
        /* Specialty mapping? */
@@ -1130,8 +1129,7 @@ munmap_back:
        vma->vm_start = addr;
        vma->vm_end = addr + len;
        vma->vm_flags = vm_flags;
-       vma->vm_page_prot = protection_map[vm_flags &
-                               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+       vma->vm_page_prot = vm_get_page_prot(vm_flags);
        vma->vm_pgoff = pgoff;
 
        if (file) {
@@ -2002,8 +2000,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        vma->vm_end = addr + len;
        vma->vm_pgoff = pgoff;
        vma->vm_flags = flags;
-       vma->vm_page_prot = protection_map[flags &
-                               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+       vma->vm_page_prot = vm_get_page_prot(flags);
        vma_link(mm, vma, prev, rb_link, rb_parent);
 out:
        mm->total_vm += len >> PAGE_SHIFT;
@@ -2209,7 +2206,7 @@ int install_special_mapping(struct mm_struct *mm,
        vma->vm_end = addr + len;
 
        vma->vm_flags = vm_flags | mm->def_flags;
-       vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+       vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
        vma->vm_ops = &special_mapping_vmops;
        vma->vm_private_data = pages;
index 1d4d69790e59920e8876971dc0ed8adeb48175b8..55227845abbe869f0aad75724f4d10c93b5a189f 100644 (file)
@@ -192,11 +192,9 @@ success:
         * held in write mode.
         */
        vma->vm_flags = newflags;
-       vma->vm_page_prot = protection_map[newflags &
-               (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+       vma->vm_page_prot = vm_get_page_prot(newflags);
        if (vma_wants_writenotify(vma)) {
-               vma->vm_page_prot = protection_map[newflags &
-                       (VM_READ|VM_WRITE|VM_EXEC)];
+               vma->vm_page_prot = vm_get_page_prot(newflags);
                dirty_accountable = 1;
        }
 
index a64decb5b13fb3922e05a70559855dde685b57b4..824cade078277fda90f06799001a563a45680394 100644 (file)
@@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
                if (!p->mm)
                        continue;
                /* skip the init task */
-               if (is_init(p))
+               if (is_global_init(p))
                        continue;
 
                /*
@@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
  */
 static void __oom_kill_task(struct task_struct *p, int verbose)
 {
-       if (is_init(p)) {
+       if (is_global_init(p)) {
                WARN_ON(1);
                printk(KERN_WARNING "tried to kill init!\n");
                return;
@@ -278,7 +278,8 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
        }
 
        if (verbose)
-               printk(KERN_ERR "Killed process %d (%s)\n", p->pid, p->comm);
+               printk(KERN_ERR "Killed process %d (%s)\n",
+                               task_pid_nr(p), p->comm);
 
        /*
         * We give our sacrificial lamb high priority and access to
@@ -326,7 +327,7 @@ static int oom_kill_task(struct task_struct *p)
         * to memory reserves though, otherwise we might deplete all memory.
         */
        do_each_thread(g, q) {
-               if (q->mm == mm && q->tgid != p->tgid)
+               if (q->mm == mm && !same_thread_group(q, p))
                        force_sig(SIGKILL, q);
        } while_each_thread(g, q);
 
@@ -337,7 +338,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
                            unsigned long points, const char *message)
 {
        struct task_struct *c;
-       struct list_head *tsk;
 
        if (printk_ratelimit()) {
                printk(KERN_WARNING "%s invoked oom-killer: "
@@ -357,11 +357,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
        }
 
        printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
-                                       message, p->pid, p->comm, points);
+                                       message, task_pid_nr(p), p->comm, points);
 
        /* Try to kill a child first */
-       list_for_each(tsk, &p->children) {
-               c = list_entry(tsk, struct task_struct, sibling);
+       list_for_each_entry(c, &p->children, sibling) {
                if (c->mm == p->mm)
                        continue;
                if (!oom_kill_task(c))
index ff5784b440d792b87bf2cd088801b1b6b5b5c0af..66c736953cfe7f01a27a495681c9c779b0a240a1 100644 (file)
@@ -656,11 +656,13 @@ static inline int hidp_setup_input(struct hidp_session *session, struct hidp_con
        }
 
        if (req->subclass & 0x80) {
-               input->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-               input->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-               input->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-               input->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-               input->relbit[0] |= BIT(REL_WHEEL);
+               input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+               input->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+                       BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+               input->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+               input->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
+                       BIT_MASK(BTN_EXTRA);
+               input->relbit[0] |= BIT_MASK(REL_WHEEL);
        }
 
        input->dev.parent = hidp_get_device(session);
index 1f0068eae5018948ec187aa31af5a7ce5ed44fde..e0a06942c025d24d3590658e997f420129858bc1 100644 (file)
@@ -447,7 +447,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
        rcu_assign_pointer(sk->sk_filter, fp);
        rcu_read_unlock_bh();
 
-       sk_filter_delayed_uncharge(sk, old_fp);
+       if (old_fp)
+               sk_filter_delayed_uncharge(sk, old_fp);
        return 0;
 }
 
index 590a767b029c15e82886bfcb49574dcfdbc7ba1e..daadbcc4e8dd7e45d92205f6ef4810a315ce3a62 100644 (file)
@@ -15,7 +15,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
index 8cae60c5338304f17cc6f59197c6bb447229b3eb..7ac703171ff3014cd2f204501250344ef5373cf1 100644 (file)
 #endif
 #include <asm/byteorder.h>
 #include <linux/rcupdate.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/uaccess.h>
@@ -3514,7 +3514,7 @@ static int pktgen_thread_worker(void *arg)
 
        init_waitqueue_head(&t->queue);
 
-       pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid);
+       pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 
        set_current_state(TASK_INTERRUPTIBLE);
 
index 1072d16696c3b651993ef7130bd09a475c072b81..4a2640d3826166d98b066d626f9427e5daacc256 100644 (file)
@@ -744,10 +744,10 @@ static struct net *get_net_ns_by_pid(pid_t pid)
        rcu_read_lock();
        tsk = find_task_by_pid(pid);
        if (tsk) {
-               task_lock(tsk);
-               if (tsk->nsproxy)
-                       net = get_net(tsk->nsproxy->net_ns);
-               task_unlock(tsk);
+               struct nsproxy *nsproxy;
+               nsproxy = task_nsproxy(tsk);
+               if (nsproxy)
+                       net = get_net(nsproxy->net_ns);
        }
        rcu_read_unlock();
        return net;
index 530bee8d9ed90448ee2b406b897ada19ec11d28c..100ba6d9d478d7df1082a3790ddfac0e6c012d73 100644 (file)
@@ -24,6 +24,8 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -42,7 +44,7 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-       if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+       if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
            ((creds->uid == current->uid || creds->uid == current->euid ||
              creds->uid == current->suid) || capable(CAP_SETUID)) &&
            ((creds->gid == current->gid || creds->gid == current->egid ||
index d292b4113d6e240829c48cbd84e24ab5da660764..febbcbcf8022ac82f25d66de5dcd54177aeca250 100644 (file)
@@ -232,7 +232,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
                        warned++;
                        printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
                               "tries to set negative timeout\n",
-                               current->comm, current->pid);
+                               current->comm, task_pid_nr(current));
                return 0;
        }
        *timeo_p = MAX_SCHEDULE_TIMEOUT;
index 81a8285d6d6a4251c7cd166635f7058046078bf6..8d8c2915e064f98d16d1409c2524f2277107185b 100644 (file)
@@ -54,7 +54,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
index 3cef12835c4b2bfa86b50a74a788488cd5ac9ea1..8fb6ca23700aa8de4c446879b19186e626dfa396 100644 (file)
@@ -93,7 +93,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
                int remaining, rover, low, high;
 
                inet_get_local_port_range(&low, &high);
-               remaining = high - low;
+               remaining = (high - low) + 1;
                rover = net_random() % remaining + low;
 
                do {
index fac6398e436709c714b95992c9045e740454d004..16eecc7046a346d6a2a22e1c33f71055a333f4c2 100644 (file)
@@ -286,7 +286,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
                struct inet_timewait_sock *tw = NULL;
 
                inet_get_local_port_range(&low, &high);
-               remaining = high - low;
+               remaining = (high - low) + 1;
 
                local_bh_disable();
                for (i = 1; i <= remaining; i++) {
index 1960747f354c8c89966b920426d7487640476617..c99f2a33fb9e9165a3f2b7144ea609b6b151e34b 100644 (file)
@@ -794,7 +794,7 @@ static int sync_thread(void *startup)
 
        add_wait_queue(&sync_wait, &wait);
 
-       set_sync_pid(state, current->pid);
+       set_sync_pid(state, task_pid_nr(current));
        complete(tinfo->startup);
 
        /*
@@ -877,7 +877,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
        if (!tinfo)
                return -ENOMEM;
 
-       IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+       IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
        IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
                  sizeof(struct ip_vs_sync_conn));
 
@@ -917,7 +917,7 @@ int stop_sync_thread(int state)
            (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
                return -ESRCH;
 
-       IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+       IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
        IP_VS_INFO("stopping sync thread %d ...\n",
                   (state == IP_VS_STATE_MASTER) ?
                   sync_master_pid : sync_backup_pid);
index c78acc1a7f11395b154460774dbbd6ee52444e0f..ffddd2b453523c137dae115fde91aa1e91d5471a 100644 (file)
@@ -122,7 +122,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
        ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
 
        if (write && ret == 0) {
-               if (range[1] <= range[0])
+               if (range[1] < range[0])
                        ret = -EINVAL;
                else
                        set_local_port_range(range);
@@ -150,7 +150,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
 
        ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
        if (ret == 0 && newval && newlen) {
-               if (range[1] <= range[0])
+               if (range[1] < range[0])
                        ret = -EINVAL;
                else
                        set_local_port_range(range);
index 4f322003835dc1886b4202259a91bd641d9139c3..2e6ad6dbba6c54d904d1df28d6e38dfcd7640b00 100644 (file)
@@ -1334,7 +1334,7 @@ do_prequeue:
                if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
                        if (net_ratelimit())
                                printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
-                                      current->comm, current->pid);
+                                      current->comm, task_pid_nr(current));
                        peek_seq = tp->copied_seq;
                }
                continue;
index cb9fc58efb2f1da00ad4789370be1d1ac07f1fcd..35d2b0e9e10bec0a1ac90c385dde9cbcf7c5f6ff 100644 (file)
@@ -147,13 +147,14 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
        write_lock_bh(&udp_hash_lock);
 
        if (!snum) {
-               int i, low, high;
+               int i, low, high, remaining;
                unsigned rover, best, best_size_so_far;
 
                inet_get_local_port_range(&low, &high);
+               remaining = (high - low) + 1;
 
                best_size_so_far = UINT_MAX;
-               best = rover = net_random() % (high - low) + low;
+               best = rover = net_random() % remaining + low;
 
                /* 1st pass: look for empty (or shortest) hash chain */
                for (i = 0; i < UDP_HTABLE_SIZE; i++) {
index 1c2c2765543505d7e61dd021fa2fcf2b9482757f..d6f1026f19438114bfef264b1f669cf03f0fd131 100644 (file)
@@ -261,7 +261,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
                struct inet_timewait_sock *tw = NULL;
 
                inet_get_local_port_range(&low, &high);
-               remaining = high - low;
+               remaining = (high - low) + 1;
 
                local_bh_disable();
                for (i = 1; i <= remaining; i++) {
index 49eacba824df5bfdf1905c1df6e5503eaf15326d..46cf962f7f8889f049a3b0406f9f5f3b6c31034c 100644 (file)
@@ -762,7 +762,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
                        if (net_ratelimit())
                                printk(KERN_DEBUG "LLC(%s:%d): Application "
                                                  "bug, race in MSG_PEEK.\n",
-                                      current->comm, current->pid);
+                                      current->comm, task_pid_nr(current));
                        peek_seq = llc->copied_seq;
                }
                continue;
index d34a9deca67ad25bc36c44c3422dae6128548e42..4b4ed2a5803c46f8e86ae550f9431fb4044e2a34 100644 (file)
@@ -37,8 +37,6 @@
 
 struct ieee80211_local;
 
-#define BIT(x) (1 << (x))
-
 #define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3)
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
index db81aef6177a948471d7d1f19ae9143ec88ad1c5..f7ffeec3913f3691d662663977ebe45e04ac2e89 100644 (file)
@@ -108,14 +108,11 @@ struct ieee802_11_elems {
        u8 wmm_param_len;
 };
 
-enum ParseRes { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 };
-
-static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
-                                           struct ieee802_11_elems *elems)
+static void ieee802_11_parse_elems(u8 *start, size_t len,
+                                  struct ieee802_11_elems *elems)
 {
        size_t left = len;
        u8 *pos = start;
-       int unknown = 0;
 
        memset(elems, 0, sizeof(*elems));
 
@@ -126,15 +123,8 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
                elen = *pos++;
                left -= 2;
 
-               if (elen > left) {
-#if 0
-                       if (net_ratelimit())
-                               printk(KERN_DEBUG "IEEE 802.11 element parse "
-                                      "failed (id=%d elen=%d left=%d)\n",
-                                      id, elen, left);
-#endif
-                       return ParseFailed;
-               }
+               if (elen > left)
+                       return;
 
                switch (id) {
                case WLAN_EID_SSID:
@@ -201,28 +191,15 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
                        elems->ext_supp_rates_len = elen;
                        break;
                default:
-#if 0
-                       printk(KERN_DEBUG "IEEE 802.11 element parse ignored "
-                                     "unknown element (id=%d elen=%d)\n",
-                                     id, elen);
-#endif
-                       unknown++;
                        break;
                }
 
                left -= elen;
                pos += elen;
        }
-
-       /* Do not trigger error if left == 1 as Apple Airport base stations
-        * send AssocResps that are one spurious byte too long. */
-
-       return unknown ? ParseUnknown : ParseOK;
 }
 
 
-
-
 static int ecw2cw(int ecw)
 {
        int cw = 1;
@@ -931,12 +908,7 @@ static void ieee80211_auth_challenge(struct net_device *dev,
 
        printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name);
        pos = mgmt->u.auth.variable;
-       if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
-           == ParseFailed) {
-               printk(KERN_DEBUG "%s: failed to parse Auth(challenge)\n",
-                      dev->name);
-               return;
-       }
+       ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
        if (!elems.challenge) {
                printk(KERN_DEBUG "%s: no challenge IE in shared key auth "
                       "frame\n", dev->name);
@@ -1230,12 +1202,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
        aid &= ~(BIT(15) | BIT(14));
 
        pos = mgmt->u.assoc_resp.variable;
-       if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
-           == ParseFailed) {
-               printk(KERN_DEBUG "%s: failed to parse AssocResp\n",
-                      dev->name);
-               return;
-       }
+       ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 
        if (!elems.supp_rates) {
                printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
@@ -1459,7 +1426,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
        struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
        struct ieee802_11_elems elems;
        size_t baselen;
-       int channel, invalid = 0, clen;
+       int channel, clen;
        struct ieee80211_sta_bss *bss;
        struct sta_info *sta;
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1505,9 +1472,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
        }
 
-       if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
-                                  &elems) == ParseFailed)
-               invalid = 1;
+       ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
        if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
            memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
@@ -1724,9 +1689,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
        if (baselen > len)
                return;
 
-       if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
-                                  &elems) == ParseFailed)
-               return;
+       ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
        if (elems.erp_info && elems.erp_info_len >= 1)
                ieee80211_handle_erp_ie(dev, elems.erp_info[0]);
index af79423bc8e81a2d41884e2564a4651b65563bfc..9ec50139b9a177d559236790b3f0351c7ba9ef71 100644 (file)
@@ -2,13 +2,13 @@
  * GPL (C) 2002 Martin Devera (devik@cdi.cz).
  */
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connbytes.h>
 #include <net/netfilter/nf_conntrack.h>
 
 #include <asm/div64.h>
-#include <asm/bitops.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
index e11000a8e950950cd5a52ece4597c72761dc8d38..d0936506b731abf62d5fabffe289a44789c52768 100644 (file)
@@ -1623,11 +1623,6 @@ static struct vm_operations_struct packet_mmap_ops = {
        .close =packet_mm_close,
 };
 
-static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
-{
-       return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
-}
-
 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
 {
        int i;
index eaabf087c59b53e1dcf05af11647fdd2288d136a..d1e9d68f8ba0a003f0820f95800620d3030184cd 100644 (file)
@@ -146,18 +146,18 @@ static void rfkill_disconnect(struct input_handle *handle)
 static const struct input_device_id rfkill_ids[] = {
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-               .evbit = { BIT(EV_KEY) },
-               .keybit = { [LONG(KEY_WLAN)] = BIT(KEY_WLAN) },
+               .evbit = { BIT_MASK(EV_KEY) },
+               .keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) },
        },
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-               .evbit = { BIT(EV_KEY) },
-               .keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) },
+               .evbit = { BIT_MASK(EV_KEY) },
+               .keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) },
        },
        {
                .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-               .evbit = { BIT(EV_KEY) },
-               .keybit = { [LONG(KEY_UWB)] = BIT(KEY_UWB) },
+               .evbit = { BIT_MASK(EV_KEY) },
+               .keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) },
        },
        { }
 };
index 92435a882fac6c61232a4a4064a33906932f0b9b..9c15c4888d1250481cc42dc58d59922915b25ca6 100644 (file)
@@ -2,9 +2,7 @@
 # Traffic control configuration.
 # 
 
-menu "QoS and/or fair queueing"
-
-config NET_SCHED
+menuconfig NET_SCHED
        bool "QoS and/or fair queueing"
        select NET_SCH_FIFO
        ---help---
@@ -41,9 +39,6 @@ config NET_SCHED
          The available schedulers are listed in the following questions; you
          can say Y to as many as you like. If unsure, say N now.
 
-config NET_SCH_FIFO
-       bool
-
 if NET_SCHED
 
 comment "Queueing/Scheduling"
@@ -500,4 +495,5 @@ config NET_CLS_IND
 
 endif # NET_SCHED
 
-endmenu
+config NET_SCH_FIFO
+       bool
index e01d57692c9a76f530b89cbc0b9e9443ae3c8ced..fa1a6f45dc416173f8b6d853f9ce0c5fa3565bc7 100644 (file)
@@ -556,6 +556,7 @@ void dev_deactivate(struct net_device *dev)
 {
        struct Qdisc *qdisc;
        struct sk_buff *skb;
+       int running;
 
        spin_lock_bh(&dev->queue_lock);
        qdisc = dev->qdisc;
@@ -571,12 +572,31 @@ void dev_deactivate(struct net_device *dev)
 
        dev_watchdog_down(dev);
 
-       /* Wait for outstanding dev_queue_xmit calls. */
+       /* Wait for outstanding qdisc-less dev_queue_xmit calls. */
        synchronize_rcu();
 
        /* Wait for outstanding qdisc_run calls. */
-       while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
-               yield();
+       do {
+               while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+                       yield();
+
+               /*
+                * Double-check inside queue lock to ensure that all effects
+                * of the queue run are visible when we return.
+                */
+               spin_lock_bh(&dev->queue_lock);
+               running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+               spin_unlock_bh(&dev->queue_lock);
+
+               /*
+                * The running flag should never be set at this point because
+                * we've already set dev->qdisc to noop_qdisc *inside* the same
+                * pair of spin locks.  That is, if any qdisc_run starts after
+                * our initial test it should see the noop_qdisc and then
+                * clear the RUNNING bit before dropping the queue lock.  So
+                * if it is set here then we've found a bug.
+                */
+       } while (WARN_ON_ONCE(running));
 }
 
 void dev_init_scheduler(struct net_device *dev)
index 3c773c53e12e96e08207e50446a033132043e349..c98873f39aec49bc4d4af57f60f34d23a20123c4 100644 (file)
@@ -847,7 +847,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
        task->tk_start = jiffies;
 
        dprintk("RPC:       new task initialized, procpid %u\n",
-                               current->pid);
+                               task_pid_nr(current));
 }
 
 static struct rpc_task *
index 6996cba5aa9664ac99f706b626137d98fb99d286..9163ec526c2a0e66ed2fdf20f232173f6aed8d90 100644 (file)
@@ -483,7 +483,7 @@ static int unix_listen(struct socket *sock, int backlog)
        sk->sk_max_ack_backlog  = backlog;
        sk->sk_state            = TCP_LISTEN;
        /* set credentials so connect can copy them */
-       sk->sk_peercred.pid     = current->tgid;
+       sk->sk_peercred.pid     = task_tgid_vnr(current);
        sk->sk_peercred.uid     = current->euid;
        sk->sk_peercred.gid     = current->egid;
        err = 0;
@@ -1133,7 +1133,7 @@ restart:
        unix_peer(newsk)        = sk;
        newsk->sk_state         = TCP_ESTABLISHED;
        newsk->sk_type          = sk->sk_type;
-       newsk->sk_peercred.pid  = current->tgid;
+       newsk->sk_peercred.pid  = task_tgid_vnr(current);
        newsk->sk_peercred.uid  = current->euid;
        newsk->sk_peercred.gid  = current->egid;
        newu = unix_sk(newsk);
@@ -1194,7 +1194,7 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
        sock_hold(skb);
        unix_peer(ska)=skb;
        unix_peer(skb)=ska;
-       ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+       ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
        ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
        ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
 
diff --git a/samples/Kconfig b/samples/Kconfig
new file mode 100644 (file)
index 0000000..57bb223
--- /dev/null
@@ -0,0 +1,16 @@
+# samples/Kconfig
+
+menuconfig SAMPLES
+       bool "Sample kernel code"
+       help
+         You can build and test sample kernel code here.
+
+if SAMPLES
+
+config SAMPLE_MARKERS
+       tristate "Build markers examples -- loadable modules only"
+       depends on MARKERS && m
+       help
+         This build markers example modules.
+
+endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
new file mode 100644 (file)
index 0000000..5a4f0b6
--- /dev/null
@@ -0,0 +1,3 @@
+# Makefile for Linux samples code
+
+obj-$(CONFIG_SAMPLES)  += markers/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
new file mode 100644 (file)
index 0000000..6d72312
--- /dev/null
@@ -0,0 +1,4 @@
+# builds the kprobes example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
new file mode 100644 (file)
index 0000000..e787c6d
--- /dev/null
@@ -0,0 +1,54 @@
+/* marker-example.c
+ *
+ * Executes a marker when /proc/marker-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+       int i;
+
+       trace_mark(subsystem_event, "%d %s", 123, "example string");
+       for (i = 0; i < 10; i++)
+               trace_mark(subsystem_eventb, MARK_NOARGS);
+       return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+       .open = my_open,
+};
+
+static int example_init(void)
+{
+       printk(KERN_ALERT "example init\n");
+       pentry_example = create_proc_entry("marker-example", 0444, NULL);
+       if (pentry_example)
+               pentry_example->proc_fops = &mark_ops;
+       else
+               return -EPERM;
+       return 0;
+}
+
+static void example_exit(void)
+{
+       printk(KERN_ALERT "example exit\n");
+       remove_proc_entry("marker-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
new file mode 100644 (file)
index 0000000..238b2e3
--- /dev/null
@@ -0,0 +1,98 @@
+/* probe-example.c
+ *
+ * Connects two functions to marker call sites.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <asm/atomic.h>
+
+struct probe_data {
+       const char *name;
+       const char *format;
+       marker_probe_func *probe_func;
+};
+
+void probe_subsystem_event(const struct marker *mdata, void *private,
+       const char *format, ...)
+{
+       va_list ap;
+       /* Declare args */
+       unsigned int value;
+       const char *mystr;
+
+       /* Assign args */
+       va_start(ap, format);
+       value = va_arg(ap, typeof(value));
+       mystr = va_arg(ap, typeof(mystr));
+
+       /* Call printk */
+       printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
+
+       /* or count, check rights, serialize data in a buffer */
+
+       va_end(ap);
+}
+
+atomic_t eventb_count = ATOMIC_INIT(0);
+
+void probe_subsystem_eventb(const struct marker *mdata, void *private,
+       const char *format, ...)
+{
+       /* Increment counter */
+       atomic_inc(&eventb_count);
+}
+
+static struct probe_data probe_array[] =
+{
+       {       .name = "subsystem_event",
+               .format = "%d %s",
+               .probe_func = probe_subsystem_event },
+       {       .name = "subsystem_eventb",
+               .format = MARK_NOARGS,
+               .probe_func = probe_subsystem_eventb },
+};
+
+static int __init probe_init(void)
+{
+       int result;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+               result = marker_probe_register(probe_array[i].name,
+                               probe_array[i].format,
+                               probe_array[i].probe_func, &probe_array[i]);
+               if (result)
+                       printk(KERN_INFO "Unable to register probe %s\n",
+                               probe_array[i].name);
+               result = marker_arm(probe_array[i].name);
+               if (result)
+                       printk(KERN_INFO "Unable to arm probe %s\n",
+                               probe_array[i].name);
+       }
+       return 0;
+}
+
+static void __exit probe_fini(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(probe_array); i++)
+               marker_probe_unregister(probe_array[i].name);
+       printk(KERN_INFO "Number of event b : %u\n",
+                       atomic_read(&eventb_count));
+}
+
+module_init(probe_init);
+module_exit(probe_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("SUBSYSTEM Probe");
index b458e2acb4acf3e035d7e3bd66a4b4687e8d5965..28e480c8100f80ebd70e0af9b2597a4041dbb4cd 100755 (executable)
@@ -15,7 +15,7 @@
 #      AVR32 port by Haavard Skinnemoen <hskinnemoen@atmel.com>
 #
 #      Usage:
-#      objdump -d vmlinux | stackcheck.pl [arch]
+#      objdump -d vmlinux | scripts/checkstack.pl [arch]
 #
 #      TODO :  Port to all architectures (one regex per arch)
 
index 48ca5b092768fdde28e1df98b6420e47af0e82b1..43f902750a1b6c802d650b6e9bbafde70ec8ae70 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/xattr.h>
 #include <linux/hugetlb.h>
 #include <linux/mount.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
@@ -334,7 +335,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
        /* For init, we want to retain the capabilities set
         * in the init_task struct. Thus we skip the usual
         * capability rules */
-       if (!is_init(current)) {
+       if (!is_global_init(current)) {
                current->cap_permitted = new_permitted;
                current->cap_effective = bprm->cap_effective ?
                                new_permitted : 0;
index a1aa89f2faf3bc9e6f25ccb3784871d0631a49dd..566b5ab9d4e892008c95c5658746b7f4fb3aff28 100644 (file)
@@ -236,8 +236,8 @@ int __init snd_pmac_attach_beep(struct snd_pmac *chip)
        input_dev->id.product = 0x0001;
        input_dev->id.version = 0x0100;
 
-       input_dev->evbit[0] = BIT(EV_SND);
-       input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+       input_dev->evbit[0] = BIT_MASK(EV_SND);
+       input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
        input_dev->event = snd_pmac_beep_event;
        input_dev->dev.parent = &chip->pdev->dev;
        input_set_drvdata(input_dev, chip);
index a1de0c6089573c7f7bc1a700f6a16b9307309523..cd536ca20e5632d479a70e2d498da16555cfe79d 100644 (file)
@@ -200,8 +200,9 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev)
 
         switch (dev->chip.usb_id) {
        case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2):
-               input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-               input->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_Z);
+               input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+               input->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+                       BIT_MASK(ABS_Z);
                input->keycode = keycode_rk2;
                input->keycodesize = sizeof(char);
                input->keycodemax = ARRAY_SIZE(keycode_rk2);
@@ -228,8 +229,8 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev)
                snd_usb_caiaq_set_auto_msg(dev, 1, 10, 0);
                break;
        case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AK1):
-               input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-               input->absbit[0] = BIT(ABS_X);
+               input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+               input->absbit[0] = BIT_MASK(ABS_X);
                input->keycode = keycode_ak1;
                input->keycodesize = sizeof(char);
                input->keycodemax = ARRAY_SIZE(keycode_ak1);