]> git.karo-electronics.de Git - linux-beck.git/commitdiff
Merge branches 'x86-alternatives-for-linus', 'x86-fpu-for-linus', 'x86-hwmon-for...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Jan 2011 19:11:50 +0000 (11:11 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Jan 2011 19:11:50 +0000 (11:11 -0800)
* 'x86-alternatives-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, suspend: Avoid unnecessary smp alternatives switch during suspend/resume

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86-64, asm: Use fxsaveq/fxrestorq in more places

* 'x86-hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, hwmon: Add core threshold notification to therm_throt.c

* 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, paravirt: Use native_halt on a halt, not native_safe_halt

* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  locking, lockdep: Convert sprintf_symbol to %pS

* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  irq: Better struct irqaction layout

583 files changed:
CREDITS
Documentation/RCU/trace.txt
Documentation/accounting/getdelays.c
Documentation/dontdiff
Documentation/filesystems/Locking
Documentation/kernel-docs.txt
Documentation/kernel-parameters.txt
Documentation/scsi/scsi_mid_low_api.txt
Documentation/trace/events-power.txt [new file with mode: 0644]
Documentation/trace/postprocess/trace-vmscan-postprocess.pl
Documentation/x86/boot.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/perf_event.h
arch/alpha/kernel/irq_alpha.c
arch/alpha/kernel/perf_event.c
arch/arm/common/it8152.c
arch/arm/include/asm/hardware/it8152.h
arch/arm/include/asm/highmem.h
arch/arm/include/asm/sizes.h
arch/arm/include/asm/system.h
arch/arm/kernel/entry-common.S
arch/arm/kernel/perf_event.c
arch/arm/kernel/smp.c
arch/arm/mach-at91/include/mach/at91_mci.h
arch/arm/mach-ixp4xx/common-pci.c
arch/arm/mach-pxa/Kconfig
arch/arm/mach-pxa/sleep.S
arch/arm/mm/cache-feroceon-l2.c
arch/arm/mm/cache-xsc3l2.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/flush.c
arch/arm/mm/highmem.c
arch/mips/kernel/perf_event_mipsxx.c
arch/mn10300/kernel/irq.c
arch/powerpc/kernel/e500-pmu.c
arch/powerpc/kernel/mpc7450-pmu.c
arch/powerpc/kernel/perf_event.c
arch/powerpc/kernel/perf_event_fsl_emb.c
arch/powerpc/kernel/power4-pmu.c
arch/powerpc/kernel/power5+-pmu.c
arch/powerpc/kernel/power5-pmu.c
arch/powerpc/kernel/power6-pmu.c
arch/powerpc/kernel/power7-pmu.c
arch/powerpc/kernel/ppc970-pmu.c
arch/powerpc/platforms/52xx/mpc52xx_gpt.c
arch/s390/Kconfig
arch/s390/include/asm/mutex.h
arch/sh/boards/mach-se/7206/irq.c
arch/sh/kernel/cpu/sh2a/clock-sh7201.c
arch/sh/kernel/cpu/sh4/clock-sh4-202.c
arch/sh/kernel/cpu/sh4/perf_event.c
arch/sh/kernel/cpu/sh4a/perf_event.c
arch/sh/kernel/perf_event.c
arch/sparc/include/asm/perf_event.h
arch/sparc/kernel/nmi.c
arch/sparc/kernel/perf_event.c
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/boot/compressed/head_64.S
arch/x86/include/asm/alternative.h
arch/x86/include/asm/amd_nb.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/apicdef.h
arch/x86/include/asm/bootparam.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/i387.h
arch/x86/include/asm/io_apic.h
arch/x86/include/asm/irq.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/mce.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/mpspec.h
arch/x86/include/asm/mpspec_def.h
arch/x86/include/asm/mrst-vrtc.h [new file with mode: 0644]
arch/x86/include/asm/mrst.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nmi.h
arch/x86/include/asm/pci.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/perf_event_p4.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/smpboot_hooks.h
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/timer.h
arch/x86/include/asm/uv/uv_bau.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/alternative.c
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apb_timer.c
arch/x86/kernel/aperture_64.c
arch/x86/kernel/apic/Makefile
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/nmi.c [deleted file]
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_amd.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perfctr-watchdog.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/early_printk.c
arch/x86/kernel/ftrace.c
arch/x86/kernel/head32.c
arch/x86/kernel/head_32.S
arch/x86/kernel/kprobes.c
arch/x86/kernel/microcode_amd.c
arch/x86/kernel/microcode_intel.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/reboot_fixups_32.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/stacktrace.c
arch/x86/kernel/time.c
arch/x86/kernel/trampoline_64.S
arch/x86/kernel/traps.c
arch/x86/kernel/tsc.c
arch/x86/kernel/verify_cpu.S [moved from arch/x86/kernel/verify_cpu_64.S with 65% similarity]
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/i8259.c
arch/x86/kvm/mmu.c
arch/x86/mm/Makefile
arch/x86/mm/amdtopology_64.c [moved from arch/x86/mm/k8topology_64.c with 94% similarity]
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/kmemcheck/error.c
arch/x86/mm/numa_64.c
arch/x86/mm/pageattr.c
arch/x86/mm/setup_nx.c
arch/x86/mm/srat_32.c
arch/x86/mm/srat_64.c
arch/x86/oprofile/backtrace.c
arch/x86/oprofile/nmi_int.c
arch/x86/oprofile/nmi_timer_int.c
arch/x86/oprofile/op_model_amd.c
arch/x86/oprofile/op_model_p4.c
arch/x86/pci/Makefile
arch/x86/pci/ce4100.c [new file with mode: 0644]
arch/x86/pci/pcbios.c
arch/x86/platform/Makefile
arch/x86/platform/ce4100/Makefile [new file with mode: 0644]
arch/x86/platform/ce4100/ce4100.c [new file with mode: 0644]
arch/x86/platform/iris/Makefile [new file with mode: 0644]
arch/x86/platform/iris/iris.c [new file with mode: 0644]
arch/x86/platform/mrst/Makefile
arch/x86/platform/mrst/early_printk_mrst.c [moved from arch/x86/kernel/early_printk_mrst.c with 100% similarity]
arch/x86/platform/mrst/mrst.c
arch/x86/platform/mrst/vrtc.c [new file with mode: 0644]
arch/x86/platform/sfi/sfi.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/platform/visws/visws_quirks.c
drivers/acpi/acpica/evgpeinit.c
drivers/acpi/acpica/nsinit.c
drivers/acpi/battery.c
drivers/acpi/numa.c
drivers/acpi/scan.c
drivers/ata/Kconfig
drivers/ata/Makefile
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-sff.c
drivers/ata/pata_cs5536.c
drivers/atm/atmtcp.c
drivers/bluetooth/hci_ldisc.c
drivers/char/agp/amd64-agp.c
drivers/char/agp/intel-gtt.c
drivers/char/ramoops.c
drivers/cpufreq/cpufreq.c
drivers/cpuidle/cpuidle.c
drivers/dma/mv_xor.c
drivers/edac/amd64_edac.c
drivers/gpio/cs5535-gpio.c
drivers/gpio/gpiolib.c
drivers/gpio/rdc321x-gpio.c
drivers/gpu/drm/drm_crtc_helper.c
drivers/gpu/drm/i915/dvo_ch7017.c
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/i915/intel_sdvo.c
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreend.h
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_cs.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_fb.c
drivers/hwmon/s3c-hwmon.c
drivers/idle/intel_idle.c
drivers/isdn/gigaset/capi.c
drivers/leds/led-class.c
drivers/media/IR/keymaps/rc-rc6-mce.c
drivers/media/IR/lirc_dev.c
drivers/media/IR/mceusb.c
drivers/media/IR/nuvoton-cir.c
drivers/media/IR/streamzap.c
drivers/media/video/cx25840/cx25840-core.c
drivers/media/video/cx88/cx88-alsa.c
drivers/media/video/cx88/cx88-cards.c
drivers/media/video/cx88/cx88-video.c
drivers/media/video/cx88/cx88.h
drivers/media/video/em28xx/em28xx-video.c
drivers/media/video/mx2_camera.c
drivers/media/video/s5p-fimc/fimc-capture.c
drivers/media/video/s5p-fimc/fimc-core.c
drivers/media/video/s5p-fimc/fimc-core.h
drivers/media/video/s5p-fimc/regs-fimc.h
drivers/media/video/sh_mobile_ceu_camera.c
drivers/media/video/soc_camera.c
drivers/media/video/wm8775.c
drivers/mfd/ab8500-core.c
drivers/mfd/wm831x-core.c
drivers/mmc/core/core.c
drivers/mmc/host/at91_mci.c
drivers/mmc/host/atmel-mci.c
drivers/net/atl1c/atl1c_main.c
drivers/net/atlx/atl1.c
drivers/net/benet/be.h
drivers/net/benet/be_cmds.c
drivers/net/benet/be_main.c
drivers/net/bonding/bond_ipv6.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bonding.h
drivers/net/cnic.c
drivers/net/ehea/ehea_ethtool.c
drivers/net/epic100.c
drivers/net/hamachi.c
drivers/net/pcmcia/axnet_cs.c
drivers/net/pcmcia/pcnet_cs.c
drivers/net/ppp_generic.c
drivers/net/skfp/skfddi.c
drivers/net/starfire.c
drivers/net/sundance.c
drivers/net/tehuti.c
drivers/net/tg3.c
drivers/net/typhoon.c
drivers/net/usb/asix.c
drivers/net/usb/mcs7830.c
drivers/net/veth.c
drivers/net/wireless/hostap/hostap_main.c
drivers/net/wireless/iwlwifi/iwl-1000.c
drivers/net/wireless/iwlwifi/iwl-6000.c
drivers/net/wireless/iwlwifi/iwl-agn-eeprom.c
drivers/net/wireless/iwlwifi/iwl-agn-lib.c
drivers/net/wireless/iwlwifi/iwl-core.h
drivers/net/wireless/iwlwifi/iwl-eeprom.h
drivers/net/wireless/libertas/cfg.c
drivers/net/wireless/p54/p54usb.c
drivers/net/wireless/rt2x00/rt2800pci.c
drivers/net/wireless/rt2x00/rt2x00.h
drivers/net/wireless/rt2x00/rt2x00dev.c
drivers/net/yellowfin.c
drivers/of/of_i2c.c
drivers/pci/hotplug/pciehp_acpi.c
drivers/platform/x86/intel_ips.c
drivers/platform/x86/intel_ips.h [new file with mode: 0644]
drivers/platform/x86/intel_scu_ipc.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-mrst.c [new file with mode: 0644]
drivers/rtc/rtc-rs5c372.c
drivers/scsi/bfa/bfa_fcs.c
drivers/scsi/bfa/bfa_fcs_fcpim.c
drivers/scsi/bfa/bfa_fcs_lport.c
drivers/scsi/bfa/bfa_fcs_rport.c
drivers/scsi/bfa/bfa_ioc.c
drivers/scsi/bfa/bfa_svc.c
drivers/scsi/bfa/bfad.c
drivers/scsi/bfa/bfad_drv.h
drivers/scsi/bfa/bfad_im.c
drivers/sh/intc/core.c
drivers/spi/coldfire_qspi.c
drivers/spi/mpc52xx_spi.c
drivers/spi/omap2_mcspi.c
drivers/spi/spi.c
drivers/spi/spi_fsl_espi.c
drivers/staging/zram/zram_drv.c
drivers/usb/atm/ueagle-atm.c
drivers/video/backlight/cr_bllcd.c
drivers/video/fbmem.c
drivers/video/imxfb.c
drivers/video/sh_mobile_hdmi.c
drivers/video/sh_mobile_lcdcfb.c
drivers/watchdog/hpwdt.c
drivers/watchdog/rdc321x_wdt.c
fs/ext4/resize.c
fs/gfs2/bmap.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/lock_dlm.c
fs/gfs2/ops_inode.c
fs/gfs2/quota.c
fs/gfs2/rgrp.c
fs/gfs2/rgrp.h
fs/gfs2/xattr.c
fs/logfs/journal.c
fs/logfs/readwrite.c
fs/ocfs2/aops.c
fs/ocfs2/aops.h
fs/ocfs2/cluster/masklog.c
fs/ocfs2/cluster/masklog.h
fs/ocfs2/dir.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/file.c
fs/ocfs2/ocfs2_fs.h
fs/proc/base.c
include/linux/completion.h
include/linux/dmaengine.h
include/linux/ftrace_event.h
include/linux/hrtimer.h
include/linux/init_task.h
include/linux/interrupt.h
include/linux/kprobes.h
include/linux/kthread.h
include/linux/module.h
include/linux/mutex.h
include/linux/netlink.h
include/linux/nmi.h
include/linux/perf_event.h
include/linux/rculist.h
include/linux/rcupdate.h
include/linux/rcutiny.h
include/linux/rcutree.h
include/linux/sched.h
include/linux/sfi.h
include/linux/stacktrace.h
include/linux/syscalls.h
include/linux/taskstats.h
include/linux/timer.h
include/linux/timerqueue.h [new file with mode: 0644]
include/linux/tracepoint.h
include/linux/unaligned/packed_struct.h
include/linux/workqueue.h
include/media/wm8775.h
include/net/flow.h
include/net/ip6_route.h
include/net/mac80211.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/net/sock.h
include/trace/define_trace.h
include/trace/events/power.h
include/trace/events/syscalls.h
include/trace/ftrace.h
init/Kconfig
init/do_mounts.c
init/main.c
kernel/cpu.c
kernel/fork.c
kernel/futex.c
kernel/hrtimer.c
kernel/hw_breakpoint.c
kernel/irq/manage.c
kernel/kprobes.c
kernel/kthread.c
kernel/lockdep_proc.c
kernel/module.c
kernel/mutex.c
kernel/perf_event.c
kernel/posix-timers.c
kernel/power/suspend.c
kernel/printk.c
kernel/rcutiny.c
kernel/rcutiny_plugin.h
kernel/rcutorture.c
kernel/rcutree.c
kernel/rcutree.h
kernel/rcutree_plugin.h
kernel/rcutree_trace.c
kernel/sched.c
kernel/sched_autogroup.c [new file with mode: 0644]
kernel/sched_autogroup.h [new file with mode: 0644]
kernel/sched_clock.c
kernel/sched_debug.c
kernel/sched_fair.c
kernel/sched_features.h
kernel/sched_rt.c
kernel/softirq.c
kernel/srcu.c
kernel/sys.c
kernel/sysctl.c
kernel/sysctl_binary.c
kernel/taskstats.c
kernel/time/timecompare.c
kernel/time/timekeeping.c
kernel/time/timer_list.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/power-traces.c
kernel/trace/ring_buffer.c
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_export.c
kernel/trace/trace_selftest.c
kernel/user.c
kernel/watchdog.c
lib/Kconfig.debug
lib/Makefile
lib/timerqueue.c [new file with mode: 0644]
mm/compaction.c
mm/memcontrol.c
mm/migrate.c
mm/nommu.c
mm/page-writeback.c
mm/percpu.c
net/bluetooth/rfcomm/core.c
net/bridge/br_multicast.c
net/bridge/br_stp_bpdu.c
net/can/bcm.c
net/core/fib_rules.c
net/core/sock.c
net/ipv4/fib_frontend.c
net/ipv4/route.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv4/udplite.c
net/ipv6/addrconf.c
net/ipv6/ip6_output.c
net/ipv6/route.c
net/ipv6/udp.c
net/ipv6/udplite.c
net/ipv6/xfrm6_output.c
net/irda/af_irda.c
net/mac80211/ibss.c
net/mac80211/rx.c
net/mac80211/work.c
net/sched/sch_sfq.c
net/sctp/socket.c
scripts/Makefile.build
scripts/kconfig/menu.c
scripts/kernel-doc
security/integrity/ima/ima_policy.c
security/keys/request_key.c
sound/core/pcm_lib.c
sound/oss/soundcard.c
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_sigmatel.c
sound/soc/codecs/max98088.c
sound/soc/codecs/wm8523.c
sound/soc/codecs/wm8741.c
sound/soc/codecs/wm8753.c
sound/soc/codecs/wm8904.c
sound/soc/codecs/wm8940.c
sound/soc/codecs/wm8955.c
sound/soc/codecs/wm8960.c
sound/soc/codecs/wm8962.c
sound/soc/codecs/wm8971.c
sound/soc/codecs/wm9081.c
sound/soc/codecs/wm9090.c
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-buildid-list.txt
tools/perf/Documentation/perf-diff.txt
tools/perf/Documentation/perf-kvm.txt
tools/perf/Documentation/perf-lock.txt
tools/perf/Documentation/perf-probe.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-sched.txt
tools/perf/Documentation/perf-script-perl.txt [moved from tools/perf/Documentation/perf-trace-perl.txt with 90% similarity]
tools/perf/Documentation/perf-script-python.txt [moved from tools/perf/Documentation/perf-trace-python.txt with 89% similarity]
tools/perf/Documentation/perf-script.txt [moved from tools/perf/Documentation/perf-trace.txt with 62% similarity]
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-test.txt
tools/perf/Documentation/perf-timechart.txt
tools/perf/Documentation/perf-top.txt
tools/perf/MANIFEST
tools/perf/Makefile
tools/perf/bench/mem-memcpy-arch.h [new file with mode: 0644]
tools/perf/bench/mem-memcpy-x86-64-asm-def.h [new file with mode: 0644]
tools/perf/bench/mem-memcpy-x86-64-asm.S [new file with mode: 0644]
tools/perf/bench/mem-memcpy.c
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-diff.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-lock.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c [moved from tools/perf/builtin-trace.c with 83% similarity]
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/builtin.h
tools/perf/command-list.txt
tools/perf/feature-tests.mak
tools/perf/perf.c
tools/perf/scripts/perl/Perf-Trace-Util/Context.c
tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
tools/perf/scripts/perl/Perf-Trace-Util/README
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
tools/perf/scripts/perl/bin/failed-syscalls-report
tools/perf/scripts/perl/bin/rw-by-file-report
tools/perf/scripts/perl/bin/rw-by-pid-report
tools/perf/scripts/perl/bin/rwtop-report
tools/perf/scripts/perl/bin/wakeup-latency-report
tools/perf/scripts/perl/bin/workqueue-stats-report
tools/perf/scripts/perl/check-perf-trace.pl
tools/perf/scripts/perl/rw-by-file.pl
tools/perf/scripts/perl/workqueue-stats.pl
tools/perf/scripts/python/Perf-Trace-Util/Context.c
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
tools/perf/scripts/python/bin/futex-contention-report
tools/perf/scripts/python/bin/netdev-times-report
tools/perf/scripts/python/bin/sched-migration-report
tools/perf/scripts/python/bin/sctop-report
tools/perf/scripts/python/bin/syscall-counts-by-pid-report
tools/perf/scripts/python/bin/syscall-counts-report
tools/perf/scripts/python/check-perf-trace.py
tools/perf/scripts/python/failed-syscalls-by-pid.py
tools/perf/scripts/python/sched-migration.py
tools/perf/scripts/python/sctop.py
tools/perf/scripts/python/syscall-counts-by-pid.py
tools/perf/scripts/python/syscall-counts.py
tools/perf/util/build-id.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evsel.c [new file with mode: 0644]
tools/perf/util/evsel.h [new file with mode: 0644]
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/include/asm/cpufeature.h [new file with mode: 0644]
tools/perf/util/include/asm/dwarf2.h [new file with mode: 0644]
tools/perf/util/include/linux/bitops.h
tools/perf/util/include/linux/linkage.h [new file with mode: 0644]
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-options.h
tools/perf/util/probe-event.c
tools/perf/util/probe-finder.c
tools/perf/util/probe-finder.h
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/sort.c
tools/perf/util/string.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/trace-event-info.c
tools/perf/util/trace-event.h
tools/perf/util/ui/util.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/perf/util/xyarray.c [new file with mode: 0644]
tools/perf/util/xyarray.h [new file with mode: 0644]

diff --git a/CREDITS b/CREDITS
index 41d8e63d5165b5b786db6ab7d8c14fbc49fc0107..494b6e4746d7b9d08f0334bab61651341d285068 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2365,8 +2365,6 @@ E: acme@redhat.com
 W: http://oops.ghostprotocols.net:81/blog/
 P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD  841A B6AB 4681 9224 DF01
 D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
-S: R. Brasílio Itiberê, 4270/1010 - Água Verde
-S: 80240-060 - Curitiba - Paraná
 S: Brazil
 
 N: Karsten Merker
index a851118775d84c7a1d2356ba6a6c8e6208292887..6a8c73f55b80ca38601ba96f179565fe8b0b7ea0 100644 (file)
@@ -1,18 +1,22 @@
 CONFIG_RCU_TRACE debugfs Files and Formats
 
 
-The rcutree implementation of RCU provides debugfs trace output that
-summarizes counters and state.  This information is useful for debugging
-RCU itself, and can sometimes also help to debug abuses of RCU.
-The following sections describe the debugfs files and formats.
+The rcutree and rcutiny implementations of RCU provide debugfs trace
+output that summarizes counters and state.  This information is useful for
+debugging RCU itself, and can sometimes also help to debug abuses of RCU.
+The following sections describe the debugfs files and formats, first
+for rcutree and next for rcutiny.
 
 
-Hierarchical RCU debugfs Files and Formats
+CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
 
-This implementation of RCU provides three debugfs files under the
+These implementations of RCU provides five debugfs files under the
 top-level directory RCU: rcu/rcudata (which displays fields in struct
-rcu_data), rcu/rcugp (which displays grace-period counters), and
-rcu/rcuhier (which displays the struct rcu_node hierarchy).
+rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of
+rcu/rcudata), rcu/rcugp (which displays grace-period counters),
+rcu/rcuhier (which displays the struct rcu_node hierarchy), and
+rcu/rcu_pending (which displays counts of the reasons that the
+rcu_pending() function decided that there was core RCU work to do).
 
 The output of "cat rcu/rcudata" looks as follows:
 
@@ -130,7 +134,8 @@ o   "ci" is the number of RCU callbacks that have been invoked for
        been registered in absence of CPU-hotplug activity.
 
 o      "co" is the number of RCU callbacks that have been orphaned due to
-       this CPU going offline.
+       this CPU going offline.  These orphaned callbacks have been moved
+       to an arbitrarily chosen online CPU.
 
 o      "ca" is the number of RCU callbacks that have been adopted due to
        other CPUs going offline.  Note that ci+co-ca+ql is the number of
@@ -168,12 +173,12 @@ o "gpnum" is the number of grace periods that have started.  It is
 
 The output of "cat rcu/rcuhier" looks as follows, with very long lines:
 
-c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0
+c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
 1/1 .>. 0:127 ^0    
 3/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 3/3f .>. 0:5 ^0    2/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3    
 rcu_bh:
-c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0
+c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
 0/1 .>. 0:127 ^0    
 0/3 .>. 0:35 ^0    0/0 .>. 36:71 ^1    0/0 .>. 72:107 ^2    0/0 .>. 108:127 ^3    
 0/3f .>. 0:5 ^0    0/3 .>. 6:11 ^1    0/0 .>. 12:17 ^2    0/0 .>. 18:23 ^3    0/0 .>. 24:29 ^4    0/0 .>. 30:35 ^5    0/0 .>. 36:41 ^0    0/0 .>. 42:47 ^1    0/0 .>. 48:53 ^2    0/0 .>. 54:59 ^3    0/0 .>. 60:65 ^4    0/0 .>. 66:71 ^5    0/0 .>. 72:77 ^0    0/0 .>. 78:83 ^1    0/0 .>. 84:89 ^2    0/0 .>. 90:95 ^3    0/0 .>. 96:101 ^4    0/0 .>. 102:107 ^5    0/0 .>. 108:113 ^0    0/0 .>. 114:119 ^1    0/0 .>. 120:125 ^2    0/0 .>. 126:127 ^3
@@ -212,11 +217,6 @@ o  "fqlh" is the number of calls to force_quiescent_state() that
        exited immediately (without even being counted in nfqs above)
        due to contention on ->fqslock.
 
-o      "oqlen" is the number of callbacks on the "orphan" callback
-       list.  RCU callbacks are placed on this list by CPUs going
-       offline, and are "adopted" either by the CPU helping the outgoing
-       CPU or by the next rcu_barrier*() call, whichever comes first.
-
 o      Each element of the form "1/1 0:127 ^0" represents one struct
        rcu_node.  Each line represents one level of the hierarchy, from
        root to leaves.  It is best to think of the rcu_data structures
@@ -326,3 +326,115 @@ o "nn" is the number of times that this CPU needed nothing.  Alert
        readers will note that the rcu "nn" number for a given CPU very
        closely matches the rcu_bh "np" number for that same CPU.  This
        is due to short-circuit evaluation in rcu_pending().
+
+
+CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats
+
+These implementations of RCU provides a single debugfs file under the
+top-level directory RCU, namely rcu/rcudata, which displays fields in
+rcu_bh_ctrlblk, rcu_sched_ctrlblk and, for CONFIG_TINY_PREEMPT_RCU,
+rcu_preempt_ctrlblk.
+
+The output of "cat rcu/rcudata" is as follows:
+
+rcu_preempt: qlen=24 gp=1097669 g197/p197/c197 tasks=...
+             ttb=. btg=no ntb=184 neb=0 nnb=183 j=01f7 bt=0274
+             normal balk: nt=1097669 gt=0 bt=371 b=0 ny=25073378 nos=0
+             exp balk: bt=0 nos=0
+rcu_sched: qlen: 0
+rcu_bh: qlen: 0
+
+This is split into rcu_preempt, rcu_sched, and rcu_bh sections, with the
+rcu_preempt section appearing only in CONFIG_TINY_PREEMPT_RCU builds.
+The last three lines of the rcu_preempt section appear only in
+CONFIG_RCU_BOOST kernel builds.  The fields are as follows:
+
+o      "qlen" is the number of RCU callbacks currently waiting either
+       for an RCU grace period or waiting to be invoked.  This is the
+       only field present for rcu_sched and rcu_bh, due to the
+       short-circuiting of grace period in those two cases.
+
+o      "gp" is the number of grace periods that have completed.
+
+o      "g197/p197/c197" displays the grace-period state, with the
+       "g" number being the number of grace periods that have started
+       (mod 256), the "p" number being the number of grace periods
+       that the CPU has responded to (also mod 256), and the "c"
+       number being the number of grace periods that have completed
+       (once again mode 256).
+
+       Why have both "gp" and "g"?  Because the data flowing into
+       "gp" is only present in a CONFIG_RCU_TRACE kernel.
+
+o      "tasks" is a set of bits.  The first bit is "T" if there are
+       currently tasks that have recently blocked within an RCU
+       read-side critical section, the second bit is "N" if any of the
+       aforementioned tasks are blocking the current RCU grace period,
+       and the third bit is "E" if any of the aforementioned tasks are
+       blocking the current expedited grace period.  Each bit is "."
+       if the corresponding condition does not hold.
+
+o      "ttb" is a single bit.  It is "B" if any of the blocked tasks
+       need to be priority boosted and "." otherwise.
+
+o      "btg" indicates whether boosting has been carried out during
+       the current grace period, with "exp" indicating that boosting
+       is in progress for an expedited grace period, "no" indicating
+       that boosting has not yet started for a normal grace period,
+       "begun" indicating that boosting has bebug for a normal grace
+       period, and "done" indicating that boosting has completed for
+       a normal grace period.
+
+o      "ntb" is the total number of tasks subjected to RCU priority boosting
+       periods since boot.
+
+o      "neb" is the number of expedited grace periods that have had
+       to resort to RCU priority boosting since boot.
+
+o      "nnb" is the number of normal grace periods that have had
+       to resort to RCU priority boosting since boot.
+
+o      "j" is the low-order 12 bits of the jiffies counter in hexadecimal.
+
+o      "bt" is the low-order 12 bits of the value that the jiffies counter
+       will have at the next time that boosting is scheduled to begin.
+
+o      In the line beginning with "normal balk", the fields are as follows:
+
+       o       "nt" is the number of times that the system balked from
+               boosting because there were no blocked tasks to boost.
+               Note that the system will balk from boosting even if the
+               grace period is overdue when the currently running task
+               is looping within an RCU read-side critical section.
+               There is no point in boosting in this case, because
+               boosting a running task won't make it run any faster.
+
+       o       "gt" is the number of times that the system balked
+               from boosting because, although there were blocked tasks,
+               none of them were preventing the current grace period
+               from completing.
+
+       o       "bt" is the number of times that the system balked
+               from boosting because boosting was already in progress.
+
+       o       "b" is the number of times that the system balked from
+               boosting because boosting had already completed for
+               the grace period in question.
+
+       o       "ny" is the number of times that the system balked from
+               boosting because it was not yet time to start boosting
+               the grace period in question.
+
+       o       "nos" is the number of times that the system balked from
+               boosting for inexplicable ("not otherwise specified")
+               reasons.  This can actually happen due to races involving
+               increments of the jiffies counter.
+
+o      In the line beginning with "exp balk", the fields are as follows:
+
+       o       "bt" is the number of times that the system balked from
+               boosting because there were no blocked tasks to boost.
+
+       o       "nos" is the number of times that the system balked from
+                boosting for inexplicable ("not otherwise specified")
+                reasons.
index a2976a6de033df2b4247f69b48f56f44abed3378..e9c77788a39d8f2c5c807b59295be6c3f1b3fea6 100644 (file)
@@ -516,6 +516,7 @@ int main(int argc, char *argv[])
                        default:
                                fprintf(stderr, "Unknown nla_type %d\n",
                                        na->nla_type);
+                       case TASKSTATS_TYPE_NULL:
                                break;
                        }
                        na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
index d9bcffd594331d7b52a5608269327a7c6642af31..470d3dba1a69aa48c55d98bea3c70d094f458de6 100644 (file)
@@ -62,6 +62,10 @@ aic7*reg_print.c*
 aic7*seq.h*
 aicasm
 aicdb.h*
+altivec1.c
+altivec2.c
+altivec4.c
+altivec8.c
 asm-offsets.h
 asm_offsets.h
 autoconf.h*
@@ -76,6 +80,7 @@ btfixupprep
 build
 bvmlinux
 bzImage*
+capflags.c
 classlist.h*
 comp*.log
 compile.h*
@@ -94,6 +99,7 @@ devlist.h*
 docproc
 elf2ecoff
 elfconfig.h*
+evergreen_reg_safe.h
 fixdep
 flask.h
 fore200e_mkfirm
@@ -108,9 +114,16 @@ genksyms
 *_gray256.c
 ihex2fw
 ikconfig.h*
+inat-tables.c
 initramfs_data.cpio
 initramfs_data.cpio.gz
 initramfs_list
+int16.c
+int1.c
+int2.c
+int32.c
+int4.c
+int8.c
 kallsyms
 kconfig
 keywords.c
@@ -140,6 +153,7 @@ mkprep
 mktables
 mktree
 modpost
+modules.builtin
 modules.order
 modversions.h*
 ncscope.*
@@ -153,14 +167,23 @@ pca200e.bin
 pca200e_ecd.bin2
 piggy.gz
 piggyback
+piggy.S
 pnmtologo
 ppc_defs.h*
 pss_boot.h
 qconf
+r100_reg_safe.h
+r200_reg_safe.h
+r300_reg_safe.h
+r420_reg_safe.h
+r600_reg_safe.h
 raid6altivec*.c
 raid6int*.c
 raid6tables.c
 relocs
+rn50_reg_safe.h
+rs600_reg_safe.h
+rv515_reg_safe.h
 series
 setup
 setup.bin
@@ -169,6 +192,7 @@ sImage
 sm_tbl*
 split-include
 syscalltab.h
+tables.c
 tags
 tftpboot.img
 timeconst.h
@@ -190,6 +214,7 @@ vmlinux
 vmlinux-*
 vmlinux.aout
 vmlinux.lds
+voffset.h
 vsyscall.lds
 vsyscall_32.lds
 wanxlfw.inc
@@ -200,3 +225,4 @@ wakeup.elf
 wakeup.lds
 zImage*
 zconf.hash.c
+zoffset.h
index b6426f15b4ae85f5469b962806004237b9b5607e..33fa3e5d38fd7480d2ddd136b68f77fef7734c1d 100644 (file)
@@ -18,7 +18,6 @@ prototypes:
        char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 
 locking rules:
-       none have BKL
                dcache_lock     rename_lock     ->d_lock        may block
 d_revalidate:  no              no              no              yes
 d_hash         no              no              no              yes
@@ -42,18 +41,23 @@ ata *);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
        int (*readlink) (struct dentry *, char __user *,int);
-       int (*follow_link) (struct dentry *, struct nameidata *);
+       void * (*follow_link) (struct dentry *, struct nameidata *);
+       void (*put_link) (struct dentry *, struct nameidata *, void *);
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int, struct nameidata *);
+       int (*check_acl)(struct inode *, int);
        int (*setattr) (struct dentry *, struct iattr *);
        int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
+       void (*truncate_range)(struct inode *, loff_t, loff_t);
+       long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
+       int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 
 locking rules:
-       all may block, none have BKL
+       all may block
                i_mutex(inode)
 lookup:                yes
 create:                yes
@@ -66,19 +70,24 @@ rmdir:              yes (both)      (see below)
 rename:                yes (all)       (see below)
 readlink:      no
 follow_link:   no
+put_link:      no
 truncate:      yes             (see below)
 setattr:       yes
 permission:    no
+check_acl:     no
 getattr:       no
 setxattr:      yes
 getxattr:      no
 listxattr:     no
 removexattr:   yes
+truncate_range:        yes
+fallocate:     no
+fiemap:                no
        Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
        cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
        ->truncate() is never called directly - it's a callback, not a
-method. It's called by vmtruncate() - library function normally used by
+method. It's called by vmtruncate() - deprecated library function used by
 ->setattr(). Locking information above applies to that call (i.e. is
 inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been
 passed).
@@ -91,7 +100,7 @@ prototypes:
        struct inode *(*alloc_inode)(struct super_block *sb);
        void (*destroy_inode)(struct inode *);
        void (*dirty_inode) (struct inode *);
-       int (*write_inode) (struct inode *, int);
+       int (*write_inode) (struct inode *, struct writeback_control *wbc);
        int (*drop_inode) (struct inode *);
        void (*evict_inode) (struct inode *);
        void (*put_super) (struct super_block *);
@@ -105,10 +114,10 @@ prototypes:
        int (*show_options)(struct seq_file *, struct vfsmount *);
        ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+       int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
 
 locking rules:
        All may block [not true, see below]
-       None have BKL
                        s_umount
 alloc_inode:
 destroy_inode:
@@ -127,6 +136,7 @@ umount_begin:               no
 show_options:          no              (namespace_sem)
 quota_read:            no              (see below)
 quota_write:           no              (see below)
+bdev_try_to_free_page: no              (see below)
 
 ->statfs() has s_umount (shared) when called by ustat(2) (native or
 compat), but that's an accident of bad API; s_umount is used to pin
@@ -139,19 +149,25 @@ be the only ones operating on the quota file by the quota code (via
 dqio_sem) (unless an admin really wants to screw up something and
 writes to quota files with quotas on). For other details about locking
 see also dquot_operations section.
+->bdev_try_to_free_page is called from the ->releasepage handler of
+the block device inode.  See there for more details.
 
 --------------------------- file_system_type ---------------------------
 prototypes:
        int (*get_sb) (struct file_system_type *, int,
                       const char *, void *, struct vfsmount *);
+       struct dentry *(*mount) (struct file_system_type *, int,
+                      const char *, void *);
        void (*kill_sb) (struct super_block *);
 locking rules:
-               may block       BKL
-get_sb         yes             no
-kill_sb                yes             no
+               may block
+get_sb         yes
+mount          yes
+kill_sb                yes
 
 ->get_sb() returns error or 0 with locked superblock attached to the vfsmount
 (exclusive on ->s_umount).
+->mount() returns ERR_PTR or the root dentry.
 ->kill_sb() takes a write-locked superblock, does all shutdown work on it,
 unlocks and drops the reference.
 
@@ -176,27 +192,35 @@ prototypes:
        void (*freepage)(struct page *);
        int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
                        loff_t offset, unsigned long nr_segs);
-       int (*launder_page) (struct page *);
+       int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
+                               unsigned long *);
+       int (*migratepage)(struct address_space *, struct page *, struct page *);
+       int (*launder_page)(struct page *);
+       int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
+       int (*error_remove_page)(struct address_space *, struct page *);
 
 locking rules:
        All except set_page_dirty and freepage may block
 
-                       BKL     PageLocked(page)        i_mutex
-writepage:             no      yes, unlocks (see below)
-readpage:              no      yes, unlocks
-sync_page:             no      maybe
-writepages:            no
-set_page_dirty         no      no
-readpages:             no
-write_begin:           no      locks the page          yes
-write_end:             no      yes, unlocks            yes
-perform_write:         no      n/a                     yes
-bmap:                  no
-invalidatepage:                no      yes
-releasepage:           no      yes
-freepage:              no      yes
-direct_IO:             no
-launder_page:          no      yes
+                       PageLocked(page)        i_mutex
+writepage:             yes, unlocks (see below)
+readpage:              yes, unlocks
+sync_page:             maybe
+writepages:
+set_page_dirty         no
+readpages:
+write_begin:           locks the page          yes
+write_end:             yes, unlocks            yes
+bmap:
+invalidatepage:                yes
+releasepage:           yes
+freepage:              yes
+direct_IO:
+get_xip_mem:                                   maybe
+migratepage:           yes (both)
+launder_page:          yes
+is_partially_uptodate: yes
+error_remove_page:     yes
 
        ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
 may be called from the request handler (/dev/loop).
@@ -276,9 +300,8 @@ under spinlock (it cannot block) and is sometimes called with the page
 not locked.
 
        ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
-filesystems and by the swapper. The latter will eventually go away. All
-instances do not actually need the BKL. Please, keep it that way and don't
-breed new callers.
+filesystems and by the swapper. The latter will eventually go away.  Please,
+keep it that way and don't breed new callers.
 
        ->invalidatepage() is called when the filesystem must attempt to drop
 some or all of the buffers from the page when it is being truncated.  It
@@ -299,47 +322,37 @@ cleaned, or an error value if not. Note that in order to prevent the page
 getting mapped back in and redirtied, it needs to be kept locked
 across the entire operation.
 
-       Note: currently almost all instances of address_space methods are
-using BKL for internal serialization and that's one of the worst sources
-of contention. Normally they are calling library functions (in fs/buffer.c)
-and pass foo_get_block() as a callback (on local block-based filesystems,
-indeed). BKL is not needed for library stuff and is usually taken by
-foo_get_block(). It's an overkill, since block bitmaps can be protected by
-internal fs locking and real critical areas are much smaller than the areas
-filesystems protect now.
-
 ----------------------- file_lock_operations ------------------------------
 prototypes:
-       void (*fl_insert)(struct file_lock *);  /* lock insertion callback */
-       void (*fl_remove)(struct file_lock *);  /* lock removal callback */
        void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
        void (*fl_release_private)(struct file_lock *);
 
 
 locking rules:
-                       BKL     may block
-fl_insert:             yes     no
-fl_remove:             yes     no
-fl_copy_lock:          yes     no
-fl_release_private:    yes     yes
+                       file_lock_lock  may block
+fl_copy_lock:          yes             no
+fl_release_private:    maybe           no
 
 ----------------------- lock_manager_operations ---------------------------
 prototypes:
        int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
        void (*fl_notify)(struct file_lock *);  /* unblock callback */
+       int (*fl_grant)(struct file_lock *, struct file_lock *, int);
        void (*fl_release_private)(struct file_lock *);
        void (*fl_break)(struct file_lock *); /* break_lease callback */
+       int (*fl_mylease)(struct file_lock *, struct file_lock *);
+       int (*fl_change)(struct file_lock **, int);
 
 locking rules:
-                       BKL     may block
-fl_compare_owner:      yes     no
-fl_notify:             yes     no
-fl_release_private:    yes     yes
-fl_break:              yes     no
-
-       Currently only NFSD and NLM provide instances of this class. None of the
-them block. If you have out-of-tree instances - please, show up. Locking
-in that area will change.
+                       file_lock_lock  may block
+fl_compare_owner:      yes             no
+fl_notify:             yes             no
+fl_grant:              no              no
+fl_release_private:    maybe           no
+fl_break:              yes             no
+fl_mylease:            yes             no
+fl_change              yes             no
+
 --------------------------- buffer_head -----------------------------------
 prototypes:
        void (*b_end_io)(struct buffer_head *bh, int uptodate);
@@ -364,17 +377,17 @@ prototypes:
        void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 
 locking rules:
-                       BKL     bd_mutex
-open:                  no      yes
-release:               no      yes
-ioctl:                 no      no
-compat_ioctl:          no      no
-direct_access:         no      no
-media_changed:         no      no
-unlock_native_capacity:        no      no
-revalidate_disk:       no      no
-getgeo:                        no      no
-swap_slot_free_notify: no      no      (see below)
+                       bd_mutex
+open:                  yes
+release:               yes
+ioctl:                 no
+compat_ioctl:          no
+direct_access:         no
+media_changed:         no
+unlock_native_capacity:        no
+revalidate_disk:       no
+getgeo:                        no
+swap_slot_free_notify: no      (see below)
 
 media_changed, unlock_native_capacity and revalidate_disk are called only from
 check_disk_change().
@@ -413,34 +426,21 @@ prototypes:
        unsigned long (*get_unmapped_area)(struct file *, unsigned long,
                        unsigned long, unsigned long, unsigned long);
        int (*check_flags)(int);
+       int (*flock) (struct file *, int, struct file_lock *);
+       ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
+                       size_t, unsigned int);
+       ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
+                       size_t, unsigned int);
+       int (*setlease)(struct file *, long, struct file_lock **);
 };
 
 locking rules:
-       All may block.
-                       BKL
-llseek:                        no      (see below)
-read:                  no
-aio_read:              no
-write:                 no
-aio_write:             no
-readdir:               no
-poll:                  no
-unlocked_ioctl:                no
-compat_ioctl:          no
-mmap:                  no
-open:                  no
-flush:                 no
-release:               no
-fsync:                 no      (see below)
-aio_fsync:             no
-fasync:                        no
-lock:                  yes
-readv:                 no
-writev:                        no
-sendfile:              no
-sendpage:              no
-get_unmapped_area:     no
-check_flags:           no
+       All may block except for ->setlease.
+       No VFS locks held on entry except for ->fsync and ->setlease.
+
+->fsync() has i_mutex on inode.
+
+->setlease has the file_list_lock held and must not sleep.
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
@@ -450,17 +450,10 @@ mutex or just to use i_size_read() instead.
 Note: this does not protect the file->f_pos against concurrent modifications
 since this is something the userspace has to take care about.
 
-Note: ext2_release() was *the* source of contention on fs-intensive
-loads and dropping BKL on ->release() helps to get rid of that (we still
-grab BKL for cases when we close a file that had been opened r/w, but that
-can and should be done using the internal locking with smaller critical areas).
-Current worst offender is ext2_get_block()...
-
-->fasync() is called without BKL protection, and is responsible for
-maintaining the FASYNC bit in filp->f_flags.  Most instances call
-fasync_helper(), which does that maintenance, so it's not normally
-something one needs to worry about.  Return values > 0 will be mapped to
-zero in the VFS layer.
+->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
+Most instances call fasync_helper(), which does that maintenance, so it's
+not normally something one needs to worry about.  Return values > 0 will be
+mapped to zero in the VFS layer.
 
 ->readdir() and ->ioctl() on directories must be changed. Ideally we would
 move ->readdir() to inode_operations and use a separate method for directory
@@ -471,8 +464,6 @@ components. And there are other reasons why the current interface is a mess...
 ->read on directories probably must go away - we should just enforce -EISDIR
 in sys_read() and friends.
 
-->fsync() has i_mutex on inode.
-
 --------------------------- dquot_operations -------------------------------
 prototypes:
        int (*write_dquot) (struct dquot *);
@@ -507,12 +498,12 @@ prototypes:
        int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
 
 locking rules:
-               BKL     mmap_sem        PageLocked(page)
-open:          no      yes
-close:         no      yes
-fault:         no      yes             can return with page locked
-page_mkwrite:  no      yes             can return with page locked
-access:                no      yes
+               mmap_sem        PageLocked(page)
+open:          yes
+close:         yes
+fault:         yes             can return with page locked
+page_mkwrite:  yes             can return with page locked
+access:                yes
 
        ->fault() is called when a previously not present pte is about
 to be faulted in. The filesystem must find and return the page associated
@@ -539,6 +530,3 @@ VM_IO | VM_PFNMAP VMAs.
 
 (if you break something or notice that it is broken and do not fix it yourself
 - at least put it here)
-
-ipc/shm.c::shm_delete() - may need BKL.
-->read() and ->write() in many drivers are (probably) missing BKL.
index 715eaaf1519dd25fa0d4011684cd263bc12b2d2f..9a8674629a07598eb553970e1207b31b42ec0316 100644 (file)
        Notes: Further information in
        http://www.oreilly.com/catalog/linuxdrive2/
 
-     * Title: "Linux Device Drivers, 3nd Edition"
+     * Title: "Linux Device Drivers, 3rd Edition"
        Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
        Publisher: O'Reilly & Associates.
        Date: 2005.
        Pages: 600.
        ISBN: 0-13-101908-2
 
-     * Title:  "The  Design  and Implementation of the 4.4 BSD UNIX
-       Operating System"
-       Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels,
-       John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1996.
-       ISBN: 0-201-54979-4
-
      * Title: "Programming for the real world - POSIX.4"
        Author: Bill O. Gallmeister.
        Publisher: O'Reilly & Associates, Inc..
        POSIX. Good reference.
 
      * Title:  "UNIX  Systems  for  Modern Architectures: Symmetric
-       Multiprocesssing and Caching for Kernel Programmers"
+       Multiprocessing and Caching for Kernel Programmers"
        Author: Curt Schimmel.
        Publisher: Addison Wesley.
        Date: June, 1994.
        Pages: 432.
        ISBN: 0-201-63338-8
 
-     * Title:  "The  Design  and Implementation of the 4.3 BSD UNIX
-       Operating System"
-       Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J.
-       Karels, John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1989 (reprinted with corrections on October, 1990).
-       ISBN: 0-201-06196-1
-
-     * Title: "The Design of the UNIX Operating System"
-       Author: Maurice J. Bach.
-       Publisher: Prentice Hall.
-       Date: 1986.
-       Pages: 471.
-       ISBN: 0-13-201757-1
-
      MISCELLANEOUS:
 
      * Name: linux/Documentation
index 8b61c93609994dd91e36c25e1b29647ad084eaff..f3dc951e949f04255d90f35b5da4b78c7d015a67 100644 (file)
@@ -1579,20 +1579,12 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nmi_watchdog=   [KNL,BUGS=X86] Debugging features for SMP kernels
                        Format: [panic,][num]
-                       Valid num: 0,1,2
+                       Valid num: 0
                        0 - turn nmi_watchdog off
-                       1 - use the IO-APIC timer for the NMI watchdog
-                       2 - use the local APIC for the NMI watchdog using
-                       a performance counter. Note: This will use one
-                       performance counter and the local APIC's performance
-                       vector.
                        When panic is specified, panic when an NMI watchdog
                        timeout occurs.
                        This is useful when you use a panic=... timeout and
                        need the box quickly up again.
-                       Instead of 1 and 2 it is possible to use the following
-                       symbolic names: lapic and ioapic
-                       Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
 
        netpoll.carrier_timeout=
                        [NET] Specifies amount of time (in seconds) that
@@ -1622,6 +1614,8 @@ and is between 256 and 4096 characters. It is defined in the file
        noapic          [SMP,APIC] Tells the kernel to not make use of any
                        IOAPICs that may be present in the system.
 
+       noautogroup     Disable scheduler automatic task group creation.
+
        nobats          [PPC] Do not use BATs for mapping kernel lowmem
                        on "Classic" PPC cores.
 
@@ -1759,7 +1753,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
        nousb           [USB] Disable the USB subsystem
 
-       nowatchdog      [KNL] Disable the lockup detector.
+       nowatchdog      [KNL] Disable the lockup detector (NMI watchdog).
 
        nowb            [ARM]
 
@@ -2467,12 +2461,13 @@ and is between 256 and 4096 characters. It is defined in the file
                        to facilitate early boot debugging.
                        See also Documentation/trace/events.txt
 
-       tsc=            Disable clocksource-must-verify flag for TSC.
+       tsc=            Disable clocksource stability checks for TSC.
                        Format: <string>
                        [x86] reliable: mark tsc clocksource as reliable, this
-                       disables clocksource verification at runtime.
-                       Used to enable high-resolution timer mode on older
-                       hardware, and in virtualized environment.
+                       disables clocksource verification at runtime, as well
+                       as the stability checks done at bootup. Used to enable
+                       high-resolution timer mode on older hardware, and in
+                       virtualized environment.
                        [x86] noirqtime: Do not use TSC to do irq accounting.
                        Used to run time disable IRQ_TIME_ACCOUNTING on any
                        platforms where RDTSC is slow and this accounting
index 570ef2b3d79b16c2f57c01f88d5de3a1a954f6a4..df322c1034667eebbced0792f2c8a7c733e5cf78 100644 (file)
@@ -1044,9 +1044,9 @@ Details:
 
 
 /**
- *      queuecommand - queue scsi command, invoke 'done' on completion
+ *      queuecommand - queue scsi command, invoke scp->scsi_done on completion
+ *      @shost: pointer to the scsi host object
  *      @scp: pointer to scsi command object
- *      @done: function pointer to be invoked on completion
  *
  *      Returns 0 on success.
  *
@@ -1074,42 +1074,45 @@ Details:
  *
  *      Other types of errors that are detected immediately may be
  *      flagged by setting scp->result to an appropriate value,
- *      invoking the 'done' callback, and then returning 0 from this
- *      function. If the command is not performed immediately (and the
- *      LLD is starting (or will start) the given command) then this
- *      function should place 0 in scp->result and return 0.
+ *      invoking the scp->scsi_done callback, and then returning 0
+ *      from this function. If the command is not performed
+ *      immediately (and the LLD is starting (or will start) the given
+ *      command) then this function should place 0 in scp->result and
+ *      return 0.
  *
  *      Command ownership.  If the driver returns zero, it owns the
- *      command and must take responsibility for ensuring the 'done'
- *      callback is executed.  Note: the driver may call done before
- *      returning zero, but after it has called done, it may not
- *      return any value other than zero.  If the driver makes a
- *      non-zero return, it must not execute the command's done
- *      callback at any time.
- *
- *      Locks: struct Scsi_Host::host_lock held on entry (with "irqsave")
- *             and is expected to be held on return.
+ *      command and must take responsibility for ensuring the
+ *      scp->scsi_done callback is executed.  Note: the driver may
+ *      call scp->scsi_done before returning zero, but after it has
+ *      called scp->scsi_done, it may not return any value other than
+ *      zero.  If the driver makes a non-zero return, it must not
+ *      execute the command's scsi_done callback at any time.
+ *
+ *      Locks: up to and including 2.6.36, struct Scsi_Host::host_lock
+ *             held on entry (with "irqsave") and is expected to be
+ *             held on return. From 2.6.37 onwards, queuecommand is
+ *             called without any locks held.
  *
  *      Calling context: in interrupt (soft irq) or process context
  *
- *      Notes: This function should be relatively fast. Normally it will
- *      not wait for IO to complete. Hence the 'done' callback is invoked 
- *      (often directly from an interrupt service routine) some time after
- *      this function has returned. In some cases (e.g. pseudo adapter 
- *      drivers that manufacture the response to a SCSI INQUIRY)
- *      the 'done' callback may be invoked before this function returns.
- *      If the 'done' callback is not invoked within a certain period
- *      the SCSI mid level will commence error processing.
- *      If a status of CHECK CONDITION is placed in "result" when the
- *      'done' callback is invoked, then the LLD driver should 
- *      perform autosense and fill in the struct scsi_cmnd::sense_buffer
+ *      Notes: This function should be relatively fast. Normally it
+ *      will not wait for IO to complete. Hence the scp->scsi_done
+ *      callback is invoked (often directly from an interrupt service
+ *      routine) some time after this function has returned. In some
+ *      cases (e.g. pseudo adapter drivers that manufacture the
+ *      response to a SCSI INQUIRY) the scp->scsi_done callback may be
+ *      invoked before this function returns.  If the scp->scsi_done
+ *      callback is not invoked within a certain period the SCSI mid
+ *      level will commence error processing.  If a status of CHECK
+ *      CONDITION is placed in "result" when the scp->scsi_done
+ *      callback is invoked, then the LLD driver should perform
+ *      autosense and fill in the struct scsi_cmnd::sense_buffer
  *      array. The scsi_cmnd::sense_buffer array is zeroed prior to
  *      the mid level queuing a command to an LLD.
  *
  *      Defined in: LLD
  **/
-    int queuecommand(struct scsi_cmnd * scp, 
-                     void (*done)(struct scsi_cmnd *))
+    int queuecommand(struct Scsi_Host *shost, struct scsi_cmnd * scp)
 
 
 /**
diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt
new file mode 100644 (file)
index 0000000..96d87b6
--- /dev/null
@@ -0,0 +1,90 @@
+
+                       Subsystem Trace Points: power
+
+The power tracing system captures events related to power transitions
+within the kernel. Broadly speaking there are three major subheadings:
+
+  o Power state switch which reports events related to suspend (S-states),
+     cpuidle (C-states) and cpufreq (P-states)
+  o System clock related changes
+  o Power domains related changes and transitions
+
+This document describes what each of the tracepoints is and why they
+might be useful.
+
+Cf. include/trace/events/power.h for the events definitions.
+
+1. Power state switch events
+============================
+
+1.1 New trace API
+-----------------
+
+A 'cpu' event class gathers the CPU-related events: cpuidle and
+cpufreq.
+
+cpu_idle               "state=%lu cpu_id=%lu"
+cpu_frequency          "state=%lu cpu_id=%lu"
+
+A suspend event is used to indicate the system going in and out of the
+suspend mode:
+
+machine_suspend                "state=%lu"
+
+
+Note: the value of '-1' or '4294967295' for state means an exit from the current state,
+i.e. trace_cpu_idle(4, smp_processor_id()) means that the system
+enters the idle state 4, while trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id())
+means that the system exits the previous idle state.
+
+The event which has 'state=4294967295' in the trace is very important to the user
+space tools which are using it to detect the end of the current state, and so to
+correctly draw the states diagrams and to calculate accurate statistics etc.
+
+1.2 DEPRECATED trace API
+------------------------
+
+A new Kconfig option CONFIG_EVENT_POWER_TRACING_DEPRECATED with the default value of
+'y' has been created. This allows the legacy trace power API to be used conjointly
+with the new trace API.
+The Kconfig option, the old trace API (in include/trace/events/power.h) and the
+old trace points will disappear in a future release (namely 2.6.41).
+
+power_start            "type=%lu state=%lu cpu_id=%lu"
+power_frequency                "type=%lu state=%lu cpu_id=%lu"
+power_end              "cpu_id=%lu"
+
+The 'type' parameter takes one of those macros:
+ . POWER_NONE  = 0,
+ . POWER_CSTATE        = 1,    /* C-State */
+ . POWER_PSTATE        = 2,    /* Fequency change or DVFS */
+
+The 'state' parameter is set depending on the type:
+ . Target C-state for type=POWER_CSTATE,
+ . Target frequency for type=POWER_PSTATE,
+
+power_end is used to indicate the exit of a state, corresponding to the latest
+power_start event.
+
+2. Clocks events
+================
+The clock events are used for clock enable/disable and for
+clock rate change.
+
+clock_enable           "%s state=%lu cpu_id=%lu"
+clock_disable          "%s state=%lu cpu_id=%lu"
+clock_set_rate         "%s state=%lu cpu_id=%lu"
+
+The first parameter gives the clock name (e.g. "gpio1_iclk").
+The second parameter is '1' for enable, '0' for disable, the target
+clock rate for set_rate.
+
+3. Power domains events
+=======================
+The power domain events are used for power domains transitions
+
+power_domain_target    "%s state=%lu cpu_id=%lu"
+
+The first parameter gives the power domain name (e.g. "mpu_pwrdm").
+The second parameter is the power domain target state.
+
index b3e73ddb1567902fdeb9d65e1a661db9bca7780d..12cecc83cd91c658c71524bba59762b83f810829 100644 (file)
@@ -373,9 +373,18 @@ EVENT_PROCESS:
                                print "         $regex_lru_isolate/o\n";
                                next;
                        }
+                       my $isolate_mode = $1;
                        my $nr_scanned = $4;
                        my $nr_contig_dirty = $7;
-                       $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+
+                       # To closer match vmstat scanning statistics, only count isolate_both
+                       # and isolate_inactive as scanning. isolate_active is rotation
+                       # isolate_inactive == 0
+                       # isolate_active   == 1
+                       # isolate_both     == 2
+                       if ($isolate_mode != 1) {
+                               $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+                       }
                        $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty;
                } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
                        $details = $5;
index 30b43e1b26979cee024aa7636e250520b8ba235f..bdeb81ccb5f61b973280f9704f28a9f9cd8cc2e1 100644 (file)
@@ -600,6 +600,7 @@ Protocol:   2.07+
   0x00000001   lguest
   0x00000002   Xen
   0x00000003   Moorestown MID
+  0x00000004   CE4100 TV Platform
 
 Field name:    hardware_subarch_data
 Type:          write (subarch-dependent)
index 6a588873cf8d2da8b00f803a1dcd8d96841292bd..c5c7292daba076e439843e3b8c5c09a5d96d2285 100644 (file)
@@ -405,7 +405,7 @@ S:  Supported
 F:     drivers/usb/gadget/amd5536udc.*
 
 AMD GEODE PROCESSOR/CHIPSET SUPPORT
-P:     Jordan Crouse
+P:     Andres Salomon <dilinger@queued.net>
 L:     linux-geode@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:     Supported
@@ -792,11 +792,14 @@ S:        Maintained
 
 ARM/NOMADIK ARCHITECTURE
 M:     Alessandro Rubini <rubini@unipv.it>
+M:     Linus Walleij <linus.walleij@stericsson.com>
 M:     STEricsson <STEricsson_nomadik_linux@list.st.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-nomadik/
 F:     arch/arm/plat-nomadik/
+F:     drivers/i2c/busses/i2c-nomadik.c
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 
 ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT
 M:     Nelson Castillo <arhuaco@freaks-unidos.net>
@@ -998,12 +1001,24 @@ F:       drivers/i2c/busses/i2c-stu300.c
 F:     drivers/rtc/rtc-coh901331.c
 F:     drivers/watchdog/coh901327_wdt.c
 F:     drivers/dma/coh901318*
+F:     drivers/mfd/ab3100*
+F:     drivers/rtc/rtc-ab3100.c
+F:     drivers/rtc/rtc-coh901331.c
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 
-ARM/U8500 ARM ARCHITECTURE
+ARM/Ux500 ARM ARCHITECTURE
 M:     Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
+M:     Linus Walleij <linus.walleij@stericsson.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-ux500/
+F:     drivers/dma/ste_dma40*
+F:     drivers/mfd/ab3550*
+F:     drivers/mfd/abx500*
+F:     drivers/mfd/ab8500*
+F:     drivers/mfd/stmpe*
+F:     drivers/rtc/rtc-ab8500.c
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 
 ARM/VFP SUPPORT
 M:     Russell King <linux@arm.linux.org.uk>
@@ -2797,6 +2812,10 @@ M:       Thomas Gleixner <tglx@linutronix.de>
 S:     Maintained
 F:     Documentation/timers/
 F:     kernel/hrtimer.c
+F:     kernel/time/clockevents.c
+F:     kernel/time/tick*.*
+F:     kernel/time/timer_*.c
+F      include/linux/clockevents.h
 F:     include/linux/hrtimer.h
 
 HIGH-SPEED SCC DRIVER FOR AX.25
@@ -4590,7 +4609,7 @@ F:        drivers/pcmcia/
 F:     include/pcmcia/
 
 PCNET32 NETWORK DRIVER
-M:     Don Fry <pcnet32@verizon.net>
+M:     Don Fry <pcnet32@frontier.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/pcnet32.c
@@ -4612,7 +4631,7 @@ PERFORMANCE EVENTS SUBSYSTEM
 M:     Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:     Paul Mackerras <paulus@samba.org>
 M:     Ingo Molnar <mingo@elte.hu>
-M:     Arnaldo Carvalho de Melo <acme@redhat.com>
+M:     Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
 S:     Supported
 F:     kernel/perf_event*.c
 F:     include/linux/perf_event.h
@@ -5127,6 +5146,18 @@ L:       alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     sound/soc/s3c24xx
 
+TIMEKEEPING, NTP
+M:     John Stultz <johnstul@us.ibm.com>
+M:     Thomas Gleixner <tglx@linutronix.de>
+S:     Supported
+F:     include/linux/clocksource.h
+F:     include/linux/time.h
+F:     include/linux/timex.h
+F:     include/linux/timekeeping.h
+F:     kernel/time/clocksource.c
+F:     kernel/time/time*.c
+F:     kernel/time/ntp.c
+
 TLG2300 VIDEO4LINUX-2 DRIVER
 M:     Huang Shijie <shijie8@gmail.com>
 M:     Kang Yong <kangyong@telegent.com>
index 77044b7918a5cbe8439d9000175ca0b7f0ea4717..74b25559f831c5c48150789ed3230921635e0ae0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 37
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Flesh-Eating Bats with Fangs
 
 # *DOCUMENTATION*
index 8bf0fa652eb63c57dec1ebfec1a93a407be4ed32..f78c2be4242b437ced3308795952102bf1359763 100644 (file)
@@ -175,4 +175,7 @@ config HAVE_PERF_EVENTS_NMI
 config HAVE_ARCH_JUMP_LABEL
        bool
 
+config HAVE_ARCH_MUTEX_CPU_RELAX
+       bool
+
 source "kernel/gcov/Kconfig"
index fe792ca818f64c4d9954e8b62e5f8064a5d5777e..5996e7a6757e4e058d4d7e7f9d10f76fbf6b6d20 100644 (file)
@@ -1,10 +1,4 @@
 #ifndef __ASM_ALPHA_PERF_EVENT_H
 #define __ASM_ALPHA_PERF_EVENT_H
 
-#ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
-#else
-static inline void init_hw_perf_events(void)    { }
-#endif
-
 #endif /* __ASM_ALPHA_PERF_EVENT_H */
index 5f77afb88e898b849f57e5c115f4688282e71c62..4c8bb374eb0a288d03d2cf5ad124e914638527f2 100644 (file)
@@ -112,8 +112,6 @@ init_IRQ(void)
        wrent(entInt, 0);
 
        alpha_mv.init_irq();
-
-       init_hw_perf_events();
 }
 
 /*
index 1cc49683fb69b2a5f96639e71a2f1af821479e77..90561c45e7d8928e8e137e33164d2d2d661a28b8 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kdebug.h>
 #include <linux/mutex.h>
+#include <linux/init.h>
 
 #include <asm/hwrpb.h>
 #include <asm/atomic.h>
@@ -863,13 +864,13 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
 /*
  * Init call to initialise performance events at kernel startup.
  */
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        pr_info("Performance events: ");
 
        if (!supported_cpu()) {
                pr_cont("No support for your CPU.\n");
-               return;
+               return 0;
        }
 
        pr_cont("Supported CPU type!\n");
@@ -881,6 +882,8 @@ void __init init_hw_perf_events(void)
        /* And set up PMU specification */
        alpha_pmu = &ev67_pmu;
 
-       perf_pmu_register(&pmu);
-}
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
+       return 0;
+}
+early_initcall(init_hw_perf_events);
index 1bec96e851967101df7a796745b84d24bd320ab8..42ff90b46dfb1f1e0e91fdfe370f493e0b81551f 100644 (file)
@@ -352,3 +352,4 @@ struct pci_bus * __init it8152_pci_scan_bus(int nr, struct pci_sys_data *sys)
        return pci_scan_bus(nr, &it8152_ops, sys);
 }
 
+EXPORT_SYMBOL(dma_set_coherent_mask);
index 21fa272301f804b9bad3218b74147f6e450fa026..b2f95c72287c861dc03c09e110242982bb51a5c5 100644 (file)
@@ -76,6 +76,7 @@ extern unsigned long it8152_base_address;
   IT8152_PD_IRQ(0)  Audio controller (ACR)
  */
 #define IT8152_IRQ(x)   (IRQ_BOARD_START + (x))
+#define IT8152_LAST_IRQ        (IRQ_BOARD_START + 40)
 
 /* IRQ-sources in 3 groups - local devices, LPC (serial), and external PCI */
 #define IT8152_LD_IRQ_COUNT     9
index 1fc684e70ab6a6c9915ad5fbd302066ccb050a29..7080e2c8fa6209e0445b59a7b33ba8bd825797af 100644 (file)
@@ -25,9 +25,6 @@ extern void *kmap_high(struct page *page);
 extern void *kmap_high_get(struct page *page);
 extern void kunmap_high(struct page *page);
 
-extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte);
-extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte);
-
 /*
  * The following functions are already defined by <linux/highmem.h>
  * when CONFIG_HIGHMEM is not set.
index 4fc1565e4f930860722bca331330d0606e494d1c..316bb2b2be3dd7ca64638ea2d13d502e66ee5fde 100644 (file)
@@ -13,9 +13,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-/* DO NOT EDIT!! - this file automatically generated
- *                 from .s file by awk -f s2h.awk
- */
 /*  Size definitions
  *  Copyright (C) ARM Limited 1998. All rights reserved.
  */
@@ -25,6 +22,9 @@
 
 /* handy sizes */
 #define SZ_16                          0x00000010
+#define SZ_32                          0x00000020
+#define SZ_64                          0x00000040
+#define SZ_128                         0x00000080
 #define SZ_256                         0x00000100
 #define SZ_512                         0x00000200
 
index 1120f18a6b17695e48c37a6b4d1d7e50d306e478..80025948b8ad378e29d162030e962bb1dd462255 100644 (file)
@@ -150,6 +150,7 @@ extern unsigned int user_debug;
 #define rmb()          dmb()
 #define wmb()          mb()
 #else
+#include <asm/memory.h>
 #define mb()   do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
 #define rmb()  do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
 #define wmb()  do { if (arch_is_coherent()) dmb(); else barrier(); } while (0)
index 8bfa98757cd2f3fc9ef128011944a55fa7838ffd..80bf8cd88d7c522c319354c75b13d5484b9ede96 100644 (file)
@@ -29,6 +29,9 @@ ret_fast_syscall:
        ldr     r1, [tsk, #TI_FLAGS]
        tst     r1, #_TIF_WORK_MASK
        bne     fast_work_pending
+#if defined(CONFIG_IRQSOFF_TRACER)
+       asm_trace_hardirqs_on
+#endif
 
        /* perform architecture specific actions before user return */
        arch_ret_to_user r1, lr
@@ -65,6 +68,9 @@ ret_slow_syscall:
        tst     r1, #_TIF_WORK_MASK
        bne     work_pending
 no_work_pending:
+#if defined(CONFIG_IRQSOFF_TRACER)
+       asm_trace_hardirqs_on
+#endif
        /* perform architecture specific actions before user return */
        arch_ret_to_user r1, lr
 
index 07a50357492ac6858bc21d0b7913aefc93cbb1cf..fdfa4976b0bfeca637178609bb5e8d51e63ac672 100644 (file)
@@ -3034,11 +3034,11 @@ init_hw_perf_events(void)
                pr_info("no hardware support available\n");
        }
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 /*
  * Callchain handling code.
index 8c1959590252e7161f1da38497eddba9b0538afb..9066473c0ebc3991a2f793e8c78616e26b31decb 100644 (file)
@@ -310,7 +310,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
         * All kernel threads share the same mm context; grab a
         * reference and switch to it.
         */
-       atomic_inc(&mm->mm_users);
        atomic_inc(&mm->mm_count);
        current->active_mm = mm;
        cpumask_set_cpu(cpu, mm_cpumask(mm));
index 57f8ee154943d91b50882a7b01ab2d602d5ba631..27ac6f550fe36eb686e8c180a727c8bf29d2c286 100644 (file)
@@ -74,6 +74,8 @@
 #define                        AT91_MCI_TRTYP_BLOCK    (0 << 19)
 #define                        AT91_MCI_TRTYP_MULTIPLE (1 << 19)
 #define                        AT91_MCI_TRTYP_STREAM   (2 << 19)
+#define                        AT91_MCI_TRTYP_SDIO_BYTE        (4 << 19)
+#define                        AT91_MCI_TRTYP_SDIO_BLOCK       (5 << 19)
 
 #define AT91_MCI_BLKR          0x18            /* Block Register */
 #define                AT91_MCI_BLKR_BCNT(n)   ((0xffff & (n)) << 0)   /* Block count */
index 24498a932ba65b8054de5ba8229ef0e0c838dd71..a54b3db8036678c8c385705b7c22b763769bb515 100644 (file)
@@ -513,4 +513,4 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
 
 EXPORT_SYMBOL(ixp4xx_pci_read);
 EXPORT_SYMBOL(ixp4xx_pci_write);
-
+EXPORT_SYMBOL(dma_set_coherent_mask);
index dd235ecc9d6c5946d6610bb7adef42cb57476210..c93e73d54dd1e7c5cebf41b13bc31b37a600391d 100644 (file)
@@ -540,6 +540,7 @@ config MACH_ICONTROL
 config ARCH_PXA_ESERIES
        bool "PXA based Toshiba e-series PDAs"
        select PXA25x
+       select FB_W100
 
 config MACH_E330
        bool "Toshiba e330"
index 52c30b01a67139e88fdb71af7dd7371525167bfd..ae008110db4edd934716e7b0a07d26aab1d84813 100644 (file)
@@ -353,8 +353,8 @@ resume_turn_on_mmu:
 
        @ Let us ensure we jump to resume_after_mmu only when the mcr above
        @ actually took effect.  They call it the "cpwait" operation.
-       mrc     p15, 0, r1, c2, c0, 0           @ queue a dependency on CP15
-       sub     pc, r2, r1, lsr #32             @ jump to virtual addr
+       mrc     p15, 0, r0, c2, c0, 0           @ queue a dependency on CP15
+       sub     pc, r2, r0, lsr #32             @ jump to virtual addr
        nop
        nop
        nop
index 6e77c042d8e9417ad5b9141c6eab37767693e3ed..e0b0e7a4ec68a3c577959e9116b9c7fa5d0be09d 100644 (file)
  */
 
 #include <linux/init.h>
+#include <linux/highmem.h>
 #include <asm/cacheflush.h>
-#include <asm/kmap_types.h>
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
 #include <plat/cache-feroceon-l2.h>
-#include "mm.h"
 
 /*
  * Low-level cache maintenance operations.
  * between which we don't want to be preempted.
  */
 
-static inline unsigned long l2_start_va(unsigned long paddr)
+static inline unsigned long l2_get_va(unsigned long paddr)
 {
 #ifdef CONFIG_HIGHMEM
        /*
-        * Let's do our own fixmap stuff in a minimal way here.
         * Because range ops can't be done on physical addresses,
         * we simply install a virtual mapping for it only for the
         * TLB lookup to occur, hence no need to flush the untouched
-        * memory mapping.  This is protected with the disabling of
-        * interrupts by the caller.
+        * memory mapping afterwards (note: a cache flush may happen
+        * in some circumstances depending on the path taken in kunmap_atomic).
         */
-       unsigned long idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
-       unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-       set_pte_ext(TOP_PTE(vaddr), pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL), 0);
-       local_flush_tlb_kernel_page(vaddr);
-       return vaddr + (paddr & ~PAGE_MASK);
+       void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT);
+       return (unsigned long)vaddr + (paddr & ~PAGE_MASK);
 #else
        return __phys_to_virt(paddr);
 #endif
 }
 
+static inline void l2_put_va(unsigned long vaddr)
+{
+#ifdef CONFIG_HIGHMEM
+       kunmap_atomic((void *)vaddr);
+#endif
+}
+
 static inline void l2_clean_pa(unsigned long addr)
 {
        __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
@@ -76,13 +75,14 @@ static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
         */
        BUG_ON((start ^ end) >> PAGE_SHIFT);
 
-       raw_local_irq_save(flags);
-       va_start = l2_start_va(start);
+       va_start = l2_get_va(start);
        va_end = va_start + (end - start);
+       raw_local_irq_save(flags);
        __asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
                "mcr p15, 1, %1, c15, c9, 5"
                : : "r" (va_start), "r" (va_end));
        raw_local_irq_restore(flags);
+       l2_put_va(va_start);
 }
 
 static inline void l2_clean_inv_pa(unsigned long addr)
@@ -106,13 +106,14 @@ static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
         */
        BUG_ON((start ^ end) >> PAGE_SHIFT);
 
-       raw_local_irq_save(flags);
-       va_start = l2_start_va(start);
+       va_start = l2_get_va(start);
        va_end = va_start + (end - start);
+       raw_local_irq_save(flags);
        __asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
                "mcr p15, 1, %1, c15, c11, 5"
                : : "r" (va_start), "r" (va_end));
        raw_local_irq_restore(flags);
+       l2_put_va(va_start);
 }
 
 static inline void l2_inv_all(void)
index c3154928bccdf9750bef6da4e4fc29c2aad732f5..5a32020471e3bab2fc1e966a6e968c17a87f1d37 100644 (file)
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 #include <linux/init.h>
+#include <linux/highmem.h>
 #include <asm/system.h>
 #include <asm/cputype.h>
 #include <asm/cacheflush.h>
-#include <asm/kmap_types.h>
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include "mm.h"
 
 #define CR_L2  (1 << 26)
 
@@ -71,16 +67,15 @@ static inline void xsc3_l2_inv_all(void)
        dsb();
 }
 
+static inline void l2_unmap_va(unsigned long va)
+{
 #ifdef CONFIG_HIGHMEM
-#define l2_map_save_flags(x)           raw_local_save_flags(x)
-#define l2_map_restore_flags(x)                raw_local_irq_restore(x)
-#else
-#define l2_map_save_flags(x)           ((x) = 0)
-#define l2_map_restore_flags(x)                ((void)(x))
+       if (va != -1)
+               kunmap_atomic((void *)va);
 #endif
+}
 
-static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
-                                     unsigned long flags)
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
 {
 #ifdef CONFIG_HIGHMEM
        unsigned long va = prev_va & PAGE_MASK;
@@ -89,17 +84,10 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
                /*
                 * Switching to a new page.  Because cache ops are
                 * using virtual addresses only, we must put a mapping
-                * in place for it.  We also enable interrupts for a
-                * short while and disable them again to protect this
-                * mapping.
+                * in place for it.
                 */
-               unsigned long idx;
-               raw_local_irq_restore(flags);
-               idx = KM_L2_CACHE + KM_TYPE_NR * smp_processor_id();
-               va = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-               raw_local_irq_restore(flags | PSR_I_BIT);
-               set_pte_ext(TOP_PTE(va), pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL), 0);
-               local_flush_tlb_kernel_page(va);
+               l2_unmap_va(prev_va);
+               va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT);
        }
        return va + (pa_offset >> (32 - PAGE_SHIFT));
 #else
@@ -109,7 +97,7 @@ static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va,
 
 static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
 {
-       unsigned long vaddr, flags;
+       unsigned long vaddr;
 
        if (start == 0 && end == -1ul) {
                xsc3_l2_inv_all();
@@ -117,13 +105,12 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
        }
 
        vaddr = -1;  /* to force the first mapping */
-       l2_map_save_flags(flags);
 
        /*
         * Clean and invalidate partial first cache line.
         */
        if (start & (CACHE_LINE_SIZE - 1)) {
-               vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr, flags);
+               vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr);
                xsc3_l2_clean_mva(vaddr);
                xsc3_l2_inv_mva(vaddr);
                start = (start | (CACHE_LINE_SIZE - 1)) + 1;
@@ -133,7 +120,7 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
         * Invalidate all full cache lines between 'start' and 'end'.
         */
        while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_inv_mva(vaddr);
                start += CACHE_LINE_SIZE;
        }
@@ -142,31 +129,30 @@ static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
         * Clean and invalidate partial last cache line.
         */
        if (start < end) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_clean_mva(vaddr);
                xsc3_l2_inv_mva(vaddr);
        }
 
-       l2_map_restore_flags(flags);
+       l2_unmap_va(vaddr);
 
        dsb();
 }
 
 static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
 {
-       unsigned long vaddr, flags;
+       unsigned long vaddr;
 
        vaddr = -1;  /* to force the first mapping */
-       l2_map_save_flags(flags);
 
        start &= ~(CACHE_LINE_SIZE - 1);
        while (start < end) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_clean_mva(vaddr);
                start += CACHE_LINE_SIZE;
        }
 
-       l2_map_restore_flags(flags);
+       l2_unmap_va(vaddr);
 
        dsb();
 }
@@ -193,7 +179,7 @@ static inline void xsc3_l2_flush_all(void)
 
 static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
 {
-       unsigned long vaddr, flags;
+       unsigned long vaddr;
 
        if (start == 0 && end == -1ul) {
                xsc3_l2_flush_all();
@@ -201,17 +187,16 @@ static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
        }
 
        vaddr = -1;  /* to force the first mapping */
-       l2_map_save_flags(flags);
 
        start &= ~(CACHE_LINE_SIZE - 1);
        while (start < end) {
-               vaddr = l2_map_va(start, vaddr, flags);
+               vaddr = l2_map_va(start, vaddr);
                xsc3_l2_clean_mva(vaddr);
                xsc3_l2_inv_mva(vaddr);
                start += CACHE_LINE_SIZE;
        }
 
-       l2_map_restore_flags(flags);
+       l2_unmap_va(vaddr);
 
        dsb();
 }
index ac6a36142fcd5a28084b3c669fac9e800fd65497..809f1bf9fa29ef0fda15cf7563cffeed7a5bb98d 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/highmem.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
@@ -480,10 +481,10 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset,
                                op(vaddr, len, dir);
                                kunmap_high(page);
                        } else if (cache_is_vipt()) {
-                               pte_t saved_pte;
-                               vaddr = kmap_high_l1_vipt(page, &saved_pte);
+                               /* unmapped pages might still be cached */
+                               vaddr = kmap_atomic(page);
                                op(vaddr + offset, len, dir);
-                               kunmap_high_l1_vipt(page, saved_pte);
+                               kunmap_atomic(vaddr);
                        }
                } else {
                        vaddr = page_address(page) + offset;
index 391ffae750986404df8658d53e93fdc69b970ba4..c29f2839f1d2b72c8aa99e17db44e120a42bc7ee 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/highmem.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
@@ -180,10 +181,10 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
                        __cpuc_flush_dcache_area(addr, PAGE_SIZE);
                        kunmap_high(page);
                } else if (cache_is_vipt()) {
-                       pte_t saved_pte;
-                       addr = kmap_high_l1_vipt(page, &saved_pte);
+                       /* unmapped pages might still be cached */
+                       addr = kmap_atomic(page);
                        __cpuc_flush_dcache_area(addr, PAGE_SIZE);
-                       kunmap_high_l1_vipt(page, saved_pte);
+                       kunmap_atomic(addr);
                }
        }
 
index c435fd9e1da95c9fdc9d7fab83b3a42caef1b905..807c0573abbe82533a884f87ea7ea243051a228d 100644 (file)
@@ -140,90 +140,3 @@ struct page *kmap_atomic_to_page(const void *ptr)
        pte = TOP_PTE(vaddr);
        return pte_page(*pte);
 }
-
-#ifdef CONFIG_CPU_CACHE_VIPT
-
-#include <linux/percpu.h>
-
-/*
- * The VIVT cache of a highmem page is always flushed before the page
- * is unmapped. Hence unmapped highmem pages need no cache maintenance
- * in that case.
- *
- * However unmapped pages may still be cached with a VIPT cache, and
- * it is not possible to perform cache maintenance on them using physical
- * addresses unfortunately.  So we have no choice but to set up a temporary
- * virtual mapping for that purpose.
- *
- * Yet this VIPT cache maintenance may be triggered from DMA support
- * functions which are possibly called from interrupt context. As we don't
- * want to keep interrupt disabled all the time when such maintenance is
- * taking place, we therefore allow for some reentrancy by preserving and
- * restoring the previous fixmap entry before the interrupted context is
- * resumed.  If the reentrancy depth is 0 then there is no need to restore
- * the previous fixmap, and leaving the current one in place allow it to
- * be reused the next time without a TLB flush (common with DMA).
- */
-
-static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth);
-
-void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte)
-{
-       unsigned int idx, cpu;
-       int *depth;
-       unsigned long vaddr, flags;
-       pte_t pte, *ptep;
-
-       if (!in_interrupt())
-               preempt_disable();
-
-       cpu = smp_processor_id();
-       depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
-
-       idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
-       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-       ptep = TOP_PTE(vaddr);
-       pte = mk_pte(page, kmap_prot);
-
-       raw_local_irq_save(flags);
-       (*depth)++;
-       if (pte_val(*ptep) == pte_val(pte)) {
-               *saved_pte = pte;
-       } else {
-               *saved_pte = *ptep;
-               set_pte_ext(ptep, pte, 0);
-               local_flush_tlb_kernel_page(vaddr);
-       }
-       raw_local_irq_restore(flags);
-
-       return (void *)vaddr;
-}
-
-void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte)
-{
-       unsigned int idx, cpu = smp_processor_id();
-       int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu);
-       unsigned long vaddr, flags;
-       pte_t pte, *ptep;
-
-       idx = KM_L1_CACHE + KM_TYPE_NR * cpu;
-       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-       ptep = TOP_PTE(vaddr);
-       pte = mk_pte(page, kmap_prot);
-
-       BUG_ON(pte_val(*ptep) != pte_val(pte));
-       BUG_ON(*depth <= 0);
-
-       raw_local_irq_save(flags);
-       (*depth)--;
-       if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) {
-               set_pte_ext(ptep, saved_pte, 0);
-               local_flush_tlb_kernel_page(vaddr);
-       }
-       raw_local_irq_restore(flags);
-
-       if (!in_interrupt())
-               preempt_enable();
-}
-
-#endif  /* CONFIG_CPU_CACHE_VIPT */
index 5c7c6fc07565bd468a9f78b327073f0212795a42..183e0d226669193700c72f405f1e4e43303612a4 100644 (file)
@@ -1047,6 +1047,6 @@ init_hw_perf_events(void)
 
        return 0;
 }
-arch_initcall(init_hw_perf_events);
+early_initcall(init_hw_perf_events);
 
 #endif /* defined(CONFIG_CPU_MIPS32)... */
index c2e44597c22b1fc5b69ebf13cca307d43c2d1a52..ac11754ecec544c965c196f9dc857b2b398b22e0 100644 (file)
@@ -459,7 +459,7 @@ void migrate_irqs(void)
                        tmp = CROSS_GxICR(irq, new);
 
                        x &= GxICR_LEVEL | GxICR_ENABLE;
-                       if (GxICR(irq) & GxICR_REQUEST) {
+                       if (GxICR(irq) & GxICR_REQUEST)
                                x |= GxICR_REQUEST | GxICR_DETECT;
                        CROSS_GxICR(irq, new) = x;
                        tmp = CROSS_GxICR(irq, new);
index 7c07de0d89436ea4bac7350de09ab73d5f89b2e4..b150b510510f167d2782f645999303dfa297e2ad 100644 (file)
@@ -126,4 +126,4 @@ static int init_e500_pmu(void)
        return register_fsl_emb_pmu(&e500_pmu);
 }
 
-arch_initcall(init_e500_pmu);
+early_initcall(init_e500_pmu);
index 09d72028f317755428865f5105ee6b76b829ad7a..2cc5e0301d0b532a2291e400cb7bdc0a87aa89cd 100644 (file)
@@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void)
        return register_power_pmu(&mpc7450_pmu);
 }
 
-arch_initcall(init_mpc7450_pmu);
+early_initcall(init_mpc7450_pmu);
index 3129c855933c2a3857b0c4b3321b259b851279b8..5674807057899cd09dc366d128b1769afc6cd3e9 100644 (file)
@@ -1379,7 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu)
                freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
-       perf_pmu_register(&power_pmu);
+       perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(power_pmu_notifier);
 
        return 0;
index 7ecca59ddf77fe20bd46b470d9392cdd16fd5ba9..4dcf5f831e9d01f8694443ac7d56146b6f7a9777 100644 (file)
@@ -681,7 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
        pr_info("%s performance monitor hardware support registered\n",
                pmu->name);
 
-       perf_pmu_register(&fsl_emb_pmu);
+       perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
 
        return 0;
 }
index 2a361cdda635881ba4e96bf33f0a3dd6b8c273ea..ead8b3c2649ebba98c00423727e7dae54c6f5a7f 100644 (file)
@@ -613,4 +613,4 @@ static int init_power4_pmu(void)
        return register_power_pmu(&power4_pmu);
 }
 
-arch_initcall(init_power4_pmu);
+early_initcall(init_power4_pmu);
index 199de527d411446918651bd374bc5e9586ace46f..eca0ac595cb6c5b790ea4fd37d77ebb3a8ee474b 100644 (file)
@@ -682,4 +682,4 @@ static int init_power5p_pmu(void)
        return register_power_pmu(&power5p_pmu);
 }
 
-arch_initcall(init_power5p_pmu);
+early_initcall(init_power5p_pmu);
index 98b6a729a9dd127cc2c88e799b58bdbea2fa313c..d5ff0f64a5e645e01ddc6f9b56c91c60e14b204a 100644 (file)
@@ -621,4 +621,4 @@ static int init_power5_pmu(void)
        return register_power_pmu(&power5_pmu);
 }
 
-arch_initcall(init_power5_pmu);
+early_initcall(init_power5_pmu);
index 84a607bda8fbc129562943d7d2ce7fe0ee0f11bd..31603927e376e7e8854bf6a0faf86bc2bc9e56f7 100644 (file)
@@ -544,4 +544,4 @@ static int init_power6_pmu(void)
        return register_power_pmu(&power6_pmu);
 }
 
-arch_initcall(init_power6_pmu);
+early_initcall(init_power6_pmu);
index 852f7b7f6b4045801df807b997c7b110426ce7a6..593740fcb799d6fc9c29faca49425ad97b15b19a 100644 (file)
@@ -369,4 +369,4 @@ static int init_power7_pmu(void)
        return register_power_pmu(&power7_pmu);
 }
 
-arch_initcall(init_power7_pmu);
+early_initcall(init_power7_pmu);
index 3fee685de4df49e01a3a85ff069f3d409354c924..9a6e093858fe13fd30d2a79adb82e38e8c424b84 100644 (file)
@@ -494,4 +494,4 @@ static int init_ppc970_pmu(void)
        return register_power_pmu(&ppc970_pmu);
 }
 
-arch_initcall(init_ppc970_pmu);
+early_initcall(init_ppc970_pmu);
index fea833e18ad58ab92c78da66e35e2f23dcacd8ff..e0d703c7fdf7cc3e7d1491a963db90ad4735b9e4 100644 (file)
@@ -63,6 +63,7 @@
 #include <linux/of_gpio.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/fs.h>
 #include <linux/watchdog.h>
 #include <linux/miscdevice.h>
 #include <linux/uaccess.h>
index e0b98e71ff4797e5807f9e5d99f674aeb90456b8..6c6d7b339aae4f49fac84b7b3edc069f4ccf3adf 100644 (file)
@@ -99,6 +99,7 @@ config S390
        select HAVE_KERNEL_LZMA
        select HAVE_KERNEL_LZO
        select HAVE_GET_USER_PAGES_FAST
+       select HAVE_ARCH_MUTEX_CPU_RELAX
        select ARCH_INLINE_SPIN_TRYLOCK
        select ARCH_INLINE_SPIN_TRYLOCK_BH
        select ARCH_INLINE_SPIN_LOCK
index 458c1f7fbc1808d48982aa0c5fe89bfe3df2098c..688271f5f2e452b9951599550f33ed0ddcfe0a7c 100644 (file)
@@ -7,3 +7,5 @@
  */
 
 #include <asm-generic/mutex-dec.h>
+
+#define arch_mutex_cpu_relax() barrier()
index d961949600fd462199f3d9706e86371e815e7b1b..9070d7e607047fccec2d51b865dfe89307cd5d41 100644 (file)
@@ -140,7 +140,7 @@ void __init init_se7206_IRQ(void)
        make_se7206_irq(IRQ1_IRQ); /* ATA */
        make_se7206_irq(IRQ3_IRQ); /* SLOT / PCM */
 
-       __raw_writew(__raw_readw(INTC_ICR1) | 0x000b, INTC_ICR); /* ICR1 */
+       __raw_writew(__raw_readw(INTC_ICR1) | 0x000b, INTC_ICR1); /* ICR1 */
 
        /* FPGA System register setup*/
        __raw_writew(0x0000,INTSTS0); /* Clear INTSTS0 */
index b26264dc2aefef75e0292cb6c9a4862e8b4f9785..c509c40cba4bfefe3917f5eec9b2ddea3c63763d 100644 (file)
@@ -34,7 +34,7 @@ static const int pfc_divisors[]={1,2,3,4,6,8,12};
 
 static void master_clk_init(struct clk *clk)
 {
-       return 10000000 * PLL2 * pll1rate[(__raw_readw(FREQCR) >> 8) & 0x0007];
+       clk->rate = 10000000 * PLL2 * pll1rate[(__raw_readw(FREQCR) >> 8) & 0x0007];
 }
 
 static struct clk_ops sh7201_master_clk_ops = {
index b601fa3978d1995f53ea466e6aed5145e6ef1f75..6282a839e08e7831c40403c8a8c576ab97242df4 100644 (file)
@@ -81,8 +81,7 @@ static void shoc_clk_init(struct clk *clk)
        for (i = 0; i < ARRAY_SIZE(frqcr3_divisors); i++) {
                int divisor = frqcr3_divisors[i];
 
-               if (clk->ops->set_rate(clk, clk->parent->rate /
-                                               divisor, 0) == 0)
+               if (clk->ops->set_rate(clk, clk->parent->rate / divisor) == 0)
                        break;
        }
 
index dbf3b4bb71febb0ba38e9ec2f6c27731ea0970be..748955df018d801db05137f1831cf18f01938b55 100644 (file)
@@ -250,4 +250,4 @@ static int __init sh7750_pmu_init(void)
 
        return register_sh_pmu(&sh7750_pmu);
 }
-arch_initcall(sh7750_pmu_init);
+early_initcall(sh7750_pmu_init);
index 580276525731531643c9165d5c45ce28f5eade20..17e6bebfede067c26379efde6b8e0a69fc6565e9 100644 (file)
@@ -284,4 +284,4 @@ static int __init sh4a_pmu_init(void)
 
        return register_sh_pmu(&sh4a_pmu);
 }
-arch_initcall(sh4a_pmu_init);
+early_initcall(sh4a_pmu_init);
index 5a4b33435650c8ea108668d8e7e30786a20bd335..2ee21a47b5af6e1aac2889712c69848b7194f317 100644 (file)
@@ -389,7 +389,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *_pmu)
 
        WARN_ON(_pmu->num_events > MAX_HWEVENTS);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(sh_pmu_notifier);
        return 0;
 }
index 6e8bfa1786dab1f45d3dff5a1dcacc31a08d4844..4d3dbe3703e9001f53f65f64d38629bda6dedb63 100644 (file)
@@ -4,8 +4,6 @@
 #ifdef CONFIG_PERF_EVENTS
 #include <asm/ptrace.h>
 
-extern void init_hw_perf_events(void);
-
 #define perf_arch_fetch_caller_regs(regs, ip)          \
 do {                                                   \
        unsigned long _pstate, _asi, _pil, _i7, _fp;    \
@@ -26,8 +24,6 @@ do {                                                  \
        (regs)->u_regs[UREG_I6] = _fp;                  \
        (regs)->u_regs[UREG_I7] = _i7;                  \
 } while (0)
-#else
-static inline void init_hw_perf_events(void)   { }
 #endif
 
 #endif
index a4bd7ba74c89d9f25221f29e616f49f26517aad5..300f810142f57e82cce5984a56d9e5e920aebb7d 100644 (file)
@@ -270,8 +270,6 @@ int __init nmi_init(void)
                        atomic_set(&nmi_active, -1);
                }
        }
-       if (!err)
-               init_hw_perf_events();
 
        return err;
 }
index 0d6deb55a2ae7e4189b5ab60aec81cd8df28adb6..760578687e7ca86cb0bb63cc7dc68721d51bee90 100644 (file)
@@ -1307,20 +1307,23 @@ static bool __init supported_pmu(void)
        return false;
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        pr_info("Performance events: ");
 
        if (!supported_pmu()) {
                pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-               return;
+               return 0;
        }
 
        pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        register_die_notifier(&perf_event_nmi_notifier);
+
+       return 0;
 }
+early_initcall(init_hw_perf_events);
 
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
                           struct pt_regs *regs)
index e330da21b84f0636751b7e18e921ecff9cc31f55..b6fccb07123e206e23a4b73534d4891f66753b90 100644 (file)
@@ -377,6 +377,18 @@ config X86_ELAN
 
          If unsure, choose "PC-compatible" instead.
 
+config X86_INTEL_CE
+       bool "CE4100 TV platform"
+       depends on PCI
+       depends on PCI_GODIRECT
+       depends on X86_32
+       depends on X86_EXTENDED_PLATFORM
+       select X86_REBOOTFIXUPS
+       ---help---
+         Select for the Intel CE media processor (CE4100) SOC.
+         This option compiles in support for the CE4100 SOC for settop
+         boxes and media devices.
+
 config X86_MRST
        bool "Moorestown MID platform"
        depends on PCI
@@ -385,6 +397,10 @@ config X86_MRST
        depends on X86_EXTENDED_PLATFORM
        depends on X86_IO_APIC
        select APB_TIMER
+       select I2C
+       select SPI
+       select INTEL_SCU_IPC
+       select X86_PLATFORM_DEVICES
        ---help---
          Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
          Internet Device(MID) platform. Moorestown consists of two chips:
@@ -466,6 +482,19 @@ config X86_ES7000
          Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
          supposed to run on an IA32-based Unisys ES7000 system.
 
+config X86_32_IRIS
+       tristate "Eurobraille/Iris poweroff module"
+       depends on X86_32
+       ---help---
+         The Iris machines from EuroBraille do not have APM or ACPI support
+         to shut themselves down properly.  A special I/O sequence is
+         needed to do so, which is what this module does at
+         kernel shutdown.
+
+         This is only for Iris machines from EuroBraille.
+
+         If unused, say N.
+
 config SCHED_OMIT_FRAME_POINTER
        def_bool y
        prompt "Single-depth WCHAN output"
@@ -1141,16 +1170,16 @@ config NUMA
 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
        depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
 
-config K8_NUMA
+config AMD_NUMA
        def_bool y
        prompt "Old style AMD Opteron NUMA detection"
        depends on X86_64 && NUMA && PCI
        ---help---
-         Enable K8 NUMA node topology detection.  You should say Y here if
-         you have a multi processor AMD K8 system. This uses an old
-         method to read the NUMA configuration directly from the builtin
-         Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
-         instead, which also takes priority if both are compiled in.
+         Enable AMD NUMA node topology detection.  You should say Y here if
+         you have a multi processor AMD system. This uses an old method to
+         read the NUMA configuration directly from the builtin Northbridge
+         of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
+         which also takes priority if both are compiled in.
 
 config X86_64_ACPI_NUMA
        def_bool y
index b59ee765414ea3891d6d4914485ba366fcee5663..45143bbcfe5e487d53e33bddaa7193ab68a5275e 100644 (file)
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST
          feature as well as for the change_page_attr() infrastructure.
          If in doubt, say "N"
 
+config DEBUG_SET_MODULE_RONX
+       bool "Set loadable kernel module data as NX and text as RO"
+       depends on MODULES
+       ---help---
+         This option helps catch unintended modifications to loadable
+         kernel module's text and read-only data. It also prevents execution
+         of module data. Such protection may interfere with run-time code
+         patching and dynamic kernel tracing - and they might also protect
+         against certain classes of kernel exploits.
+         If in doubt, say "N".
+
 config DEBUG_NX_TEST
        tristate "Testcase for the NX non-executable stack feature"
        depends on DEBUG_KERNEL && m
index 52f85a196fa033df961d20349ce6b7437409e843..35af09d13dc13b5d41ec7e19e066c7b5b676f30a 100644 (file)
@@ -182,7 +182,7 @@ no_longmode:
        hlt
        jmp     1b
 
-#include "../../kernel/verify_cpu_64.S"
+#include "../../kernel/verify_cpu.S"
 
        /*
         * Be careful here startup_64 needs to be at a predictable
index 76561d20ea2f27f0edfd0eee6d043b98c6aa6e90..13009d1af99a33e2bbee39fbe80a194fac85ece4 100644 (file)
@@ -66,6 +66,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
 extern void alternatives_smp_module_del(struct module *mod);
 extern void alternatives_smp_switch(int smp);
 extern int alternatives_text_reserved(void *start, void *end);
+extern bool skip_smp_alternatives;
 #else
 static inline void alternatives_smp_module_add(struct module *mod, char *name,
                                               void *locks, void *locks_end,
@@ -180,8 +181,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
  */
+struct text_poke_param {
+       void *addr;
+       const void *opcode;
+       size_t len;
+};
+
 extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
+extern void text_poke_smp_batch(struct text_poke_param *params, int n);
 
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 #define IDEAL_NOP_SIZE_5 5
index c8517f81b21e73f9f2c428a26f2fb8995f73011f..6aee50d655d12f6792e495c5d5d004aaef9377e2 100644 (file)
@@ -3,36 +3,53 @@
 
 #include <linux/pci.h>
 
-extern struct pci_device_id k8_nb_ids[];
+extern struct pci_device_id amd_nb_misc_ids[];
 struct bootnode;
 
-extern int early_is_k8_nb(u32 value);
-extern int cache_k8_northbridges(void);
-extern void k8_flush_garts(void);
-extern int k8_get_nodes(struct bootnode *nodes);
-extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
-extern int k8_scan_nodes(void);
+extern int early_is_amd_nb(u32 value);
+extern int amd_cache_northbridges(void);
+extern void amd_flush_garts(void);
+extern int amd_get_nodes(struct bootnode *nodes);
+extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
+extern int amd_scan_nodes(void);
 
-struct k8_northbridge_info {
+struct amd_northbridge {
+       struct pci_dev *misc;
+};
+
+struct amd_northbridge_info {
        u16 num;
-       u8 gart_supported;
-       struct pci_dev **nb_misc;
+       u64 flags;
+       struct amd_northbridge *nb;
 };
-extern struct k8_northbridge_info k8_northbridges;
+extern struct amd_northbridge_info amd_northbridges;
+
+#define AMD_NB_GART                    0x1
+#define AMD_NB_L3_INDEX_DISABLE                0x2
 
 #ifdef CONFIG_AMD_NB
 
-static inline struct pci_dev *node_to_k8_nb_misc(int node)
+static inline int amd_nb_num(void)
 {
-       return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
+       return amd_northbridges.num;
 }
 
-#else
+static inline int amd_nb_has_feature(int feature)
+{
+       return ((amd_northbridges.flags & feature) == feature);
+}
 
-static inline struct pci_dev *node_to_k8_nb_misc(int node)
+static inline struct amd_northbridge *node_to_amd_nb(int node)
 {
-       return NULL;
+       return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
 }
+
+#else
+
+#define amd_nb_num(x)          0
+#define amd_nb_has_feature(x)  false
+#define node_to_amd_nb(x)      NULL
+
 #endif
 
 
index f6ce0bda3b98a74906cb1c8297f4ba150430699d..cf12007796db95f1a48635a55016ff01a0f811c2 100644 (file)
@@ -238,6 +238,7 @@ extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
 extern int APIC_init_uniprocessor(void);
 extern void enable_NMI_through_LVT0(void);
+extern int apic_force_enable(void);
 
 /*
  * On 32bit this is mach-xxx local
index a859ca461fb0432585f952e08337610b1165a204..47a30ff8e51782a31f146c78b458ea6a89bcacc3 100644 (file)
 
 #ifdef CONFIG_X86_32
 # define MAX_IO_APICS 64
+# define MAX_LOCAL_APIC 256
 #else
 # define MAX_IO_APICS 128
 # define MAX_LOCAL_APIC 32768
index 8e6218550e774b56fd30f3171bc163bfb511b947..c8bfe63a06de289057321af73c114e3521d9088a 100644 (file)
@@ -124,6 +124,7 @@ enum {
        X86_SUBARCH_LGUEST,
        X86_SUBARCH_XEN,
        X86_SUBARCH_MRST,
+       X86_SUBARCH_CE4100,
        X86_NR_SUBARCHS,
 };
 
index 9479a037419fe1358a96cece0d877a269c71e365..0141b234406fb01f8418320ea49c997fdbd14cbb 100644 (file)
@@ -117,6 +117,10 @@ enum fixed_addresses {
        FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
        FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
        __end_of_permanent_fixed_addresses,
+
+#ifdef CONFIG_X86_MRST
+       FIX_LNW_VRTC,
+#endif
        /*
         * 256 temporary boot-time mappings, used by early_ioremap(),
         * before ioremap() is functional.
index 4aa2bb3b242ab76733e0f7e5ba95454471297c1a..ef328901c80240f4a1471d3e4bdd795daffc6621 100644 (file)
@@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
        int err;
 
        /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+       asm volatile("1:  fxrstorq %[fx]\n\t"
+                    "2:\n"
+                    ".section .fixup,\"ax\"\n"
+                    "3:  movl $-1,%[err]\n"
+                    "    jmp  2b\n"
+                    ".previous\n"
+                    _ASM_EXTABLE(1b, 3b)
+                    : [err] "=r" (err)
+                    : [fx] "m" (*fx), "0" (0));
+#else
        asm volatile("1:  rex64/fxrstor (%[fx])\n\t"
                     "2:\n"
                     ".section .fixup,\"ax\"\n"
@@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
                     _ASM_EXTABLE(1b, 3b)
                     : [err] "=r" (err)
                     : [fx] "R" (fx), "m" (*fx), "0" (0));
+#endif
        return err;
 }
 
@@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
                return -EFAULT;
 
        /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+       asm volatile("1:  fxsaveq %[fx]\n\t"
+                    "2:\n"
+                    ".section .fixup,\"ax\"\n"
+                    "3:  movl $-1,%[err]\n"
+                    "    jmp  2b\n"
+                    ".previous\n"
+                    _ASM_EXTABLE(1b, 3b)
+                    : [err] "=r" (err), [fx] "=m" (*fx)
+                    : "0" (0));
+#else
        asm volatile("1:  rex64/fxsave (%[fx])\n\t"
                     "2:\n"
                     ".section .fixup,\"ax\"\n"
@@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
                     _ASM_EXTABLE(1b, 3b)
                     : [err] "=r" (err), "=m" (*fx)
                     : [fx] "R" (fx), "0" (0));
+#endif
        if (unlikely(err) &&
            __clear_user(fx, sizeof(struct i387_fxsave_struct)))
                err = -EFAULT;
index a6b28d017c2fb9aae9e5f5d2bae03db4dcadbd43..0c5ca4e30d7bda949a3470ad6c89f391d19ac623 100644 (file)
@@ -159,7 +159,7 @@ struct io_apic_irq_attr;
 extern int io_apic_set_pci_routing(struct device *dev, int irq,
                 struct io_apic_irq_attr *irq_attr);
 void setup_IO_APIC_irq_extra(u32 gsi);
-extern void ioapic_init_mappings(void);
+extern void ioapic_and_gsi_init(void);
 extern void ioapic_insert_resources(void);
 
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
@@ -168,10 +168,9 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 
-extern void probe_nr_irqs_gsi(void);
 extern int get_nr_irqs_gsi(void);
-
 extern void setup_ioapic_ids_from_mpc(void);
+extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
 struct mp_ioapic_gsi{
        u32 gsi_base;
@@ -189,9 +188,8 @@ extern void __init pre_init_apic_IRQ0(void);
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
 static const int timer_through_8259 = 0;
-static inline void ioapic_init_mappings(void)  { }
+static inline void ioapic_and_gsi_init(void) { }
 static inline void ioapic_insert_resources(void) { }
-static inline void probe_nr_irqs_gsi(void)     { }
 #define gsi_top (NR_IRQS_LEGACY)
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 
index 13b0ebaa512f77764e06956632b32755f8ca2bfa..ba870bb6dd8ef30ab81a317a8eb43dcb83066630 100644 (file)
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
        return ((irq == 2) ? 9 : irq);
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-# define ARCH_HAS_NMI_WATCHDOG
-#endif
-
 #ifdef CONFIG_X86_32
 extern void irq_ctx_init(int cpu);
 #else
index 5bdfca86581beb3b45c60fd1f8d900a5daa68bf9..f23eb2528464f4a51ad6d14d70db0f7c33d92e0a 100644 (file)
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_registers(struct pt_regs *regs);
 extern void show_trace(struct task_struct *t, struct pt_regs *regs,
-                      unsigned long *sp, unsigned long bp);
+                      unsigned long *sp);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern void show_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
index c62c13cb9788f0a1ea664fed073ebbca1ed02f15..eb16e94ae04f79927eb2c5afbf7b6ecd849b1621 100644 (file)
@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
 
 void mce_log_therm_throt_event(__u64 status);
 
+/* Interrupt Handler for core thermal thresholds */
+extern int (*platform_thermal_notify)(__u64 msr_val);
+
 #ifdef CONFIG_X86_THERMAL_VECTOR
 extern void mcheck_intel_therm_init(void);
 #else
index ef51b501e22a6e53bf4ae7e2d9e2566760f72ee1..24215072d0e1e5894d4643634bfe1ef9786eef55 100644 (file)
@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
 
 #ifdef CONFIG_MICROCODE_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
+
+static inline void get_ucode_data(void *to, const u8 *from, size_t n)
+{
+       memcpy(to, from, n);
+}
+
 #else
 static inline struct microcode_ops * __init init_amd_microcode(void)
 {
index c82868e9f905f04779778542298ce5560ae2e865..0c90dd9f05053c83591df6e04ec5d7979fee779f 100644 (file)
@@ -5,8 +5,9 @@
 
 #include <asm/mpspec_def.h>
 #include <asm/x86_init.h>
+#include <asm/apicdef.h>
 
-extern int apic_version[MAX_APICS];
+extern int apic_version[];
 extern int pic_mode;
 
 #ifdef CONFIG_X86_32
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
                                 int active_high_low);
 #endif /* CONFIG_ACPI */
 
-#define PHYSID_ARRAY_SIZE      BITS_TO_LONGS(MAX_APICS)
+#define PHYSID_ARRAY_SIZE      BITS_TO_LONGS(MAX_LOCAL_APIC)
 
 struct physid_mask {
        unsigned long mask[PHYSID_ARRAY_SIZE];
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t;
        test_and_set_bit(physid, (map).mask)
 
 #define physids_and(dst, src1, src2)                                   \
-       bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+       bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
 
 #define physids_or(dst, src1, src2)                                    \
-       bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS)
+       bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
 
 #define physids_clear(map)                                     \
-       bitmap_zero((map).mask, MAX_APICS)
+       bitmap_zero((map).mask, MAX_LOCAL_APIC)
 
 #define physids_complement(dst, src)                           \
-       bitmap_complement((dst).mask, (src).mask, MAX_APICS)
+       bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
 
 #define physids_empty(map)                                     \
-       bitmap_empty((map).mask, MAX_APICS)
+       bitmap_empty((map).mask, MAX_LOCAL_APIC)
 
 #define physids_equal(map1, map2)                              \
-       bitmap_equal((map1).mask, (map2).mask, MAX_APICS)
+       bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
 
 #define physids_weight(map)                                    \
-       bitmap_weight((map).mask, MAX_APICS)
+       bitmap_weight((map).mask, MAX_LOCAL_APIC)
 
 #define physids_shift_right(d, s, n)                           \
-       bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS)
+       bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
 
 #define physids_shift_left(d, s, n)                            \
-       bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS)
+       bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
 
 static inline unsigned long physids_coerce(physid_mask_t *map)
 {
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map)
        map->mask[0] = physids;
 }
 
-/* Note: will create very large stack frames if physid_mask_t is big */
-#define physid_mask_of_physid(physid)                                  \
-       ({                                                              \
-               physid_mask_t __physid_mask = PHYSID_MASK_NONE;         \
-               physid_set(physid, __physid_mask);                      \
-               __physid_mask;                                          \
-       })
-
 static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
 {
        physids_clear(*map);
index 4a7f96d7c188edd92387cdec4a3be36e23028f35..c0a955a9a08784f662a071d976ef57bc0637c8c6 100644 (file)
 
 #ifdef CONFIG_X86_32
 # define MAX_MPC_ENTRY 1024
-# define MAX_APICS      256
-#else
-# if NR_CPUS <= 255
-#  define MAX_APICS     255
-# else
-#  define MAX_APICS   32768
-# endif
 #endif
 
 /* Intel MP Floating Pointer Structure */
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h
new file mode 100644 (file)
index 0000000..73668ab
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _MRST_VRTC_H
+#define _MRST_VRTC_H
+
+extern unsigned char vrtc_cmos_read(unsigned char reg);
+extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
+extern unsigned long vrtc_get_time(void);
+extern int vrtc_set_mmss(unsigned long nowtime);
+
+#endif
index 4a711a684b174435bd5aae838515a836101eb389..719f00b28ff5358caf87d736ed5b4100dafdce9e 100644 (file)
@@ -14,7 +14,9 @@
 #include <linux/sfi.h>
 
 extern int pci_mrst_init(void);
-int __init sfi_parse_mrtc(struct sfi_table_header *table);
+extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
+extern int sfi_mrtc_num;
+extern struct sfi_rtc_table_entry sfi_mrtc_array[];
 
 /*
  * Medfield is the follow-up of Moorestown, it combines two chip solution into
@@ -50,4 +52,14 @@ extern void mrst_early_console_init(void);
 
 extern struct console early_hsu_console;
 extern void hsu_early_console_init(void);
+
+extern void intel_scu_devices_create(void);
+extern void intel_scu_devices_destroy(void);
+
+/* VRTC timer */
+#define MRST_VRTC_MAP_SZ       (1024)
+/*#define MRST_VRTC_PGOFFSET   (0xc00) */
+
+extern void mrst_rtc_init(void);
+
 #endif /* _ASM_X86_MRST_H */
index 6b89f5e860214266d7270160f739e9a9be290802..4d0dfa0d998e9f80ce244d86e1fd583513aaaaca 100644 (file)
 #define MSR_AMD64_IBSCTL               0xc001103a
 #define MSR_AMD64_IBSBRTARGET          0xc001103b
 
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL              0xc0010200
+#define MSR_F15H_PERF_CTR              0xc0010201
+
 /* Fam 10h MSRs */
 #define MSR_FAM10H_MMIO_CONF_BASE      0xc0010058
 #define FAM10H_MMIO_CONF_ENABLE                (1<<0)
 #define PACKAGE_THERM_INT_LOW_ENABLE           (1 << 1)
 #define PACKAGE_THERM_INT_PLN_ENABLE           (1 << 24)
 
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
+#define THERM_SHIFT_THRESHOLD0        8
+#define THERM_MASK_THRESHOLD0          (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE    (1 << 23)
+#define THERM_SHIFT_THRESHOLD1        16
+#define THERM_MASK_THRESHOLD1          (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0        (1 << 6)
+#define THERM_LOG_THRESHOLD0           (1 << 7)
+#define THERM_STATUS_THRESHOLD1        (1 << 8)
+#define THERM_LOG_THRESHOLD1           (1 << 9)
+
 /* MISC_ENABLE bits: architectural */
 #define MSR_IA32_MISC_ENABLE_FAST_STRING       (1ULL << 0)
 #define MSR_IA32_MISC_ENABLE_TCC               (1ULL << 1)
index 932f0f86b4b76252e6e6434ab9c15d81c3b17004..c4021b9535102547712c92203e8afe5809e567fc 100644 (file)
@@ -5,41 +5,15 @@
 #include <asm/irq.h>
 #include <asm/io.h>
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
-/**
- * do_nmi_callback
- *
- * Check to see if a callback exists and execute it.  Return 1
- * if the handler exists and was handled successfully.
- */
-int do_nmi_callback(struct pt_regs *regs, int cpu);
+#ifdef CONFIG_X86_LOCAL_APIC
 
 extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
-extern int check_nmi_watchdog(void);
-#if !defined(CONFIG_LOCKUP_DETECTOR)
-extern int nmi_watchdog_enabled;
-#endif
 extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
 extern int reserve_perfctr_nmi(unsigned int);
 extern void release_perfctr_nmi(unsigned int);
 extern int reserve_evntsel_nmi(unsigned int);
 extern void release_evntsel_nmi(unsigned int);
 
-extern void setup_apic_nmi_watchdog(void *);
-extern void stop_apic_nmi_watchdog(void *);
-extern void disable_timer_nmi_watchdog(void);
-extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
-extern void cpu_nmi_set_wd_enabled(void);
-
-extern atomic_t nmi_active;
-extern unsigned int nmi_watchdog;
-#define NMI_NONE       0
-#define NMI_IO_APIC    1
-#define NMI_LOCAL_APIC 2
-#define NMI_INVALID    3
-
 struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
                        void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic;
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
-
-static inline void localise_nmi_watchdog(void)
-{
-       if (nmi_watchdog == NMI_IO_APIC)
-               nmi_watchdog = NMI_LOCAL_APIC;
-}
-
-/* check if nmi_watchdog is active (ie was specified at boot) */
-static inline int nmi_watchdog_active(void)
-{
-       /*
-        * actually it should be:
-        *      return (nmi_watchdog == NMI_LOCAL_APIC ||
-        *              nmi_watchdog == NMI_IO_APIC)
-        * but since they are power of two we could use a
-        * cheaper way --cvg
-        */
-       return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
-}
 #endif
 
-void lapic_watchdog_stop(void);
-int lapic_watchdog_init(unsigned nmi_hz);
-int lapic_wd_event(unsigned nmi_hz);
-unsigned lapic_adjust_nmi_hz(unsigned hz);
-void disable_lapic_nmi_watchdog(void);
-void enable_lapic_nmi_watchdog(void);
 void stop_nmi(void);
 void restart_nmi(void);
 
index ca0437c714b2aa3c94195a67dfe49e5eeb349b19..6761292296307163a0f5cbb4af1e27642d33da10 100644 (file)
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start;
 
 #define PCIBIOS_MIN_CARDBUS_IO 0x4000
 
+extern int pcibios_enabled;
 void pcibios_config_init(void);
 struct pci_bus *pcibios_scan_root(int bus);
 
index 550e26b1dbb3593f324910f0197402966ae91299..d9d4dae305f6991efa446ec0ef4e7fad8054bdc3 100644 (file)
@@ -125,7 +125,6 @@ union cpuid10_edx {
 #define IBS_OP_MAX_CNT_EXT     0x007FFFFFULL   /* not a register bit mask */
 
 #ifdef CONFIG_PERF_EVENTS
-extern void init_hw_perf_events(void);
 extern void perf_events_lapic_init(void);
 
 #define PERF_EVENT_INDEX_OFFSET                        0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
 }
 
 #else
-static inline void init_hw_perf_events(void)           { }
 static inline void perf_events_lapic_init(void)        { }
 #endif
 
index a70cd216be5d729db1f364340f911d632819f18d..295e2ff18a6a80be6ec3425d91e73239c76f1198 100644 (file)
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS {
 };
 
 /*
- * P4 PEBS specifics (Replay Event only)
- *
- * Format (bits):
- *   0-6: metric from P4_PEBS_METRIC enum
- *    7 : reserved
- *    8 : reserved
- * 9-11 : reserved
- *
  * Note we have UOP and PEBS bits reserved for now
  * just in case if we will need them once
  */
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC {
        P4_PEBS_METRIC__max
 };
 
+/*
+ * Notes on internal configuration of ESCR+CCCR tuples
+ *
+ * Since P4 has quite the different architecture of
+ * performance registers in compare with "architectural"
+ * once and we have on 64 bits to keep configuration
+ * of performance event, the following trick is used.
+ *
+ * 1) Since both ESCR and CCCR registers have only low
+ *    32 bits valuable, we pack them into a single 64 bit
+ *    configuration. Low 32 bits of such config correspond
+ *    to low 32 bits of CCCR register and high 32 bits
+ *    correspond to low 32 bits of ESCR register.
+ *
+ * 2) The meaning of every bit of such config field can
+ *    be found in Intel SDM but it should be noted that
+ *    we "borrow" some reserved bits for own usage and
+ *    clean them or set to a proper value when we do
+ *    a real write to hardware registers.
+ *
+ * 3) The format of bits of config is the following
+ *    and should be either 0 or set to some predefined
+ *    values:
+ *
+ *    Low 32 bits
+ *    -----------
+ *      0-6: P4_PEBS_METRIC enum
+ *     7-11:                    reserved
+ *       12:                    reserved (Enable)
+ *    13-15:                    reserved (ESCR select)
+ *    16-17: Active Thread
+ *       18: Compare
+ *       19: Complement
+ *    20-23: Threshold
+ *       24: Edge
+ *       25:                    reserved (FORCE_OVF)
+ *       26:                    reserved (OVF_PMI_T0)
+ *       27:                    reserved (OVF_PMI_T1)
+ *    28-29:                    reserved
+ *       30:                    reserved (Cascade)
+ *       31:                    reserved (OVF)
+ *
+ *    High 32 bits
+ *    ------------
+ *        0:                    reserved (T1_USR)
+ *        1:                    reserved (T1_OS)
+ *        2:                    reserved (T0_USR)
+ *        3:                    reserved (T0_OS)
+ *        4: Tag Enable
+ *      5-8: Tag Value
+ *     9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
+ *    25-30: enum P4_EVENTS
+ *       31:                    reserved (HT thread)
+ */
+
 #endif /* PERF_EVENT_P4_H */
 
index d6763b139a844243b9fbb8dc620e633fe7b5825a..db8aa19a08a22d35e608625b251bcee7fa5fe1ee 100644 (file)
@@ -53,6 +53,12 @@ extern void x86_mrst_early_setup(void);
 static inline void x86_mrst_early_setup(void) { }
 #endif
 
+#ifdef CONFIG_X86_INTEL_CE
+extern void x86_ce4100_early_setup(void);
+#else
+static inline void x86_ce4100_early_setup(void) { }
+#endif
+
 #ifndef _SETUP
 
 /*
index 1def60114906bf1b1dfa98ef3bd8ac177f65010f..6c22bf353f26495b1fa71dc5a92cdaa05e5b1d8e 100644 (file)
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
                setup_IO_APIC();
        else {
                nr_ioapics = 0;
-               localise_nmi_watchdog();
        }
 #endif
 }
index 2b16a2ad23dc6b9647028c0808f8f45b094e74ac..52b5c7ed3608d9fc439c5ca69bf58ca1b8ebef88 100644 (file)
@@ -7,6 +7,7 @@
 #define _ASM_X86_STACKTRACE_H
 
 #include <linux/uaccess.h>
+#include <linux/ptrace.h>
 
 extern int kstack_depth_to_print;
 
@@ -46,7 +47,7 @@ struct stacktrace_ops {
 };
 
 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+               unsigned long *stack,
                const struct stacktrace_ops *ops, void *data);
 
 #ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
 #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
 #endif
 
+#ifdef CONFIG_FRAME_POINTER
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+       unsigned long bp;
+
+       if (regs)
+               return regs->bp;
+
+       if (task == current) {
+               /* Grab bp right from our regs */
+               get_bp(bp);
+               return bp;
+       }
+
+       /* bp is the last reg pushed by switch_to */
+       return *(unsigned long *)task->thread.sp;
+}
+#else
+static inline unsigned long
+stack_frame(struct task_struct *task, struct pt_regs *regs)
+{
+       return 0;
+}
+#endif
+
 extern void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl);
+                  unsigned long *stack, char *log_lvl);
 
 extern void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *sp, unsigned long bp, char *log_lvl);
+                  unsigned long *sp, char *log_lvl);
 
 extern unsigned int code_bytes;
 
index 5469630b27f56d732b10036ba381df39f28ca52b..fa7b9176b76cb33820034403fd8f4a50dc49709c 100644 (file)
 unsigned long long native_sched_clock(void);
 extern int recalibrate_cpu_khz(void);
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-extern int timer_ack;
-#else
-# define timer_ack (0)
-#endif
-
 extern int no_timer_check;
 
 /* Accelerators for sched_clock()
index 42d412fd8b02cdd369b5cc8db5a67aa5cc7a0770..ce1d54c8a433a6b866977beeb9c6681e0db64746 100644 (file)
  * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
  * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
  *
- * We will use 31 sets, one for sending BAU messages from each of the 32
+ * We will use one set for sending BAU messages from each of the
  * cpu's on the uvhub.
  *
  * TLB shootdown will use the first of the 8 descriptors of each set.
  * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
  */
 
+#define MAX_CPUS_PER_UVHUB             64
+#define MAX_CPUS_PER_SOCKET            32
+#define UV_ADP_SIZE                    64 /* hardware-provided max. */
+#define UV_CPUS_PER_ACT_STATUS         32 /* hardware-provided max. */
 #define UV_ITEMS_PER_DESCRIPTOR                8
 /* the 'throttle' to prevent the hardware stay-busy bug */
 #define MAX_BAU_CONCURRENT             3
-#define UV_CPUS_PER_ACT_STATUS         32
 #define UV_ACT_STATUS_MASK             0x3
 #define UV_ACT_STATUS_SIZE             2
-#define UV_ADP_SIZE                    32
 #define UV_DISTRIBUTION_SIZE           256
 #define UV_SW_ACK_NPENDING             8
 #define UV_NET_ENDPOINT_INTD           0x38
  * number of destination side software ack resources
  */
 #define DEST_NUM_RESOURCES             8
-#define MAX_CPUS_PER_NODE              32
 /*
  * completion statuses for sending a TLB flush message
  */
index 1e994754d323f400b85c5d1d7350c6e52a702ff9..34244b2cd880cff373e744ec34193adb8287ab2a 100644 (file)
@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT)     += doublefault_32.o
 obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_VM86)             += vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
-obj-$(CONFIG_EARLY_PRINTK_MRST)        += early_printk_mrst.o
 
 obj-$(CONFIG_HPET_TIMER)       += hpet.o
 obj-$(CONFIG_APB_TIMER)                += apb_timer.o
index 71232b941b6c9c6409fd14e9479f3d1625eeaeb0..17c8090fabd4703d324240328e2a15a55299bb70 100644 (file)
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
 {
        unsigned int ver = 0;
 
+       if (id >= (MAX_LOCAL_APIC-1)) {
+               printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+               return;
+       }
+
        if (!enabled) {
                ++disabled_cpus;
                return;
@@ -910,13 +915,13 @@ static int __init acpi_parse_madt_lapic_entries(void)
        acpi_register_lapic_address(acpi_lapic_addr);
 
        count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
-                                     acpi_parse_sapic, MAX_APICS);
+                                     acpi_parse_sapic, MAX_LOCAL_APIC);
 
        if (!count) {
                x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
-                                               acpi_parse_x2apic, MAX_APICS);
+                                       acpi_parse_x2apic, MAX_LOCAL_APIC);
                count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
-                                             acpi_parse_lapic, MAX_APICS);
+                                       acpi_parse_lapic, MAX_LOCAL_APIC);
        }
        if (!count && !x2count) {
                printk(KERN_ERR PREFIX "No LAPIC entries present\n");
index 5079f24c955a2d3b9b66532cd2c8cd45e5116d14..123608531c8f933b819a3fc7c135748137c8eb5b 100644 (file)
@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
        mutex_unlock(&smp_alt);
 }
 
+bool skip_smp_alternatives;
 void alternatives_smp_switch(int smp)
 {
        struct smp_alt_module *mod;
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp)
        printk("lockdep: fixing up alternatives.\n");
 #endif
 
-       if (noreplace_smp || smp_alt_once)
+       if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
                return;
        BUG_ON(!smp && (num_online_cpus() > 1));
 
@@ -591,17 +592,21 @@ static atomic_t stop_machine_first;
 static int wrote_text;
 
 struct text_poke_params {
-       void *addr;
-       const void *opcode;
-       size_t len;
+       struct text_poke_param *params;
+       int nparams;
 };
 
 static int __kprobes stop_machine_text_poke(void *data)
 {
        struct text_poke_params *tpp = data;
+       struct text_poke_param *p;
+       int i;
 
        if (atomic_dec_and_test(&stop_machine_first)) {
-               text_poke(tpp->addr, tpp->opcode, tpp->len);
+               for (i = 0; i < tpp->nparams; i++) {
+                       p = &tpp->params[i];
+                       text_poke(p->addr, p->opcode, p->len);
+               }
                smp_wmb();      /* Make sure other cpus see that this has run */
                wrote_text = 1;
        } else {
@@ -610,8 +615,12 @@ static int __kprobes stop_machine_text_poke(void *data)
                smp_mb();       /* Load wrote_text before following execution */
        }
 
-       flush_icache_range((unsigned long)tpp->addr,
-                          (unsigned long)tpp->addr + tpp->len);
+       for (i = 0; i < tpp->nparams; i++) {
+               p = &tpp->params[i];
+               flush_icache_range((unsigned long)p->addr,
+                                  (unsigned long)p->addr + p->len);
+       }
+
        return 0;
 }
 
@@ -631,10 +640,13 @@ static int __kprobes stop_machine_text_poke(void *data)
 void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
 {
        struct text_poke_params tpp;
+       struct text_poke_param p;
 
-       tpp.addr = addr;
-       tpp.opcode = opcode;
-       tpp.len = len;
+       p.addr = addr;
+       p.opcode = opcode;
+       p.len = len;
+       tpp.params = &p;
+       tpp.nparams = 1;
        atomic_set(&stop_machine_first, 1);
        wrote_text = 0;
        /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +654,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
        return addr;
 }
 
+/**
+ * text_poke_smp_batch - Update instructions on a live kernel on SMP
+ * @params: an array of text_poke parameters
+ * @n: the number of elements in params.
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
+ * stop_machine() is heavy task, it is better to aggregate text_poke requests
+ * and do it once if possible.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+{
+       struct text_poke_params tpp = {.params = params, .nparams = n};
+
+       atomic_set(&stop_machine_first, 1);
+       wrote_text = 0;
+       stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+}
+
 #if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
 
 #ifdef CONFIG_X86_64
index 8f6463d8ed0de1ebfece6cb1138a15697f657197..affacb5e0065a1392713da260ecf8abfbab2c405 100644 (file)
 
 static u32 *flush_words;
 
-struct pci_device_id k8_nb_ids[] = {
+struct pci_device_id amd_nb_misc_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
        {}
 };
-EXPORT_SYMBOL(k8_nb_ids);
+EXPORT_SYMBOL(amd_nb_misc_ids);
 
-struct k8_northbridge_info k8_northbridges;
-EXPORT_SYMBOL(k8_northbridges);
+struct amd_northbridge_info amd_northbridges;
+EXPORT_SYMBOL(amd_northbridges);
 
-static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+static struct pci_dev *next_northbridge(struct pci_dev *dev,
+                                       struct pci_device_id *ids)
 {
        do {
                dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
                if (!dev)
                        break;
-       } while (!pci_match_id(&k8_nb_ids[0], dev));
+       } while (!pci_match_id(ids, dev));
        return dev;
 }
 
-int cache_k8_northbridges(void)
+int amd_cache_northbridges(void)
 {
-       int i;
-       struct pci_dev *dev;
+       int i = 0;
+       struct amd_northbridge *nb;
+       struct pci_dev *misc;
 
-       if (k8_northbridges.num)
+       if (amd_nb_num())
                return 0;
 
-       dev = NULL;
-       while ((dev = next_k8_northbridge(dev)) != NULL)
-               k8_northbridges.num++;
+       misc = NULL;
+       while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
+               i++;
 
-       /* some CPU families (e.g. family 0x11) do not support GART */
-       if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
-           boot_cpu_data.x86 == 0x15)
-               k8_northbridges.gart_supported = 1;
+       if (i == 0)
+               return 0;
 
-       k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
-                                         sizeof(void *), GFP_KERNEL);
-       if (!k8_northbridges.nb_misc)
+       nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
+       if (!nb)
                return -ENOMEM;
 
-       if (!k8_northbridges.num) {
-               k8_northbridges.nb_misc[0] = NULL;
-               return 0;
-       }
+       amd_northbridges.nb = nb;
+       amd_northbridges.num = i;
 
-       if (k8_northbridges.gart_supported) {
-               flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
-                                     GFP_KERNEL);
-               if (!flush_words) {
-                       kfree(k8_northbridges.nb_misc);
-                       return -ENOMEM;
-               }
-       }
+       misc = NULL;
+       for (i = 0; i != amd_nb_num(); i++) {
+               node_to_amd_nb(i)->misc = misc =
+                       next_northbridge(misc, amd_nb_misc_ids);
+        }
+
+       /* some CPU families (e.g. family 0x11) do not support GART */
+       if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+           boot_cpu_data.x86 == 0x15)
+               amd_northbridges.flags |= AMD_NB_GART;
+
+       /*
+        * Some CPU families support L3 Cache Index Disable. There are some
+        * limitations because of E382 and E388 on family 0x10.
+        */
+       if (boot_cpu_data.x86 == 0x10 &&
+           boot_cpu_data.x86_model >= 0x8 &&
+           (boot_cpu_data.x86_model > 0x9 ||
+            boot_cpu_data.x86_mask >= 0x1))
+               amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
 
-       dev = NULL;
-       i = 0;
-       while ((dev = next_k8_northbridge(dev)) != NULL) {
-               k8_northbridges.nb_misc[i] = dev;
-               if (k8_northbridges.gart_supported)
-                       pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
-       }
-       k8_northbridges.nb_misc[i] = NULL;
        return 0;
 }
-EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+EXPORT_SYMBOL_GPL(amd_cache_northbridges);
 
 /* Ignores subdevice/subvendor but as far as I can figure out
    they're useless anyways */
-int __init early_is_k8_nb(u32 device)
+int __init early_is_amd_nb(u32 device)
 {
        struct pci_device_id *id;
        u32 vendor = device & 0xffff;
        device >>= 16;
-       for (id = k8_nb_ids; id->vendor; id++)
+       for (id = amd_nb_misc_ids; id->vendor; id++)
                if (vendor == id->vendor && device == id->device)
                        return 1;
        return 0;
 }
 
-void k8_flush_garts(void)
+int amd_cache_gart(void)
+{
+       int i;
+
+       if (!amd_nb_has_feature(AMD_NB_GART))
+               return 0;
+
+       flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
+       if (!flush_words) {
+               amd_northbridges.flags &= ~AMD_NB_GART;
+               return -ENOMEM;
+       }
+
+       for (i = 0; i != amd_nb_num(); i++)
+               pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+                                     &flush_words[i]);
+
+       return 0;
+}
+
+void amd_flush_garts(void)
 {
        int flushed, i;
        unsigned long flags;
        static DEFINE_SPINLOCK(gart_lock);
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
        /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
           that it doesn't matter to serialize more. -AK */
        spin_lock_irqsave(&gart_lock, flags);
        flushed = 0;
-       for (i = 0; i < k8_northbridges.num; i++) {
-               pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
-                                      flush_words[i]|1);
+       for (i = 0; i < amd_nb_num(); i++) {
+               pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+                                      flush_words[i] | 1);
                flushed++;
        }
-       for (i = 0; i < k8_northbridges.num; i++) {
+       for (i = 0; i < amd_nb_num(); i++) {
                u32 w;
                /* Make sure the hardware actually executed the flush*/
                for (;;) {
-                       pci_read_config_dword(k8_northbridges.nb_misc[i],
+                       pci_read_config_dword(node_to_amd_nb(i)->misc,
                                              0x9c, &w);
                        if (!(w & 1))
                                break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
        if (!flushed)
                printk("nothing to flush?\n");
 }
-EXPORT_SYMBOL_GPL(k8_flush_garts);
+EXPORT_SYMBOL_GPL(amd_flush_garts);
 
-static __init int init_k8_nbs(void)
+static __init int init_amd_nbs(void)
 {
        int err = 0;
 
-       err = cache_k8_northbridges();
+       err = amd_cache_northbridges();
 
        if (err < 0)
-               printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
+               printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+
+       if (amd_cache_gart() < 0)
+               printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
+                      "GART support disabled.\n");
 
        return err;
 }
 
 /* This has to go after the PCI subsystem */
-fs_initcall(init_k8_nbs);
+fs_initcall(init_amd_nbs);
index 92543c73cf8ed8d085dc581fe8171b3bbb6f939e..7c9ab59653e8bc5e229ba9e96734d20d4db50db5 100644 (file)
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
 
        if (system_state == SYSTEM_BOOTING) {
                irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
+               irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
                /* APB timer irqs are set up as mp_irqs, timer is edge type */
                __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
                if (request_irq(adev->irq, apbt_interrupt_handler,
index b3a16e8f0703d47f50a354223bfe8c6e9382126e..dcd7c83e1659212ea5bab9d1ba0b1f0d8e4942c5 100644 (file)
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
  * Do an PCI bus scan by hand because we're running before the PCI
  * subsystem.
  *
- * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
  * generically. It's probably overkill to always scan all slots because
  * the AGP bridges should be always an own bus on the HT hierarchy,
  * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
 
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
 
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
                dev_limit = bus_dev_ranges[i].dev_limit;
 
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
                dev_base = bus_dev_ranges[i].dev_base;
                dev_limit = bus_dev_ranges[i].dev_limit;
                for (slot = dev_base; slot < dev_limit; slot++) {
-                       if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+                       if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
                                continue;
 
                        write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
index 910f20b457c464d34f1e9874269d652ea0da325e..3966b564ea478746bc77d66886249a294177b21a 100644 (file)
@@ -3,10 +3,7 @@
 #
 
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
-obj-$(CONFIG_X86_LOCAL_APIC)   += nmi.o
-endif
-obj-$(CONFIG_HARDLOCKUP_DETECTOR)      += hw_nmi.o
+obj-y                          += hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
 obj-$(CONFIG_SMP)              += ipi.o
index 78218135b48e6169d155fb4a097e5b6c8e30e53a..879999a5230fc613a0815cd056b4820ebb5cf95a 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/dmi.h>
-#include <linux/nmi.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
        reserved = reserve_eilvt_offset(offset, new);
 
        if (reserved != new) {
-               pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
-                      "vector 0x%x was already reserved by another core, "
-                      "APIC%lX=0x%x\n",
-                      smp_processor_id(), new, reserved, reg, old);
+               pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+                      "vector 0x%x, but the register is already in use for "
+                      "vector 0x%x on another cpu\n",
+                      smp_processor_id(), reg, offset, new, reserved);
                return -EINVAL;
        }
 
        if (!eilvt_entry_is_changeable(old, new)) {
-               pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
-                      "register already in use, APIC%lX=0x%x\n",
-                      smp_processor_id(), new, reg, old);
+               pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+                      "vector 0x%x, but the register is already in use for "
+                      "vector 0x%x on this cpu\n",
+                      smp_processor_id(), reg, offset, new, old);
                return -EBUSY;
        }
 
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
         * PIT/HPET going.  Otherwise register lapic as a dummy
         * device.
         */
-       if (nmi_watchdog != NMI_IO_APIC)
-               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-       else
-               pr_warning("APIC timer registered as dummy,"
-                       " due to nmi_watchdog=%d!\n", nmi_watchdog);
+       lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
 
        /* Setup the lapic or request the broadcast */
        setup_APIC_timer();
@@ -1387,7 +1383,6 @@ void __cpuinit end_local_APIC_setup(void)
        }
 #endif
 
-       setup_apic_nmi_watchdog(NULL);
        apic_pm_activate();
 
        /*
@@ -1538,13 +1533,60 @@ static int __init detect_init_APIC(void)
        return 0;
 }
 #else
+
+static int apic_verify(void)
+{
+       u32 features, h, l;
+
+       /*
+        * The APIC feature bit should now be enabled
+        * in `cpuid'
+        */
+       features = cpuid_edx(1);
+       if (!(features & (1 << X86_FEATURE_APIC))) {
+               pr_warning("Could not enable APIC!\n");
+               return -1;
+       }
+       set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /* The BIOS may have set up the APIC at some other address */
+       rdmsr(MSR_IA32_APICBASE, l, h);
+       if (l & MSR_IA32_APICBASE_ENABLE)
+               mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+       pr_info("Found and enabled local APIC!\n");
+       return 0;
+}
+
+int apic_force_enable(void)
+{
+       u32 h, l;
+
+       if (disable_apic)
+               return -1;
+
+       /*
+        * Some BIOSes disable the local APIC in the APIC_BASE
+        * MSR. This can only be done in software for Intel P6 or later
+        * and AMD K7 (Model > 1) or later.
+        */
+       rdmsr(MSR_IA32_APICBASE, l, h);
+       if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+               pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+               l &= ~MSR_IA32_APICBASE_BASE;
+               l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+               wrmsr(MSR_IA32_APICBASE, l, h);
+               enabled_via_apicbase = 1;
+       }
+       return apic_verify();
+}
+
 /*
  * Detect and initialize APIC
  */
 static int __init detect_init_APIC(void)
 {
-       u32 h, l, features;
-
        /* Disabled by kernel option? */
        if (disable_apic)
                return -1;
@@ -1574,38 +1616,12 @@ static int __init detect_init_APIC(void)
                                "you can enable it with \"lapic\"\n");
                        return -1;
                }
-               /*
-                * Some BIOSes disable the local APIC in the APIC_BASE
-                * MSR. This can only be done in software for Intel P6 or later
-                * and AMD K7 (Model > 1) or later.
-                */
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               if (!(l & MSR_IA32_APICBASE_ENABLE)) {
-                       pr_info("Local APIC disabled by BIOS -- reenabling.\n");
-                       l &= ~MSR_IA32_APICBASE_BASE;
-                       l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
-                       wrmsr(MSR_IA32_APICBASE, l, h);
-                       enabled_via_apicbase = 1;
-               }
-       }
-       /*
-        * The APIC feature bit should now be enabled
-        * in `cpuid'
-        */
-       features = cpuid_edx(1);
-       if (!(features & (1 << X86_FEATURE_APIC))) {
-               pr_warning("Could not enable APIC!\n");
-               return -1;
+               if (apic_force_enable())
+                       return -1;
+       } else {
+               if (apic_verify())
+                       return -1;
        }
-       set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-       /* The BIOS may have set up the APIC at some other address */
-       rdmsr(MSR_IA32_APICBASE, l, h);
-       if (l & MSR_IA32_APICBASE_ENABLE)
-               mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
-       pr_info("Found and enabled local APIC!\n");
 
        apic_pm_activate();
 
@@ -1693,7 +1709,7 @@ void __init init_apic_mappings(void)
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-int apic_version[MAX_APICS];
+int apic_version[MAX_LOCAL_APIC];
 
 int __init APIC_init_uniprocessor(void)
 {
@@ -1758,17 +1774,10 @@ int __init APIC_init_uniprocessor(void)
                setup_IO_APIC();
        else {
                nr_ioapics = 0;
-               localise_nmi_watchdog();
        }
-#else
-       localise_nmi_watchdog();
 #endif
 
        x86_init.timers.setup_percpu_clockev();
-#ifdef CONFIG_X86_64
-       check_nmi_watchdog();
-#endif
-
        return 0;
 }
 
index 62f6e1e55b90d7f9a2bc460e73ba8895da23c4ff..72ec29e1ae0605990dc7f3b9fc1511bf5cab3cbe 100644 (file)
 #include <linux/nmi.h>
 #include <linux/module.h>
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(void)
 {
        return (u64)(cpu_khz) * 1000 * 60;
 }
+#endif
 
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
+#ifdef arch_trigger_all_cpu_backtrace
 /* For reliability, we're prepared to waste bits here. */
 static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
 void arch_trigger_all_cpu_backtrace(void)
 {
        int i;
 
+       if (test_and_set_bit(0, &backtrace_flag))
+               /*
+                * If there is already a trigger_all_cpu_backtrace() in progress
+                * (backtrace_flag == 1), don't output double cpu dump infos.
+                */
+               return;
+
        cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
 
        printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -42,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
                        break;
                mdelay(1);
        }
+
+       clear_bit(0, &backtrace_flag);
+       smp_mb__after_clear_bit();
 }
 
 static int __kprobes
@@ -50,7 +64,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 {
        struct die_args *args = __args;
        struct pt_regs *regs;
-       int cpu = smp_processor_id();
+       int cpu;
 
        switch (cmd) {
        case DIE_NMI:
@@ -62,6 +76,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
        }
 
        regs = args->regs;
+       cpu = smp_processor_id();
 
        if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
                static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -91,18 +106,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
 }
 early_initcall(register_trigger_all_cpu_backtrace);
 #endif
-
-/* STUB calls to mimic old nmi_watchdog behaviour */
-#if defined(CONFIG_X86_LOCAL_APIC)
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
-#endif
-atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
-void cpu_nmi_set_wd_enabled(void) { return; }
-void stop_apic_nmi_watchdog(void *unused) { return; }
-void setup_apic_nmi_watchdog(void *unused) { return; }
-int __init check_nmi_watchdog(void) { return 0; }
index fadcd743a74f8bdcd5effbaf7e28b01ea3003532..f6cd5b41077034405045fec84fcde39a4b0b3212 100644 (file)
@@ -54,7 +54,6 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/setup.h>
@@ -1934,8 +1933,7 @@ void disable_IO_APIC(void)
  *
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
-
-void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
 {
        union IO_APIC_reg_00 reg_00;
        physid_mask_t phys_id_present_map;
@@ -1944,15 +1942,6 @@ void __init setup_ioapic_ids_from_mpc(void)
        unsigned char old_id;
        unsigned long flags;
 
-       if (acpi_ioapic)
-               return;
-       /*
-        * Don't check I/O APIC IDs for xAPIC systems.  They have
-        * no meaning without the serial APIC bus.
-        */
-       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-               return;
        /*
         * This is broken; anything with a real cpu count has to
         * circumvent this idiocy regardless.
@@ -2006,7 +1995,6 @@ void __init setup_ioapic_ids_from_mpc(void)
                        physids_or(phys_id_present_map, phys_id_present_map, tmp);
                }
 
-
                /*
                 * We need to adjust the IRQ routing table
                 * if the ID changed.
@@ -2042,6 +2030,21 @@ void __init setup_ioapic_ids_from_mpc(void)
                        apic_printk(APIC_VERBOSE, " ok.\n");
        }
 }
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+       if (acpi_ioapic)
+               return;
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       setup_ioapic_ids_from_mpc_nocheck();
+}
 #endif
 
 int no_timer_check __initdata;
@@ -2642,24 +2645,6 @@ static void lapic_register_intr(int irq)
                                      "edge");
 }
 
-static void __init setup_nmi(void)
-{
-       /*
-        * Dirty trick to enable the NMI watchdog ...
-        * We put the 8259A master into AEOI mode and
-        * unmask on all local APICs LVT0 as NMI.
-        *
-        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
-        * is from Maciej W. Rozycki - so we do not have to EOI from
-        * the NMI handler or the timer interrupt.
-        */
-       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
-       enable_NMI_through_LVT0();
-
-       apic_printk(APIC_VERBOSE, " done.\n");
-}
-
 /*
  * This looks a bit hackish but it's about the only one way of sending
  * a few INTA cycles to 8259As and any associated glue logic.  ICR does
@@ -2765,15 +2750,6 @@ static inline void __init check_timer(void)
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        legacy_pic->init(1);
-#ifdef CONFIG_X86_32
-       {
-               unsigned int ver;
-
-               ver = apic_read(APIC_LVR);
-               ver = GET_APIC_VERSION(ver);
-               timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
-       }
-#endif
 
        pin1  = find_isa_irq_pin(0, mp_INT);
        apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2797,6 @@ static inline void __init check_timer(void)
                                unmask_ioapic(cfg);
                }
                if (timer_irq_works()) {
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               setup_nmi();
-                               legacy_pic->unmask(0);
-                       }
                        if (disable_timer_pin_1 > 0)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
@@ -2850,11 +2822,6 @@ static inline void __init check_timer(void)
                if (timer_irq_works()) {
                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
                        timer_through_8259 = 1;
-                       if (nmi_watchdog == NMI_IO_APIC) {
-                               legacy_pic->mask(0);
-                               setup_nmi();
-                               legacy_pic->unmask(0);
-                       }
                        goto out;
                }
                /*
@@ -2866,15 +2833,6 @@ static inline void __init check_timer(void)
                apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
        }
 
-       if (nmi_watchdog == NMI_IO_APIC) {
-               apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
-                           "through the IO-APIC - disabling NMI Watchdog!\n");
-               nmi_watchdog = NMI_NONE;
-       }
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as Virtual Wire IRQ...\n");
 
@@ -3639,7 +3597,7 @@ int __init io_apic_get_redir_entries (int ioapic)
        return reg_01.bits.entries + 1;
 }
 
-void __init probe_nr_irqs_gsi(void)
+static void __init probe_nr_irqs_gsi(void)
 {
        int nr;
 
@@ -3956,7 +3914,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
        return res;
 }
 
-void __init ioapic_init_mappings(void)
+void __init ioapic_and_gsi_init(void)
 {
        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
        struct resource *ioapic_res;
@@ -3994,6 +3952,8 @@ fake_ioapic_page:
                ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
                ioapic_res++;
        }
+
+       probe_nr_irqs_gsi();
 }
 
 void __init ioapic_insert_resources(void)
@@ -4103,7 +4063,8 @@ void __init pre_init_apic_IRQ0(void)
 
        printk(KERN_INFO "Early APIC setup for system timer0\n");
 #ifndef CONFIG_SMP
-       phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+       physid_set_mask_of_physid(boot_cpu_physical_apicid,
+                                        &phys_cpu_present_map);
 #endif
        /* Make sure the irq descriptor is set up */
        cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644 (file)
index c90041c..0000000
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- *  NMI watchdog support on APIC systems
- *
- *  Started by Ingo Molnar <mingo@redhat.com>
- *
- *  Fixes:
- *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
- *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
- *  Mikael Pettersson  : Pentium 4 support for local APIC NMI watchdog.
- *  Pavel Machek and
- *  Mikael Pettersson  : PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- *     be enabled
- *  0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0);          /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
-       return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_MCE)
-       return atomic_read(&mce_entry) > 0;
-#endif
-       return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
-       return per_cpu(irq_stat, cpu).apic_timer_irqs +
-               per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
-       local_irq_enable_in_hardirq();
-       /*
-        * Intentionally don't use cpu_relax here. This is
-        * to make sure that the performance counter really ticks,
-        * even if there is a simulator or similar that catches the
-        * pause instruction. On a real HT machine this is fine because
-        * all other CPUs are busy with "useless" delay loops and don't
-        * care if they get somewhat less cycles.
-        */
-       while (endflag == 0)
-               mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
-{
-       printk(KERN_CONT "\n");
-
-       printk(KERN_WARNING
-               "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
-                       cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
-       printk(KERN_WARNING
-               "Please report this to bugzilla.kernel.org,\n");
-       printk(KERN_WARNING
-               "and attach the output of the 'dmesg' command.\n");
-
-       per_cpu(wd_enabled, cpu) = 0;
-       atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
-       unsigned int *prev_nmi_count;
-       int cpu;
-
-       if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
-               return 0;
-
-       prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
-       if (!prev_nmi_count)
-               goto error;
-
-       printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
-       for_each_possible_cpu(cpu)
-               prev_nmi_count[cpu] = get_nmi_count(cpu);
-       local_irq_enable();
-       mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
-       for_each_online_cpu(cpu) {
-               if (!per_cpu(wd_enabled, cpu))
-                       continue;
-               if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
-                       report_broken_nmi(cpu, prev_nmi_count);
-       }
-       endflag = 1;
-       if (!atomic_read(&nmi_active)) {
-               kfree(prev_nmi_count);
-               atomic_set(&nmi_active, -1);
-               goto error;
-       }
-       printk("OK.\n");
-
-       /*
-        * now that we know it works we can reduce NMI frequency to
-        * something more reasonable; makes a difference in some configs
-        */
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               nmi_hz = lapic_adjust_nmi_hz(1);
-
-       kfree(prev_nmi_count);
-       return 0;
-error:
-       if (nmi_watchdog == NMI_IO_APIC) {
-               if (!timer_through_8259)
-                       legacy_pic->mask(0);
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-       }
-
-#ifdef CONFIG_X86_32
-       timer_ack = 0;
-#endif
-       return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
-       unsigned int nmi;
-
-       if (!strncmp(str, "panic", 5)) {
-               panic_on_timeout = 1;
-               str = strchr(str, ',');
-               if (!str)
-                       return 1;
-               ++str;
-       }
-
-       if (!strncmp(str, "lapic", 5))
-               nmi_watchdog = NMI_LOCAL_APIC;
-       else if (!strncmp(str, "ioapic", 6))
-               nmi_watchdog = NMI_IO_APIC;
-       else {
-               get_option(&str, &nmi);
-               if (nmi >= NMI_INVALID)
-                       return 0;
-               nmi_watchdog = nmi;
-       }
-
-       return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       nmi_pm_active = atomic_read(&nmi_active);
-       stop_apic_nmi_watchdog(NULL);
-       BUG_ON(atomic_read(&nmi_active) != 0);
-       return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
-       /* only CPU0 goes here, other CPUs should be offline */
-       if (nmi_pm_active > 0) {
-               setup_apic_nmi_watchdog(NULL);
-               touch_nmi_watchdog();
-       }
-       return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
-       .name           = "lapic_nmi",
-       .resume         = lapic_nmi_resume,
-       .suspend        = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
-       .id     = 0,
-       .cls    = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
-       int error;
-
-       /*
-        * should really be a BUG_ON but b/c this is an
-        * init call, it just doesn't work.  -dcz
-        */
-       if (nmi_watchdog != NMI_LOCAL_APIC)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0)
-               return 0;
-
-       error = sysdev_class_register(&nmi_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic_nmi);
-       return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
-       apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
-       if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-               on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
-       __get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
-       if (__get_cpu_var(wd_enabled))
-               return;
-
-       /* cheap hack to support suspend/resume */
-       /* if cpu0 is not active neither should the other cpus */
-       if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
-               return;
-
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               if (lapic_watchdog_init(nmi_hz) < 0) {
-                       __get_cpu_var(wd_enabled) = 0;
-                       return;
-               }
-               /* FALL THROUGH */
-       case NMI_IO_APIC:
-               __get_cpu_var(wd_enabled) = 1;
-               atomic_inc(&nmi_active);
-       }
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
-       /* only support LOCAL and IO APICs for now */
-       if (!nmi_watchdog_active())
-               return;
-       if (__get_cpu_var(wd_enabled) == 0)
-               return;
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               lapic_watchdog_stop();
-       else
-               __acpi_nmi_disable(NULL);
-       __get_cpu_var(wd_enabled) = 0;
-       atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(long, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
-       if (nmi_watchdog_active()) {
-               unsigned cpu;
-
-               /*
-                * Tell other CPUs to reset their alert counters. We cannot
-                * do it ourselves because the alert count increase is not
-                * atomic.
-                */
-               for_each_present_cpu(cpu) {
-                       if (per_cpu(nmi_touch, cpu) != 1)
-                               per_cpu(nmi_touch, cpu) = 1;
-               }
-       }
-
-       /*
-        * Tickle the softlockup detector too:
-        */
-       touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
-       /*
-        * Since current_thread_info()-> is always on the stack, and we
-        * always switch the stack NMI-atomically, it's safe to use
-        * smp_processor_id().
-        */
-       unsigned int sum;
-       int touched = 0;
-       int cpu = smp_processor_id();
-       int rc = 0;
-
-       sum = get_timer_irqs(cpu);
-
-       if (__get_cpu_var(nmi_touch)) {
-               __get_cpu_var(nmi_touch) = 0;
-               touched = 1;
-       }
-
-       /* We can be called before check_nmi_watchdog, hence NULL check. */
-       if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-               static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
-
-               raw_spin_lock(&lock);
-               printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
-               show_regs(regs);
-               dump_stack();
-               raw_spin_unlock(&lock);
-               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-
-               rc = 1;
-       }
-
-       /* Could check oops_in_progress here too, but it's safer not to */
-       if (mce_in_progress())
-               touched = 1;
-
-       /* if the none of the timers isn't firing, this cpu isn't doing much */
-       if (!touched && __get_cpu_var(last_irq_sum) == sum) {
-               /*
-                * Ayiee, looks like this CPU is stuck ...
-                * wait a few IRQs (5 seconds) before doing the oops ...
-                */
-               __this_cpu_inc(alert_counter);
-               if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
-                       /*
-                        * die_nmi will return ONLY if NOTIFY_STOP happens..
-                        */
-                       die_nmi("BUG: NMI Watchdog detected LOCKUP",
-                               regs, panic_on_timeout);
-       } else {
-               __get_cpu_var(last_irq_sum) = sum;
-               __this_cpu_write(alert_counter, 0);
-       }
-
-       /* see if the nmi watchdog went off */
-       if (!__get_cpu_var(wd_enabled))
-               return rc;
-       switch (nmi_watchdog) {
-       case NMI_LOCAL_APIC:
-               rc |= lapic_wd_event(nmi_hz);
-               break;
-       case NMI_IO_APIC:
-               /*
-                * don't know how to accurately check for this.
-                * just assume it was a watchdog timer interrupt
-                * This matches the old behaviour.
-                */
-               rc = 1;
-               break;
-       }
-       return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
-       __get_cpu_var(wd_enabled) = 1;
-       atomic_inc(&nmi_active);
-       __acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
-       touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
-       unknown_nmi_panic = 1;
-       return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-       unsigned char reason = get_nmi_reason();
-       char buf[64];
-
-       sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-       die_nmi(buf, regs, 1); /* Always panic here */
-       return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
-                       void __user *buffer, size_t *length, loff_t *ppos)
-{
-       int old_state;
-
-       nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
-       old_state = nmi_watchdog_enabled;
-       proc_dointvec(table, write, buffer, length, ppos);
-       if (!!old_state == !!nmi_watchdog_enabled)
-               return 0;
-
-       if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
-               printk(KERN_WARNING
-                       "NMI watchdog is permanently disabled\n");
-               return -EIO;
-       }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_lapic_nmi_watchdog();
-               else
-                       disable_lapic_nmi_watchdog();
-       } else if (nmi_watchdog == NMI_IO_APIC) {
-               if (nmi_watchdog_enabled)
-                       enable_ioapic_nmi_watchdog();
-               else
-                       disable_ioapic_nmi_watchdog();
-       } else {
-               printk(KERN_WARNING
-                       "NMI watchdog doesn't know what hardware to touch\n");
-               return -EIO;
-       }
-       return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-       if (unknown_nmi_panic)
-               return unknown_nmi_panic_callback(regs, cpu);
-#endif
-       return 0;
-}
-
-void arch_trigger_all_cpu_backtrace(void)
-{
-       int i;
-
-       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
-       printk(KERN_INFO "sending NMI to all CPUs:\n");
-       apic->send_IPI_all(NMI_VECTOR);
-
-       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
-       for (i = 0; i < 10 * 1000; i++) {
-               if (cpumask_empty(to_cpumask(backtrace_mask)))
-                       break;
-               mdelay(1);
-       }
-}
index c1c52c341f40a607be5b252746db31f36b8dc561..2a3f2a7db243f8b846ef5d3032287037d0111a69 100644 (file)
@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits;
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 static DEFINE_SPINLOCK(uv_nmi_lock);
 
+static unsigned long __init uv_early_read_mmr(unsigned long addr)
+{
+       unsigned long val, *mmr;
+
+       mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
+       val = *mmr;
+       early_iounmap(mmr, sizeof(*mmr));
+       return val;
+}
+
 static inline bool is_GRU_range(u64 start, u64 end)
 {
        return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
        return is_ISA_range(start, end) || is_GRU_range(start, end);
 }
 
-static int early_get_nodeid(void)
+static int __init early_get_pnodeid(void)
 {
        union uvh_node_id_u node_id;
-       unsigned long *mmr;
-
-       mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
-       node_id.v = *mmr;
-       early_iounmap(mmr, sizeof(*mmr));
+       union uvh_rh_gam_config_mmr_u  m_n_config;
+       int pnode;
 
        /* Currently, all blades have same revision number */
+       node_id.v = uv_early_read_mmr(UVH_NODE_ID);
+       m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
        uv_min_hub_revision_id = node_id.s.revision;
 
-       return node_id.s.node_id;
+       pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+       return pnode;
 }
 
 static void __init early_get_apic_pnode_shift(void)
 {
-       unsigned long *mmr;
-
-       mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
-       uvh_apicid.v = *mmr;
-       early_iounmap(mmr, sizeof(*mmr));
+       uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
        if (!uvh_apicid.v)
                /*
                 * Old bios, use default value
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void)
 static void __init uv_set_apicid_hibit(void)
 {
        union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
-       unsigned long *mmr;
 
-       mmr = early_ioremap(UV_LOCAL_MMR_BASE |
-               UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
-       apicid_mask.v = *mmr;
-       early_iounmap(mmr, sizeof(*mmr));
+       apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
        uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
 }
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-       int nodeid;
+       int pnodeid;
 
        if (!strcmp(oem_id, "SGI")) {
-               nodeid = early_get_nodeid();
+               pnodeid = early_get_pnodeid();
                early_get_apic_pnode_shift();
                x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
                x86_platform.nmi_init = uv_nmi_init;
@@ -119,7 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
                        uv_system_type = UV_X2APIC;
                else if (!strcmp(oem_table_id, "UVH")) {
                        __get_cpu_var(x2apic_extra_bits) =
-                               nodeid << (uvh_apicid.s.pnode_shift - 1);
+                               pnodeid << uvh_apicid.s.pnode_shift;
                        uv_system_type = UV_NON_UNIQUE_APIC;
                        uv_set_apicid_hibit();
                        return 1;
@@ -682,27 +684,32 @@ void uv_nmi_init(void)
 void __init uv_system_init(void)
 {
        union uvh_rh_gam_config_mmr_u  m_n_config;
+       union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
        union uvh_node_id_u node_id;
        unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
-       int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+       int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
        int gnode_extra, max_pnode = 0;
        unsigned long mmr_base, present, paddr;
-       unsigned short pnode_mask;
+       unsigned short pnode_mask, pnode_io_mask;
 
        map_low_mmrs();
 
        m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
        m_val = m_n_config.s.m_skt;
        n_val = m_n_config.s.n_skt;
+       mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+       n_io = mmioh.s.n_io;
        mmr_base =
            uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
            ~UV_MMR_ENABLE;
        pnode_mask = (1 << n_val) - 1;
+       pnode_io_mask = (1 << n_io) - 1;
+
        node_id.v = uv_read_local_mmr(UVH_NODE_ID);
        gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
        gnode_upper = ((unsigned long)gnode_extra  << m_val);
-       printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
-                       n_val, m_val, gnode_upper, gnode_extra);
+       printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
+                       n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
 
        printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 
@@ -735,7 +742,7 @@ void __init uv_system_init(void)
                for (j = 0; j < 64; j++) {
                        if (!test_bit(j, &present))
                                continue;
-                       pnode = (i * 64 + j);
+                       pnode = (i * 64 + j) & pnode_mask;
                        uv_blade_info[blade].pnode = pnode;
                        uv_blade_info[blade].nr_possible_cpus = 0;
                        uv_blade_info[blade].nr_online_cpus = 0;
@@ -756,6 +763,7 @@ void __init uv_system_init(void)
                /*
                 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
                 */
+               uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
                uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
                pnode = uv_apicid_to_pnode(apicid);
                blade = boot_pnode_to_blade(pnode);
@@ -772,7 +780,6 @@ void __init uv_system_init(void)
                uv_cpu_hub_info(cpu)->numa_blade_id = blade;
                uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
                uv_cpu_hub_info(cpu)->pnode = pnode;
-               uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
                uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
                uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
                uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -796,7 +803,7 @@ void __init uv_system_init(void)
 
        map_gru_high(max_pnode);
        map_mmr_high(max_pnode);
-       map_mmioh_high(max_pnode);
+       map_mmioh_high(max_pnode & pnode_io_mask);
 
        uv_cpu_init();
        uv_scir_register_cpu_notifier();
index 4b68bda30938d0a55ed39eeaeff68157266a9ea0..1d59834396bdc145c630e671d1bccd7769689a88 100644 (file)
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
 #else
        vgetcpu_set_mode();
 #endif
-       init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
index 17ad0336621135a2310f3e9a2e5d9bf3d2f668ba..9ecf81f9b90fb0c73416d958b1aa216b17e1ecfa 100644 (file)
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
 };
 
 struct amd_l3_cache {
-       struct   pci_dev *dev;
-       bool     can_disable;
+       struct   amd_northbridge *nb;
        unsigned indices;
        u8       subcaches[4];
 };
@@ -311,14 +310,12 @@ struct _cache_attr {
 /*
  * L3 cache descriptors
  */
-static struct amd_l3_cache **__cpuinitdata l3_caches;
-
 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
 {
        unsigned int sc0, sc1, sc2, sc3;
        u32 val = 0;
 
-       pci_read_config_dword(l3->dev, 0x1C4, &val);
+       pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
 
        /* calculate subcache sizes */
        l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
-static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
-{
-       struct amd_l3_cache *l3;
-       struct pci_dev *dev = node_to_k8_nb_misc(node);
-
-       l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
-       if (!l3) {
-               printk(KERN_WARNING "Error allocating L3 struct\n");
-               return NULL;
-       }
-
-       l3->dev = dev;
-
-       amd_calc_l3_indices(l3);
-
-       return l3;
-}
-
-static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
-                                          int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
+                                       int index)
 {
+       static struct amd_l3_cache *__cpuinitdata l3_caches;
        int node;
 
-       if (boot_cpu_data.x86 != 0x10)
-               return;
-
-       if (index < 3)
-               return;
-
-       /* see errata #382 and #388 */
-       if (boot_cpu_data.x86_model < 0x8)
-               return;
-
-       if ((boot_cpu_data.x86_model == 0x8 ||
-            boot_cpu_data.x86_model == 0x9)
-               &&
-            boot_cpu_data.x86_mask < 0x1)
-                       return;
-
-       /* not in virtualized environments */
-       if (k8_northbridges.num == 0)
+       /* only for L3, and not in virtualized environments */
+       if (index < 3 || amd_nb_num() == 0)
                return;
 
        /*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
         * never freed but this is done only on shutdown so it doesn't matter.
         */
        if (!l3_caches) {
-               int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
+               int size = amd_nb_num() * sizeof(struct amd_l3_cache);
 
                l3_caches = kzalloc(size, GFP_ATOMIC);
                if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
 
        node = amd_get_nb_id(smp_processor_id());
 
-       if (!l3_caches[node]) {
-               l3_caches[node] = amd_init_l3_cache(node);
-               l3_caches[node]->can_disable = true;
+       if (!l3_caches[node].nb) {
+               l3_caches[node].nb = node_to_amd_nb(node);
+               amd_calc_l3_indices(&l3_caches[node]);
        }
 
-       WARN_ON(!l3_caches[node]);
-
-       this_leaf->l3 = l3_caches[node];
+       this_leaf->l3 = &l3_caches[node];
 }
 
 /*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
 {
        unsigned int reg = 0;
 
-       pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
+       pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
 
        /* check whether this slot is activated already */
        if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 {
        int index;
 
-       if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+       if (!this_leaf->l3 ||
+           !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
                return -EINVAL;
 
        index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
                if (!l3->subcaches[i])
                        continue;
 
-               pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+               pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
 
                /*
                 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
                wbinvd_on_cpu(cpu);
 
                reg |= BIT(31);
-               pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+               pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
        }
 }
 
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+       if (!this_leaf->l3 ||
+           !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
                return -EINVAL;
 
        cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 #define STORE_CACHE_DISABLE(slot)                                      \
 static ssize_t                                                         \
 store_cache_disable_##slot(struct _cpuid4_info *this_leaf,             \
-                           const char *buf, size_t count)              \
+                          const char *buf, size_t count)               \
 {                                                                      \
        return store_cache_disable(this_leaf, buf, count, slot);        \
 }
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
                show_cache_disable_1, store_cache_disable_1);
 
 #else  /* CONFIG_AMD_NB */
-static void __cpuinit
-amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
-{
-};
+#define amd_init_l3_cache(x, y)
 #endif /* CONFIG_AMD_NB */
 
 static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
 
        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
                amd_cpuid4(index, &eax, &ebx, &ecx);
-               amd_check_l3_disable(this_leaf, index);
+               amd_init_l3_cache(this_leaf, index);
        } else {
                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
        }
@@ -983,30 +944,48 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
-#define DEFAULT_SYSFS_CACHE_ATTRS      \
-       &type.attr,                     \
-       &level.attr,                    \
-       &coherency_line_size.attr,      \
-       &physical_line_partition.attr,  \
-       &ways_of_associativity.attr,    \
-       &number_of_sets.attr,           \
-       &size.attr,                     \
-       &shared_cpu_map.attr,           \
-       &shared_cpu_list.attr
-
 static struct attribute *default_attrs[] = {
-       DEFAULT_SYSFS_CACHE_ATTRS,
+       &type.attr,
+       &level.attr,
+       &coherency_line_size.attr,
+       &physical_line_partition.attr,
+       &ways_of_associativity.attr,
+       &number_of_sets.attr,
+       &size.attr,
+       &shared_cpu_map.attr,
+       &shared_cpu_list.attr,
        NULL
 };
 
-static struct attribute *default_l3_attrs[] = {
-       DEFAULT_SYSFS_CACHE_ATTRS,
 #ifdef CONFIG_AMD_NB
-       &cache_disable_0.attr,
-       &cache_disable_1.attr,
+static struct attribute ** __cpuinit amd_l3_attrs(void)
+{
+       static struct attribute **attrs;
+       int n;
+
+       if (attrs)
+               return attrs;
+
+       n = sizeof (default_attrs) / sizeof (struct attribute *);
+
+       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+               n += 2;
+
+       attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
+       if (attrs == NULL)
+               return attrs = default_attrs;
+
+       for (n = 0; default_attrs[n]; n++)
+               attrs[n] = default_attrs[n];
+
+       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+               attrs[n++] = &cache_disable_0.attr;
+               attrs[n++] = &cache_disable_1.attr;
+       }
+
+       return attrs;
+}
 #endif
-       NULL
-};
 
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 
                this_leaf = CPUID4_INFO_IDX(cpu, i);
 
-               if (this_leaf->l3 && this_leaf->l3->can_disable)
-                       ktype_cache.default_attrs = default_l3_attrs;
-               else
-                       ktype_cache.default_attrs = default_attrs;
-
+               ktype_cache.default_attrs = default_attrs;
+#ifdef CONFIG_AMD_NB
+               if (this_leaf->l3)
+                       ktype_cache.default_attrs = amd_l3_attrs();
+#endif
                retval = kobject_init_and_add(&(this_object->kobj),
                                              &ktype_cache,
                                              per_cpu(ici_cache_kobject, cpu),
index 80c482382d5c95e06b71ffdf91b6f5d362bf2b45..5bf2fac52aca7771b6b7827117b9d2b2778fd8ad 100644 (file)
@@ -31,8 +31,6 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 
-#define PFX               "mce_threshold: "
-#define VERSION           "version 1.1.1"
 #define NR_BANKS          6
 #define NR_BLOCKS         9
 #define THRESHOLD_MAX     0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
        struct list_head        miscj;
 };
 
-/* defaults used early on boot */
-static struct threshold_block threshold_defaults = {
-       .interrupt_enable       = 0,
-       .threshold_limit        = THRESHOLD_MAX,
-};
-
 struct threshold_bank {
        struct kobject          *kobj;
        struct threshold_block  *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
 struct thresh_restart {
        struct threshold_block  *b;
        int                     reset;
+       int                     set_lvt_off;
+       int                     lvt_off;
        u16                     old_limit;
 };
 
+static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
+{
+       int msr = (hi & MASK_LVTOFF_HI) >> 20;
+
+       if (apic < 0) {
+               pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
+                      "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
+                      b->bank, b->block, b->address, hi, lo);
+               return 0;
+       }
+
+       if (apic != msr) {
+               pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
+                      "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
+                      b->cpu, apic, b->bank, b->block, b->address, hi, lo);
+               return 0;
+       }
+
+       return 1;
+};
+
 /* must be called with correct cpu affinity */
 /* Called via smp_call_function_single() */
 static void threshold_restart_bank(void *_tr)
 {
        struct thresh_restart *tr = _tr;
-       u32 mci_misc_hi, mci_misc_lo;
+       u32 hi, lo;
 
-       rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+       rdmsr(tr->b->address, lo, hi);
 
-       if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+       if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
                tr->reset = 1;  /* limit cannot be lower than err count */
 
        if (tr->reset) {                /* reset err count and overflow bit */
-               mci_misc_hi =
-                   (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
+               hi =
+                   (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
                    (THRESHOLD_MAX - tr->b->threshold_limit);
        } else if (tr->old_limit) {     /* change limit w/o reset */
-               int new_count = (mci_misc_hi & THRESHOLD_MAX) +
+               int new_count = (hi & THRESHOLD_MAX) +
                    (tr->old_limit - tr->b->threshold_limit);
 
-               mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
+               hi = (hi & ~MASK_ERR_COUNT_HI) |
                    (new_count & THRESHOLD_MAX);
        }
 
+       if (tr->set_lvt_off) {
+               if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
+                       /* set new lvt offset */
+                       hi &= ~MASK_LVTOFF_HI;
+                       hi |= tr->lvt_off << 20;
+               }
+       }
+
        tr->b->interrupt_enable ?
-           (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
-           (mci_misc_hi &= ~MASK_INT_TYPE_HI);
+           (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
+           (hi &= ~MASK_INT_TYPE_HI);
 
-       mci_misc_hi |= MASK_COUNT_EN_HI;
-       wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+       hi |= MASK_COUNT_EN_HI;
+       wrmsr(tr->b->address, lo, hi);
+}
+
+static void mce_threshold_block_init(struct threshold_block *b, int offset)
+{
+       struct thresh_restart tr = {
+               .b                      = b,
+               .set_lvt_off            = 1,
+               .lvt_off                = offset,
+       };
+
+       b->threshold_limit              = THRESHOLD_MAX;
+       threshold_restart_bank(&tr);
+};
+
+static int setup_APIC_mce(int reserved, int new)
+{
+       if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
+                                             APIC_EILVT_MSG_FIX, 0))
+               return new;
+
+       return reserved;
 }
 
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
+       struct threshold_block b;
        unsigned int cpu = smp_processor_id();
        u32 low = 0, high = 0, address = 0;
        unsigned int bank, block;
-       struct thresh_restart tr;
-       int lvt_off = -1;
-       u8 offset;
+       int offset = -1;
 
        for (bank = 0; bank < NR_BANKS; ++bank) {
                for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
                        if (shared_bank[bank] && c->cpu_core_id)
                                break;
 #endif
-                       offset = (high & MASK_LVTOFF_HI) >> 20;
-                       if (lvt_off < 0) {
-                               if (setup_APIC_eilvt(offset,
-                                                    THRESHOLD_APIC_VECTOR,
-                                                    APIC_EILVT_MSG_FIX, 0)) {
-                                       pr_err(FW_BUG "cpu %d, failed to "
-                                              "setup threshold interrupt "
-                                              "for bank %d, block %d "
-                                              "(MSR%08X=0x%x%08x)",
-                                              smp_processor_id(), bank, block,
-                                              address, high, low);
-                                       continue;
-                               }
-                               lvt_off = offset;
-                       } else if (lvt_off != offset) {
-                               pr_err(FW_BUG "cpu %d, invalid threshold "
-                                      "interrupt offset %d for bank %d,"
-                                      "block %d (MSR%08X=0x%x%08x)",
-                                      smp_processor_id(), lvt_off, bank,
-                                      block, address, high, low);
-                               continue;
-                       }
-
-                       high &= ~MASK_LVTOFF_HI;
-                       high |= lvt_off << 20;
-                       wrmsr(address, low, high);
+                       offset = setup_APIC_mce(offset,
+                                               (high & MASK_LVTOFF_HI) >> 20);
 
-                       threshold_defaults.address = address;
-                       tr.b = &threshold_defaults;
-                       tr.reset = 0;
-                       tr.old_limit = 0;
-                       threshold_restart_bank(&tr);
+                       memset(&b, 0, sizeof(b));
+                       b.cpu           = cpu;
+                       b.bank          = bank;
+                       b.block         = block;
+                       b.address       = address;
 
+                       mce_threshold_block_init(&b, offset);
                        mce_threshold_vector = amd_threshold_interrupt;
                }
        }
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
 
        b->interrupt_enable = !!new;
 
+       memset(&tr, 0, sizeof(tr));
        tr.b            = b;
-       tr.reset        = 0;
-       tr.old_limit    = 0;
 
        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
        if (new < 1)
                new = 1;
 
+       memset(&tr, 0, sizeof(tr));
        tr.old_limit = b->threshold_limit;
        b->threshold_limit = new;
        tr.b = b;
-       tr.reset = 0;
 
        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
                        continue;
                err = threshold_create_bank(cpu, bank);
                if (err)
-                       goto out;
+                       return err;
        }
-out:
+
        return err;
 }
 
index 4b683267eca5fb982111375ab9e000d7eda3b437..e12246ff5aa6d2683ae8537a92da76e479a26bf7 100644 (file)
@@ -53,8 +53,13 @@ struct thermal_state {
        struct _thermal_state core_power_limit;
        struct _thermal_state package_throttle;
        struct _thermal_state package_power_limit;
+       struct _thermal_state core_thresh0;
+       struct _thermal_state core_thresh1;
 };
 
+/* Callback to handle core threshold interrupts */
+int (*platform_thermal_notify)(__u64 msr_val);
+
 static DEFINE_PER_CPU(struct thermal_state, thermal_state);
 
 static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
        return 0;
 }
 
+static int thresh_event_valid(int event)
+{
+       struct _thermal_state *state;
+       unsigned int this_cpu = smp_processor_id();
+       struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+       u64 now = get_jiffies_64();
+
+       state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
+
+       if (time_before64(now, state->next_check))
+               return 0;
+
+       state->next_check = now + CHECK_INTERVAL;
+       return 1;
+}
+
 #ifdef CONFIG_SYSFS
 /* Add/Remove thermal_throttle interface for CPU device: */
 static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
 #define PACKAGE_THROTTLED      ((__u64)2 << 62)
 #define PACKAGE_POWER_LIMIT    ((__u64)3 << 62)
 
+static void notify_thresholds(__u64 msr_val)
+{
+       /* check whether the interrupt handler is defined;
+        * otherwise simply return
+        */
+       if (!platform_thermal_notify)
+               return;
+
+       /* lower threshold reached */
+       if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
+               platform_thermal_notify(msr_val);
+       /* higher threshold reached */
+       if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
+               platform_thermal_notify(msr_val);
+}
+
 /* Thermal transition interrupt handler */
 static void intel_thermal_interrupt(void)
 {
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
 
        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
+       /* Check for violation of core thermal thresholds*/
+       notify_thresholds(msr_val);
+
        if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
                                THERMAL_THROTTLING_EVENT,
                                CORE_LEVEL) != 0)
index 6d75b9145b13f0e68a106acd76b0d458c827d099..0a360d146596b6d01f8c833e655150faa3ae9605 100644 (file)
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
 {
        int i;
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               disable_lapic_nmi_watchdog();
-
        for (i = 0; i < x86_pmu.num_counters; i++) {
                if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
                        goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
        for (i--; i >= 0; i--)
                release_perfctr_nmi(x86_pmu.perfctr + i);
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               enable_lapic_nmi_watchdog();
-
        return false;
 }
 
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
                release_perfctr_nmi(x86_pmu.perfctr + i);
                release_evntsel_nmi(x86_pmu.eventsel + i);
        }
-
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               enable_lapic_nmi_watchdog();
 }
 
 #else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
 static bool check_hw_exists(void)
 {
        u64 val, val_new = 0;
-       int ret = 0;
+       int i, reg, ret = 0;
+
+       /*
+        * Check to see if the BIOS enabled any of the counters, if so
+        * complain and bail.
+        */
+       for (i = 0; i < x86_pmu.num_counters; i++) {
+               reg = x86_pmu.eventsel + i;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+                       goto bios_fail;
+       }
 
+       if (x86_pmu.num_counters_fixed) {
+               reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+               ret = rdmsrl_safe(reg, &val);
+               if (ret)
+                       goto msr_fail;
+               for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+                       if (val & (0x03 << i*4))
+                               goto bios_fail;
+               }
+       }
+
+       /*
+        * Now write a value and read it back to see if it matches,
+        * this is needed to detect certain hardware emulators (qemu/kvm)
+        * that don't trap on the MSR access and always return 0s.
+        */
        val = 0xabcdUL;
-       ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+       ret = checking_wrmsrl(x86_pmu.perfctr, val);
        ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
        if (ret || val != val_new)
-               return false;
+               goto msr_fail;
 
        return true;
+
+bios_fail:
+       printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+       printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+       return false;
+
+msr_fail:
+       printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+       return false;
 }
 
 static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        u64 config;
 
-       if (!hwc->sample_period) {
+       if (!is_sampling_event(event)) {
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
                local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void)
        pr_info("no hardware sampling interrupt available.\n");
 }
 
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
 {
        struct event_constraint *c;
        int err;
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void)
                err = amd_pmu_init();
                break;
        default:
-               return;
+               return 0;
        }
        if (err != 0) {
                pr_cont("no PMU driver, software events only.\n");
-               return;
+               return 0;
        }
 
        pmu_check_apic();
 
        /* sanity check that the hardware exists or is emulated */
-       if (!check_hw_exists()) {
-               pr_cont("Broken PMU hardware detected, software events only.\n");
-               return;
-       }
+       if (!check_hw_exists())
+               return 0;
 
        pr_cont("%s PMU driver.\n", x86_pmu.name);
 
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void)
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
-       perf_pmu_register(&pmu);
+       perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
        perf_cpu_notifier(x86_pmu_notifier);
+
+       return 0;
 }
+early_initcall(init_hw_perf_events);
 
 static inline void x86_pmu_read(struct perf_event *event)
 {
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
        perf_callchain_store(entry, regs->ip);
 
-       dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
+       dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
 }
 
 #ifdef CONFIG_COMPAT
index e421b8cd6944af860c4b28a1a176a14320ac9f79..67e2202a60393cd48a0f2251862c59f65bf7e667 100644 (file)
@@ -1,7 +1,5 @@
 #ifdef CONFIG_CPU_SUP_AMD
 
-static DEFINE_RAW_SPINLOCK(amd_nb_lock);
-
 static __initconst const u64 amd_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
        return &emptyconstraint;
 }
 
-static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+static struct amd_nb *amd_alloc_nb(int cpu)
 {
        struct amd_nb *nb;
        int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
        if (!nb)
                return NULL;
 
-       nb->nb_id = nb_id;
+       nb->nb_id = -1;
 
        /*
         * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
        if (boot_cpu_data.x86_max_cores < 2)
                return NOTIFY_OK;
 
-       cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+       cpuc->amd_nb = amd_alloc_nb(cpu);
        if (!cpuc->amd_nb)
                return NOTIFY_BAD;
 
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
        nb_id = amd_get_nb_id(cpu);
        WARN_ON_ONCE(nb_id == BAD_APICID);
 
-       raw_spin_lock(&amd_nb_lock);
-
        for_each_online_cpu(i) {
                nb = per_cpu(cpu_hw_events, i).amd_nb;
                if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
 
        cpuc->amd_nb->nb_id = nb_id;
        cpuc->amd_nb->refcnt++;
-
-       raw_spin_unlock(&amd_nb_lock);
 }
 
 static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
 
        cpuhw = &per_cpu(cpu_hw_events, cpu);
 
-       raw_spin_lock(&amd_nb_lock);
-
        if (cpuhw->amd_nb) {
                struct amd_nb *nb = cpuhw->amd_nb;
 
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
 
                cpuhw->amd_nb = NULL;
        }
-
-       raw_spin_unlock(&amd_nb_lock);
 }
 
 static __initconst const struct x86_pmu amd_pmu = {
index c8f5c088cad11ae3f245e1e7374bb43c915170d6..24e390e40f2e0b484d4b2b09084deb9d120d59b4 100644 (file)
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
        if (ret)
                return ret;
 
+       if (event->attr.precise_ip &&
+           (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+               /*
+                * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                * (0x003c) so that we can use it with PEBS.
+                *
+                * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                * PEBS capable. However we can use INST_RETIRED.ANY_P
+                * (0x00c0), which is a PEBS capable event, to get the same
+                * count.
+                *
+                * INST_RETIRED.ANY_P counts the number of cycles that retires
+                * CNTMASK instructions. By setting CNTMASK to a value (16)
+                * larger than the maximum number of instructions that can be
+                * retired per cycle (4) and then inverting the condition, we
+                * count all cycles that retire 16 or less instructions, which
+                * is every cycle.
+                *
+                * Thereby we gain a PEBS capable cycle counter.
+                */
+               u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+
+               alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+               event->hw.config = alt_config;
+       }
+
        if (event->attr.type != PERF_TYPE_RAW)
                return 0;
 
index d9f4ff8fcd693c509b2d079b381a8e8683d10d9c..d5a236615501fd6a41fb6f6bc76bfd2369a47bc5 100644 (file)
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
 #include <asm/perf_event.h>
 
-struct nmi_watchdog_ctlblk {
-       unsigned int cccr_msr;
-       unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
-       unsigned int evntsel_msr;  /* the MSR to select the events to handle */
-};
-
-/* Interface defining a CPU specific perfctr watchdog */
-struct wd_ops {
-       int (*reserve)(void);
-       void (*unreserve)(void);
-       int (*setup)(unsigned nmi_hz);
-       void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
-       void (*stop)(void);
-       unsigned perfctr;
-       unsigned evntsel;
-       u64 checkbit;
-};
-
-static const struct wd_ops *wd_ops;
-
 /*
  * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
 
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-
 /* converts an msr to an appropriate reservation bit */
 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 {
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
        clear_bit(counter, evntsel_nmi_owner);
 }
 EXPORT_SYMBOL(release_evntsel_nmi);
-
-void disable_lapic_nmi_watchdog(void)
-{
-       BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-       if (atomic_read(&nmi_active) <= 0)
-               return;
-
-       on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-
-       if (wd_ops)
-               wd_ops->unreserve();
-
-       BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_lapic_nmi_watchdog(void)
-{
-       BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
-       /* are we already enabled */
-       if (atomic_read(&nmi_active) != 0)
-               return;
-
-       /* are we lapic aware */
-       if (!wd_ops)
-               return;
-       if (!wd_ops->reserve()) {
-               printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
-               return;
-       }
-
-       on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
-       touch_nmi_watchdog();
-}
-
-/*
- * Activate the NMI watchdog via the local APIC.
- */
-
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
-       u64 counter_val;
-       unsigned int retval = hz;
-
-       /*
-        * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
-        * are writable, with higher bits sign extending from bit 31.
-        * So, we can only program the counter with 31 bit values and
-        * 32nd bit should be 1, for 33.. to be 1.
-        * Find the appropriate nmi_hz
-        */
-       counter_val = (u64)cpu_khz * 1000;
-       do_div(counter_val, retval);
-       if (counter_val > 0x7fffffffULL) {
-               u64 count = (u64)cpu_khz * 1000;
-               do_div(count, 0x7fffffffUL);
-               retval = count + 1;
-       }
-       return retval;
-}
-
-static void write_watchdog_counter(unsigned int perfctr_msr,
-                               const char *descr, unsigned nmi_hz)
-{
-       u64 count = (u64)cpu_khz * 1000;
-
-       do_div(count, nmi_hz);
-       if (descr)
-               pr_debug("setting %s to -0x%08Lx\n", descr, count);
-       wrmsrl(perfctr_msr, 0 - count);
-}
-
-static void write_watchdog_counter32(unsigned int perfctr_msr,
-                               const char *descr, unsigned nmi_hz)
-{
-       u64 count = (u64)cpu_khz * 1000;
-
-       do_div(count, nmi_hz);
-       if (descr)
-               pr_debug("setting %s to -0x%08Lx\n", descr, count);
-       wrmsr(perfctr_msr, (u32)(-count), 0);
-}
-
-/*
- * AMD K7/K8/Family10h/Family11h support.
- * AMD keeps this interface nicely stable so there is not much variety
- */
-#define K7_EVNTSEL_ENABLE      (1 << 22)
-#define K7_EVNTSEL_INT         (1 << 20)
-#define K7_EVNTSEL_OS          (1 << 17)
-#define K7_EVNTSEL_USR         (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING   0x76
-#define K7_NMI_EVENT           K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       wrmsrl(perfctr_msr, 0UL);
-
-       evntsel = K7_EVNTSEL_INT
-               | K7_EVNTSEL_OS
-               | K7_EVNTSEL_USR
-               | K7_NMI_EVENT;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
-
-       /* initialize the wd struct before enabling */
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= K7_EVNTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-
-       return 1;
-}
-
-static void single_msr_stop_watchdog(void)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int single_msr_reserve(void)
-{
-       if (!reserve_perfctr_nmi(wd_ops->perfctr))
-               return 0;
-
-       if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
-               release_perfctr_nmi(wd_ops->perfctr);
-               return 0;
-       }
-       return 1;
-}
-
-static void single_msr_unreserve(void)
-{
-       release_evntsel_nmi(wd_ops->evntsel);
-       release_perfctr_nmi(wd_ops->perfctr);
-}
-
-static void __kprobes
-single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       /* start the cycle over again */
-       write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops k7_wd_ops = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_k7_watchdog,
-       .rearm          = single_msr_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_K7_PERFCTR0,
-       .evntsel        = MSR_K7_EVNTSEL0,
-       .checkbit       = 1ULL << 47,
-};
-
-/*
- * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
- */
-#define P6_EVNTSEL0_ENABLE     (1 << 22)
-#define P6_EVNTSEL_INT         (1 << 20)
-#define P6_EVNTSEL_OS          (1 << 17)
-#define P6_EVNTSEL_USR         (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT           P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-static int setup_p6_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       /* KVM doesn't implement this MSR */
-       if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
-               return 0;
-
-       evntsel = P6_EVNTSEL_INT
-               | P6_EVNTSEL_OS
-               | P6_EVNTSEL_USR
-               | P6_NMI_EVENT;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-       write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
-
-       /* initialize the wd struct before enabling */
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= P6_EVNTSEL0_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-
-       return 1;
-}
-
-static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       /*
-        * P6 based Pentium M need to re-unmask
-        * the apic vector but it doesn't hurt
-        * other P6 variant.
-        * ArchPerfom/Core Duo also needs this
-        */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
-       /* P6/ARCH_PERFMON has 32 bit counter write */
-       write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p6_wd_ops = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_p6_watchdog,
-       .rearm          = p6_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_P6_PERFCTR0,
-       .evntsel        = MSR_P6_EVNTSEL0,
-       .checkbit       = 1ULL << 39,
-};
-
-/*
- * Intel P4 performance counters.
- * By far the most complicated of all.
- */
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL  (1 << 7)
-#define P4_ESCR_EVENT_SELECT(N)        ((N) << 25)
-#define P4_ESCR_OS             (1 << 3)
-#define P4_ESCR_USR            (1 << 2)
-#define P4_CCCR_OVF_PMI0       (1 << 26)
-#define P4_CCCR_OVF_PMI1       (1 << 27)
-#define P4_CCCR_THRESHOLD(N)   ((N) << 20)
-#define P4_CCCR_COMPLEMENT     (1 << 19)
-#define P4_CCCR_COMPARE                (1 << 18)
-#define P4_CCCR_REQUIRED       (3 << 16)
-#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
-#define P4_CCCR_ENABLE         (1 << 12)
-#define P4_CCCR_OVF            (1 << 31)
-
-#define P4_CONTROLS 18
-static unsigned int p4_controls[18] = {
-       MSR_P4_BPU_CCCR0,
-       MSR_P4_BPU_CCCR1,
-       MSR_P4_BPU_CCCR2,
-       MSR_P4_BPU_CCCR3,
-       MSR_P4_MS_CCCR0,
-       MSR_P4_MS_CCCR1,
-       MSR_P4_MS_CCCR2,
-       MSR_P4_MS_CCCR3,
-       MSR_P4_FLAME_CCCR0,
-       MSR_P4_FLAME_CCCR1,
-       MSR_P4_FLAME_CCCR2,
-       MSR_P4_FLAME_CCCR3,
-       MSR_P4_IQ_CCCR0,
-       MSR_P4_IQ_CCCR1,
-       MSR_P4_IQ_CCCR2,
-       MSR_P4_IQ_CCCR3,
-       MSR_P4_IQ_CCCR4,
-       MSR_P4_IQ_CCCR5,
-};
-/*
- * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- * CRU_ESCR0 (with any non-null event selector) through a complemented
- * max threshold. [IA32-Vol3, Section 14.9.9]
- */
-static int setup_p4_watchdog(unsigned nmi_hz)
-{
-       unsigned int perfctr_msr, evntsel_msr, cccr_msr;
-       unsigned int evntsel, cccr_val;
-       unsigned int misc_enable, dummy;
-       unsigned int ht_num;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
-       if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
-               return 0;
-
-#ifdef CONFIG_SMP
-       /* detect which hyperthread we are on */
-       if (smp_num_siblings == 2) {
-               unsigned int ebx, apicid;
-
-               ebx = cpuid_ebx(1);
-               apicid = (ebx >> 24) & 0xff;
-               ht_num = apicid & 1;
-       } else
-#endif
-               ht_num = 0;
-
-       /*
-        * performance counters are shared resources
-        * assign each hyperthread its own set
-        * (re-use the ESCR0 register, seems safe
-        * and keeps the cccr_val the same)
-        */
-       if (!ht_num) {
-               /* logical cpu 0 */
-               perfctr_msr = MSR_P4_IQ_PERFCTR0;
-               evntsel_msr = MSR_P4_CRU_ESCR0;
-               cccr_msr = MSR_P4_IQ_CCCR0;
-               cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
-
-               /*
-                * If we're on the kdump kernel or other situation, we may
-                * still have other performance counter registers set to
-                * interrupt and they'll keep interrupting forever because
-                * of the P4_CCCR_OVF quirk. So we need to ACK all the
-                * pending interrupts and disable all the registers here,
-                * before reenabling the NMI delivery. Refer to p4_rearm()
-                * about the P4_CCCR_OVF quirk.
-                */
-               if (reset_devices) {
-                       unsigned int low, high;
-                       int i;
-
-                       for (i = 0; i < P4_CONTROLS; i++) {
-                               rdmsr(p4_controls[i], low, high);
-                               low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
-                               wrmsr(p4_controls[i], low, high);
-                       }
-               }
-       } else {
-               /* logical cpu 1 */
-               perfctr_msr = MSR_P4_IQ_PERFCTR1;
-               evntsel_msr = MSR_P4_CRU_ESCR0;
-               cccr_msr = MSR_P4_IQ_CCCR1;
-
-               /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
-               if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
-                       cccr_val = P4_CCCR_OVF_PMI0;
-               else
-                       cccr_val = P4_CCCR_OVF_PMI1;
-               cccr_val |= P4_CCCR_ESCR_SELECT(4);
-       }
-
-       evntsel = P4_ESCR_EVENT_SELECT(0x3F)
-               | P4_ESCR_OS
-               | P4_ESCR_USR;
-
-       cccr_val |= P4_CCCR_THRESHOLD(15)
-                | P4_CCCR_COMPLEMENT
-                | P4_CCCR_COMPARE
-                | P4_CCCR_REQUIRED;
-
-       wrmsr(evntsel_msr, evntsel, 0);
-       wrmsr(cccr_msr, cccr_val, 0);
-       write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = cccr_msr;
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       cccr_val |= P4_CCCR_ENABLE;
-       wrmsr(cccr_msr, cccr_val, 0);
-       return 1;
-}
-
-static void stop_p4_watchdog(void)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       wrmsr(wd->cccr_msr, 0, 0);
-       wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int p4_reserve(void)
-{
-       if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
-               return 0;
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
-               goto fail1;
-#endif
-       if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
-               goto fail2;
-       /* RED-PEN why is ESCR1 not reserved here? */
-       return 1;
- fail2:
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1)
-               release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
- fail1:
-#endif
-       release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-       return 0;
-}
-
-static void p4_unreserve(void)
-{
-#ifdef CONFIG_SMP
-       if (smp_num_siblings > 1)
-               release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
-#endif
-       release_evntsel_nmi(MSR_P4_CRU_ESCR0);
-       release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-}
-
-static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
-       unsigned dummy;
-       /*
-        * P4 quirks:
-        * - An overflown perfctr will assert its interrupt
-        *   until the OVF flag in its CCCR is cleared.
-        * - LVTPC is masked on interrupt and must be
-        *   unmasked by the LVTPC handler.
-        */
-       rdmsrl(wd->cccr_msr, dummy);
-       dummy &= ~P4_CCCR_OVF;
-       wrmsrl(wd->cccr_msr, dummy);
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       /* start the cycle over again */
-       write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p4_wd_ops = {
-       .reserve        = p4_reserve,
-       .unreserve      = p4_unreserve,
-       .setup          = setup_p4_watchdog,
-       .rearm          = p4_rearm,
-       .stop           = stop_p4_watchdog,
-       /* RED-PEN this is wrong for the other sibling */
-       .perfctr        = MSR_P4_BPU_PERFCTR0,
-       .evntsel        = MSR_P4_BSU_ESCR0,
-       .checkbit       = 1ULL << 39,
-};
-
-/*
- * Watchdog using the Intel architected PerfMon.
- * Used for Core2 and hopefully all future Intel CPUs.
- */
-#define ARCH_PERFMON_NMI_EVENT_SEL     ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK   ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static struct wd_ops intel_arch_wd_ops;
-
-static int setup_intel_arch_watchdog(unsigned nmi_hz)
-{
-       unsigned int ebx;
-       union cpuid10_eax eax;
-       unsigned int unused;
-       unsigned int perfctr_msr, evntsel_msr;
-       unsigned int evntsel;
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
-       /*
-        * Check whether the Architectural PerfMon supports
-        * Unhalted Core Cycles Event or not.
-        * NOTE: Corresponding bit = 0 in ebx indicates event present.
-        */
-       cpuid(10, &(eax.full), &ebx, &unused, &unused);
-       if ((eax.split.mask_length <
-                       (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
-           (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
-               return 0;
-
-       perfctr_msr = wd_ops->perfctr;
-       evntsel_msr = wd_ops->evntsel;
-
-       wrmsrl(perfctr_msr, 0UL);
-
-       evntsel = ARCH_PERFMON_EVENTSEL_INT
-               | ARCH_PERFMON_EVENTSEL_OS
-               | ARCH_PERFMON_EVENTSEL_USR
-               | ARCH_PERFMON_NMI_EVENT_SEL
-               | ARCH_PERFMON_NMI_EVENT_UMASK;
-
-       /* setup the timer */
-       wrmsr(evntsel_msr, evntsel, 0);
-       nmi_hz = adjust_for_32bit_ctr(nmi_hz);
-       write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-
-       wd->perfctr_msr = perfctr_msr;
-       wd->evntsel_msr = evntsel_msr;
-       wd->cccr_msr = 0;  /* unused */
-
-       /* ok, everything is initialized, announce that we're set */
-       cpu_nmi_set_wd_enabled();
-
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-       evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
-       wrmsr(evntsel_msr, evntsel, 0);
-       intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
-       return 1;
-}
-
-static struct wd_ops intel_arch_wd_ops __read_mostly = {
-       .reserve        = single_msr_reserve,
-       .unreserve      = single_msr_unreserve,
-       .setup          = setup_intel_arch_watchdog,
-       .rearm          = p6_rearm,
-       .stop           = single_msr_stop_watchdog,
-       .perfctr        = MSR_ARCH_PERFMON_PERFCTR1,
-       .evntsel        = MSR_ARCH_PERFMON_EVENTSEL1,
-};
-
-static void probe_nmi_watchdog(void)
-{
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_AMD:
-               if (boot_cpu_data.x86 == 6 ||
-                   (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
-                       wd_ops = &k7_wd_ops;
-               return;
-       case X86_VENDOR_INTEL:
-               /* Work around where perfctr1 doesn't have a working enable
-                * bit as described in the following errata:
-                * AE49 Core Duo and Intel Core Solo 65 nm
-                * AN49 Intel Pentium Dual-Core
-                * AF49 Dual-Core Intel Xeon Processor LV
-                */
-               if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
-                   ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
-                    boot_cpu_data.x86_mask == 4))) {
-                       intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
-                       intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
-               }
-               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-                       wd_ops = &intel_arch_wd_ops;
-                       break;
-               }
-               switch (boot_cpu_data.x86) {
-               case 6:
-                       if (boot_cpu_data.x86_model > 13)
-                               return;
-
-                       wd_ops = &p6_wd_ops;
-                       break;
-               case 15:
-                       wd_ops = &p4_wd_ops;
-                       break;
-               default:
-                       return;
-               }
-               break;
-       }
-}
-
-/* Interface to nmi.c */
-
-int lapic_watchdog_init(unsigned nmi_hz)
-{
-       if (!wd_ops) {
-               probe_nmi_watchdog();
-               if (!wd_ops) {
-                       printk(KERN_INFO "NMI watchdog: CPU not supported\n");
-                       return -1;
-               }
-
-               if (!wd_ops->reserve()) {
-                       printk(KERN_ERR
-                               "NMI watchdog: cannot reserve perfctrs\n");
-                       return -1;
-               }
-       }
-
-       if (!(wd_ops->setup(nmi_hz))) {
-               printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
-                      raw_smp_processor_id());
-               return -1;
-       }
-
-       return 0;
-}
-
-void lapic_watchdog_stop(void)
-{
-       if (wd_ops)
-               wd_ops->stop();
-}
-
-unsigned lapic_adjust_nmi_hz(unsigned hz)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
-           wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
-               hz = adjust_for_32bit_ctr(hz);
-       return hz;
-}
-
-int __kprobes lapic_wd_event(unsigned nmi_hz)
-{
-       struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-       u64 ctr;
-
-       rdmsrl(wd->perfctr_msr, ctr);
-       if (ctr & wd_ops->checkbit) /* perfctr still running? */
-               return 0;
-
-       wd_ops->rearm(wd, nmi_hz);
-       return 1;
-}
index 6e8752c1bd5241fc9e7e63ee088f06c84d0526fb..8474c998cbd40d2f3481f87879f5a42d1f105e52 100644 (file)
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
 
 void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp, char *log_lvl)
+               unsigned long *stack, char *log_lvl)
 {
        printk("%sCall Trace:\n", log_lvl);
-       dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+       dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
 }
 
 void show_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp)
+               unsigned long *stack)
 {
-       show_trace_log_lvl(task, regs, stack, bp, "");
+       show_trace_log_lvl(task, regs, stack, "");
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-       show_stack_log_lvl(task, NULL, sp, 0, "");
+       show_stack_log_lvl(task, NULL, sp, "");
 }
 
 /*
@@ -210,7 +210,7 @@ void dump_stack(void)
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
-       show_trace(NULL, NULL, &stack, bp);
+       show_trace(NULL, NULL, &stack);
 }
 EXPORT_SYMBOL(dump_stack);
 
index 1bc7f75a5bdaf823999f7b90271808819b6f3ad3..74cc1eda384b8d26437a10fa55e3f64cfcf4545a 100644 (file)
 #include <asm/stacktrace.h>
 
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+               struct pt_regs *regs, unsigned long *stack,
                const struct stacktrace_ops *ops, void *data)
 {
        int graph = 0;
+       unsigned long bp;
 
        if (!task)
                task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                        stack = (unsigned long *)task->thread.sp;
        }
 
-#ifdef CONFIG_FRAME_POINTER
-       if (!bp) {
-               if (task == current) {
-                       /* Grab bp right from our regs */
-                       get_bp(bp);
-               } else {
-                       /* bp is the last reg pushed by switch_to */
-                       bp = *(unsigned long *) task->thread.sp;
-               }
-       }
-#endif
-
+       bp = stack_frame(task, regs);
        for (;;) {
                struct thread_info *context;
 
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                  unsigned long *sp, unsigned long bp, char *log_lvl)
+                  unsigned long *sp, char *log_lvl)
 {
        unsigned long *stack;
        int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                touch_nmi_watchdog();
        }
        printk(KERN_CONT "\n");
-       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+       show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
                u8 *ip;
 
                printk(KERN_EMERG "Stack:\n");
-               show_stack_log_lvl(NULL, regs, &regs->sp,
-                               0, KERN_EMERG);
+               show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
 
                printk(KERN_EMERG "Code: ");
 
index 6a340485249a965f29686ac84117458d6cb72be6..64101335de19aad09ec03d75dea8260b28051747 100644 (file)
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-               unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+               struct pt_regs *regs, unsigned long *stack,
                const struct stacktrace_ops *ops, void *data)
 {
        const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
        unsigned used = 0;
        struct thread_info *tinfo;
        int graph = 0;
+       unsigned long bp;
 
        if (!task)
                task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
                        stack = (unsigned long *)task->thread.sp;
        }
 
-#ifdef CONFIG_FRAME_POINTER
-       if (!bp) {
-               if (task == current) {
-                       /* Grab bp right from our regs */
-                       get_bp(bp);
-               } else {
-                       /* bp is the last reg pushed by switch_to */
-                       bp = *(unsigned long *) task->thread.sp;
-               }
-       }
-#endif
-
+       bp = stack_frame(task, regs);
        /*
         * Print function call entries in all stacks, starting at the
         * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
 
 void
 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-                  unsigned long *sp, unsigned long bp, char *log_lvl)
+                  unsigned long *sp, char *log_lvl)
 {
        unsigned long *irq_stack_end;
        unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
        preempt_enable();
 
        printk(KERN_CONT "\n");
-       show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+       show_trace_log_lvl(task, regs, sp, log_lvl);
 }
 
 void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
 
                printk(KERN_EMERG "Stack:\n");
                show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
-                               regs->bp, KERN_EMERG);
+                                  KERN_EMERG);
 
                printk(KERN_EMERG "Code: ");
 
index 4572f25f93255f8bb4a5e5158d3c9949f912b657..cd28a350f7f933162a6fa9bd0de08c64e22495f2 100644 (file)
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
                if (!strncmp(buf, "xen", 3))
                        early_console_register(&xenboot_console, keep);
 #endif
-#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+#ifdef CONFIG_EARLY_PRINTK_MRST
                if (!strncmp(buf, "mrst", 4)) {
                        mrst_early_console_init();
                        early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
                        hsu_early_console_init();
                        early_console_register(&early_hsu_console, keep);
                }
-
 #endif
                buf++;
        }
index 3afb33f14d2d2c86a3c961d87aaae531d2631ac8..298448656b6079d074232518cb16e50895b4a5b8 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/module.h>
 
 #include <trace/syscall.h>
 
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
 int ftrace_arch_code_modify_prepare(void)
 {
        set_kernel_text_rw();
+       set_all_modules_text_rw();
        modifying_code = 1;
        return 0;
 }
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
 int ftrace_arch_code_modify_post_process(void)
 {
        modifying_code = 0;
+       set_all_modules_text_ro();
        set_kernel_text_ro();
        return 0;
 }
index 763310165fa0d1b0e4bf1891632a729e289980e8..7f138b3c3c52cf2d6790d8308fdc9a6dae8b41b7 100644 (file)
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
        case X86_SUBARCH_MRST:
                x86_mrst_early_setup();
                break;
+       case X86_SUBARCH_CE4100:
+               x86_ce4100_early_setup();
+               break;
        default:
                i386_default_early_setup();
                break;
index c0dbd9ac24f0d5cf7e87f8f0439275656b877f73..5707fc8a7a4bcc8527178cb52ca38afad747ee5b 100644 (file)
@@ -316,6 +316,10 @@ ENTRY(startup_32_smp)
        subl $0x80000001, %eax
        cmpl $(0x8000ffff-0x80000001), %eax
        ja 6f
+
+       /* Clear bogus XD_DISABLE bits */
+       call verify_cpu
+
        mov $0x80000001, %eax
        cpuid
        /* Execute Disable bit supported? */
@@ -611,6 +615,8 @@ ignore_int:
 #endif
        iret
 
+#include "verify_cpu.S"
+
        __REFDATA
 .align 4
 ENTRY(initial_code)
index 1cbd54c0df99189548a3a03f40fbb75a1703475a..5940282bd2f94ed886226bc717c189e593adab50 100644 (file)
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
+       /* This is possible if op is under delayed unoptimizing */
+       if (kprobe_disabled(&op->kp))
+               return;
+
        preempt_disable();
        if (kprobe_running()) {
                kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
        return 0;
 }
 
-/* Replace a breakpoint (int3) with a relative jump.  */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+       u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+                                           u8 *insn_buf,
+                                           struct optimized_kprobe *op)
 {
-       unsigned char jmp_code[RELATIVEJUMP_SIZE];
        s32 rel = (s32)((long)op->optinsn.insn -
                        ((long)op->kp.addr + RELATIVEJUMP_SIZE));
 
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
        memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
               RELATIVE_ADDR_SIZE);
 
-       jmp_code[0] = RELATIVEJUMP_OPCODE;
-       *(s32 *)(&jmp_code[1]) = rel;
+       insn_buf[0] = RELATIVEJUMP_OPCODE;
+       *(s32 *)(&insn_buf[1]) = rel;
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               WARN_ON(kprobe_disabled(&op->kp));
+               /* Setup param */
+               setup_optimize_kprobe(&jump_poke_params[c],
+                                     jump_poke_bufs[c].buf, op);
+               list_del_init(&op->list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
 
        /*
         * text_poke_smp doesn't support NMI/MCE code modifying.
         * However, since kprobes itself also doesn't support NMI/MCE
         * code probing, it's not a problem.
         */
-       text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
-       return 0;
+       text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+                                             u8 *insn_buf,
+                                             struct optimized_kprobe *op)
+{
+       /* Set int3 to first byte for kprobes */
+       insn_buf[0] = BREAKPOINT_INSTRUCTION;
+       memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+       tprm->addr = op->kp.addr;
+       tprm->opcode = insn_buf;
+       tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+                                   struct list_head *done_list)
+{
+       struct optimized_kprobe *op, *tmp;
+       int c = 0;
+
+       list_for_each_entry_safe(op, tmp, oplist, list) {
+               /* Setup param */
+               setup_unoptimize_kprobe(&jump_poke_params[c],
+                                       jump_poke_bufs[c].buf, op);
+               list_move(&op->list, done_list);
+               if (++c >= MAX_OPTIMIZE_PROBES)
+                       break;
+       }
+
+       /*
+        * text_poke_smp doesn't support NMI/MCE code modifying.
+        * However, since kprobes itself also doesn't support NMI/MCE
+        * code probing, it's not a problem.
+        */
+       text_poke_smp_batch(jump_poke_params, c);
 }
 
 /* Replace a relative jump with a breakpoint (int3).  */
@@ -1453,11 +1526,35 @@ static int  __kprobes setup_detour_execution(struct kprobe *p,
        }
        return 0;
 }
+
+static int __kprobes init_poke_params(void)
+{
+       /* Allocate code buffer and parameter array */
+       jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+                                MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_bufs)
+               return -ENOMEM;
+
+       jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+                                  MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+       if (!jump_poke_params) {
+               kfree(jump_poke_bufs);
+               jump_poke_bufs = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+#else  /* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+       return 0;
+}
 #endif
 
 int __init arch_init_kprobes(void)
 {
-       return 0;
+       return init_poke_params();
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
index ce0cb4721c9ac9eec8869e64c1fcd1a1ef0fd379..0fe6d1a66c38cf0aaea3383ac000eefbb5d34fca 100644 (file)
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
        return 0;
 }
 
-static int get_ucode_data(void *to, const u8 *from, size_t n)
-{
-       memcpy(to, from, n);
-       return 0;
-}
-
 static void *
 get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
 {
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
        u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
        void *mc;
 
-       if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
-               return NULL;
+       get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
 
        if (section_hdr[0] != UCODE_UCODE_TYPE) {
                pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
                return NULL;
        }
 
-       mc = vmalloc(UCODE_MAX_SIZE);
-       if (mc) {
-               memset(mc, 0, UCODE_MAX_SIZE);
-               if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
-                                  total_size)) {
-                       vfree(mc);
-                       mc = NULL;
-               } else
-                       *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
-       }
+       mc = vzalloc(UCODE_MAX_SIZE);
+       if (!mc)
+               return NULL;
+
+       get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
+       *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+
        return mc;
 }
 
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
        unsigned int *buf_pos = (unsigned int *)container_hdr;
        unsigned long size;
 
-       if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
-               return 0;
+       get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
 
        size = buf_pos[2];
 
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
        }
 
        buf += UCODE_CONTAINER_HEADER_SIZE;
-       if (get_ucode_data(equiv_cpu_table, buf, size)) {
-               vfree(equiv_cpu_table);
-               return 0;
-       }
+       get_ucode_data(equiv_cpu_table, buf, size);
 
        return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
 }
index dcb65cc0a05368ca096e4d907bffe63f9525fb93..1a1b606d3e92a879183cd720e3072b4b0832e579 100644 (file)
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 
                /* For performance reasons, reuse mc area when possible */
                if (!mc || mc_size > curr_mc_size) {
-                       if (mc)
-                               vfree(mc);
+                       vfree(mc);
                        mc = vmalloc(mc_size);
                        if (!mc)
                                break;
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 
                if (get_ucode_data(mc, ucode_ptr, mc_size) ||
                    microcode_sanity_check(mc) < 0) {
-                       vfree(mc);
                        break;
                }
 
                if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
-                       if (new_mc)
-                               vfree(new_mc);
+                       vfree(new_mc);
                        new_rev = mc_header.rev;
                        new_mc  = mc;
                        mc = NULL;      /* trigger new vmalloc */
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
                leftover  -= mc_size;
        }
 
-       if (mc)
-               vfree(mc);
+       vfree(mc);
 
        if (leftover) {
-               if (new_mc)
-                       vfree(new_mc);
+               vfree(new_mc);
                state = UCODE_ERROR;
                goto out;
        }
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
                goto out;
        }
 
-       if (uci->mc)
-               vfree(uci->mc);
+       vfree(uci->mc);
        uci->mc = (struct microcode_intel *)new_mc;
 
        pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
index ba0f0ca9f280bb0473470fdb71a3fa04faec00f0..c01ffa5b9b87e509da797e359577bbb84463ce5f 100644 (file)
@@ -143,7 +143,7 @@ static void flush_gart(void)
 
        spin_lock_irqsave(&iommu_bitmap_lock, flags);
        if (need_flush) {
-               k8_flush_garts();
+               amd_flush_garts();
                need_flush = false;
        }
        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
 {
        int i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
 
                enable_gart_translation(dev, __pa(agp_gatt_table));
        }
 
        /* Flush the GART-TLB to remove stale entries */
-       k8_flush_garts();
+       amd_flush_garts();
 }
 
 /*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
        if (!fix_up_north_bridges)
                return;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
        pr_info("PCI-DMA: Restoring GART aperture settings\n");
 
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
 
                /*
                 * Don't enable translations just yet.  That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
  * Private Northbridge GATT initialization in case we cannot use the
  * AGP driver for some reason.
  */
-static __init int init_k8_gatt(struct agp_kern_info *info)
+static __init int init_amd_gatt(struct agp_kern_info *info)
 {
        unsigned aper_size, gatt_size, new_aper_size;
        unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
        aper_size = aper_base = info->aper_size = 0;
        dev = NULL;
-       for (i = 0; i < k8_northbridges.num; i++) {
-               dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               dev = node_to_amd_nb(i)->misc;
                new_aper_base = read_aperture(dev, &new_aper_size);
                if (!new_aper_base)
                        goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
        if (!no_agp)
                return;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
-       for (i = 0; i < k8_northbridges.num; i++) {
+       for (i = 0; i < amd_nb_num(); i++) {
                u32 ctl;
 
-               dev = k8_northbridges.nb_misc[i];
+               dev = node_to_amd_nb(i)->misc;
                pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
 
                ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
        unsigned long scratch;
        long i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return 0;
 
 #ifndef CONFIG_AGP_AMD64
        no_agp = 1;
 #else
        /* Makefile puts PCI initialization via subsys_initcall first. */
-       /* Add other K8 AGP bridge drivers here */
+       /* Add other AMD AGP bridge drivers here */
        no_agp = no_agp ||
                (agp_amd64_init() < 0) ||
                (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
        if (no_iommu ||
            (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
            !gart_iommu_aperture ||
-           (no_agp && init_k8_gatt(&info) < 0)) {
+           (no_agp && init_amd_gatt(&info) < 0)) {
                if (max_pfn > MAX_DMA32_PFN) {
                        pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
                        pr_warning("falling back to iommu=soft.\n");
index 57d1868a86aadc060bc2260b34139809a98ffab5..c852041bfc3d5b70e792dad0cfd42fdd467c3f38 100644 (file)
@@ -91,8 +91,7 @@ void exit_thread(void)
 void show_regs(struct pt_regs *regs)
 {
        show_registers(regs);
-       show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
-                  regs->bp);
+       show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
 }
 
 void show_regs_common(void)
@@ -374,6 +373,7 @@ void default_idle(void)
 {
        if (hlt_use_halt()) {
                trace_power_start(POWER_CSTATE, 1, smp_processor_id());
+               trace_cpu_idle(1, smp_processor_id());
                current_thread_info()->status &= ~TS_POLLING;
                /*
                 * TS_POLLING-cleared state must be visible before we
@@ -444,6 +444,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
        trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
+       trace_cpu_idle((ax>>4)+1, smp_processor_id());
        if (!need_resched()) {
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
@@ -460,6 +461,7 @@ static void mwait_idle(void)
 {
        if (!need_resched()) {
                trace_power_start(POWER_CSTATE, 1, smp_processor_id());
+               trace_cpu_idle(1, smp_processor_id());
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
 
@@ -481,10 +483,12 @@ static void mwait_idle(void)
 static void poll_idle(void)
 {
        trace_power_start(POWER_CSTATE, 0, smp_processor_id());
+       trace_cpu_idle(0, smp_processor_id());
        local_irq_enable();
        while (!need_resched())
                cpu_relax();
-       trace_power_end(0);
+       trace_power_end(smp_processor_id());
+       trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 /*
index 96586c3cbbbf88dd6479ed250b24ea1112154a22..4b9befa0e347f6f402238d28fea4f9969563a6dd 100644 (file)
@@ -113,8 +113,8 @@ void cpu_idle(void)
                        stop_critical_timings();
                        pm_idle();
                        start_critical_timings();
-
                        trace_power_end(smp_processor_id());
+                       trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
                }
                tick_nohz_restart_sched_tick();
                preempt_enable_no_resched();
index b3d7a3a04f389d9626837a1e776c5106e821f7dc..4c818a73839685c3b1083d2170f02e6758a3bed7 100644 (file)
@@ -142,6 +142,8 @@ void cpu_idle(void)
                        start_critical_timings();
 
                        trace_power_end(smp_processor_id());
+                       trace_cpu_idle(PWR_EVENT_EXIT,
+                                      smp_processor_id());
 
                        /* In many cases the interrupt that ended idle
                           has already called exit_idle. But some idle
index fda313ebbb03dfc98d5d0f953a1b99846e25dc9a..c8e41e90f59ceb9da7be768fcfe197a83602742e 100644 (file)
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
        outb(1, 0x92);
 }
 
+static void ce4100_reset(struct pci_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < 10; i++) {
+               outb(0x2, 0xcf9);
+               udelay(50);
+       }
+}
+
 struct device_fixup {
        unsigned int vendor;
        unsigned int device;
        void (*reboot_fixup)(struct pci_dev *);
 };
 
+/*
+ * PCI ids solely used for fixups_table go here
+ */
+#define PCI_DEVICE_ID_INTEL_CE4100     0x0708
+
 static const struct device_fixup fixups_table[] = {
 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
 { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
 { PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
+{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
 };
 
 /*
index 85268f8eadf667c6034c06882ed661cb7d03e4a8..d3cfe26c0252ab24aaae1481c8c297ac096e6c6d 100644 (file)
@@ -501,7 +501,18 @@ static inline unsigned long long get_total_mem(void)
        return total << PAGE_SHIFT;
 }
 
-#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF
+/*
+ * Keep the crash kernel below this limit.  On 32 bits earlier kernels
+ * would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
+ * limit once kexec-tools are fixed.
+ */
+#ifdef CONFIG_X86_32
+# define CRASH_KERNEL_ADDR_MAX (512 << 20)
+#else
+# define CRASH_KERNEL_ADDR_MAX (896 << 20)
+#endif
+
 static void __init reserve_crashkernel(void)
 {
        unsigned long long total_mem;
@@ -520,10 +531,10 @@ static void __init reserve_crashkernel(void)
                const unsigned long long alignment = 16<<20;    /* 16M */
 
                /*
-                *  kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX
+                *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
                 */
                crash_base = memblock_find_in_range(alignment,
-                              DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment);
+                              CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
 
                if (crash_base == MEMBLOCK_ERROR) {
                        pr_info("crashkernel reservation failed - No suitable area found.\n");
@@ -694,7 +705,7 @@ static u64 __init get_max_mapped(void)
 void __init setup_arch(char **cmdline_p)
 {
        int acpi = 0;
-       int k8 = 0;
+       int amd = 0;
        unsigned long flags;
 
 #ifdef CONFIG_X86_32
@@ -980,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
        acpi = acpi_numa_init();
 #endif
 
-#ifdef CONFIG_K8_NUMA
+#ifdef CONFIG_AMD_NUMA
        if (!acpi)
-               k8 = !k8_numa_init(0, max_pfn);
+               amd = !amd_numa_init(0, max_pfn);
 #endif
 
-       initmem_init(0, max_pfn, acpi, k8);
+       initmem_init(0, max_pfn, acpi, amd);
        memblock_find_dma_reserve();
        dma32_reserve_bootmem();
 
@@ -1034,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        init_apic_mappings();
-       ioapic_init_mappings();
-
-       /* need to wait for io_apic is mapped */
-       probe_nr_irqs_gsi();
+       ioapic_and_gsi_init();
 
        kvm_guest_init();
 
index 083e99d1b7df2aba236563467f47ebb21a09943d..ee886fe10ef4eb8515ae1d2431c0ce8ad3e8dd89 100644 (file)
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
         */
        smp_store_cpu_info(cpuid);
 
+       /*
+        * This must be done before setting cpu_online_mask
+        * or calling notify_cpu_starting.
+        */
+       set_cpu_sibling_map(raw_smp_processor_id());
+       wmb();
+
        notify_cpu_starting(cpuid);
 
        /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
         */
        check_tsc_sync_target();
 
-       if (nmi_watchdog == NMI_IO_APIC) {
-               legacy_pic->mask(0);
-               enable_NMI_through_LVT0();
-               legacy_pic->unmask(0);
-       }
-
-       /* This must be done before setting cpu_online_mask */
-       set_cpu_sibling_map(raw_smp_processor_id());
-       wmb();
-
        /*
         * We need to hold call_lock, so there is no inconsistency
         * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
                printk(KERN_INFO "SMP mode deactivated.\n");
                smpboot_clear_io_apic();
 
-               localise_nmi_watchdog();
-
                connect_bsp_APIC();
                setup_local_APIC();
                end_local_APIC_setup();
@@ -1166,6 +1161,20 @@ out:
        preempt_enable();
 }
 
+void arch_disable_nonboot_cpus_begin(void)
+{
+       /*
+        * Avoid the smp alternatives switch during the disable_nonboot_cpus().
+        * In the suspend path, we will be back in the SMP mode shortly anyways.
+        */
+       skip_smp_alternatives = true;
+}
+
+void arch_disable_nonboot_cpus_end(void)
+{
+       skip_smp_alternatives = false;
+}
+
 void arch_enable_nonboot_cpus_begin(void)
 {
        set_mtrr_aps_delayed_init();
@@ -1196,7 +1205,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 #ifdef CONFIG_X86_IO_APIC
        setup_ioapic_dest();
 #endif
-       check_nmi_watchdog();
        mtrr_aps_init();
 }
 
@@ -1341,8 +1349,6 @@ int native_cpu_disable(void)
        if (cpu == 0)
                return -EBUSY;
 
-       if (nmi_watchdog == NMI_LOCAL_APIC)
-               stop_apic_nmi_watchdog(NULL);
        clear_local_APIC();
 
        cpu_disable_common();
index b53c525368a75cf07489b0327de138bfab5b16d5..938c8e10a19abeae0e2c814e182a0920ff0328f9 100644 (file)
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
  */
 void save_stack_trace(struct stack_trace *trace)
 {
-       dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
+       dump_trace(current, NULL, NULL, &save_stack_ops, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
-void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
 {
-       dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+       dump_trace(current, regs, NULL, &save_stack_ops, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-       dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
+       dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
        if (trace->nr_entries < trace->max_entries)
                trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
index fb5cc5e14cfafb6ed62c4de2929b3bfa0a2e91e9..25a28a245937989d2de9abfb94d96835a1b01615 100644 (file)
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-int timer_ack;
-#endif
-
 #ifdef CONFIG_X86_64
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
        /* Keep nmi watchdog up to date */
        inc_irq_stat(irq0_irqs);
 
-       /* Optimized out for !IO_APIC and x86_64 */
-       if (timer_ack) {
-               /*
-                * Subtle, when I/O APICs are used we have to ack timer IRQ
-                * manually to deassert NMI lines for the watchdog if run
-                * on an 82489DX-based system.
-                */
-               raw_spin_lock(&i8259A_lock);
-               outb(0x0c, PIC_MASTER_OCW3);
-               /* Ack the IRQ; AEOI will end it automatically. */
-               inb(PIC_MASTER_POLL);
-               raw_spin_unlock(&i8259A_lock);
-       }
-
        global_clock_event->event_handler(global_clock_event);
 
        /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
index 3af2dff58b213262d403a8f9c1c55f84920b1d8d..075d130efcf9019eb3f4745677e281533335fa11 100644 (file)
@@ -127,7 +127,7 @@ startup_64:
 no_longmode:
        hlt
        jmp no_longmode
-#include "verify_cpu_64.S"
+#include "verify_cpu.S"
 
        # Careful these need to be in the same 64K segment as the above;
 tidt:
index cb838ca42c9664c2ecf9530d170d27cdbc679f7a..c76aaca5694dd88c6fee8af964e8c62ccd3ad272 100644 (file)
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+int unknown_nmi_panic;
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
        if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@ gp_in_kernel:
        die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+       unknown_nmi_panic = 1;
+       return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static notrace __kprobes void
 mem_parity_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -342,9 +351,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
        reason = (reason & 0xf) | 8;
        outb(reason, 0x61);
 
-       i = 2000;
-       while (--i)
-               udelay(1000);
+       i = 20000;
+       while (--i) {
+               touch_nmi_watchdog();
+               udelay(100);
+       }
 
        reason &= ~8;
        outb(reason, 0x61);
@@ -371,7 +382,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
                        reason, smp_processor_id());
 
        printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
-       if (panic_on_unrecovered_nmi)
+       if (unknown_nmi_panic || panic_on_unrecovered_nmi)
                panic("NMI: Not continuing");
 
        printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +408,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
                if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
                                                        == NOTIFY_STOP)
                        return;
-
-#ifndef CONFIG_LOCKUP_DETECTOR
-               /*
-                * Ok, so this is none of the documented NMI sources,
-                * so it must be the NMI watchdog.
-                */
-               if (nmi_watchdog_tick(regs, reason))
-                       return;
-               if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-                       unknown_nmi_error(reason, regs);
-#else
-               unknown_nmi_error(reason, regs);
 #endif
+               unknown_nmi_error(reason, regs);
 
                return;
        }
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code)
 
 void stop_nmi(void)
 {
-       acpi_nmi_disable();
        ignore_nmis++;
 }
 
 void restart_nmi(void)
 {
        ignore_nmis--;
-       acpi_nmi_enable();
 }
 
 /* May run on IST stack. */
index 0c40d8b72416ba2ef7e86bfd812b7bf6f1db2f8f..356a0d455cf997cb1bd586d3a13fd8a7c16d4d3c 100644 (file)
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
 
        if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                return 0;
+
+       if (tsc_clocksource_reliable)
+               return 0;
        /*
         * Intel systems are normally all synchronized.
         * Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
                /* assume multi socket systems are not synchronized: */
                if (num_possible_cpus() > 1)
-                       tsc_unstable = 1;
+                       return 1;
        }
 
-       return tsc_unstable;
+       return 0;
+}
+
+
+static void tsc_refine_calibration_work(struct work_struct *work);
+static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
+/**
+ * tsc_refine_calibration_work - Further refine tsc freq calibration
+ * @work - ignored.
+ *
+ * This functions uses delayed work over a period of a
+ * second to further refine the TSC freq value. Since this is
+ * timer based, instead of loop based, we don't block the boot
+ * process while this longer calibration is done.
+ *
+ * If there are any calibration anomolies (too many SMIs, etc),
+ * or the refined calibration is off by 1% of the fast early
+ * calibration, we throw out the new calibration and use the
+ * early calibration.
+ */
+static void tsc_refine_calibration_work(struct work_struct *work)
+{
+       static u64 tsc_start = -1, ref_start;
+       static int hpet;
+       u64 tsc_stop, ref_stop, delta;
+       unsigned long freq;
+
+       /* Don't bother refining TSC on unstable systems */
+       if (check_tsc_unstable())
+               goto out;
+
+       /*
+        * Since the work is started early in boot, we may be
+        * delayed the first time we expire. So set the workqueue
+        * again once we know timers are working.
+        */
+       if (tsc_start == -1) {
+               /*
+                * Only set hpet once, to avoid mixing hardware
+                * if the hpet becomes enabled later.
+                */
+               hpet = is_hpet_enabled();
+               schedule_delayed_work(&tsc_irqwork, HZ);
+               tsc_start = tsc_read_refs(&ref_start, hpet);
+               return;
+       }
+
+       tsc_stop = tsc_read_refs(&ref_stop, hpet);
+
+       /* hpet or pmtimer available ? */
+       if (!hpet && !ref_start && !ref_stop)
+               goto out;
+
+       /* Check, whether the sampling was disturbed by an SMI */
+       if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
+               goto out;
+
+       delta = tsc_stop - tsc_start;
+       delta *= 1000000LL;
+       if (hpet)
+               freq = calc_hpet_ref(delta, ref_start, ref_stop);
+       else
+               freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
+
+       /* Make sure we're within 1% */
+       if (abs(tsc_khz - freq) > tsc_khz/100)
+               goto out;
+
+       tsc_khz = freq;
+       printk(KERN_INFO "Refined TSC clocksource calibration: "
+               "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
+                                       (unsigned long)tsc_khz % 1000);
+
+out:
+       clocksource_register_khz(&clocksource_tsc, tsc_khz);
 }
 
-static void __init init_tsc_clocksource(void)
+
+static int __init init_tsc_clocksource(void)
 {
+       if (!cpu_has_tsc || tsc_disabled > 0)
+               return 0;
+
        if (tsc_clocksource_reliable)
                clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
        /* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
                clocksource_tsc.rating = 0;
                clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
        }
-       clocksource_register_khz(&clocksource_tsc, tsc_khz);
+       schedule_delayed_work(&tsc_irqwork, 0);
+       return 0;
 }
+/*
+ * We use device_initcall here, to ensure we run after the hpet
+ * is fully initialized, which may occur at fs_initcall time.
+ */
+device_initcall(init_tsc_clocksource);
 
 void __init tsc_init(void)
 {
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
                mark_tsc_unstable("TSCs unsynchronized");
 
        check_system_tsc_reliable();
-       init_tsc_clocksource();
 }
 
similarity index 65%
rename from arch/x86/kernel/verify_cpu_64.S
rename to arch/x86/kernel/verify_cpu.S
index 56a8c2a867d9af28e0c24ee6a7dff4122f73685b..0edefc19a113290792667afe816bfdecaa0c8fc3 100644 (file)
@@ -7,6 +7,7 @@
  *     Copyright (c) 2007  Andi Kleen (ak@suse.de)
  *     Copyright (c) 2007  Eric Biederman (ebiederm@xmission.com)
  *     Copyright (c) 2007  Vivek Goyal (vgoyal@in.ibm.com)
+ *     Copyright (c) 2010  Kees Cook (kees.cook@canonical.com)
  *
  *     This source code is licensed under the GNU General Public License,
  *     Version 2.  See the file COPYING for more details.
  *     This is a common code for verification whether CPU supports
  *     long mode and SSE or not. It is not called directly instead this
  *     file is included at various places and compiled in that context.
- *     Following are the current usage.
+ *     This file is expected to run in 32bit code.  Currently:
  *
- *     This file is included by both 16bit and 32bit code.
+ *     arch/x86/boot/compressed/head_64.S: Boot cpu verification
+ *     arch/x86/kernel/trampoline_64.S: secondary processor verfication
+ *     arch/x86/kernel/head_32.S: processor startup
  *
- *     arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
- *     arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
- *     arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
- *     arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
- *
- *     verify_cpu, returns the status of cpu check in register %eax.
+ *     verify_cpu, returns the status of longmode and SSE in register %eax.
  *             0: Success    1: Failure
  *
+ *     On Intel, the XD_DISABLE flag will be cleared as a side-effect.
+ *
  *     The caller needs to check for the error code and take the action
  *     appropriately. Either display a message or halt.
  */
@@ -62,8 +62,41 @@ verify_cpu:
        cmpl    $0x444d4163,%ecx
        jnz     verify_cpu_noamd
        mov     $1,%di                  # cpu is from AMD
+       jmp     verify_cpu_check
 
 verify_cpu_noamd:
+       cmpl    $0x756e6547,%ebx        # GenuineIntel?
+       jnz     verify_cpu_check
+       cmpl    $0x49656e69,%edx
+       jnz     verify_cpu_check
+       cmpl    $0x6c65746e,%ecx
+       jnz     verify_cpu_check
+
+       # only call IA32_MISC_ENABLE when:
+       # family > 6 || (family == 6 && model >= 0xd)
+       movl    $0x1, %eax              # check CPU family and model
+       cpuid
+       movl    %eax, %ecx
+
+       andl    $0x0ff00f00, %eax       # mask family and extended family
+       shrl    $8, %eax
+       cmpl    $6, %eax
+       ja      verify_cpu_clear_xd     # family > 6, ok
+       jb      verify_cpu_check        # family < 6, skip
+
+       andl    $0x000f00f0, %ecx       # mask model and extended model
+       shrl    $4, %ecx
+       cmpl    $0xd, %ecx
+       jb      verify_cpu_check        # family == 6, model < 0xd, skip
+
+verify_cpu_clear_xd:
+       movl    $MSR_IA32_MISC_ENABLE, %ecx
+       rdmsr
+       btrl    $2, %edx                # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
+       jnc     verify_cpu_check        # only write MSR if bit was changed
+       wrmsr
+
+verify_cpu_check:
        movl    $0x1,%eax               # Does the cpu have what it takes
        cpuid
        andl    $REQUIRED_MASK0,%edx
index e03530aebfd0332635f901afa9f19f00317fa097..bf4700755184e32d4b4e549bd19f4014caa46468 100644 (file)
@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
 
 PHDRS {
        text PT_LOAD FLAGS(5);          /* R_E */
-       data PT_LOAD FLAGS(7);          /* RWE */
+       data PT_LOAD FLAGS(6);          /* RW_ */
 #ifdef CONFIG_X86_64
        user PT_LOAD FLAGS(5);          /* R_E */
 #ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
 
        EXCEPTION_TABLE(16) :text = 0x9090
 
+#if defined(CONFIG_DEBUG_RODATA)
+       /* .text should occupy whole number of pages */
+       . = ALIGN(PAGE_SIZE);
+#endif
        X64_ALIGN_DEBUG_RODATA_BEGIN
        RO_DATA(PAGE_SIZE)
        X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
                __bss_start = .;
                *(.bss..page_aligned)
                *(.bss)
-               . = ALIGN(4);
+               . = ALIGN(PAGE_SIZE);
                __bss_stop = .;
        }
 
index f628234fbeca06683876543f07476f10111812b7..3cece05e4ac4622853c8eeb20e3bd04a02f851c7 100644 (file)
@@ -575,6 +575,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
        s->pics[1].elcr_mask = 0xde;
        s->pics[0].pics_state = s;
        s->pics[1].pics_state = s;
+       s->pics[0].isr_ack = 0xff;
+       s->pics[1].isr_ack = 0xff;
 
        /*
         * Initialize PIO device
index fb8b376bf28cb3e04a6bb903900f32838ab02a14..fbb04aee8301efab741f80f7c590a45d7e3b298f 100644 (file)
@@ -2394,7 +2394,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                        ASSERT(!VALID_PAGE(root));
                        spin_lock(&vcpu->kvm->mmu_lock);
                        kvm_mmu_free_some_pages(vcpu);
-                       sp = kvm_mmu_get_page(vcpu, i << 30, i << 30,
+                       sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
+                                             i << 30,
                                              PT32_ROOT_LEVEL, 1, ACC_ALL,
                                              NULL);
                        root = __pa(sp->spt);
index 55543397a8a795f295ef6e3731e6b9373c953958..09df2f9a3d69ce36a20ec86c82bc9b719d44a1ae 100644 (file)
@@ -23,7 +23,7 @@ mmiotrace-y                   := kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)   += testmmiotrace.o
 
 obj-$(CONFIG_NUMA)             += numa.o numa_$(BITS).o
-obj-$(CONFIG_K8_NUMA)          += k8topology_64.o
+obj-$(CONFIG_AMD_NUMA)         += amdtopology_64.o
 obj-$(CONFIG_ACPI_NUMA)                += srat_$(BITS).o
 
 obj-$(CONFIG_HAVE_MEMBLOCK)            += memblock.o
similarity index 94%
rename from arch/x86/mm/k8topology_64.c
rename to arch/x86/mm/amdtopology_64.c
index 804a3b6c6e14f6aba0cc36616d85ab882a121685..51fae9cfdecb39ba87149dc76f128f6e12e73540 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * AMD K8 NUMA support.
+ * AMD NUMA support.
  * Discover the memory map and associated nodes.
  *
- * This version reads it directly from the K8 northbridge.
+ * This version reads it directly from the AMD northbridge.
  *
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
@@ -57,7 +57,7 @@ static __init void early_get_boot_cpu_id(void)
 {
        /*
         * need to get the APIC ID of the BSP so can use that to
-        * create apicid_to_node in k8_scan_nodes()
+        * create apicid_to_node in amd_scan_nodes()
         */
 #ifdef CONFIG_X86_MPPARSE
        /*
@@ -69,7 +69,7 @@ static __init void early_get_boot_cpu_id(void)
        early_init_lapic_mapping();
 }
 
-int __init k8_get_nodes(struct bootnode *physnodes)
+int __init amd_get_nodes(struct bootnode *physnodes)
 {
        int i;
        int ret = 0;
@@ -82,7 +82,7 @@ int __init k8_get_nodes(struct bootnode *physnodes)
        return ret;
 }
 
-int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
 {
        unsigned long start = PFN_PHYS(start_pfn);
        unsigned long end = PFN_PHYS(end_pfn);
@@ -194,7 +194,7 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
        return 0;
 }
 
-int __init k8_scan_nodes(void)
+int __init amd_scan_nodes(void)
 {
        unsigned int bits;
        unsigned int cores;
index c0e28a13de7df55c1ee1b173b61fd2d18c50e49c..947f42abe820eed9e47388ff3fcfd6fc937bb96a 100644 (file)
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
        /*
         * We just marked the kernel text read only above, now that
         * we are going to free part of that, we need to make that
-        * writeable first.
+        * writeable and non-executable first.
         */
+       set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
        set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 
        printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
index 0e969f9f401b72bddf403325e2a3a862199ee07b..f89b5bb4e93f82926f339054aa37f5bd4e829216 100644 (file)
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
 
 static inline int is_kernel_text(unsigned long addr)
 {
-       if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end)
+       if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
                return 1;
        return 0;
 }
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void)
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 }
 
+static void mark_nxdata_nx(void)
+{
+       /*
+        * When this called, init has already been executed and released,
+        * so everything past _etext sould be NX.
+        */
+       unsigned long start = PFN_ALIGN(_etext);
+       /*
+        * This comes from is_kernel_text upper limit. Also HPAGE where used:
+        */
+       unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
+
+       if (__supported_pte_mask & _PAGE_NX)
+               printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
+       set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
+}
+
 void mark_rodata_ro(void)
 {
        unsigned long start = PFN_ALIGN(_text);
@@ -946,6 +963,7 @@ void mark_rodata_ro(void)
        printk(KERN_INFO "Testing CPA: write protecting again\n");
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 #endif
+       mark_nxdata_nx();
 }
 #endif
 
index af3b6c8a436f7b7ec49a2366738a28faa58cdfc5..704a37cedddb59404a3c1fc773e44853b2089939 100644 (file)
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
        e->trace.entries = e->trace_entries;
        e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
        e->trace.skip = 0;
-       save_stack_trace_bp(&e->trace, regs->bp);
+       save_stack_trace_regs(&e->trace, regs);
 
        /* Round address down to nearest 16 bytes */
        shadow_copy = kmemcheck_shadow_lookup(address
index 7ffc9b727efdc95ee6748acd1a1b646c8e164ffb..7762a517d69d9233a7a6e419a5eaa9ae6c94ee53 100644 (file)
@@ -264,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata;
 static char *cmdline __initdata;
 
 static int __init setup_physnodes(unsigned long start, unsigned long end,
-                                       int acpi, int k8)
+                                       int acpi, int amd)
 {
        int nr_nodes = 0;
        int ret = 0;
@@ -274,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
        if (acpi)
                nr_nodes = acpi_get_nodes(physnodes);
 #endif
-#ifdef CONFIG_K8_NUMA
-       if (k8)
-               nr_nodes = k8_get_nodes(physnodes);
+#ifdef CONFIG_AMD_NUMA
+       if (amd)
+               nr_nodes = amd_get_nodes(physnodes);
 #endif
        /*
         * Basic sanity checking on the physical node map: there may be errors
-        * if the SRAT or K8 incorrectly reported the topology or the mem=
+        * if the SRAT or AMD code incorrectly reported the topology or the mem=
         * kernel parameter is used.
         */
        for (i = 0; i < nr_nodes; i++) {
@@ -549,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
  * numa=fake command-line option.
  */
 static int __init numa_emulation(unsigned long start_pfn,
-                       unsigned long last_pfn, int acpi, int k8)
+                       unsigned long last_pfn, int acpi, int amd)
 {
        u64 addr = start_pfn << PAGE_SHIFT;
        u64 max_addr = last_pfn << PAGE_SHIFT;
@@ -557,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn,
        int num_nodes;
        int i;
 
-       num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
+       num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
        /*
         * If the numa=fake command-line contains a 'M' or 'G', it represents
         * the fixed node size.  Otherwise, if it is just a single number N,
@@ -602,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn,
 #endif /* CONFIG_NUMA_EMU */
 
 void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
-                               int acpi, int k8)
+                               int acpi, int amd)
 {
        int i;
 
@@ -610,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
        nodes_clear(node_online_map);
 
 #ifdef CONFIG_NUMA_EMU
-       if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
+       if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
                return;
        nodes_clear(node_possible_map);
        nodes_clear(node_online_map);
@@ -624,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
        nodes_clear(node_online_map);
 #endif
 
-#ifdef CONFIG_K8_NUMA
-       if (!numa_off && k8 && !k8_scan_nodes())
+#ifdef CONFIG_AMD_NUMA
+       if (!numa_off && amd && !amd_scan_nodes())
                return;
        nodes_clear(node_possible_map);
        nodes_clear(node_online_map);
index 532e7933d606fdbdde77aacdb2de746f249d2bf2..8b830ca14ac46c08facc1a848ddcb3c42c0d56cf 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/pfn.h>
 #include <linux/percpu.h>
 #include <linux/gfp.h>
+#include <linux/pci.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
                                   unsigned long pfn)
 {
        pgprot_t forbidden = __pgprot(0);
+       pgprot_t required = __pgprot(0);
 
        /*
         * The BIOS area between 640k and 1Mb needs to be executable for
         * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
         */
-       if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
+#ifdef CONFIG_PCI_BIOS
+       if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
                pgprot_val(forbidden) |= _PAGE_NX;
+#endif
 
        /*
         * The kernel text needs to be executable for obvious reasons
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
        if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
                   __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
                pgprot_val(forbidden) |= _PAGE_RW;
+       /*
+        * .data and .bss should always be writable.
+        */
+       if (within(address, (unsigned long)_sdata, (unsigned long)_edata) ||
+           within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop))
+               pgprot_val(required) |= _PAGE_RW;
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
        /*
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 #endif
 
        prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
+       prot = __pgprot(pgprot_val(prot) | pgprot_val(required));
 
        return prot;
 }
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 {
        unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
        pte_t new_pte, old_pte, *tmp;
-       pgprot_t old_prot, new_prot;
+       pgprot_t old_prot, new_prot, req_prot;
        int i, do_split = 1;
        unsigned int level;
 
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
         * We are safe now. Check whether the new pgprot is the same:
         */
        old_pte = *kpte;
-       old_prot = new_prot = pte_pgprot(old_pte);
+       old_prot = new_prot = req_prot = pte_pgprot(old_pte);
 
-       pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
-       pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+       pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
+       pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
 
        /*
         * old_pte points to the large page base address. So we need
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
        pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
        cpa->pfn = pfn;
 
-       new_prot = static_protections(new_prot, address, pfn);
+       new_prot = static_protections(req_prot, address, pfn);
 
        /*
         * We need to check the full range, whether
         * static_protection() requires a different pgprot for one of
         * the pages in the range we try to preserve:
         */
-       addr = address + PAGE_SIZE;
-       pfn++;
-       for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
-               pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
+       addr = address & pmask;
+       pfn = pte_pfn(old_pte);
+       for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
+               pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
 
                if (pgprot_val(chk_prot) != pgprot_val(new_prot))
                        goto out_unlock;
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
         * that we limited the number of possible pages already to
         * the number of pages in the large page.
         */
-       if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+       if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
                /*
                 * The address is aligned and the number of pages
                 * covers the full page.
index a3250aa34086fce7d376e9e1e464fa2e996dbb6d..410531d3c292d20cde9b40a487711930eab51eb2 100644 (file)
@@ -41,7 +41,7 @@ void __init x86_report_nx(void)
 {
        if (!cpu_has_nx) {
                printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
-                      "missing in CPU or disabled in BIOS!\n");
+                      "missing in CPU!\n");
        } else {
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
                if (disable_nx) {
index a17dffd136c143898e91187cbd39d005b05779b6..f16434568a51da26ea8524ae11fa84f0c7405717 100644 (file)
@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
        /* mark this node as "seen" in node bitmap */
        BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
 
+       /* don't need to check apic_id here, because it is always 8 bits */
        apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
 
        printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
index a35cb9d8b0606bc8f7123cd15f0017972a5e8dda..171a0aacb99a0874373619f4fd51ed955e2ddb9e 100644 (file)
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
        }
 
        apic_id = pa->apic_id;
+       if (apic_id >= MAX_LOCAL_APIC) {
+               printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+               return;
+       }
        apicid_to_node[apic_id] = node;
        node_set(node, cpu_nodes_parsed);
        acpi_numa = 1;
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
                apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
        else
                apic_id = pa->apic_id;
+
+       if (apic_id >= MAX_LOCAL_APIC) {
+               printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
+               return;
+       }
+
        apicid_to_node[apic_id] = node;
        node_set(node, cpu_nodes_parsed);
        acpi_numa = 1;
index 2d49d4e19a3619c0be2c7d17a892b8aea582048f..72cbec14d783867cb5f5fa8547eb6fcee2fe28b9 100644 (file)
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
        if (!user_mode_vm(regs)) {
                unsigned long stack = kernel_stack_pointer(regs);
                if (depth)
-                       dump_trace(NULL, regs, (unsigned long *)stack, 0,
+                       dump_trace(NULL, regs, (unsigned long *)stack,
                                   &backtrace_ops, &depth);
                return;
        }
index 4e8baad36d37739e32b71da0be2932924bacfe69..358c8b9c96a79c725766e1627544486eb312a0bc 100644 (file)
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
                case 0x14:
                        cpu_type = "x86-64/family14h";
                        break;
+               case 0x15:
+                       cpu_type = "x86-64/family15h";
+                       break;
                default:
                        return -ENODEV;
                }
index e3ecb71b5790228073d5eb089f4a4fc9303ab09a..0636dd93cef8d64a718124ddfa7c5a02edd2174f 100644 (file)
@@ -58,9 +58,6 @@ static void timer_stop(void)
 
 int __init op_nmi_timer_init(struct oprofile_operations *ops)
 {
-       if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
-               return -ENODEV;
-
        ops->start = timer_start;
        ops->stop = timer_stop;
        ops->cpu_type = "timer";
index a011bcc0f94331d82c8abfa7d4afdbbd0c59eff5..c3b8e24f2b16f4f6441c286268a61ac6a320b7c3 100644 (file)
 #include "op_x86_model.h"
 #include "op_counter.h"
 
-#define NUM_COUNTERS 4
+#define NUM_COUNTERS           4
+#define NUM_COUNTERS_F15H      6
 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
-#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_COUNTERS      32
 #else
-#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_COUNTERS      0
 #endif
 
 #define OP_EVENT_MASK                  0x0FFF
@@ -41,7 +42,8 @@
 
 #define MSR_AMD_EVENTSEL_RESERVED      ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
-static unsigned long reset_value[NUM_VIRT_COUNTERS];
+static int num_counters;
+static unsigned long reset_value[OP_MAX_COUNTER];
 
 #define IBS_FETCH_SIZE                 6
 #define IBS_OP_SIZE                    12
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
        int i;
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->counters[i].addr)
                        continue;
                release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 {
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; i++) {
+       for (i = 0; i < num_counters; i++) {
                if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
                        goto fail;
                if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
                        goto fail;
                }
                /* both registers must be reserved */
-               msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
-               msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+               if (num_counters == NUM_COUNTERS_F15H) {
+                       msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
+                       msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
+               } else {
+                       msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+                       msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+               }
                continue;
        fail:
                if (!counter_config[i].enabled)
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        int i;
 
        /* setup reset_value */
-       for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+       for (i = 0; i < OP_MAX_COUNTER; ++i) {
                if (counter_config[i].enabled
                    && msrs->counters[op_x86_virt_to_phys(i)].addr)
                        reset_value[i] = counter_config[i].count;
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* clear all counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!msrs->controls[i].addr)
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
        }
 
        /* enable active counters */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                int virt = op_x86_phys_to_virt(i);
                if (!reset_value[virt])
                        continue;
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
        u64 val;
        int i;
 
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
         * Subtle: stop on all counters to avoid race with setting our
         * pm callback
         */
-       for (i = 0; i < NUM_COUNTERS; ++i) {
+       for (i = 0; i < num_counters; ++i) {
                if (!reset_value[op_x86_phys_to_virt(i)])
                        continue;
                rdmsrl(msrs->controls[i].addr, val);
@@ -603,6 +610,7 @@ static int force_ibs_eilvt_setup(void)
                ret = setup_ibs_ctl(i);
                if (ret)
                        return ret;
+               pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
                return 0;
        }
 
@@ -630,21 +638,29 @@ static int __init_ibs_nmi(void)
        return 0;
 }
 
-/* initialize the APIC for the IBS interrupts if available */
+/*
+ * check and reserve APIC extended interrupt LVT offset for IBS if
+ * available
+ *
+ * init_ibs() preforms implicitly cpu-local operations, so pin this
+ * thread to its current CPU
+ */
+
 static void init_ibs(void)
 {
-       ibs_caps = get_ibs_caps();
+       preempt_disable();
 
+       ibs_caps = get_ibs_caps();
        if (!ibs_caps)
-               return;
+               goto out;
 
-       if (__init_ibs_nmi()) {
+       if (__init_ibs_nmi() < 0)
                ibs_caps = 0;
-               return;
-       }
+       else
+               printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 
-       printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
-              (unsigned)ibs_caps);
+out:
+       preempt_enable();
 }
 
 static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -698,18 +714,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
        return 0;
 }
 
+struct op_x86_model_spec op_amd_spec;
+
 static int op_amd_init(struct oprofile_operations *ops)
 {
        init_ibs();
        create_arch_files = ops->create_files;
        ops->create_files = setup_ibs_files;
+
+       if (boot_cpu_data.x86 == 0x15) {
+               num_counters = NUM_COUNTERS_F15H;
+       } else {
+               num_counters = NUM_COUNTERS;
+       }
+
+       op_amd_spec.num_counters = num_counters;
+       op_amd_spec.num_controls = num_counters;
+       op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
+
        return 0;
 }
 
 struct op_x86_model_spec op_amd_spec = {
-       .num_counters           = NUM_COUNTERS,
-       .num_controls           = NUM_COUNTERS,
-       .num_virt_counters      = NUM_VIRT_COUNTERS,
+       /* num_counters/num_controls filled in at runtime */
        .reserved               = MSR_AMD_EVENTSEL_RESERVED,
        .event_mask             = OP_EVENT_MASK,
        .init                   = op_amd_init,
index 182558dd5515add420a27dfa58304d04b6a6f71a..9fadec074142b11afcb39e73627dc4c4fd8e14dd 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/oprofile.h>
 #include <linux/smp.h>
 #include <linux/ptrace.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
index effd96e33f16690c3dd318de9886bee75e5fd699..6b8759f7634e661de3983dbc7e6accb26e939a8c 100644 (file)
@@ -7,6 +7,7 @@ obj-$(CONFIG_PCI_OLPC)          += olpc.o
 obj-$(CONFIG_PCI_XEN)          += xen.o
 
 obj-y                          += fixup.o
+obj-$(CONFIG_X86_INTEL_CE)      += ce4100.o
 obj-$(CONFIG_ACPI)             += acpi.o
 obj-y                          += legacy.o irq.o
 
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
new file mode 100644 (file)
index 0000000..85b68ef
--- /dev/null
@@ -0,0 +1,315 @@
+/*
+ *  GPL LICENSE SUMMARY
+ *
+ *  Copyright(c) 2010 Intel Corporation. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of version 2 of the GNU General Public License as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *  The full GNU General Public License is included in this distribution
+ *  in the file called LICENSE.GPL.
+ *
+ *  Contact Information:
+ *    Intel Corporation
+ *    2200 Mission College Blvd.
+ *    Santa Clara, CA  97052
+ *
+ * This provides access methods for PCI registers that mis-behave on
+ * the CE4100. Each register can be assigned a private init, read and
+ * write routine. The exception to this is the bridge device.  The
+ * bridge device is the only device on bus zero (0) that requires any
+ * fixup so it is a special case ATM
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+
+#include <asm/pci_x86.h>
+
+struct sim_reg {
+       u32 value;
+       u32 mask;
+};
+
+struct sim_dev_reg {
+       int dev_func;
+       int reg;
+       void (*init)(struct sim_dev_reg *reg);
+       void (*read)(struct sim_dev_reg *reg, u32 *value);
+       void (*write)(struct sim_dev_reg *reg, u32 value);
+       struct sim_reg sim_reg;
+};
+
+struct sim_reg_op {
+       void (*init)(struct sim_dev_reg *reg);
+       void (*read)(struct sim_dev_reg *reg, u32 value);
+       void (*write)(struct sim_dev_reg *reg, u32 value);
+};
+
+#define MB (1024 * 1024)
+#define KB (1024)
+#define SIZE_TO_MASK(size) (~(size - 1))
+
+#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\
+{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\
+       {0, SIZE_TO_MASK(size)} },
+
+static void reg_init(struct sim_dev_reg *reg)
+{
+       pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4,
+                             &reg->sim_reg.value);
+}
+
+static void reg_read(struct sim_dev_reg *reg, u32 *value)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pci_config_lock, flags);
+       *value = reg->sim_reg.value;
+       raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static void reg_write(struct sim_dev_reg *reg, u32 value)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&pci_config_lock, flags);
+       reg->sim_reg.value = (value & reg->sim_reg.mask) |
+               (reg->sim_reg.value & ~reg->sim_reg.mask);
+       raw_spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+static void sata_reg_init(struct sim_dev_reg *reg)
+{
+       pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4,
+                             &reg->sim_reg.value);
+       reg->sim_reg.value += 0x400;
+}
+
+static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value)
+{
+       reg_read(reg, value);
+       if (*value != reg->sim_reg.mask)
+               *value |= 0x100;
+}
+
+void sata_revid_init(struct sim_dev_reg *reg)
+{
+       reg->sim_reg.value = 0x01060100;
+       reg->sim_reg.mask = 0;
+}
+
+static void sata_revid_read(struct sim_dev_reg *reg, u32 *value)
+{
+       reg_read(reg, value);
+}
+
+static struct sim_dev_reg bus1_fixups[] = {
+       DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write)
+       DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write)
+       DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
+       DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x8,  0, sata_revid_init, sata_revid_read, 0)
+       DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write)
+       DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write)
+       DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write)
+       DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
+       DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write)
+};
+
+static void __init init_sim_regs(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+               if (bus1_fixups[i].init)
+                       bus1_fixups[i].init(&bus1_fixups[i]);
+       }
+}
+
+static inline void extract_bytes(u32 *value, int reg, int len)
+{
+       uint32_t mask;
+
+       *value >>= ((reg & 3) * 8);
+       mask = 0xFFFFFFFF >> ((4 - len) * 8);
+       *value &= mask;
+}
+
+int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
+{
+       u32 av_bridge_base, av_bridge_limit;
+       int retval = 0;
+
+       switch (reg) {
+       /* Make BARs appear to not request any memory. */
+       case PCI_BASE_ADDRESS_0:
+       case PCI_BASE_ADDRESS_0 + 1:
+       case PCI_BASE_ADDRESS_0 + 2:
+       case PCI_BASE_ADDRESS_0 + 3:
+               *value = 0;
+               break;
+
+               /* Since subordinate bus number register is hardwired
+                * to zero and read only, so do the simulation.
+                */
+       case PCI_PRIMARY_BUS:
+               if (len == 4)
+                       *value = 0x00010100;
+               break;
+
+       case PCI_SUBORDINATE_BUS:
+               *value = 1;
+               break;
+
+       case PCI_MEMORY_BASE:
+       case PCI_MEMORY_LIMIT:
+               /* Get the A/V bridge base address. */
+               pci_direct_conf1.read(0, 0, devfn,
+                               PCI_BASE_ADDRESS_0, 4, &av_bridge_base);
+
+               av_bridge_limit = av_bridge_base + (512*MB - 1);
+               av_bridge_limit >>= 16;
+               av_bridge_limit &= 0xFFF0;
+
+               av_bridge_base >>= 16;
+               av_bridge_base &= 0xFFF0;
+
+               if (reg == PCI_MEMORY_LIMIT)
+                       *value = av_bridge_limit;
+               else if (len == 2)
+                       *value = av_bridge_base;
+               else
+                       *value = (av_bridge_limit << 16) | av_bridge_base;
+               break;
+               /* Make prefetchable memory limit smaller than prefetchable
+                * memory base, so not claim prefetchable memory space.
+                */
+       case PCI_PREF_MEMORY_BASE:
+               *value = 0xFFF0;
+               break;
+       case PCI_PREF_MEMORY_LIMIT:
+               *value = 0x0;
+               break;
+               /* Make IO limit smaller than IO base, so not claim IO space. */
+       case PCI_IO_BASE:
+               *value = 0xF0;
+               break;
+       case PCI_IO_LIMIT:
+               *value = 0;
+               break;
+       default:
+               retval = 1;
+       }
+       return retval;
+}
+
+static int ce4100_conf_read(unsigned int seg, unsigned int bus,
+                           unsigned int devfn, int reg, int len, u32 *value)
+{
+       int i, retval = 1;
+
+       if (bus == 1) {
+               for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+                       if (bus1_fixups[i].dev_func == devfn &&
+                           bus1_fixups[i].reg == (reg & ~3) &&
+                           bus1_fixups[i].read) {
+                               bus1_fixups[i].read(&(bus1_fixups[i]),
+                                                   value);
+                               extract_bytes(value, reg, len);
+                               return 0;
+                       }
+               }
+       }
+
+       if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) &&
+           !bridge_read(devfn, reg, len, value))
+               return 0;
+
+       return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
+}
+
+static int ce4100_conf_write(unsigned int seg, unsigned int bus,
+                            unsigned int devfn, int reg, int len, u32 value)
+{
+       int i;
+
+       if (bus == 1) {
+               for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
+                       if (bus1_fixups[i].dev_func == devfn &&
+                           bus1_fixups[i].reg == (reg & ~3) &&
+                           bus1_fixups[i].write) {
+                               bus1_fixups[i].write(&(bus1_fixups[i]),
+                                                    value);
+                               return 0;
+                       }
+               }
+       }
+
+       /* Discard writes to A/V bridge BAR. */
+       if (bus == 0 && PCI_DEVFN(1, 0) == devfn &&
+           ((reg & ~3) == PCI_BASE_ADDRESS_0))
+               return 0;
+
+       return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
+}
+
+struct pci_raw_ops ce4100_pci_conf = {
+       .read = ce4100_conf_read,
+       .write = ce4100_conf_write,
+};
+
+static int __init ce4100_pci_init(void)
+{
+       init_sim_regs();
+       raw_pci_ops = &ce4100_pci_conf;
+       return 0;
+}
+subsys_initcall(ce4100_pci_init);
index 2492d165096a2a696cf8332534966cfce2c848a7..a5f7d0d63de0def1481382f785d0fc34d553f0f6 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/uaccess.h>
 #include <asm/pci_x86.h>
 #include <asm/pci-functions.h>
+#include <asm/cacheflush.h>
 
 /* BIOS32 signature: "_32_" */
 #define BIOS32_SIGNATURE       (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
 #define PCIBIOS_HW_TYPE1_SPEC          0x10
 #define PCIBIOS_HW_TYPE2_SPEC          0x20
 
+int pcibios_enabled;
+
+/* According to the BIOS specification at:
+ * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could
+ * restrict the x zone to some pages and make it ro. But this may be
+ * broken on some bios, complex to handle with static_protections.
+ * We could make the 0xe0000-0x100000 range rox, but this can break
+ * some ISA mapping.
+ *
+ * So we let's an rw and x hole when pcibios is used. This shouldn't
+ * happen for modern system with mmconfig, and if you don't want it
+ * you could disable pcibios...
+ */
+static inline void set_bios_x(void)
+{
+       pcibios_enabled = 1;
+       set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT);
+       if (__supported_pte_mask & _PAGE_NX)
+               printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n");
+}
+
 /*
  * This is the standard structure used to identify the entry point
  * to the BIOS32 Service Directory, as documented in
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void)
                        DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
                                        bios32_entry);
                        bios32_indirect.address = bios32_entry + PAGE_OFFSET;
+                       set_bios_x();
                        if (check_pcibios())
                                return &pci_bios_access;
                }
index 7bf70b812fa2a436ec8ce1bea270b05af2b2f487..021eee91c0562503dbb68c5bea490674d9db8618 100644 (file)
@@ -1,5 +1,7 @@
 # Platform specific code goes here
+obj-y  += ce4100/
 obj-y  += efi/
+obj-y  += iris/
 obj-y  += mrst/
 obj-y  += olpc/
 obj-y  += scx200/
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
new file mode 100644 (file)
index 0000000..91fc929
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_INTEL_CE)     += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
new file mode 100644 (file)
index 0000000..d2c0d51
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Intel CE4100  platform specific setup code
+ *
+ * (C) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+
+#include <asm/setup.h>
+#include <asm/io.h>
+
+static int ce4100_i8042_detect(void)
+{
+       return 0;
+}
+
+static void __init sdv_find_smp_config(void)
+{
+}
+
+#ifdef CONFIG_SERIAL_8250
+
+
+static unsigned int mem_serial_in(struct uart_port *p, int offset)
+{
+       offset = offset << p->regshift;
+       return readl(p->membase + offset);
+}
+
+/*
+ * The UART Tx interrupts are not set under some conditions and therefore serial
+ * transmission hangs. This is a silicon issue and has not been root caused. The
+ * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
+ * bit of LSR register in interrupt handler to see whether at least one of these
+ * two bits is set, if so then process the transmit request. If this workaround
+ * is not applied, then the serial transmission may hang. This workaround is for
+ * errata number 9 in Errata - B step.
+*/
+
+static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
+{
+       unsigned int ret, ier, lsr;
+
+       if (offset == UART_IIR) {
+               offset = offset << p->regshift;
+               ret = readl(p->membase + offset);
+               if (ret & UART_IIR_NO_INT) {
+                       /* see if the TX interrupt should have really set */
+                       ier = mem_serial_in(p, UART_IER);
+                       /* see if the UART's XMIT interrupt is enabled */
+                       if (ier & UART_IER_THRI) {
+                               lsr = mem_serial_in(p, UART_LSR);
+                               /* now check to see if the UART should be
+                                  generating an interrupt (but isn't) */
+                               if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
+                                       ret &= ~UART_IIR_NO_INT;
+                       }
+               }
+       } else
+               ret =  mem_serial_in(p, offset);
+       return ret;
+}
+
+static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
+{
+       offset = offset << p->regshift;
+       writel(value, p->membase + offset);
+}
+
+static void ce4100_serial_fixup(int port, struct uart_port *up,
+       unsigned short *capabilites)
+{
+#ifdef CONFIG_EARLY_PRINTK
+       /*
+        * Over ride the legacy port configuration that comes from
+        * asm/serial.h. Using the ioport driver then switching to the
+        * PCI memmaped driver hangs the IOAPIC
+        */
+       if (up->iotype !=  UPIO_MEM32) {
+               up->uartclk  = 14745600;
+               up->mapbase = 0xdffe0200;
+               set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
+                               up->mapbase & PAGE_MASK);
+               up->membase =
+                       (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
+               up->membase += up->mapbase & ~PAGE_MASK;
+               up->iotype   = UPIO_MEM32;
+               up->regshift = 2;
+       }
+#endif
+       up->iobase = 0;
+       up->serial_in = ce4100_mem_serial_in;
+       up->serial_out = ce4100_mem_serial_out;
+
+       *capabilites |= (1 << 12);
+}
+
+static __init void sdv_serial_fixup(void)
+{
+       serial8250_set_isa_configurator(ce4100_serial_fixup);
+}
+
+#else
+static inline void sdv_serial_fixup(void);
+#endif
+
+static void __init sdv_arch_setup(void)
+{
+       sdv_serial_fixup();
+}
+
+/*
+ * CE4100 specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_ce4100_early_setup(void)
+{
+       x86_init.oem.arch_setup = sdv_arch_setup;
+       x86_platform.i8042_detect = ce4100_i8042_detect;
+       x86_init.resources.probe_roms = x86_init_noop;
+       x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+       x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+}
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
new file mode 100644 (file)
index 0000000..db92198
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_X86_32_IRIS)              += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
new file mode 100644 (file)
index 0000000..1ba7f5e
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Eurobraille/Iris power off support.
+ *
+ * Eurobraille's Iris machine is a PC with no APM or ACPI support.
+ * It is shutdown by a special I/O sequence which this module provides.
+ *
+ *  Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/io.h>
+
+#define IRIS_GIO_BASE          0x340
+#define IRIS_GIO_INPUT         IRIS_GIO_BASE
+#define IRIS_GIO_OUTPUT                (IRIS_GIO_BASE + 1)
+#define IRIS_GIO_PULSE         0x80 /* First byte to send */
+#define IRIS_GIO_REST          0x00 /* Second byte to send */
+#define IRIS_GIO_NODEV         0xff /* Likely not an Iris */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
+MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
+MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
+
+static int force;
+
+module_param(force, bool, 0);
+MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
+
+static void (*old_pm_power_off)(void);
+
+static void iris_power_off(void)
+{
+       outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT);
+       msleep(850);
+       outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT);
+}
+
+/*
+ * Before installing the power_off handler, try to make sure the OS is
+ * running on an Iris.  Since Iris does not support DMI, this is done
+ * by reading its input port and seeing whether the read value is
+ * meaningful.
+ */
+static int iris_init(void)
+{
+       unsigned char status;
+       if (force != 1) {
+               printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
+               return -ENODEV;
+       }
+       status = inb(IRIS_GIO_INPUT);
+       if (status == IRIS_GIO_NODEV) {
+               printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
+               return -ENODEV;
+       }
+       old_pm_power_off = pm_power_off;
+       pm_power_off = &iris_power_off;
+       printk(KERN_INFO "Iris power_off handler installed.\n");
+
+       return 0;
+}
+
+static void iris_exit(void)
+{
+       pm_power_off = old_pm_power_off;
+       printk(KERN_INFO "Iris power_off handler uninstalled.\n");
+}
+
+module_init(iris_init);
+module_exit(iris_exit);
index efbbc552fa953a5bc5e70e8108ec909cb3061252..f61ccdd4934141444f1c8f27d2b3a783f06c37d0 100644 (file)
@@ -1 +1,3 @@
 obj-$(CONFIG_X86_MRST)         += mrst.o
+obj-$(CONFIG_X86_MRST)         += vrtc.o
+obj-$(CONFIG_EARLY_PRINTK_MRST)        += early_printk_mrst.o
index 79ae68154e871fe208ff5fcfd809daf3b03c4ab5..fee0b4914e07ad494f82629ccc10dc0a1980879c 100644 (file)
@@ -9,9 +9,19 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
+
+#define pr_fmt(fmt) "mrst: " fmt
+
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sfi.h>
+#include <linux/intel_pmic_gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/i2c.h>
+#include <linux/i2c/pca953x.h>
+#include <linux/gpio_keys.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
 #include <linux/irq.h>
 #include <linux/module.h>
 
@@ -23,7 +33,9 @@
 #include <asm/mrst.h>
 #include <asm/io.h>
 #include <asm/i8259.h>
+#include <asm/intel_scu_ipc.h>
 #include <asm/apb_timer.h>
+#include <asm/reboot.h>
 
 /*
  * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
@@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
                memcpy(sfi_mtimer_array, pentry, totallen);
        }
 
-       printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num);
+       pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
        pentry = sfi_mtimer_array;
        for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
-               printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz,"
+               pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
                        " irq = %d\n", totallen, (u32)pentry->phys_addr,
                        pentry->freq_hz, pentry->irq);
                        if (!pentry->irq)
@@ -176,14 +188,14 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
                memcpy(sfi_mrtc_array, pentry, totallen);
        }
 
-       printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num);
+       pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
        pentry = sfi_mrtc_array;
        for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
-               printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n",
+               pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
                        totallen, (u32)pentry->phys_addr, pentry->irq);
                mp_irq.type = MP_IOAPIC;
                mp_irq.irqtype = mp_INT;
-               mp_irq.irqflag = 0;
+               mp_irq.irqflag = 0xf;   /* level trigger and active low */
                mp_irq.srcbus = 0;
                mp_irq.srcbusirq = pentry->irq; /* IRQ */
                mp_irq.dstapic = MP_APIC_ALL;
@@ -209,6 +221,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
 
 void __init mrst_time_init(void)
 {
+       sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
        switch (mrst_timer_options) {
        case MRST_TIMER_APBT_ONLY:
                break;
@@ -224,16 +237,10 @@ void __init mrst_time_init(void)
                return;
        }
        /* we need at least one APB timer */
-       sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
        pre_init_apic_IRQ0();
        apbt_time_init();
 }
 
-void __init mrst_rtc_init(void)
-{
-       sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
-}
-
 void __cpuinit mrst_arch_setup(void)
 {
        if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
@@ -256,6 +263,17 @@ static int mrst_i8042_detect(void)
        return 0;
 }
 
+/* Reboot and power off are handled by the SCU on a MID device */
+static void mrst_power_off(void)
+{
+       intel_scu_ipc_simple_command(0xf1, 1);
+}
+
+static void mrst_reboot(void)
+{
+       intel_scu_ipc_simple_command(0xf1, 0);
+}
+
 /*
  * Moorestown specific x86_init function overrides and early setup
  * calls.
@@ -281,6 +299,10 @@ void __init x86_mrst_early_setup(void)
 
        legacy_pic = &null_legacy_pic;
 
+       /* Moorestown specific power_off/restart method */
+       pm_power_off = mrst_power_off;
+       machine_ops.emergency_restart  = mrst_reboot;
+
        /* Avoid searching for BIOS MP tables */
        x86_init.mpparse.find_smp_config = x86_init_noop;
        x86_init.mpparse.get_smp_config = x86_init_uint_noop;
@@ -309,3 +331,505 @@ static inline int __init setup_x86_mrst_timer(char *arg)
        return 0;
 }
 __setup("x86_mrst_timer=", setup_x86_mrst_timer);
+
+/*
+ * Parsing GPIO table first, since the DEVS table will need this table
+ * to map the pin name to the actual pin.
+ */
+static struct sfi_gpio_table_entry *gpio_table;
+static int gpio_num_entry;
+
+static int __init sfi_parse_gpio(struct sfi_table_header *table)
+{
+       struct sfi_table_simple *sb;
+       struct sfi_gpio_table_entry *pentry;
+       int num, i;
+
+       if (gpio_table)
+               return 0;
+       sb = (struct sfi_table_simple *)table;
+       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
+       pentry = (struct sfi_gpio_table_entry *)sb->pentry;
+
+       gpio_table = (struct sfi_gpio_table_entry *)
+                               kmalloc(num * sizeof(*pentry), GFP_KERNEL);
+       if (!gpio_table)
+               return -1;
+       memcpy(gpio_table, pentry, num * sizeof(*pentry));
+       gpio_num_entry = num;
+
+       pr_debug("GPIO pin info:\n");
+       for (i = 0; i < num; i++, pentry++)
+               pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
+               " pin = %d\n", i,
+                       pentry->controller_name,
+                       pentry->pin_name,
+                       pentry->pin_no);
+       return 0;
+}
+
+static int get_gpio_by_name(const char *name)
+{
+       struct sfi_gpio_table_entry *pentry = gpio_table;
+       int i;
+
+       if (!pentry)
+               return -1;
+       for (i = 0; i < gpio_num_entry; i++, pentry++) {
+               if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
+                       return pentry->pin_no;
+       }
+       return -1;
+}
+
+/*
+ * Here defines the array of devices platform data that IAFW would export
+ * through SFI "DEVS" table, we use name and type to match the device and
+ * its platform data.
+ */
+struct devs_id {
+       char name[SFI_NAME_LEN + 1];
+       u8 type;
+       u8 delay;
+       void *(*get_platform_data)(void *info);
+};
+
+/* the offset for the mapping of global gpio pin to irq */
+#define MRST_IRQ_OFFSET 0x100
+
+static void __init *pmic_gpio_platform_data(void *info)
+{
+       static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
+       int gpio_base = get_gpio_by_name("pmic_gpio_base");
+
+       if (gpio_base == -1)
+               gpio_base = 64;
+       pmic_gpio_pdata.gpio_base = gpio_base;
+       pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
+       pmic_gpio_pdata.gpiointr = 0xffffeff8;
+
+       return &pmic_gpio_pdata;
+}
+
+static void __init *max3111_platform_data(void *info)
+{
+       struct spi_board_info *spi_info = info;
+       int intr = get_gpio_by_name("max3111_int");
+
+       if (intr == -1)
+               return NULL;
+       spi_info->irq = intr + MRST_IRQ_OFFSET;
+       return NULL;
+}
+
+/* we have multiple max7315 on the board ... */
+#define MAX7315_NUM 2
+static void __init *max7315_platform_data(void *info)
+{
+       static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
+       static int nr;
+       struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
+       struct i2c_board_info *i2c_info = info;
+       int gpio_base, intr;
+       char base_pin_name[SFI_NAME_LEN + 1];
+       char intr_pin_name[SFI_NAME_LEN + 1];
+
+       if (nr == MAX7315_NUM) {
+               pr_err("too many max7315s, we only support %d\n",
+                               MAX7315_NUM);
+               return NULL;
+       }
+       /* we have several max7315 on the board, we only need load several
+        * instances of the same pca953x driver to cover them
+        */
+       strcpy(i2c_info->type, "max7315");
+       if (nr++) {
+               sprintf(base_pin_name, "max7315_%d_base", nr);
+               sprintf(intr_pin_name, "max7315_%d_int", nr);
+       } else {
+               strcpy(base_pin_name, "max7315_base");
+               strcpy(intr_pin_name, "max7315_int");
+       }
+
+       gpio_base = get_gpio_by_name(base_pin_name);
+       intr = get_gpio_by_name(intr_pin_name);
+
+       if (gpio_base == -1)
+               return NULL;
+       max7315->gpio_base = gpio_base;
+       if (intr != -1) {
+               i2c_info->irq = intr + MRST_IRQ_OFFSET;
+               max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
+       } else {
+               i2c_info->irq = -1;
+               max7315->irq_base = -1;
+       }
+       return max7315;
+}
+
+static void __init *emc1403_platform_data(void *info)
+{
+       static short intr2nd_pdata;
+       struct i2c_board_info *i2c_info = info;
+       int intr = get_gpio_by_name("thermal_int");
+       int intr2nd = get_gpio_by_name("thermal_alert");
+
+       if (intr == -1 || intr2nd == -1)
+               return NULL;
+
+       i2c_info->irq = intr + MRST_IRQ_OFFSET;
+       intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+       return &intr2nd_pdata;
+}
+
+static void __init *lis331dl_platform_data(void *info)
+{
+       static short intr2nd_pdata;
+       struct i2c_board_info *i2c_info = info;
+       int intr = get_gpio_by_name("accel_int");
+       int intr2nd = get_gpio_by_name("accel_2");
+
+       if (intr == -1 || intr2nd == -1)
+               return NULL;
+
+       i2c_info->irq = intr + MRST_IRQ_OFFSET;
+       intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
+
+       return &intr2nd_pdata;
+}
+
+static void __init *no_platform_data(void *info)
+{
+       return NULL;
+}
+
+static const struct devs_id __initconst device_ids[] = {
+       {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
+       {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
+       {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+       {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
+       {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
+       {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
+       {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+       {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
+       {},
+};
+
+#define MAX_IPCDEVS    24
+static struct platform_device *ipc_devs[MAX_IPCDEVS];
+static int ipc_next_dev;
+
+#define MAX_SCU_SPI    24
+static struct spi_board_info *spi_devs[MAX_SCU_SPI];
+static int spi_next_dev;
+
+#define MAX_SCU_I2C    24
+static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
+static int i2c_bus[MAX_SCU_I2C];
+static int i2c_next_dev;
+
+static void __init intel_scu_device_register(struct platform_device *pdev)
+{
+       if(ipc_next_dev == MAX_IPCDEVS)
+               pr_err("too many SCU IPC devices");
+       else
+               ipc_devs[ipc_next_dev++] = pdev;
+}
+
+static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
+{
+       struct spi_board_info *new_dev;
+
+       if (spi_next_dev == MAX_SCU_SPI) {
+               pr_err("too many SCU SPI devices");
+               return;
+       }
+
+       new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+       if (!new_dev) {
+               pr_err("failed to alloc mem for delayed spi dev %s\n",
+                       sdev->modalias);
+               return;
+       }
+       memcpy(new_dev, sdev, sizeof(*sdev));
+
+       spi_devs[spi_next_dev++] = new_dev;
+}
+
+static void __init intel_scu_i2c_device_register(int bus,
+                                               struct i2c_board_info *idev)
+{
+       struct i2c_board_info *new_dev;
+
+       if (i2c_next_dev == MAX_SCU_I2C) {
+               pr_err("too many SCU I2C devices");
+               return;
+       }
+
+       new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
+       if (!new_dev) {
+               pr_err("failed to alloc mem for delayed i2c dev %s\n",
+                       idev->type);
+               return;
+       }
+       memcpy(new_dev, idev, sizeof(*idev));
+
+       i2c_bus[i2c_next_dev] = bus;
+       i2c_devs[i2c_next_dev++] = new_dev;
+}
+
+/* Called by IPC driver */
+void intel_scu_devices_create(void)
+{
+       int i;
+
+       for (i = 0; i < ipc_next_dev; i++)
+               platform_device_add(ipc_devs[i]);
+
+       for (i = 0; i < spi_next_dev; i++)
+               spi_register_board_info(spi_devs[i], 1);
+
+       for (i = 0; i < i2c_next_dev; i++) {
+               struct i2c_adapter *adapter;
+               struct i2c_client *client;
+
+               adapter = i2c_get_adapter(i2c_bus[i]);
+               if (adapter) {
+                       client = i2c_new_device(adapter, i2c_devs[i]);
+                       if (!client)
+                               pr_err("can't create i2c device %s\n",
+                                       i2c_devs[i]->type);
+               } else
+                       i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
+       }
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_create);
+
+/* Called by IPC driver */
+void intel_scu_devices_destroy(void)
+{
+       int i;
+
+       for (i = 0; i < ipc_next_dev; i++)
+               platform_device_del(ipc_devs[i]);
+}
+EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
+
+static void __init install_irq_resource(struct platform_device *pdev, int irq)
+{
+       /* Single threaded */
+       static struct resource __initdata res = {
+               .name = "IRQ",
+               .flags = IORESOURCE_IRQ,
+       };
+       res.start = irq;
+       platform_device_add_resources(pdev, &res, 1);
+}
+
+static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
+{
+       const struct devs_id *dev = device_ids;
+       void *pdata = NULL;
+
+       while (dev->name[0]) {
+               if (dev->type == SFI_DEV_TYPE_IPC &&
+                       !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
+                       pdata = dev->get_platform_data(pdev);
+                       break;
+               }
+               dev++;
+       }
+       pdev->dev.platform_data = pdata;
+       intel_scu_device_register(pdev);
+}
+
+static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
+{
+       const struct devs_id *dev = device_ids;
+       void *pdata = NULL;
+
+       while (dev->name[0]) {
+               if (dev->type == SFI_DEV_TYPE_SPI &&
+                               !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
+                       pdata = dev->get_platform_data(spi_info);
+                       break;
+               }
+               dev++;
+       }
+       spi_info->platform_data = pdata;
+       if (dev->delay)
+               intel_scu_spi_device_register(spi_info);
+       else
+               spi_register_board_info(spi_info, 1);
+}
+
+static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
+{
+       const struct devs_id *dev = device_ids;
+       void *pdata = NULL;
+
+       while (dev->name[0]) {
+               if (dev->type == SFI_DEV_TYPE_I2C &&
+                       !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
+                       pdata = dev->get_platform_data(i2c_info);
+                       break;
+               }
+               dev++;
+       }
+       i2c_info->platform_data = pdata;
+
+       if (dev->delay)
+               intel_scu_i2c_device_register(bus, i2c_info);
+       else
+               i2c_register_board_info(bus, i2c_info, 1);
+ }
+
+
+static int __init sfi_parse_devs(struct sfi_table_header *table)
+{
+       struct sfi_table_simple *sb;
+       struct sfi_device_table_entry *pentry;
+       struct spi_board_info spi_info;
+       struct i2c_board_info i2c_info;
+       struct platform_device *pdev;
+       int num, i, bus;
+       int ioapic;
+       struct io_apic_irq_attr irq_attr;
+
+       sb = (struct sfi_table_simple *)table;
+       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
+       pentry = (struct sfi_device_table_entry *)sb->pentry;
+
+       for (i = 0; i < num; i++, pentry++) {
+               if (pentry->irq != (u8)0xff) { /* native RTE case */
+                       /* these SPI2 devices are not exposed to system as PCI
+                        * devices, but they have separate RTE entry in IOAPIC
+                        * so we have to enable them one by one here
+                        */
+                       ioapic = mp_find_ioapic(pentry->irq);
+                       irq_attr.ioapic = ioapic;
+                       irq_attr.ioapic_pin = pentry->irq;
+                       irq_attr.trigger = 1;
+                       irq_attr.polarity = 1;
+                       io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
+               }
+               switch (pentry->type) {
+               case SFI_DEV_TYPE_IPC:
+                       /* ID as IRQ is a hack that will go away */
+                       pdev = platform_device_alloc(pentry->name, pentry->irq);
+                       if (pdev == NULL) {
+                               pr_err("out of memory for SFI platform device '%s'.\n",
+                                                       pentry->name);
+                               continue;
+                       }
+                       install_irq_resource(pdev, pentry->irq);
+                       pr_debug("info[%2d]: IPC bus, name = %16.16s, "
+                               "irq = 0x%2x\n", i, pentry->name, pentry->irq);
+                       sfi_handle_ipc_dev(pdev);
+                       break;
+               case SFI_DEV_TYPE_SPI:
+                       memset(&spi_info, 0, sizeof(spi_info));
+                       strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
+                       spi_info.irq = pentry->irq;
+                       spi_info.bus_num = pentry->host_num;
+                       spi_info.chip_select = pentry->addr;
+                       spi_info.max_speed_hz = pentry->max_freq;
+                       pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
+                               "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
+                               spi_info.bus_num,
+                               spi_info.modalias,
+                               spi_info.irq,
+                               spi_info.max_speed_hz,
+                               spi_info.chip_select);
+                       sfi_handle_spi_dev(&spi_info);
+                       break;
+               case SFI_DEV_TYPE_I2C:
+                       memset(&i2c_info, 0, sizeof(i2c_info));
+                       bus = pentry->host_num;
+                       strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
+                       i2c_info.irq = pentry->irq;
+                       i2c_info.addr = pentry->addr;
+                       pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
+                               "irq = 0x%2x, addr = 0x%x\n", i, bus,
+                               i2c_info.type,
+                               i2c_info.irq,
+                               i2c_info.addr);
+                       sfi_handle_i2c_dev(bus, &i2c_info);
+                       break;
+               case SFI_DEV_TYPE_UART:
+               case SFI_DEV_TYPE_HSI:
+               default:
+                       ;
+               }
+       }
+       return 0;
+}
+
+static int __init mrst_platform_init(void)
+{
+       sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
+       sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
+       return 0;
+}
+arch_initcall(mrst_platform_init);
+
+/*
+ * we will search these buttons in SFI GPIO table (by name)
+ * and register them dynamically. Please add all possible
+ * buttons here, we will shrink them if no GPIO found.
+ */
+static struct gpio_keys_button gpio_button[] = {
+       {KEY_POWER,             -1, 1, "power_btn",     EV_KEY, 0, 3000},
+       {KEY_PROG1,             -1, 1, "prog_btn1",     EV_KEY, 0, 20},
+       {KEY_PROG2,             -1, 1, "prog_btn2",     EV_KEY, 0, 20},
+       {SW_LID,                -1, 1, "lid_switch",    EV_SW,  0, 20},
+       {KEY_VOLUMEUP,          -1, 1, "vol_up",        EV_KEY, 0, 20},
+       {KEY_VOLUMEDOWN,        -1, 1, "vol_down",      EV_KEY, 0, 20},
+       {KEY_CAMERA,            -1, 1, "camera_full",   EV_KEY, 0, 20},
+       {KEY_CAMERA_FOCUS,      -1, 1, "camera_half",   EV_KEY, 0, 20},
+       {SW_KEYPAD_SLIDE,       -1, 1, "MagSw1",        EV_SW,  0, 20},
+       {SW_KEYPAD_SLIDE,       -1, 1, "MagSw2",        EV_SW,  0, 20},
+};
+
+static struct gpio_keys_platform_data mrst_gpio_keys = {
+       .buttons        = gpio_button,
+       .rep            = 1,
+       .nbuttons       = -1, /* will fill it after search */
+};
+
+static struct platform_device pb_device = {
+       .name           = "gpio-keys",
+       .id             = -1,
+       .dev            = {
+               .platform_data  = &mrst_gpio_keys,
+       },
+};
+
+/*
+ * Shrink the non-existent buttons, register the gpio button
+ * device if there is some
+ */
+static int __init pb_keys_init(void)
+{
+       struct gpio_keys_button *gb = gpio_button;
+       int i, num, good = 0;
+
+       num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
+       for (i = 0; i < num; i++) {
+               gb[i].gpio = get_gpio_by_name(gb[i].desc);
+               if (gb[i].gpio == -1)
+                       continue;
+
+               if (i != good)
+                       gb[good] = gb[i];
+               good++;
+       }
+
+       if (good) {
+               mrst_gpio_keys.nbuttons = good;
+               return platform_device_register(&pb_device);
+       }
+       return 0;
+}
+late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
new file mode 100644 (file)
index 0000000..32cd7ed
--- /dev/null
@@ -0,0 +1,165 @@
+/*
+ * vrtc.c: Driver for virtual RTC device on Intel MID platform
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Note:
+ * VRTC is emulated by system controller firmware, the real HW
+ * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
+ * in a memory mapped IO space that is visible to the host IA
+ * processor.
+ *
+ * This driver is based on RTC CMOS driver.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/platform_device.h>
+
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+#include <asm/time.h>
+#include <asm/fixmap.h>
+
+static unsigned char __iomem *vrtc_virt_base;
+
+unsigned char vrtc_cmos_read(unsigned char reg)
+{
+       unsigned char retval;
+
+       /* vRTC's registers range from 0x0 to 0xD */
+       if (reg > 0xd || !vrtc_virt_base)
+               return 0xff;
+
+       lock_cmos_prefix(reg);
+       retval = __raw_readb(vrtc_virt_base + (reg << 2));
+       lock_cmos_suffix(reg);
+       return retval;
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_read);
+
+void vrtc_cmos_write(unsigned char val, unsigned char reg)
+{
+       if (reg > 0xd || !vrtc_virt_base)
+               return;
+
+       lock_cmos_prefix(reg);
+       __raw_writeb(val, vrtc_virt_base + (reg << 2));
+       lock_cmos_suffix(reg);
+}
+EXPORT_SYMBOL_GPL(vrtc_cmos_write);
+
+unsigned long vrtc_get_time(void)
+{
+       u8 sec, min, hour, mday, mon;
+       u32 year;
+
+       while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
+               cpu_relax();
+
+       sec = vrtc_cmos_read(RTC_SECONDS);
+       min = vrtc_cmos_read(RTC_MINUTES);
+       hour = vrtc_cmos_read(RTC_HOURS);
+       mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
+       mon = vrtc_cmos_read(RTC_MONTH);
+       year = vrtc_cmos_read(RTC_YEAR);
+
+       /* vRTC YEAR reg contains the offset to 1960 */
+       year += 1960;
+
+       printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
+               "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
+
+       return mktime(year, mon, mday, hour, min, sec);
+}
+
+/* Only care about the minutes and seconds */
+int vrtc_set_mmss(unsigned long nowtime)
+{
+       int real_sec, real_min;
+       int vrtc_min;
+
+       vrtc_min = vrtc_cmos_read(RTC_MINUTES);
+
+       real_sec = nowtime % 60;
+       real_min = nowtime / 60;
+       if (((abs(real_min - vrtc_min) + 15)/30) & 1)
+               real_min += 30;
+       real_min %= 60;
+
+       vrtc_cmos_write(real_sec, RTC_SECONDS);
+       vrtc_cmos_write(real_min, RTC_MINUTES);
+       return 0;
+}
+
+void __init mrst_rtc_init(void)
+{
+       unsigned long rtc_paddr;
+       void __iomem *virt_base;
+
+       sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
+       if (!sfi_mrtc_num)
+               return;
+
+       rtc_paddr = sfi_mrtc_array[0].phys_addr;
+
+       /* vRTC's register address may not be page aligned */
+       set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
+
+       virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
+       virt_base += rtc_paddr & ~PAGE_MASK;
+       vrtc_virt_base = virt_base;
+
+       x86_platform.get_wallclock = vrtc_get_time;
+       x86_platform.set_wallclock = vrtc_set_mmss;
+}
+
+/*
+ * The Moorestown platform has a memory mapped virtual RTC device that emulates
+ * the programming interface of the RTC.
+ */
+
+static struct resource vrtc_resources[] = {
+       [0] = {
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .flags  = IORESOURCE_IRQ,
+       }
+};
+
+static struct platform_device vrtc_device = {
+       .name           = "rtc_mrst",
+       .id             = -1,
+       .resource       = vrtc_resources,
+       .num_resources  = ARRAY_SIZE(vrtc_resources),
+};
+
+/* Register the RTC device if appropriate */
+static int __init mrst_device_create(void)
+{
+       /* No Moorestown, no device */
+       if (!mrst_identify_cpu())
+               return -ENODEV;
+       /* No timer, no device */
+       if (!sfi_mrtc_num)
+               return -ENODEV;
+
+       /* iomem resource */
+       vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
+       vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
+                               MRST_VRTC_MAP_SZ;
+       /* irq resource */
+       vrtc_resources[1].start = sfi_mrtc_array[0].irq;
+       vrtc_resources[1].end = sfi_mrtc_array[0].irq;
+
+       return platform_device_register(&vrtc_device);
+}
+
+module_init(mrst_device_create);
index dd4c281ffe5720c3ff15f1eceaa09759e17df7d1..ca54875ac795117079b7a9521bfd0bf42bf9f980 100644 (file)
@@ -48,9 +48,9 @@ static void __init mp_sfi_register_lapic_address(unsigned long address)
 /* All CPUs enumerated by SFI must be present and enabled */
 static void __cpuinit mp_sfi_register_lapic(u8 id)
 {
-       if (MAX_APICS - id <= 0) {
+       if (MAX_LOCAL_APIC - id <= 0) {
                pr_warning("Processor #%d invalid (max %d)\n",
-                       id, MAX_APICS);
+                       id, MAX_LOCAL_APIC);
                return;
        }
 
index ba9caa808a9c1b42c6a616968c57e96769039314..df58e9cad96ae9441a4f86f22900a6e0bf05aa64 100644 (file)
@@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode)
 
        /*
         * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
-        * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub
+        * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE)
         */
        bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
                                * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
@@ -1490,7 +1490,7 @@ calculate_destination_timeout(void)
 /*
  * initialize the bau_control structure for each cpu
  */
-static void __init uv_init_per_cpu(int nuvhubs)
+static int __init uv_init_per_cpu(int nuvhubs)
 {
        int i;
        int cpu;
@@ -1507,7 +1507,7 @@ static void __init uv_init_per_cpu(int nuvhubs)
        struct bau_control *smaster = NULL;
        struct socket_desc {
                short num_cpus;
-               short cpu_number[16];
+               short cpu_number[MAX_CPUS_PER_SOCKET];
        };
        struct uvhub_desc {
                unsigned short socket_mask;
@@ -1540,6 +1540,10 @@ static void __init uv_init_per_cpu(int nuvhubs)
                sdp = &bdp->socket[socket];
                sdp->cpu_number[sdp->num_cpus] = cpu;
                sdp->num_cpus++;
+               if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
+                       printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus);
+                       return 1;
+               }
        }
        for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
                if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
@@ -1570,6 +1574,12 @@ static void __init uv_init_per_cpu(int nuvhubs)
                                bcp->uvhub_master = hmaster;
                                bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
                                                blade_processor_id;
+                               if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
+                                       printk(KERN_EMERG
+                                               "%d cpus per uvhub invalid\n",
+                                               bcp->uvhub_cpu);
+                                       return 1;
+                               }
                        }
 nextsocket:
                        socket++;
@@ -1595,6 +1605,7 @@ nextsocket:
                bcp->congested_reps = congested_reps;
                bcp->congested_period = congested_period;
        }
+       return 0;
 }
 
 /*
@@ -1625,7 +1636,10 @@ static int __init uv_bau_init(void)
        spin_lock_init(&disable_lock);
        congested_cycles = microsec_2_cycles(congested_response_us);
 
-       uv_init_per_cpu(nuvhubs);
+       if (uv_init_per_cpu(nuvhubs)) {
+               nobau = 1;
+               return 0;
+       }
 
        uv_partition_base_pnode = 0x7fffffff;
        for (uvhub = 0; uvhub < nuvhubs; uvhub++)
index 3371bd053b89f29e14e5f55d88929d2427f123b2..63203767174683b3db1ab410be9263aabe280ab0 100644 (file)
@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
        ver = m->apicver;
        if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
                printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
-                       m->apicid, MAX_APICS);
+                       m->apicid, MAX_LOCAL_APIC);
                return;
        }
 
index 2c7def95f721c6d08596c9ce852a3ac4a1c54e38..4c8dea513b66adeea7881d7f541237c812e5bf14 100644 (file)
@@ -408,6 +408,9 @@ acpi_ev_match_gpe_method(acpi_handle obj_handle,
                return_ACPI_STATUS(AE_OK);
        }
 
+       /* Disable the GPE in case it's been enabled already. */
+       (void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
+
        /*
         * Add the GPE information from above to the gpe_event_info block for
         * use during dispatch of this GPE.
index 660a2728908d6e15f8abbe5b2eae43b4a91df503..0cac7ec0d2ece0764806fae7a11994da0520ab23 100644 (file)
@@ -577,9 +577,7 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
         * as possible (without an NMI being received in the middle of
         * this) - so disable NMIs and initialize the device:
         */
-       acpi_nmi_disable();
        status = acpi_ns_evaluate(info);
-       acpi_nmi_enable();
 
        if (ACPI_SUCCESS(status)) {
                walk_info->num_INI++;
index 9fb9d5ac939db1c071c4cdcaa7fba49364236e25..95649d373071ac93d14bbbb478db1ab4a8cdb801 100644 (file)
@@ -130,8 +130,6 @@ struct acpi_battery {
        unsigned long flags;
 };
 
-static int acpi_battery_update(struct acpi_battery *battery);
-
 #define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
 
 inline int acpi_battery_present(struct acpi_battery *battery)
@@ -186,9 +184,6 @@ static int acpi_battery_get_property(struct power_supply *psy,
        int ret = 0;
        struct acpi_battery *battery = to_acpi_battery(psy);
 
-       if (acpi_battery_update(battery))
-               return -ENODEV;
-
        if (acpi_battery_present(battery)) {
                /* run battery update only if it is present */
                acpi_battery_get_state(battery);
index 5718566e00f9b27573db228ba74559570ba48f89..d9926afec110997b618b70062d50450847d9d1ff 100644 (file)
@@ -275,13 +275,23 @@ acpi_table_parse_srat(enum acpi_srat_type id,
 int __init acpi_numa_init(void)
 {
        int ret = 0;
+       int nr_cpu_entries = nr_cpu_ids;
+
+#ifdef CONFIG_X86
+       /*
+        * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
+        * SRAT cpu entries could have different order with that in MADT.
+        * So go over all cpu entries in SRAT to get apicid to node mapping.
+        */
+       nr_cpu_entries = MAX_LOCAL_APIC;
+#endif
 
        /* SRAT: Static Resource Affinity Table */
        if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
                acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-                                    acpi_parse_x2apic_affinity, nr_cpu_ids);
+                                    acpi_parse_x2apic_affinity, nr_cpu_entries);
                acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-                                    acpi_parse_processor_affinity, nr_cpu_ids);
+                                    acpi_parse_processor_affinity, nr_cpu_entries);
                ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
                                            acpi_parse_memory_affinity,
                                            NR_NODE_MEMBLKS);
index 2b6c21d86b9885571b41679586636fff3b6c53bb..29ef505c487b92efe1b12e79c2231aa85c0298ce 100644 (file)
@@ -705,54 +705,85 @@ static int acpi_bus_get_perf_flags(struct acpi_device *device)
 }
 
 static acpi_status
-acpi_bus_extract_wakeup_device_power_package(struct acpi_device *device,
-                                            union acpi_object *package)
+acpi_bus_extract_wakeup_device_power_package(acpi_handle handle,
+                                            struct acpi_device_wakeup *wakeup)
 {
-       int i = 0;
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       union acpi_object *package = NULL;
        union acpi_object *element = NULL;
+       acpi_status status;
+       int i = 0;
 
-       if (!device || !package || (package->package.count < 2))
+       if (!wakeup)
                return AE_BAD_PARAMETER;
 
+       /* _PRW */
+       status = acpi_evaluate_object(handle, "_PRW", NULL, &buffer);
+       if (ACPI_FAILURE(status)) {
+               ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRW"));
+               return status;
+       }
+
+       package = (union acpi_object *)buffer.pointer;
+
+       if (!package || (package->package.count < 2)) {
+               status = AE_BAD_DATA;
+               goto out;
+       }
+
        element = &(package->package.elements[0]);
-       if (!element)
-               return AE_BAD_PARAMETER;
+       if (!element) {
+               status = AE_BAD_DATA;
+               goto out;
+       }
        if (element->type == ACPI_TYPE_PACKAGE) {
                if ((element->package.count < 2) ||
                    (element->package.elements[0].type !=
                     ACPI_TYPE_LOCAL_REFERENCE)
-                   || (element->package.elements[1].type != ACPI_TYPE_INTEGER))
-                       return AE_BAD_DATA;
-               device->wakeup.gpe_device =
+                   || (element->package.elements[1].type != ACPI_TYPE_INTEGER)) {
+                       status = AE_BAD_DATA;
+                       goto out;
+               }
+               wakeup->gpe_device =
                    element->package.elements[0].reference.handle;
-               device->wakeup.gpe_number =
+               wakeup->gpe_number =
                    (u32) element->package.elements[1].integer.value;
        } else if (element->type == ACPI_TYPE_INTEGER) {
-               device->wakeup.gpe_number = element->integer.value;
-       } else
-               return AE_BAD_DATA;
+               wakeup->gpe_device = NULL;
+               wakeup->gpe_number = element->integer.value;
+       } else {
+               status = AE_BAD_DATA;
+               goto out;
+       }
 
        element = &(package->package.elements[1]);
        if (element->type != ACPI_TYPE_INTEGER) {
-               return AE_BAD_DATA;
+               status = AE_BAD_DATA;
+               goto out;
        }
-       device->wakeup.sleep_state = element->integer.value;
+       wakeup->sleep_state = element->integer.value;
 
        if ((package->package.count - 2) > ACPI_MAX_HANDLES) {
-               return AE_NO_MEMORY;
+               status = AE_NO_MEMORY;
+               goto out;
        }
-       device->wakeup.resources.count = package->package.count - 2;
-       for (i = 0; i < device->wakeup.resources.count; i++) {
+       wakeup->resources.count = package->package.count - 2;
+       for (i = 0; i < wakeup->resources.count; i++) {
                element = &(package->package.elements[i + 2]);
-               if (element->type != ACPI_TYPE_LOCAL_REFERENCE)
-                       return AE_BAD_DATA;
+               if (element->type != ACPI_TYPE_LOCAL_REFERENCE) {
+                       status = AE_BAD_DATA;
+                       goto out;
+               }
 
-               device->wakeup.resources.handles[i] = element->reference.handle;
+               wakeup->resources.handles[i] = element->reference.handle;
        }
 
-       acpi_gpe_can_wake(device->wakeup.gpe_device, device->wakeup.gpe_number);
+       acpi_gpe_can_wake(wakeup->gpe_device, wakeup->gpe_number);
 
-       return AE_OK;
+ out:
+       kfree(buffer.pointer);
+
+       return status;
 }
 
 static void acpi_bus_set_run_wake_flags(struct acpi_device *device)
@@ -787,26 +818,15 @@ static void acpi_bus_set_run_wake_flags(struct acpi_device *device)
 static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 {
        acpi_status status = 0;
-       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-       union acpi_object *package = NULL;
        int psw_error;
 
-       /* _PRW */
-       status = acpi_evaluate_object(device->handle, "_PRW", NULL, &buffer);
-       if (ACPI_FAILURE(status)) {
-               ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRW"));
-               goto end;
-       }
-
-       package = (union acpi_object *)buffer.pointer;
-       status = acpi_bus_extract_wakeup_device_power_package(device, package);
+       status = acpi_bus_extract_wakeup_device_power_package(device->handle,
+                                                             &device->wakeup);
        if (ACPI_FAILURE(status)) {
                ACPI_EXCEPTION((AE_INFO, status, "Extracting _PRW package"));
                goto end;
        }
 
-       kfree(buffer.pointer);
-
        device->wakeup.flags.valid = 1;
        device->wakeup.prepare_count = 0;
        acpi_bus_set_run_wake_flags(device);
@@ -1351,6 +1371,7 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl,
        struct acpi_bus_ops *ops = context;
        int type;
        unsigned long long sta;
+       struct acpi_device_wakeup wakeup;
        struct acpi_device *device;
        acpi_status status;
        int result;
@@ -1360,8 +1381,10 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl,
                return AE_OK;
 
        if (!(sta & ACPI_STA_DEVICE_PRESENT) &&
-           !(sta & ACPI_STA_DEVICE_FUNCTIONING))
+           !(sta & ACPI_STA_DEVICE_FUNCTIONING)) {
+               acpi_bus_extract_wakeup_device_power_package(handle, &wakeup);
                return AE_CTRL_DEPTH;
+       }
 
        /*
         * We may already have an acpi_device from a previous enumeration.  If
index 11ec911016c6ab3e81cefe7e202c4cc4c95762f0..36e2319264bd33d7918979105a4f641da36d3dcd 100644 (file)
@@ -128,16 +128,6 @@ config PDC_ADMA
 
          If unsure, say N.
 
-config PATA_MPC52xx
-       tristate "Freescale MPC52xx SoC internal IDE"
-       depends on PPC_MPC52xx && PPC_BESTCOMM
-       select PPC_BESTCOMM_ATA
-       help
-         This option enables support for integrated IDE controller
-         of the Freescale MPC52xx SoC.
-
-         If unsure, say N.
-
 config PATA_OCTEON_CF
        tristate "OCTEON Boot Bus Compact Flash support"
        depends on CPU_CAVIUM_OCTEON
@@ -366,7 +356,7 @@ config PATA_CS5535
 
 config PATA_CS5536
        tristate "CS5536 PATA support"
-       depends on PCI && X86 && !X86_64
+       depends on PCI
        help
          This option enables support for the AMD CS5536
          companion chip used with the Geode LX processor family.
@@ -491,6 +481,16 @@ config PATA_MARVELL
 
          If unsure, say N.
 
+config PATA_MPC52xx
+       tristate "Freescale MPC52xx SoC internal IDE"
+       depends on PPC_MPC52xx && PPC_BESTCOMM
+       select PPC_BESTCOMM_ATA
+       help
+         This option enables support for integrated IDE controller
+         of the Freescale MPC52xx SoC.
+
+         If unsure, say N.
+
 config PATA_NETCELL
        tristate "NETCELL Revolution RAID support"
        depends on PCI
index c501af5b12b959209bde413d9b8bbe35f7be5068..2b67c900a459865c694e0cf012ebcacbfd8f76bd 100644 (file)
@@ -11,7 +11,6 @@ obj-$(CONFIG_SATA_DWC)                += sata_dwc_460ex.o
 
 # SFF w/ custom DMA
 obj-$(CONFIG_PDC_ADMA)         += pdc_adma.o
-obj-$(CONFIG_PATA_MPC52xx)     += pata_mpc52xx.o
 obj-$(CONFIG_PATA_OCTEON_CF)   += pata_octeon_cf.o
 obj-$(CONFIG_SATA_QSTOR)       += sata_qstor.o
 obj-$(CONFIG_SATA_SX4)         += sata_sx4.o
@@ -52,6 +51,7 @@ obj-$(CONFIG_PATA_IT821X)     += pata_it821x.o
 obj-$(CONFIG_PATA_JMICRON)     += pata_jmicron.o
 obj-$(CONFIG_PATA_MACIO)       += pata_macio.o
 obj-$(CONFIG_PATA_MARVELL)     += pata_marvell.o
+obj-$(CONFIG_PATA_MPC52xx)     += pata_mpc52xx.o
 obj-$(CONFIG_PATA_NETCELL)     += pata_netcell.o
 obj-$(CONFIG_PATA_NINJA32)     += pata_ninja32.o
 obj-$(CONFIG_PATA_NS87415)     += pata_ns87415.o
index 7f77c67d267ca454f63b5501ab1bb82d5fe9f16d..f23d6d46b95b1bf7f4f19fb657f86cadc0ccd6b7 100644 (file)
@@ -4807,9 +4807,6 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc)
 {
        struct ata_device *dev = qc->dev;
 
-       if (ata_tag_internal(qc->tag))
-               return;
-
        if (ata_is_nodata(qc->tf.protocol))
                return;
 
@@ -4858,14 +4855,23 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
                if (unlikely(qc->err_mask))
                        qc->flags |= ATA_QCFLAG_FAILED;
 
-               if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) {
-                       /* always fill result TF for failed qc */
+               /*
+                * Finish internal commands without any further processing
+                * and always with the result TF filled.
+                */
+               if (unlikely(ata_tag_internal(qc->tag))) {
                        fill_result_tf(qc);
+                       __ata_qc_complete(qc);
+                       return;
+               }
 
-                       if (!ata_tag_internal(qc->tag))
-                               ata_qc_schedule_eh(qc);
-                       else
-                               __ata_qc_complete(qc);
+               /*
+                * Non-internal qc has failed.  Fill the result TF and
+                * summon EH.
+                */
+               if (unlikely(qc->flags & ATA_QCFLAG_FAILED)) {
+                       fill_result_tf(qc);
+                       ata_qc_schedule_eh(qc);
                        return;
                }
 
index 5e590504f3aa15c6c3c73450327a0494c264d20f..17a637877d0311abb7c95c7cd6e2359be4a2c70b 100644 (file)
@@ -3275,6 +3275,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
        struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL;
        struct ata_eh_context *ehc = &link->eh_context;
        struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL;
+       enum ata_lpm_policy old_policy = link->lpm_policy;
        unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM;
        unsigned int err_mask;
        int rc;
@@ -3338,6 +3339,14 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                goto fail;
        }
 
+       /*
+        * Low level driver acked the transition.  Issue DIPM command
+        * with the new policy set.
+        */
+       link->lpm_policy = policy;
+       if (ap && ap->slave_link)
+               ap->slave_link->lpm_policy = policy;
+
        /* host config updated, enable DIPM if transitioning to MIN_POWER */
        ata_for_each_dev(dev, link, ENABLED) {
                if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) {
@@ -3353,12 +3362,14 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                }
        }
 
-       link->lpm_policy = policy;
-       if (ap && ap->slave_link)
-               ap->slave_link->lpm_policy = policy;
        return 0;
 
 fail:
+       /* restore the old policy */
+       link->lpm_policy = old_policy;
+       if (ap && ap->slave_link)
+               ap->slave_link->lpm_policy = old_policy;
+
        /* if no device or only one more chance is left, disable LPM */
        if (!dev || ehc->tries[dev->devno] <= 2) {
                ata_link_printk(link, KERN_WARNING,
index d05387d1e14be5aedf2f23dae45e86b7ed989276..484697fef3867dce2c1926f0ad50bb53de3b40e7 100644 (file)
@@ -1532,11 +1532,10 @@ static unsigned int __ata_sff_port_intr(struct ata_port *ap,
                if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
                        return ata_sff_idle_irq(ap);
                break;
-       case HSM_ST:
-       case HSM_ST_LAST:
-               break;
-       default:
+       case HSM_ST_IDLE:
                return ata_sff_idle_irq(ap);
+       default:
+               break;
        }
 
        /* check main status, clearing INTRQ if needed */
index 21ee23f89e88b153ee8e35af5d75c61f83f92496..628c8fae5937183f24b9a607c17f0765609a3a5e 100644 (file)
 #include <linux/delay.h>
 #include <linux/libata.h>
 #include <scsi/scsi_host.h>
+
+#ifdef CONFIG_X86_32
 #include <asm/msr.h>
+static int use_msr;
+module_param_named(msr, use_msr, int, 0644);
+MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
+#else
+#undef rdmsr   /* avoid accidental MSR usage on, e.g. x86-64 */
+#undef wrmsr
+#define rdmsr(x, y, z) do { } while (0)
+#define wrmsr(x, y, z) do { } while (0)
+#define use_msr 0
+#endif
 
 #define DRV_NAME       "pata_cs5536"
-#define DRV_VERSION    "0.0.7"
+#define DRV_VERSION    "0.0.8"
 
 enum {
        CFG                     = 0,
@@ -75,8 +87,6 @@ enum {
        IDE_ETC_NODMA           = 0x03,
 };
 
-static int use_msr;
-
 static const u32 msr_reg[4] = {
        MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC,
 };
@@ -88,7 +98,7 @@ static const u8 pci_reg[4] = {
 static inline int cs5536_read(struct pci_dev *pdev, int reg, u32 *val)
 {
        if (unlikely(use_msr)) {
-               u32 dummy;
+               u32 dummy __maybe_unused;
 
                rdmsr(msr_reg[reg], *val, dummy);
                return 0;
@@ -294,8 +304,6 @@ MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller");
 MODULE_LICENSE("GPL");
 MODULE_DEVICE_TABLE(pci, cs5536);
 MODULE_VERSION(DRV_VERSION);
-module_param_named(msr, use_msr, int, 0644);
-MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
 
 module_init(cs5536_init);
 module_exit(cs5536_exit);
index 2b464b631f22697ac5f177d371b94d5646683d8f..0b0625054a87a5697c8e4745bfd49eb2d1bf45c0 100644 (file)
@@ -392,7 +392,10 @@ static int atmtcp_attach(struct atm_vcc *vcc,int itf)
                        atm_dev_put(dev);
                        return -EMEDIUMTYPE;
                }
-               if (PRIV(dev)->vcc) return -EBUSY;
+               if (PRIV(dev)->vcc) {
+                       atm_dev_put(dev);
+                       return -EBUSY;
+               }
        }
        else {
                int error;
index 720148294e648473a1a82cc0c68f50ac1fd923c3..3c6cabcb7d84b0428cdeae91b3f9a98f2bc85715 100644 (file)
@@ -311,8 +311,10 @@ static void hci_uart_tty_close(struct tty_struct *tty)
 
                if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) {
                        hu->proto->close(hu);
-                       hci_unregister_dev(hdev);
-                       hci_free_dev(hdev);
+                       if (hdev) {
+                               hci_unregister_dev(hdev);
+                               hci_free_dev(hdev);
+                       }
                }
        }
 }
index 42396df555567660d597ac524631e9020edfbea3..9252e85706ef2ce54728a0c8bc366c80c793033c 100644 (file)
@@ -38,7 +38,7 @@ static int agp_bridges_found;
 
 static void amd64_tlbflush(struct agp_memory *temp)
 {
-       k8_flush_garts();
+       amd_flush_garts();
 }
 
 static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
@@ -124,7 +124,7 @@ static int amd64_fetch_size(void)
        u32 temp;
        struct aper_size_info_32 *values;
 
-       dev = k8_northbridges.nb_misc[0];
+       dev = node_to_amd_nb(0)->misc;
        if (dev==NULL)
                return 0;
 
@@ -181,16 +181,15 @@ static int amd_8151_configure(void)
        unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
        int i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return 0;
 
        /* Configure AGP regs in each x86-64 host bridge. */
-       for (i = 0; i < k8_northbridges.num; i++) {
+       for (i = 0; i < amd_nb_num(); i++) {
                agp_bridge->gart_bus_addr =
-                               amd64_configure(k8_northbridges.nb_misc[i],
-                                               gatt_bus);
+                       amd64_configure(node_to_amd_nb(i)->misc, gatt_bus);
        }
-       k8_flush_garts();
+       amd_flush_garts();
        return 0;
 }
 
@@ -200,11 +199,11 @@ static void amd64_cleanup(void)
        u32 tmp;
        int i;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return;
 
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
                /* disable gart translation */
                pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp);
                tmp &= ~GARTEN;
@@ -331,15 +330,15 @@ static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr)
 {
        int i;
 
-       if (cache_k8_northbridges() < 0)
+       if (amd_cache_northbridges() < 0)
                return -ENODEV;
 
-       if (!k8_northbridges.gart_supported)
+       if (!amd_nb_has_feature(AMD_NB_GART))
                return -ENODEV;
 
        i = 0;
-       for (i = 0; i < k8_northbridges.num; i++) {
-               struct pci_dev *dev = k8_northbridges.nb_misc[i];
+       for (i = 0; i < amd_nb_num(); i++) {
+               struct pci_dev *dev = node_to_amd_nb(i)->misc;
                if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
                        dev_err(&dev->dev, "no usable aperture found\n");
 #ifdef __x86_64__
@@ -416,7 +415,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
        }
 
        /* shadow x86-64 registers into ULi registers */
-       pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+       pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
                               &httfea);
 
        /* if x86-64 aperture base is beyond 4G, exit here */
@@ -484,7 +483,7 @@ static int nforce3_agp_init(struct pci_dev *pdev)
        pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
 
        /* shadow x86-64 registers into NVIDIA registers */
-       pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE,
+       pci_read_config_dword (node_to_amd_nb(0)->misc, AMD64_GARTAPERTUREBASE,
                               &apbase);
 
        /* if x86-64 aperture base is beyond 4G, exit here */
@@ -778,7 +777,7 @@ int __init agp_amd64_init(void)
                }
 
                /* First check that we have at least one AMD64 NB */
-               if (!pci_dev_present(k8_nb_ids))
+               if (!pci_dev_present(amd_nb_misc_ids))
                        return -ENODEV;
 
                /* Look for any AGP bridge */
index 16a2847b7cdbfeee43c9dcd212915a2f6b500711..29ac6d499fa6a69221b51ec52ca934c5d41e0912 100644 (file)
@@ -1192,12 +1192,19 @@ static void i9xx_chipset_flush(void)
                writel(1, intel_private.i9xx_flush_page);
 }
 
-static void i965_write_entry(dma_addr_t addr, unsigned int entry,
+static void i965_write_entry(dma_addr_t addr,
+                            unsigned int entry,
                             unsigned int flags)
 {
+       u32 pte_flags;
+
+       pte_flags = I810_PTE_VALID;
+       if (flags == AGP_USER_CACHED_MEMORY)
+               pte_flags |= I830_PTE_SYSTEM_CACHED;
+
        /* Shift high bits down */
        addr |= (addr >> 28) & 0xf0;
-       writel(addr | I810_PTE_VALID, intel_private.gtt + entry);
+       writel(addr | pte_flags, intel_private.gtt + entry);
 }
 
 static bool gen6_check_flags(unsigned int flags)
index 73dcb0ee41fdaebcbefee2b0ddf705bd5a26b1e9..d3d63be2cd37efc15c5b261ab10492ba8b01caf6 100644 (file)
@@ -29,7 +29,6 @@
 #include <linux/ramoops.h>
 
 #define RAMOOPS_KERNMSG_HDR "===="
-#define RAMOOPS_HEADER_SIZE   (5 + sizeof(struct timeval))
 
 #define RECORD_SIZE 4096
 
@@ -65,8 +64,8 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
                        struct ramoops_context, dump);
        unsigned long s1_start, s2_start;
        unsigned long l1_cpy, l2_cpy;
-       int res;
-       char *buf;
+       int res, hdr_size;
+       char *buf, *buf_orig;
        struct timeval timestamp;
 
        /* Only dump oopses if dump_oops is set */
@@ -74,6 +73,8 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
                return;
 
        buf = (char *)(cxt->virt_addr + (cxt->count * RECORD_SIZE));
+       buf_orig = buf;
+
        memset(buf, '\0', RECORD_SIZE);
        res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR);
        buf += res;
@@ -81,8 +82,9 @@ static void ramoops_do_dump(struct kmsg_dumper *dumper,
        res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec);
        buf += res;
 
-       l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE));
-       l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE) - l2_cpy);
+       hdr_size = buf - buf_orig;
+       l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - hdr_size));
+       l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - hdr_size) - l2_cpy);
 
        s2_start = l2 - l2_cpy;
        s1_start = l1 - l1_cpy;
index c63a438237444a0e35fa05bdef9d0d232839c463..1109f6848a43940b8e8ed738f891f6560f17f7a8 100644 (file)
@@ -355,6 +355,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
                dprintk("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
                        (unsigned long)freqs->cpu);
                trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
+               trace_cpu_frequency(freqs->new, freqs->cpu);
                srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                CPUFREQ_POSTCHANGE, freqs);
                if (likely(policy) && likely(policy->cpu == freqs->cpu))
index a507108433785f8432b3d15504c3a036b461c1a0..08d5f05378d9efb1df0fe055240e8e8ed9e7a90b 100644 (file)
@@ -107,6 +107,7 @@ static void cpuidle_idle_call(void)
        if (cpuidle_curr_governor->reflect)
                cpuidle_curr_governor->reflect(dev);
        trace_power_end(smp_processor_id());
+       trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 /**
index 411d5bf50fc43cab437dff34d3d9f25dd9928fe0..a25f5f61e0e00becc22b5b5b4d8ce1e4ce6e31b8 100644 (file)
@@ -449,7 +449,7 @@ mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
 static void mv_xor_tasklet(unsigned long data)
 {
        struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
-       __mv_xor_slot_cleanup(chan);
+       mv_xor_slot_cleanup(chan);
 }
 
 static struct mv_xor_desc_slot *
index eca9ba193e94a914aa64740f0b4262b8f7c99b81..df211181fca41627cb0bdea2a089cd9d26f7faf4 100644 (file)
@@ -2917,7 +2917,7 @@ static int __init amd64_edac_init(void)
 
        opstate_init();
 
-       if (cache_k8_northbridges() < 0)
+       if (amd_cache_northbridges() < 0)
                goto err_ret;
 
        msrs = msrs_alloc();
@@ -2934,7 +2934,7 @@ static int __init amd64_edac_init(void)
         * to finish initialization of the MC instances.
         */
        err = -ENODEV;
-       for (nb = 0; nb < k8_northbridges.num; nb++) {
+       for (nb = 0; nb < amd_nb_num(); nb++) {
                if (!pvt_lookup[nb])
                        continue;
 
index 599f6c9e0fbf18661d7607407d11b73cd3cda7ff..d3e55a0ae92be02e5f46ea7d6f7f82c87e75c807 100644 (file)
@@ -56,15 +56,26 @@ static struct cs5535_gpio_chip {
  * registers, see include/linux/cs5535.h.
  */
 
-static void errata_outl(u32 val, unsigned long addr)
+static void errata_outl(struct cs5535_gpio_chip *chip, u32 val,
+               unsigned int reg)
 {
+       unsigned long addr = chip->base + 0x80 + reg;
+
        /*
         * According to the CS5536 errata (#36), after suspend
         * a write to the high bank GPIO register will clear all
         * non-selected bits; the recommended workaround is a
         * read-modify-write operation.
+        *
+        * Don't apply this errata to the edge status GPIOs, as writing
+        * to their lower bits will clear them.
         */
-       val |= inl(addr);
+       if (reg != GPIO_POSITIVE_EDGE_STS && reg != GPIO_NEGATIVE_EDGE_STS) {
+               if (val & 0xffff)
+                       val |= (inl(addr) & 0xffff); /* ignore the high bits */
+               else
+                       val |= (inl(addr) ^ (val >> 16));
+       }
        outl(val, addr);
 }
 
@@ -76,7 +87,7 @@ static void __cs5535_gpio_set(struct cs5535_gpio_chip *chip, unsigned offset,
                outl(1 << offset, chip->base + reg);
        else
                /* high bank register */
-               errata_outl(1 << (offset - 16), chip->base + 0x80 + reg);
+               errata_outl(chip, 1 << (offset - 16), reg);
 }
 
 void cs5535_gpio_set(unsigned offset, unsigned int reg)
@@ -98,7 +109,7 @@ static void __cs5535_gpio_clear(struct cs5535_gpio_chip *chip, unsigned offset,
                outl(1 << (offset + 16), chip->base + reg);
        else
                /* high bank register */
-               errata_outl(1 << offset, chip->base + 0x80 + reg);
+               errata_outl(chip, 1 << offset, reg);
 }
 
 void cs5535_gpio_clear(unsigned offset, unsigned int reg)
index 21da9c19a0cba82e1a8aba6aa52056953efb4f82..649550e2cae99947027df5f2887e77016f554b54 100644 (file)
@@ -1281,6 +1281,9 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
                err = gpio_direction_output(gpio,
                                (flags & GPIOF_INIT_HIGH) ? 1 : 0);
 
+       if (err)
+               gpio_free(gpio);
+
        return err;
 }
 EXPORT_SYMBOL_GPL(gpio_request_one);
index 2762698e0204adc99699839137680bafc7fcc0c7..897e0577e65e0fd425f18edb00512ee414566a64 100644 (file)
@@ -135,7 +135,7 @@ static int __devinit rdc321x_gpio_probe(struct platform_device *pdev)
        struct rdc321x_gpio *rdc321x_gpio_dev;
        struct rdc321x_gpio_pdata *pdata;
 
-       pdata = pdev->dev.platform_data;
+       pdata = platform_get_drvdata(pdev);
        if (!pdata) {
                dev_err(&pdev->dev, "no platform data supplied\n");
                return -ENODEV;
index bede10a0340700b69717caf1da8e81ee0546ab00..2d4e17a004dbb2f09bae780c56f36138c4b1beca 100644 (file)
@@ -241,7 +241,7 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
        }
 
        list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-               if (encoder->crtc && !drm_helper_encoder_in_use(encoder)) {
+               if (!drm_helper_encoder_in_use(encoder)) {
                        drm_encoder_disable(encoder);
                        /* disconnector encoder from any connector */
                        encoder->crtc = NULL;
@@ -874,7 +874,10 @@ static void output_poll_execute(struct work_struct *work)
                        continue;
 
                connector->status = connector->funcs->detect(connector, false);
-               DRM_DEBUG_KMS("connector status updated to %d\n", connector->status);
+               DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %d to %d\n",
+                             connector->base.id,
+                             drm_get_connector_name(connector),
+                             old_status, connector->status);
                if (old_status != connector->status)
                        changed = true;
        }
index af70337567ce35a0167ffb193d8b1ffae769fedd..d3e8c540f778d01ef97437c3643acbd690716546 100644 (file)
@@ -242,7 +242,7 @@ fail:
 
 static enum drm_connector_status ch7017_detect(struct intel_dvo_device *dvo)
 {
-       return connector_status_unknown;
+       return connector_status_connected;
 }
 
 static enum drm_mode_status ch7017_mode_valid(struct intel_dvo_device *dvo,
index e6800819bca846f6a3fd102ef37e2632858247f8..cb900dc83d950e29f99144e0d39c38102c55ce5b 100644 (file)
@@ -34,6 +34,7 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "../../../platform/x86/intel_ips.h"
 #include <linux/pci.h>
 #include <linux/vgaarb.h>
 #include <linux/acpi.h>
@@ -1870,6 +1871,26 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
 
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+       void (*link)(void);
+
+       link = symbol_get(ips_link_to_i915_driver);
+       if (link) {
+               link();
+               symbol_put(ips_link_to_i915_driver);
+       }
+}
+
 /**
  * i915_driver_load - setup chip and create an initial config
  * @dev: DRM device
@@ -2075,6 +2096,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        dev_priv->mchdev_lock = &mchdev_lock;
        spin_unlock(&mchdev_lock);
 
+       ips_ping_for_i915_load();
+
        return 0;
 
 out_workqueue_free:
index 878fc766a12cc05f6b30c90d53d65a52c6023727..cb8f434292793451225eefdbea071aefe7408e79 100644 (file)
 # define MARIUNIT_CLOCK_GATE_DISABLE           (1 << 18)
 # define SVSMUNIT_CLOCK_GATE_DISABLE           (1 << 1)
 
+#define PCH_3DCGDIS1           0x46024
+# define VFMUNIT_CLOCK_GATE_DISABLE            (1 << 11)
+
 #define FDI_PLL_FREQ_CTL        0x46030
 #define  FDI_PLL_FREQ_CHANGE_REQUEST    (1<<24)
 #define  FDI_PLL_FREQ_LOCK_LIMIT_MASK   0xfff00
 #define ILK_DISPLAY_CHICKEN2   0x42004
 #define  ILK_DPARB_GATE        (1<<22)
 #define  ILK_VSDPFD_FULL       (1<<21)
+#define ILK_DISPLAY_CHICKEN_FUSES      0x42014
+#define  ILK_INTERNAL_GRAPHICS_DISABLE (1<<31)
+#define  ILK_INTERNAL_DISPLAY_DISABLE  (1<<30)
+#define  ILK_DISPLAY_DEBUG_DISABLE     (1<<29)
+#define  ILK_HDCP_DISABLE              (1<<25)
+#define  ILK_eDP_A_DISABLE             (1<<24)
+#define  ILK_DESKTOP                   (1<<23)
 #define ILK_DSPCLK_GATE                0x42020
 #define  ILK_DPARB_CLK_GATE    (1<<5)
 /* According to spec this bit 7/8/9 of 0x42020 should be set to enable FBC */
index d9b7092439ef59ab19acc9546a039ff60f0bb43c..fca523288acad035b9f3d130fe32e9c3c7719989 100644 (file)
@@ -5379,6 +5379,23 @@ static int intel_encoder_clones(struct drm_device *dev, int type_mask)
        return index_mask;
 }
 
+static bool has_edp_a(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (!IS_MOBILE(dev))
+               return false;
+
+       if ((I915_READ(DP_A) & DP_DETECTED) == 0)
+               return false;
+
+       if (IS_GEN5(dev) &&
+           (I915_READ(ILK_DISPLAY_CHICKEN_FUSES) & ILK_eDP_A_DISABLE))
+               return false;
+
+       return true;
+}
+
 static void intel_setup_outputs(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -5396,7 +5413,7 @@ static void intel_setup_outputs(struct drm_device *dev)
        if (HAS_PCH_SPLIT(dev)) {
                dpd_is_edp = intel_dpd_is_edp(dev);
 
-               if (IS_MOBILE(dev) && (I915_READ(DP_A) & DP_DETECTED))
+               if (has_edp_a(dev))
                        intel_dp_init(dev, DP_A);
 
                if (dpd_is_edp && (I915_READ(PCH_DP_D) & DP_DETECTED))
@@ -5825,6 +5842,8 @@ void intel_init_clock_gating(struct drm_device *dev)
                        I915_WRITE(PCH_3DCGDIS0,
                                   MARIUNIT_CLOCK_GATE_DISABLE |
                                   SVSMUNIT_CLOCK_GATE_DISABLE);
+                       I915_WRITE(PCH_3DCGDIS1,
+                                  VFMUNIT_CLOCK_GATE_DISABLE);
                }
 
                I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
index df648cb4c29641cec581307b1c40a46d812fd58b..864417cffe9a7c3a45ea52716aae46da97e1b7c1 100644 (file)
@@ -479,6 +479,7 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
        uint16_t address = algo_data->address;
        uint8_t msg[5];
        uint8_t reply[2];
+       unsigned retry;
        int msg_bytes;
        int reply_bytes;
        int ret;
@@ -513,14 +514,33 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
                break;
        }
 
-       for (;;) {
-         ret = intel_dp_aux_ch(intel_dp,
-                               msg, msg_bytes,
-                               reply, reply_bytes);
+       for (retry = 0; retry < 5; retry++) {
+               ret = intel_dp_aux_ch(intel_dp,
+                                     msg, msg_bytes,
+                                     reply, reply_bytes);
                if (ret < 0) {
                        DRM_DEBUG_KMS("aux_ch failed %d\n", ret);
                        return ret;
                }
+
+               switch (reply[0] & AUX_NATIVE_REPLY_MASK) {
+               case AUX_NATIVE_REPLY_ACK:
+                       /* I2C-over-AUX Reply field is only valid
+                        * when paired with AUX ACK.
+                        */
+                       break;
+               case AUX_NATIVE_REPLY_NACK:
+                       DRM_DEBUG_KMS("aux_ch native nack\n");
+                       return -EREMOTEIO;
+               case AUX_NATIVE_REPLY_DEFER:
+                       udelay(100);
+                       continue;
+               default:
+                       DRM_ERROR("aux_ch invalid native reply 0x%02x\n",
+                                 reply[0]);
+                       return -EREMOTEIO;
+               }
+
                switch (reply[0] & AUX_I2C_REPLY_MASK) {
                case AUX_I2C_REPLY_ACK:
                        if (mode == MODE_I2C_READ) {
@@ -528,17 +548,20 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
                        }
                        return reply_bytes - 1;
                case AUX_I2C_REPLY_NACK:
-                       DRM_DEBUG_KMS("aux_ch nack\n");
+                       DRM_DEBUG_KMS("aux_i2c nack\n");
                        return -EREMOTEIO;
                case AUX_I2C_REPLY_DEFER:
-                       DRM_DEBUG_KMS("aux_ch defer\n");
+                       DRM_DEBUG_KMS("aux_i2c defer\n");
                        udelay(100);
                        break;
                default:
-                       DRM_ERROR("aux_ch invalid reply 0x%02x\n", reply[0]);
+                       DRM_ERROR("aux_i2c invalid reply 0x%02x\n", reply[0]);
                        return -EREMOTEIO;
                }
        }
+
+       DRM_ERROR("too many retries, giving up\n");
+       return -EREMOTEIO;
 }
 
 static int
index 89a65be8a3f364359edaf6add71b6259affa3cbd..31cd7e33e8208b8112ed7cb2b278e8cc8b84fc61 100644 (file)
@@ -696,20 +696,17 @@ int intel_wait_ring_buffer(struct drm_device *dev,
        drm_i915_private_t *dev_priv = dev->dev_private;
        u32 head;
 
-       head = intel_read_status_page(ring, 4);
-       if (head) {
-               ring->head = head & HEAD_ADDR;
-               ring->space = ring->head - (ring->tail + 8);
-               if (ring->space < 0)
-                       ring->space += ring->size;
-               if (ring->space >= n)
-                       return 0;
-       }
-
        trace_i915_ring_wait_begin (dev);
        end = jiffies + 3 * HZ;
        do {
-               ring->head = I915_READ_HEAD(ring) & HEAD_ADDR;
+               /* If the reported head position has wrapped or hasn't advanced,
+                * fallback to the slow and accurate path.
+                */
+               head = intel_read_status_page(ring, 4);
+               if (head < ring->actual_head)
+                       head = I915_READ_HEAD(ring);
+               ring->actual_head = head;
+               ring->head = head & HEAD_ADDR;
                ring->space = ring->head - (ring->tail + 8);
                if (ring->space < 0)
                        ring->space += ring->size;
index 3126c2681983e21ba729d9ca599d104f4f1323de..d2cd0f1efeedfdec3102f0436dc70d0d47a2d52d 100644 (file)
@@ -30,8 +30,9 @@ struct  intel_ring_buffer {
        struct          drm_device *dev;
        struct          drm_gem_object *gem_object;
 
-       unsigned int    head;
-       unsigned int    tail;
+       u32             actual_head;
+       u32             head;
+       u32             tail;
        int             space;
        struct intel_hw_status_page status_page;
 
index d97e6cb52d34a102705d9ab59f030cb551333e16..6bc42fa2a6ecc152d796611a3e66311b98b3de9f 100644 (file)
@@ -1908,9 +1908,12 @@ intel_sdvo_select_i2c_bus(struct drm_i915_private *dev_priv,
                speed = mapping->i2c_speed;
        }
 
-       sdvo->i2c = &dev_priv->gmbus[pin].adapter;
-       intel_gmbus_set_speed(sdvo->i2c, speed);
-       intel_gmbus_force_bit(sdvo->i2c, true);
+       if (pin < GMBUS_NUM_PORTS) {
+               sdvo->i2c = &dev_priv->gmbus[pin].adapter;
+               intel_gmbus_set_speed(sdvo->i2c, speed);
+               intel_gmbus_force_bit(sdvo->i2c, true);
+       } else
+               sdvo->i2c = &dev_priv->gmbus[GMBUS_PORT_DPB].adapter;
 }
 
 static bool
@@ -2037,13 +2040,14 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
                                           SDVO_COLORIMETRY_RGB256);
                connector->connector_type = DRM_MODE_CONNECTOR_HDMIA;
 
-               intel_sdvo_add_hdmi_properties(intel_sdvo_connector);
                intel_sdvo->is_hdmi = true;
        }
        intel_sdvo->base.clone_mask = ((1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
                                       (1 << INTEL_ANALOG_CLONE_BIT));
 
        intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo);
+       if (intel_sdvo->is_hdmi)
+               intel_sdvo_add_hdmi_properties(intel_sdvo_connector);
 
        return true;
 }
index df2b6f2b35f893d00b475147f8d995a7b3c15bc2..9fbabaa6ee448bb665766a34dc944c694e69eaa3 100644 (file)
@@ -253,7 +253,8 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
        case DRM_MODE_DPMS_SUSPEND:
        case DRM_MODE_DPMS_OFF:
                drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
-               atombios_blank_crtc(crtc, ATOM_ENABLE);
+               if (radeon_crtc->enabled)
+                       atombios_blank_crtc(crtc, ATOM_ENABLE);
                if (ASIC_IS_DCE3(rdev))
                        atombios_enable_crtc_memreq(crtc, ATOM_DISABLE);
                atombios_enable_crtc(crtc, ATOM_DISABLE);
@@ -530,7 +531,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
                                        dp_clock = dig_connector->dp_clock;
                                }
                        }
-
+#if 0 /* doesn't work properly on some laptops */
                        /* use recommended ref_div for ss */
                        if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
                                if (ss_enabled) {
@@ -540,7 +541,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
                                        }
                                }
                        }
-
+#endif
                        if (ASIC_IS_AVIVO(rdev)) {
                                /* DVO wants 2x pixel clock if the DVO chip is in 12 bit mode */
                                if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1)
index 4dc5b4714c5a6ae1919246ac55ed32d3bacc44ac..7b337c361a1240f12afc0105bc55d0867172b151 100644 (file)
@@ -748,6 +748,8 @@ void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev)
        unsigned i;
        u32 tmp;
 
+       WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
+
        WREG32(VM_CONTEXT0_REQUEST_RESPONSE, REQUEST_TYPE(1));
        for (i = 0; i < rdev->usec_timeout; i++) {
                /* read MC_STATUS */
@@ -1922,7 +1924,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev)
 static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
 {
        struct evergreen_mc_save save;
-       u32 srbm_reset = 0;
        u32 grbm_reset = 0;
 
        dev_info(rdev->dev, "GPU softreset \n");
@@ -1961,16 +1962,6 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
        udelay(50);
        WREG32(GRBM_SOFT_RESET, 0);
        (void)RREG32(GRBM_SOFT_RESET);
-
-       /* reset all the system blocks */
-       srbm_reset = SRBM_SOFT_RESET_ALL_MASK;
-
-       dev_info(rdev->dev, "  SRBM_SOFT_RESET=0x%08X\n", srbm_reset);
-       WREG32(SRBM_SOFT_RESET, srbm_reset);
-       (void)RREG32(SRBM_SOFT_RESET);
-       udelay(50);
-       WREG32(SRBM_SOFT_RESET, 0);
-       (void)RREG32(SRBM_SOFT_RESET);
        /* Wait a little for things to settle down */
        udelay(50);
        dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
@@ -1981,10 +1972,6 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
                RREG32(GRBM_STATUS_SE1));
        dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
                RREG32(SRBM_STATUS));
-       /* After reset we need to reinit the asic as GPU often endup in an
-        * incoherent state.
-        */
-       atom_asic_init(rdev->mode_info.atom_context);
        evergreen_mc_resume(rdev, &save);
        return 0;
 }
@@ -2596,6 +2583,11 @@ int evergreen_resume(struct radeon_device *rdev)
 {
        int r;
 
+       /* reset the asic, the gfx blocks are often in a bad state
+        * after the driver is unloaded or after a resume
+        */
+       if (radeon_asic_reset(rdev))
+               dev_warn(rdev->dev, "GPU reset failed !\n");
        /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
         * posting will perform necessary task to bring back GPU into good
         * shape.
@@ -2712,6 +2704,11 @@ int evergreen_init(struct radeon_device *rdev)
        r = radeon_atombios_init(rdev);
        if (r)
                return r;
+       /* reset the asic, the gfx blocks are often in a bad state
+        * after the driver is unloaded or after a resume
+        */
+       if (radeon_asic_reset(rdev))
+               dev_warn(rdev->dev, "GPU reset failed !\n");
        /* Post card if necessary */
        if (!evergreen_card_posted(rdev)) {
                if (!rdev->bios) {
index 113c70cc8b3930eba7170a94f2c75b9f62e07c1d..a73b53c44359c59a617c6c7aed5a8510a40c39d2 100644 (file)
 #define        HDP_NONSURFACE_BASE                             0x2C04
 #define        HDP_NONSURFACE_INFO                             0x2C08
 #define        HDP_NONSURFACE_SIZE                             0x2C0C
+#define HDP_MEM_COHERENCY_FLUSH_CNTL                   0x5480
 #define HDP_REG_COHERENCY_FLUSH_CNTL                   0x54A0
 #define        HDP_TILING_CONFIG                               0x2F3C
 
index 4d7a2e1bdb90e498138eea542e22884c9e36eb0d..9c92db7c896b6719edfaef92d1484ca300d2cb95 100644 (file)
@@ -1342,13 +1342,19 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev)
        u32 srbm_status;
        u32 grbm_status;
        u32 grbm_status2;
+       struct r100_gpu_lockup *lockup;
        int r;
 
+       if (rdev->family >= CHIP_RV770)
+               lockup = &rdev->config.rv770.lockup;
+       else
+               lockup = &rdev->config.r600.lockup;
+
        srbm_status = RREG32(R_000E50_SRBM_STATUS);
        grbm_status = RREG32(R_008010_GRBM_STATUS);
        grbm_status2 = RREG32(R_008014_GRBM_STATUS2);
        if (!G_008010_GUI_ACTIVE(grbm_status)) {
-               r100_gpu_lockup_update(&rdev->config.r300.lockup, &rdev->cp);
+               r100_gpu_lockup_update(lockup, &rdev->cp);
                return false;
        }
        /* force CP activities */
@@ -1360,7 +1366,7 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev)
                radeon_ring_unlock_commit(rdev);
        }
        rdev->cp.rptr = RREG32(R600_CP_RB_RPTR);
-       return r100_gpu_cp_is_lockup(rdev, &rdev->config.r300.lockup, &rdev->cp);
+       return r100_gpu_cp_is_lockup(rdev, lockup, &rdev->cp);
 }
 
 int r600_asic_reset(struct radeon_device *rdev)
index 0f90fc3482ce205df9991bcd1d7c1170abd912ca..7831e0890210c6b4b7a80f09da51d7b76cc80511 100644 (file)
@@ -315,11 +315,10 @@ static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
                if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
                        /* the initial DDX does bad things with the CB size occasionally */
                        /* it rounds up height too far for slice tile max but the BO is smaller */
-                       tmp = (height - 7) * 8 * bpe;
-                       if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
-                               dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]));
-                               return -EINVAL;
-                       }
+                       /* r600c,g also seem to flush at bad times in some apps resulting in
+                        * bogus values here. So for linear just allow anything to avoid breaking
+                        * broken userspace.
+                        */
                } else {
                        dev_warn(p->dev, "%s offset[%d] %d %d %lu too big\n", __func__, i, track->cb_color_bo_offset[i], tmp, radeon_bo_size(track->cb_color_bo[i]));
                        return -EINVAL;
index e12e79326cb115a490ed5d94e0eda2a84ee400db..501966a13f48094a65cfeb17460fa16aeb62a308 100644 (file)
@@ -910,11 +910,6 @@ int radeon_resume_kms(struct drm_device *dev)
        radeon_pm_resume(rdev);
        radeon_restore_bios_scratch_regs(rdev);
 
-       /* turn on display hw */
-       list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-       }
-
        radeon_fbdev_set_suspend(rdev, 0);
        release_console_sem();
 
@@ -922,6 +917,10 @@ int radeon_resume_kms(struct drm_device *dev)
        radeon_hpd_init(rdev);
        /* blat the mode back in */
        drm_helper_resume_force_mode(dev);
+       /* turn on display hw */
+       list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+               drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+       }
        return 0;
 }
 
index 88e4ea925900ef3aecf71aa17744d3f9d0d8c445..60e689f2d048b71aa85b2a95d759fab06352250c 100644 (file)
@@ -232,9 +232,28 @@ static struct drm_driver driver_old = {
 
 static struct drm_driver kms_driver;
 
+static void radeon_kick_out_firmware_fb(struct pci_dev *pdev)
+{
+       struct apertures_struct *ap;
+       bool primary = false;
+
+       ap = alloc_apertures(1);
+       ap->ranges[0].base = pci_resource_start(pdev, 0);
+       ap->ranges[0].size = pci_resource_len(pdev, 0);
+
+#ifdef CONFIG_X86
+       primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
+#endif
+       remove_conflicting_framebuffers(ap, "radeondrmfb", primary);
+       kfree(ap);
+}
+
 static int __devinit
 radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
+       /* Get rid of things like offb */
+       radeon_kick_out_firmware_fb(pdev);
+
        return drm_get_pci_dev(pdev, ent, &kms_driver);
 }
 
index efa211898fe60204a424b98cdb9295ea2ee99356..6abea32be5e83b80346f5c79b0a966c66c194a7c 100644 (file)
@@ -245,7 +245,7 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev,
                goto out_unref;
        }
        info->apertures->ranges[0].base = rdev->ddev->mode_config.fb_base;
-       info->apertures->ranges[0].size = rdev->mc.real_vram_size;
+       info->apertures->ranges[0].size = rdev->mc.aper_size;
 
        info->fix.mmio_start = 0;
        info->fix.mmio_len = 0;
index 05248f2d7581cd02dc191185b083bd30977ec5ba..92b42db43bcfd9e5706c41bfb9049a26fd967b93 100644 (file)
@@ -234,7 +234,6 @@ static int s3c_hwmon_create_attr(struct device *dev,
        attr->index = channel;
        attr->dev_attr.attr.name  = attrs->in_name;
        attr->dev_attr.attr.mode  = S_IRUGO;
-       attr->dev_attr.attr.owner = THIS_MODULE;
        attr->dev_attr.show = s3c_hwmon_ch_show;
 
        ret =  device_create_file(dev, &attr->dev_attr);
@@ -252,7 +251,6 @@ static int s3c_hwmon_create_attr(struct device *dev,
                attr->index = channel;
                attr->dev_attr.attr.name  = attrs->label_name;
                attr->dev_attr.attr.mode  = S_IRUGO;
-               attr->dev_attr.attr.owner = THIS_MODULE;
                attr->dev_attr.show = s3c_hwmon_label_show;
 
                ret = device_create_file(dev, &attr->dev_attr);
index c131d58bcb50d818e05ee0464581bceedfdbf20f..56ac09d6c9308157d6acda8c99bdbdeef91aa771 100644 (file)
@@ -220,9 +220,8 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
        kt_before = ktime_get_real();
 
        stop_critical_timings();
-#ifndef MODULE
        trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
-#endif
+       trace_cpu_idle((eax >> 4) + 1, cpu);
        if (!need_resched()) {
 
                __monitor((void *)&current_thread_info()->flags, 0, 0);
index bcc174e4f3b146db6772cea89c79ac9eb14ce80a..658e75f18d052b5a721ff2e6882109c945318d5b 100644 (file)
@@ -1900,6 +1900,7 @@ static void do_disconnect_req(struct gigaset_capi_ctr *iif,
                if (b3skb == NULL) {
                        dev_err(cs->dev, "%s: out of memory\n", __func__);
                        send_conf(iif, ap, skb, CAPI_MSGOSRESOURCEERR);
+                       kfree(b3cmsg);
                        return;
                }
                capi_cmsg2message(b3cmsg,
index 211e21f34bd57d5f6e2b079dcd22194713b3d5f5..d5a4ade88991eb690e053a6a4218de0512ad9387 100644 (file)
@@ -267,7 +267,7 @@ void led_blink_set(struct led_classdev *led_cdev,
                   unsigned long *delay_off)
 {
        if (led_cdev->blink_set &&
-           led_cdev->blink_set(led_cdev, delay_on, delay_off))
+           !led_cdev->blink_set(led_cdev, delay_on, delay_off))
                return;
 
        /* blink with 1 Hz as default if nothing specified */
index 1b7adabbcee99d4affc24f48e335f33cb3dd409e..6da955dfef48989a1773792f09bf1715326a86b5 100644 (file)
@@ -26,8 +26,8 @@ static struct ir_scancode rc6_mce[] = {
 
        { 0x800f040a, KEY_DELETE },
        { 0x800f040b, KEY_ENTER },
-       { 0x800f040c, KEY_POWER },
-       { 0x800f040d, KEY_PROG1 },              /* Windows MCE button */
+       { 0x800f040c, KEY_POWER },              /* PC Power */
+       { 0x800f040d, KEY_PROG1 },              /* Windows MCE button */
        { 0x800f040e, KEY_MUTE },
        { 0x800f040f, KEY_INFO },
 
@@ -56,31 +56,32 @@ static struct ir_scancode rc6_mce[] = {
        { 0x800f0422, KEY_OK },
        { 0x800f0423, KEY_EXIT },
        { 0x800f0424, KEY_DVD },
-       { 0x800f0425, KEY_TUNER },              /* LiveTV */
-       { 0x800f0426, KEY_EPG },                /* Guide */
-       { 0x800f0427, KEY_ZOOM },               /* Aspect */
+       { 0x800f0425, KEY_TUNER },              /* LiveTV */
+       { 0x800f0426, KEY_EPG },                /* Guide */
+       { 0x800f0427, KEY_ZOOM },               /* Aspect */
 
        { 0x800f043a, KEY_BRIGHTNESSUP },
 
        { 0x800f0446, KEY_TV },
-       { 0x800f0447, KEY_AUDIO },              /* My Music */
-       { 0x800f0448, KEY_PVR },                /* RecordedTV */
+       { 0x800f0447, KEY_AUDIO },              /* My Music */
+       { 0x800f0448, KEY_PVR },                /* RecordedTV */
        { 0x800f0449, KEY_CAMERA },
        { 0x800f044a, KEY_VIDEO },
        { 0x800f044c, KEY_LANGUAGE },
        { 0x800f044d, KEY_TITLE },
-       { 0x800f044e, KEY_PRINT },      /* Print - HP OEM version of remote */
+       { 0x800f044e, KEY_PRINT },      /* Print - HP OEM version of remote */
 
        { 0x800f0450, KEY_RADIO },
 
-       { 0x800f045a, KEY_SUBTITLE },           /* Caption/Teletext */
+       { 0x800f045a, KEY_SUBTITLE },           /* Caption/Teletext */
        { 0x800f045b, KEY_RED },
        { 0x800f045c, KEY_GREEN },
        { 0x800f045d, KEY_YELLOW },
        { 0x800f045e, KEY_BLUE },
 
+       { 0x800f0465, KEY_POWER2 },     /* TV Power */
        { 0x800f046e, KEY_PLAYPAUSE },
-       { 0x800f046f, KEY_MEDIA },      /* Start media application (NEW) */
+       { 0x800f046f, KEY_MEDIA },      /* Start media application (NEW) */
 
        { 0x800f0480, KEY_BRIGHTNESSDOWN },
        { 0x800f0481, KEY_PLAYPAUSE },
index 8418b14ee4d2244ee3b6528549d0d22f35c85159..756656e17bddb035dbff24529155a86e82d19b3c 100644 (file)
@@ -522,10 +522,8 @@ unsigned int lirc_dev_fop_poll(struct file *file, poll_table *wait)
 
        dev_dbg(ir->d.dev, LOGHEAD "poll called\n", ir->d.name, ir->d.minor);
 
-       if (!ir->attached) {
-               mutex_unlock(&ir->irctl_lock);
+       if (!ir->attached)
                return POLLERR;
-       }
 
        poll_wait(file, &ir->buf->wait_poll, wait);
 
@@ -649,18 +647,18 @@ ssize_t lirc_dev_fop_read(struct file *file,
        if (!buf)
                return -ENOMEM;
 
-       if (mutex_lock_interruptible(&ir->irctl_lock))
-               return -ERESTARTSYS;
+       if (mutex_lock_interruptible(&ir->irctl_lock)) {
+               ret = -ERESTARTSYS;
+               goto out_unlocked;
+       }
        if (!ir->attached) {
-               mutex_unlock(&ir->irctl_lock);
-               return -ENODEV;
+               ret = -ENODEV;
+               goto out_locked;
        }
 
        if (length % ir->chunk_size) {
-               dev_dbg(ir->d.dev, LOGHEAD "read result = -EINVAL\n",
-                       ir->d.name, ir->d.minor);
-               mutex_unlock(&ir->irctl_lock);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out_locked;
        }
 
        /*
@@ -711,18 +709,23 @@ ssize_t lirc_dev_fop_read(struct file *file,
                        lirc_buffer_read(ir->buf, buf);
                        ret = copy_to_user((void *)buffer+written, buf,
                                           ir->buf->chunk_size);
-                       written += ir->buf->chunk_size;
+                       if (!ret)
+                               written += ir->buf->chunk_size;
+                       else
+                               ret = -EFAULT;
                }
        }
 
        remove_wait_queue(&ir->buf->wait_poll, &wait);
        set_current_state(TASK_RUNNING);
+
+out_locked:
        mutex_unlock(&ir->irctl_lock);
 
 out_unlocked:
        kfree(buf);
        dev_dbg(ir->d.dev, LOGHEAD "read result = %s (%d)\n",
-               ir->d.name, ir->d.minor, ret ? "-EFAULT" : "OK", ret);
+               ir->d.name, ir->d.minor, ret ? "<fail>" : "<ok>", ret);
 
        return ret ? ret : written;
 }
index 9dce684fd23113d13a3d445995983089cff56c60..392ca24132dafd5456e905fa61c216d566703976 100644 (file)
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/usb.h>
 #include <linux/input.h>
+#include <linux/usb.h>
+#include <linux/usb/input.h>
 #include <media/ir-core.h>
-#include <media/ir-common.h>
 
 #define DRIVER_VERSION "1.91"
 #define DRIVER_AUTHOR  "Jarod Wilson <jarod@wilsonet.com>"
@@ -49,6 +49,7 @@
 #define USB_BUFLEN             32 /* USB reception buffer length */
 #define USB_CTRL_MSG_SZ                2  /* Size of usb ctrl msg on gen1 hw */
 #define MCE_G1_INIT_MSGS       40 /* Init messages on gen1 hw to throw out */
+#define MS_TO_NS(msec)         ((msec) * 1000)
 
 /* MCE constants */
 #define MCE_CMDBUF_SIZE                384  /* MCE Command buffer length */
@@ -74,6 +75,7 @@
 #define MCE_PACKET_LENGTH_MASK 0x1f /* Packet length mask */
 
 /* Sub-commands, which follow MCE_COMMAND_HEADER or MCE_HW_CMD_HEADER */
+#define MCE_CMD_SIG_END                0x01    /* End of signal */
 #define MCE_CMD_PING           0x03    /* Ping device */
 #define MCE_CMD_UNKNOWN                0x04    /* Unknown */
 #define MCE_CMD_UNKNOWN2       0x05    /* Unknown */
@@ -91,6 +93,7 @@
 #define MCE_CMD_G_TXMASK       0x13    /* Set TX port bitmask */
 #define MCE_CMD_S_RXSENSOR     0x14    /* Set RX sensor (std/learning) */
 #define MCE_CMD_G_RXSENSOR     0x15    /* Get RX sensor (std/learning) */
+#define MCE_RSP_PULSE_COUNT    0x15    /* RX pulse count (only if learning) */
 #define MCE_CMD_TX_PORTS       0x16    /* Get number of TX ports */
 #define MCE_CMD_G_WAKESRC      0x17    /* Get wake source */
 #define MCE_CMD_UNKNOWN7       0x18    /* Unknown */
@@ -146,14 +149,16 @@ enum mceusb_model_type {
        MCE_GEN3,
        MCE_GEN2_TX_INV,
        POLARIS_EVK,
+       CX_HYBRID_TV,
 };
 
 struct mceusb_model {
        u32 mce_gen1:1;
        u32 mce_gen2:1;
        u32 mce_gen3:1;
-       u32 tx_mask_inverted:1;
+       u32 tx_mask_normal:1;
        u32 is_polaris:1;
+       u32 no_tx:1;
 
        const char *rc_map;     /* Allow specify a per-board map */
        const char *name;       /* per-board name */
@@ -162,18 +167,18 @@ struct mceusb_model {
 static const struct mceusb_model mceusb_model[] = {
        [MCE_GEN1] = {
                .mce_gen1 = 1,
-               .tx_mask_inverted = 1,
+               .tx_mask_normal = 1,
        },
        [MCE_GEN2] = {
                .mce_gen2 = 1,
        },
        [MCE_GEN2_TX_INV] = {
                .mce_gen2 = 1,
-               .tx_mask_inverted = 1,
+               .tx_mask_normal = 1,
        },
        [MCE_GEN3] = {
                .mce_gen3 = 1,
-               .tx_mask_inverted = 1,
+               .tx_mask_normal = 1,
        },
        [POLARIS_EVK] = {
                .is_polaris = 1,
@@ -183,7 +188,12 @@ static const struct mceusb_model mceusb_model[] = {
                 * to allow testing it
                 */
                .rc_map = RC_MAP_RC5_HAUPPAUGE_NEW,
-               .name = "cx231xx MCE IR",
+               .name = "Conexant Hybrid TV (cx231xx) MCE IR",
+       },
+       [CX_HYBRID_TV] = {
+               .is_polaris = 1,
+               .no_tx = 1, /* tx isn't wired up at all */
+               .name = "Conexant Hybrid TV (cx231xx) MCE IR",
        },
 };
 
@@ -273,6 +283,8 @@ static struct usb_device_id mceusb_dev_table[] = {
        { USB_DEVICE(VENDOR_FORMOSA, 0xe03c) },
        /* Formosa Industrial Computing */
        { USB_DEVICE(VENDOR_FORMOSA, 0xe03e) },
+       /* Fintek eHome Infrared Transceiver (HP branded) */
+       { USB_DEVICE(VENDOR_FINTEK, 0x5168) },
        /* Fintek eHome Infrared Transceiver */
        { USB_DEVICE(VENDOR_FINTEK, 0x0602) },
        /* Fintek eHome Infrared Transceiver (in the AOpen MP45) */
@@ -292,9 +304,12 @@ static struct usb_device_id mceusb_dev_table[] = {
        { USB_DEVICE(VENDOR_NORTHSTAR, 0xe004) },
        /* TiVo PC IR Receiver */
        { USB_DEVICE(VENDOR_TIVO, 0x2000) },
-       /* Conexant SDK */
+       /* Conexant Hybrid TV "Shelby" Polaris SDK */
        { USB_DEVICE(VENDOR_CONEXANT, 0x58a1),
          .driver_info = POLARIS_EVK },
+       /* Conexant Hybrid TV RDU253S Polaris */
+       { USB_DEVICE(VENDOR_CONEXANT, 0x58a5),
+         .driver_info = CX_HYBRID_TV },
        /* Terminating entry */
        { }
 };
@@ -303,7 +318,10 @@ static struct usb_device_id mceusb_dev_table[] = {
 struct mceusb_dev {
        /* ir-core bits */
        struct ir_dev_props *props;
-       struct ir_raw_event rawir;
+
+       /* optional features we can enable */
+       bool carrier_report_enabled;
+       bool learning_enabled;
 
        /* core device bits */
        struct device *dev;
@@ -318,6 +336,8 @@ struct mceusb_dev {
        /* buffers and dma */
        unsigned char *buf_in;
        unsigned int len_in;
+       dma_addr_t dma_in;
+       dma_addr_t dma_out;
 
        enum {
                CMD_HEADER = 0,
@@ -325,15 +345,14 @@ struct mceusb_dev {
                CMD_DATA,
                PARSE_IRDATA,
        } parser_state;
-       u8 cmd, rem;            /* Remaining IR data bytes in packet */
 
-       dma_addr_t dma_in;
-       dma_addr_t dma_out;
+       u8 cmd, rem;            /* Remaining IR data bytes in packet */
 
        struct {
                u32 connected:1;
-               u32 tx_mask_inverted:1;
+               u32 tx_mask_normal:1;
                u32 microsoft_gen1:1;
+               u32 no_tx:1;
        } flags;
 
        /* transmit support */
@@ -408,9 +427,10 @@ static int mceusb_cmdsize(u8 cmd, u8 subcmd)
                case MCE_CMD_UNKNOWN:
                case MCE_CMD_S_CARRIER:
                case MCE_CMD_S_TIMEOUT:
-               case MCE_CMD_G_RXSENSOR:
+               case MCE_RSP_PULSE_COUNT:
                        datasize = 2;
                        break;
+               case MCE_CMD_SIG_END:
                case MCE_CMD_S_TXMASK:
                case MCE_CMD_S_RXSENSOR:
                        datasize = 1;
@@ -433,7 +453,7 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
                return;
 
        /* skip meaningless 0xb1 0x60 header bytes on orig receiver */
-       if (ir->flags.microsoft_gen1 && !out)
+       if (ir->flags.microsoft_gen1 && !out && !offset)
                skip = 2;
 
        if (len <= skip)
@@ -491,6 +511,9 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
                break;
        case MCE_COMMAND_HEADER:
                switch (subcmd) {
+               case MCE_CMD_SIG_END:
+                       dev_info(dev, "End of signal\n");
+                       break;
                case MCE_CMD_PING:
                        dev_info(dev, "Ping\n");
                        break;
@@ -525,10 +548,11 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, char *buf,
                                 inout, data1 == 0x02 ? "short" : "long");
                        break;
                case MCE_CMD_G_RXSENSOR:
-                       if (len == 2)
+               /* aka MCE_RSP_PULSE_COUNT */
+                       if (out)
                                dev_info(dev, "Get receive sensor\n");
-                       else
-                               dev_info(dev, "Received pulse count is %d\n",
+                       else if (ir->learning_enabled)
+                               dev_info(dev, "RX pulse count: %d\n",
                                         ((data1 << 8) | data2));
                        break;
                case MCE_RSP_CMD_INVALID:
@@ -724,16 +748,16 @@ out:
        return ret ? ret : n;
 }
 
-/* Sets active IR outputs -- mce devices typically (all?) have two */
+/* Sets active IR outputs -- mce devices typically have two */
 static int mceusb_set_tx_mask(void *priv, u32 mask)
 {
        struct mceusb_dev *ir = priv;
 
-       if (ir->flags.tx_mask_inverted)
+       if (ir->flags.tx_mask_normal)
+               ir->tx_mask = mask;
+       else
                ir->tx_mask = (mask != MCE_DEFAULT_TX_MASK ?
                                mask ^ MCE_DEFAULT_TX_MASK : mask) << 1;
-       else
-               ir->tx_mask = mask;
 
        return 0;
 }
@@ -752,7 +776,7 @@ static int mceusb_set_tx_carrier(void *priv, u32 carrier)
 
                if (carrier == 0) {
                        ir->carrier = carrier;
-                       cmdbuf[2] = 0x01;
+                       cmdbuf[2] = MCE_CMD_SIG_END;
                        cmdbuf[3] = MCE_IRDATA_TRAILER;
                        dev_dbg(ir->dev, "%s: disabling carrier "
                                "modulation\n", __func__);
@@ -782,6 +806,34 @@ static int mceusb_set_tx_carrier(void *priv, u32 carrier)
        return carrier;
 }
 
+/*
+ * We don't do anything but print debug spew for many of the command bits
+ * we receive from the hardware, but some of them are useful information
+ * we want to store so that we can use them.
+ */
+static void mceusb_handle_command(struct mceusb_dev *ir, int index)
+{
+       u8 hi = ir->buf_in[index + 1] & 0xff;
+       u8 lo = ir->buf_in[index + 2] & 0xff;
+
+       switch (ir->buf_in[index]) {
+       /* 2-byte return value commands */
+       case MCE_CMD_S_TIMEOUT:
+               ir->props->timeout = MS_TO_NS((hi << 8 | lo) / 2);
+               break;
+
+       /* 1-byte return value commands */
+       case MCE_CMD_S_TXMASK:
+               ir->tx_mask = hi;
+               break;
+       case MCE_CMD_S_RXSENSOR:
+               ir->learning_enabled = (hi == 0x02);
+               break;
+       default:
+               break;
+       }
+}
+
 static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
 {
        DEFINE_IR_RAW_EVENT(rawir);
@@ -791,39 +843,30 @@ static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
        if (ir->flags.microsoft_gen1)
                i = 2;
 
+       /* if there's no data, just return now */
+       if (buf_len <= i)
+               return;
+
        for (; i < buf_len; i++) {
                switch (ir->parser_state) {
                case SUBCMD:
                        ir->rem = mceusb_cmdsize(ir->cmd, ir->buf_in[i]);
                        mceusb_dev_printdata(ir, ir->buf_in, i - 1,
                                             ir->rem + 2, false);
+                       mceusb_handle_command(ir, i);
                        ir->parser_state = CMD_DATA;
                        break;
                case PARSE_IRDATA:
                        ir->rem--;
                        rawir.pulse = ((ir->buf_in[i] & MCE_PULSE_BIT) != 0);
                        rawir.duration = (ir->buf_in[i] & MCE_PULSE_MASK)
-                                        * MCE_TIME_UNIT * 1000;
-
-                       if ((ir->buf_in[i] & MCE_PULSE_MASK) == 0x7f) {
-                               if (ir->rawir.pulse == rawir.pulse) {
-                                       ir->rawir.duration += rawir.duration;
-                               } else {
-                                       ir->rawir.duration = rawir.duration;
-                                       ir->rawir.pulse = rawir.pulse;
-                               }
-                               if (ir->rem)
-                                       break;
-                       }
-                       rawir.duration += ir->rawir.duration;
-                       ir->rawir.duration = 0;
-                       ir->rawir.pulse = rawir.pulse;
+                                        * MS_TO_NS(MCE_TIME_UNIT);
 
                        dev_dbg(ir->dev, "Storing %s with duration %d\n",
                                rawir.pulse ? "pulse" : "space",
                                rawir.duration);
 
-                       ir_raw_event_store(ir->idev, &rawir);
+                       ir_raw_event_store_with_filter(ir->idev, &rawir);
                        break;
                case CMD_DATA:
                        ir->rem--;
@@ -839,17 +882,10 @@ static void mceusb_process_ir_data(struct mceusb_dev *ir, int buf_len)
                                continue;
                        }
                        ir->rem = (ir->cmd & MCE_PACKET_LENGTH_MASK);
-                       mceusb_dev_printdata(ir, ir->buf_in, i, ir->rem + 1, false);
-                       if (ir->rem) {
+                       mceusb_dev_printdata(ir, ir->buf_in,
+                                            i, ir->rem + 1, false);
+                       if (ir->rem)
                                ir->parser_state = PARSE_IRDATA;
-                               break;
-                       }
-                       /*
-                        * a package with len=0 (e. g. 0x80) means end of
-                        * data. We could use it to do the call to
-                        * ir_raw_event_handle(). For now, we don't need to
-                        * use it.
-                        */
                        break;
                }
 
@@ -984,9 +1020,11 @@ static void mceusb_get_parameters(struct mceusb_dev *ir)
        mce_async_out(ir, GET_CARRIER_FREQ, sizeof(GET_CARRIER_FREQ));
        mce_sync_in(ir, NULL, maxp);
 
-       /* get the transmitter bitmask */
-       mce_async_out(ir, GET_TX_BITMASK, sizeof(GET_TX_BITMASK));
-       mce_sync_in(ir, NULL, maxp);
+       if (!ir->flags.no_tx) {
+               /* get the transmitter bitmask */
+               mce_async_out(ir, GET_TX_BITMASK, sizeof(GET_TX_BITMASK));
+               mce_sync_in(ir, NULL, maxp);
+       }
 
        /* get receiver timeout value */
        mce_async_out(ir, GET_RX_TIMEOUT, sizeof(GET_RX_TIMEOUT));
@@ -1035,12 +1073,18 @@ static struct input_dev *mceusb_init_input_dev(struct mceusb_dev *ir)
        props->priv = ir;
        props->driver_type = RC_DRIVER_IR_RAW;
        props->allowed_protos = IR_TYPE_ALL;
-       props->s_tx_mask = mceusb_set_tx_mask;
-       props->s_tx_carrier = mceusb_set_tx_carrier;
-       props->tx_ir = mceusb_tx_ir;
+       props->timeout = MS_TO_NS(1000);
+       if (!ir->flags.no_tx) {
+               props->s_tx_mask = mceusb_set_tx_mask;
+               props->s_tx_carrier = mceusb_set_tx_carrier;
+               props->tx_ir = mceusb_tx_ir;
+       }
 
        ir->props = props;
 
+       usb_to_input_id(ir->usbdev, &idev->id);
+       idev->dev.parent = ir->dev;
+
        if (mceusb_model[ir->model].rc_map)
                rc_map = mceusb_model[ir->model].rc_map;
 
@@ -1074,16 +1118,16 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
        enum mceusb_model_type model = id->driver_info;
        bool is_gen3;
        bool is_microsoft_gen1;
-       bool tx_mask_inverted;
+       bool tx_mask_normal;
        bool is_polaris;
 
-       dev_dbg(&intf->dev, "%s called\n", __func__);
+       dev_dbg(&intf->dev, "%s called\n", __func__);
 
        idesc  = intf->cur_altsetting;
 
        is_gen3 = mceusb_model[model].mce_gen3;
        is_microsoft_gen1 = mceusb_model[model].mce_gen1;
-       tx_mask_inverted = mceusb_model[model].tx_mask_inverted;
+       tx_mask_normal = mceusb_model[model].tx_mask_normal;
        is_polaris = mceusb_model[model].is_polaris;
 
        if (is_polaris) {
@@ -1107,7 +1151,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
                        ep_in = ep;
                        ep_in->bmAttributes = USB_ENDPOINT_XFER_INT;
                        ep_in->bInterval = 1;
-                       dev_dbg(&intf->dev, "acceptable inbound endpoint "
+                       dev_dbg(&intf->dev, "acceptable inbound endpoint "
                                "found\n");
                }
 
@@ -1122,12 +1166,12 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
                        ep_out = ep;
                        ep_out->bmAttributes = USB_ENDPOINT_XFER_INT;
                        ep_out->bInterval = 1;
-                       dev_dbg(&intf->dev, "acceptable outbound endpoint "
+                       dev_dbg(&intf->dev, "acceptable outbound endpoint "
                                "found\n");
                }
        }
        if (ep_in == NULL) {
-               dev_dbg(&intf->dev, "inbound and/or endpoint not found\n");
+               dev_dbg(&intf->dev, "inbound and/or endpoint not found\n");
                return -ENODEV;
        }
 
@@ -1150,11 +1194,10 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
        ir->dev = &intf->dev;
        ir->len_in = maxp;
        ir->flags.microsoft_gen1 = is_microsoft_gen1;
-       ir->flags.tx_mask_inverted = tx_mask_inverted;
+       ir->flags.tx_mask_normal = tx_mask_normal;
+       ir->flags.no_tx = mceusb_model[model].no_tx;
        ir->model = model;
 
-       init_ir_raw_event(&ir->rawir);
-
        /* Saving usb interface data for use by the transmitter routine */
        ir->usb_ep_in = ep_in;
        ir->usb_ep_out = ep_out;
@@ -1191,7 +1234,8 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
 
        mceusb_get_parameters(ir);
 
-       mceusb_set_tx_mask(ir, MCE_DEFAULT_TX_MASK);
+       if (!ir->flags.no_tx)
+               mceusb_set_tx_mask(ir, MCE_DEFAULT_TX_MASK);
 
        usb_set_intfdata(intf, ir);
 
index 301be53aee857298c43855df887a90ece26a2d8c..acc729c79ceca10afb96b08b101095c1a3e1dc03 100644 (file)
@@ -603,6 +603,8 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
        count = nvt->pkts;
        nvt_dbg_verbose("Processing buffer of len %d", count);
 
+       init_ir_raw_event(&rawir);
+
        for (i = 0; i < count; i++) {
                nvt->pkts--;
                sample = nvt->buf[i];
@@ -643,11 +645,15 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
                 * indicates end of IR signal, but new data incoming. In both
                 * cases, it means we're ready to call ir_raw_event_handle
                 */
-               if (sample == BUF_PULSE_BIT || ((sample != BUF_LEN_MASK) &&
-                   (sample & BUF_REPEAT_MASK) == BUF_REPEAT_BYTE))
+               if ((sample == BUF_PULSE_BIT) && nvt->pkts) {
+                       nvt_dbg("Calling ir_raw_event_handle (signal end)\n");
                        ir_raw_event_handle(nvt->rdev);
+               }
        }
 
+       nvt_dbg("Calling ir_raw_event_handle (buffer empty)\n");
+       ir_raw_event_handle(nvt->rdev);
+
        if (nvt->pkts) {
                nvt_dbg("Odd, pkts should be 0 now... (its %u)", nvt->pkts);
                nvt->pkts = 0;
index 548381c35bfd1573bf5886b648100e0d06a63f11..3a20aef67d08f97f7f0b92b143a85a67bbe24304 100644 (file)
@@ -34,8 +34,9 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/usb.h>
 #include <linux/input.h>
+#include <linux/usb.h>
+#include <linux/usb/input.h>
 #include <media/ir-core.h>
 
 #define DRIVER_VERSION "1.61"
@@ -140,7 +141,9 @@ static struct usb_driver streamzap_driver = {
 
 static void sz_push(struct streamzap_ir *sz, struct ir_raw_event rawir)
 {
-       ir_raw_event_store(sz->idev, &rawir);
+       dev_dbg(sz->dev, "Storing %s with duration %u us\n",
+               (rawir.pulse ? "pulse" : "space"), rawir.duration);
+       ir_raw_event_store_with_filter(sz->idev, &rawir);
 }
 
 static void sz_push_full_pulse(struct streamzap_ir *sz,
@@ -167,7 +170,6 @@ static void sz_push_full_pulse(struct streamzap_ir *sz,
                        rawir.duration *= 1000;
                        rawir.duration &= IR_MAX_DURATION;
                }
-               dev_dbg(sz->dev, "ls %u\n", rawir.duration);
                sz_push(sz, rawir);
 
                sz->idle = false;
@@ -180,7 +182,6 @@ static void sz_push_full_pulse(struct streamzap_ir *sz,
        sz->sum += rawir.duration;
        rawir.duration *= 1000;
        rawir.duration &= IR_MAX_DURATION;
-       dev_dbg(sz->dev, "p %u\n", rawir.duration);
        sz_push(sz, rawir);
 }
 
@@ -200,7 +201,6 @@ static void sz_push_full_space(struct streamzap_ir *sz,
        rawir.duration += SZ_RESOLUTION / 2;
        sz->sum += rawir.duration;
        rawir.duration *= 1000;
-       dev_dbg(sz->dev, "s %u\n", rawir.duration);
        sz_push(sz, rawir);
 }
 
@@ -221,8 +221,6 @@ static void streamzap_callback(struct urb *urb)
        struct streamzap_ir *sz;
        unsigned int i;
        int len;
-       static int timeout = (((SZ_TIMEOUT * SZ_RESOLUTION * 1000) &
-                               IR_MAX_DURATION) | 0x03000000);
 
        if (!urb)
                return;
@@ -246,7 +244,7 @@ static void streamzap_callback(struct urb *urb)
 
        dev_dbg(sz->dev, "%s: received urb, len %d\n", __func__, len);
        for (i = 0; i < len; i++) {
-               dev_dbg(sz->dev, "sz idx %d: %x\n",
+               dev_dbg(sz->dev, "sz->buf_in[%d]: %x\n",
                        i, (unsigned char)sz->buf_in[i]);
                switch (sz->decoder_state) {
                case PulseSpace:
@@ -273,7 +271,7 @@ static void streamzap_callback(struct urb *urb)
                                DEFINE_IR_RAW_EVENT(rawir);
 
                                rawir.pulse = false;
-                               rawir.duration = timeout;
+                               rawir.duration = sz->props->timeout;
                                sz->idle = true;
                                if (sz->timeout_enabled)
                                        sz_push(sz, rawir);
@@ -335,6 +333,9 @@ static struct input_dev *streamzap_init_input_dev(struct streamzap_ir *sz)
 
        sz->props = props;
 
+       usb_to_input_id(sz->usbdev, &idev->id);
+       idev->dev.parent = sz->dev;
+
        ret = ir_input_register(idev, RC_MAP_STREAMZAP, props, DRIVER_NAME);
        if (ret < 0) {
                dev_err(dev, "remote input device register failed\n");
@@ -444,6 +445,8 @@ static int __devinit streamzap_probe(struct usb_interface *intf,
        sz->decoder_state = PulseSpace;
        /* FIXME: don't yet have a way to set this */
        sz->timeout_enabled = true;
+       sz->props->timeout = (((SZ_TIMEOUT * SZ_RESOLUTION * 1000) &
+                               IR_MAX_DURATION) | 0x03000000);
        #if 0
        /* not yet supported, depends on patches from maxim */
        /* see also: LIRC_GET_REC_RESOLUTION and LIRC_SET_REC_TIMEOUT */
index dfb198d0415bc200b0152c1be105d722261653c5..f16461844c5c0063e281da22b8cd3e27fe6d28f4 100644 (file)
@@ -1989,8 +1989,23 @@ static int cx25840_probe(struct i2c_client *client,
        v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops,
                        V4L2_CID_HUE, -128, 127, 1, 0);
        if (!is_cx2583x(state)) {
-               default_volume = 228 - cx25840_read(client, 0x8d4);
-               default_volume = ((default_volume / 2) + 23) << 9;
+               default_volume = cx25840_read(client, 0x8d4);
+               /*
+                * Enforce the legacy PVR-350/MSP3400 to PVR-150/CX25843 volume
+                * scale mapping limits to avoid -ERANGE errors when
+                * initializing the volume control
+                */
+               if (default_volume > 228) {
+                       /* Bottom out at -96 dB, v4l2 vol range 0x2e00-0x2fff */
+                       default_volume = 228;
+                       cx25840_write(client, 0x8d4, 228);
+               }
+               else if (default_volume < 20) {
+                       /* Top out at + 8 dB, v4l2 vol range 0xfe00-0xffff */
+                       default_volume = 20;
+                       cx25840_write(client, 0x8d4, 20);
+               }
+               default_volume = (((228 - default_volume) >> 1) + 23) << 9;
 
                state->volume = v4l2_ctrl_new_std(&state->hdl,
                        &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_VOLUME,
index 4aaa47c0eabf54ea8e008a616b0dac531b423ce7..54b7fcd469a8a9b2db267e81ac163d0e579ce31b 100644 (file)
@@ -40,7 +40,6 @@
 #include <sound/control.h>
 #include <sound/initval.h>
 #include <sound/tlv.h>
-#include <media/wm8775.h>
 
 #include "cx88.h"
 #include "cx88-reg.h"
@@ -587,47 +586,26 @@ static int snd_cx88_volume_put(struct snd_kcontrol *kcontrol,
        int left, right, v, b;
        int changed = 0;
        u32 old;
-       struct v4l2_control client_ctl;
-
-       /* Pass volume & balance onto any WM8775 */
-       if (value->value.integer.value[0] >= value->value.integer.value[1]) {
-               v = value->value.integer.value[0] << 10;
-               b = value->value.integer.value[0] ?
-                       (0x8000 * value->value.integer.value[1]) / value->value.integer.value[0] :
-                       0x8000;
-       } else {
-               v = value->value.integer.value[1] << 10;
-               b = value->value.integer.value[1] ?
-               0xffff - (0x8000 * value->value.integer.value[0]) / value->value.integer.value[1] :
-               0x8000;
-       }
-       client_ctl.value = v;
-       client_ctl.id = V4L2_CID_AUDIO_VOLUME;
-       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
-       client_ctl.value = b;
-       client_ctl.id = V4L2_CID_AUDIO_BALANCE;
-       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
 
        left = value->value.integer.value[0] & 0x3f;
        right = value->value.integer.value[1] & 0x3f;
        b = right - left;
        if (b < 0) {
-               v = 0x3f - left;
-               b = (-b) | 0x40;
+           v = 0x3f - left;
+           b = (-b) | 0x40;
        } else {
-               v = 0x3f - right;
+           v = 0x3f - right;
        }
        /* Do we really know this will always be called with IRQs on? */
        spin_lock_irq(&chip->reg_lock);
        old = cx_read(AUD_VOL_CTL);
        if (v != (old & 0x3f)) {
-               cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, (old & ~0x3f) | v);
-               changed = 1;
+           cx_write(AUD_VOL_CTL, (old & ~0x3f) | v);
+           changed = 1;
        }
-       if ((cx_read(AUD_BAL_CTL) & 0x7f) != b) {
-               cx_write(AUD_BAL_CTL, b);
-               changed = 1;
+       if (cx_read(AUD_BAL_CTL) != b) {
+           cx_write(AUD_BAL_CTL, b);
+           changed = 1;
        }
        spin_unlock_irq(&chip->reg_lock);
 
@@ -640,7 +618,7 @@ static const struct snd_kcontrol_new snd_cx88_volume = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
        .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
                  SNDRV_CTL_ELEM_ACCESS_TLV_READ,
-       .name = "Analog-TV Volume",
+       .name = "Playback Volume",
        .info = snd_cx88_volume_info,
        .get = snd_cx88_volume_get,
        .put = snd_cx88_volume_put,
@@ -671,14 +649,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
        vol = cx_read(AUD_VOL_CTL);
        if (value->value.integer.value[0] != !(vol & bit)) {
                vol ^= bit;
-               cx_swrite(SHADOW_AUD_VOL_CTL, AUD_VOL_CTL, vol);
-               /* Pass mute onto any WM8775 */
-               if ((1<<6) == bit) {
-                       struct v4l2_control client_ctl;
-                       client_ctl.value = 0 != (vol & bit);
-                       client_ctl.id = V4L2_CID_AUDIO_MUTE;
-                       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-               }
+               cx_write(AUD_VOL_CTL, vol);
                ret = 1;
        }
        spin_unlock_irq(&chip->reg_lock);
@@ -687,7 +658,7 @@ static int snd_cx88_switch_put(struct snd_kcontrol *kcontrol,
 
 static const struct snd_kcontrol_new snd_cx88_dac_switch = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-       .name = "Audio-Out Switch",
+       .name = "Playback Switch",
        .info = snd_ctl_boolean_mono_info,
        .get = snd_cx88_switch_get,
        .put = snd_cx88_switch_put,
@@ -696,49 +667,13 @@ static const struct snd_kcontrol_new snd_cx88_dac_switch = {
 
 static const struct snd_kcontrol_new snd_cx88_source_switch = {
        .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-       .name = "Analog-TV Switch",
+       .name = "Capture Switch",
        .info = snd_ctl_boolean_mono_info,
        .get = snd_cx88_switch_get,
        .put = snd_cx88_switch_put,
        .private_value = (1<<6),
 };
 
-static int snd_cx88_alc_get(struct snd_kcontrol *kcontrol,
-                              struct snd_ctl_elem_value *value)
-{
-       snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
-       struct cx88_core *core = chip->core;
-       struct v4l2_control client_ctl;
-
-       client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
-       call_hw(core, WM8775_GID, core, g_ctrl, &client_ctl);
-       value->value.integer.value[0] = client_ctl.value ? 1 : 0;
-
-       return 0;
-}
-
-static int snd_cx88_alc_put(struct snd_kcontrol *kcontrol,
-                                      struct snd_ctl_elem_value *value)
-{
-       snd_cx88_card_t *chip = snd_kcontrol_chip(kcontrol);
-       struct cx88_core *core = chip->core;
-       struct v4l2_control client_ctl;
-
-       client_ctl.value = 0 != value->value.integer.value[0];
-       client_ctl.id = V4L2_CID_AUDIO_LOUDNESS;
-       call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
-       return 0;
-}
-
-static struct snd_kcontrol_new snd_cx88_alc_switch = {
-       .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-       .name = "Line-In ALC Switch",
-       .info = snd_ctl_boolean_mono_info,
-       .get = snd_cx88_alc_get,
-       .put = snd_cx88_alc_put,
-};
-
 /****************************************************************************
                        Basic Flow for Sound Devices
  ****************************************************************************/
@@ -860,7 +795,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
 {
        struct snd_card  *card;
        snd_cx88_card_t  *chip;
-       struct v4l2_subdev *sd;
        int              err;
 
        if (devno >= SNDRV_CARDS)
@@ -896,15 +830,6 @@ static int __devinit cx88_audio_initdev(struct pci_dev *pci,
        if (err < 0)
                goto error;
 
-       /* If there's a wm8775 then add a Line-In ALC switch */
-       list_for_each_entry(sd, &chip->core->v4l2_dev.subdevs, list) {
-               if (WM8775_GID == sd->grp_id) {
-                       snd_ctl_add(card, snd_ctl_new1(&snd_cx88_alc_switch,
-                                                      chip));
-                       break;
-               }
-       }
-
        strcpy (card->driver, "CX88x");
        sprintf(card->shortname, "Conexant CX%x", pci->device);
        sprintf(card->longname, "%s at %#llx",
index 9b9e169cce90862ee92f7d80ab7851cdb3cf81cb..0ccc2afd72668e7d2b8384312b01cde4ebb0e86b 100644 (file)
@@ -1007,15 +1007,22 @@ static const struct cx88_board cx88_boards[] = {
                .radio_type     = UNSET,
                .tuner_addr     = ADDR_UNSET,
                .radio_addr     = ADDR_UNSET,
+               .audio_chip = V4L2_IDENT_WM8775,
                .input          = {{
                        .type   = CX88_VMUX_DVB,
                        .vmux   = 0,
+                       /* 2: Line-In */
+                       .audioroute = 2,
                },{
                        .type   = CX88_VMUX_COMPOSITE1,
                        .vmux   = 1,
+                       /* 2: Line-In */
+                       .audioroute = 2,
                },{
                        .type   = CX88_VMUX_SVIDEO,
                        .vmux   = 2,
+                       /* 2: Line-In */
+                       .audioroute = 2,
                }},
                .mpeg           = CX88_MPEG_DVB,
        },
index 62cea9549404bf29d3d58e004af015dcb132f779..d9249e5a04c9088041a67d9a1b997e26dae64761 100644 (file)
@@ -40,7 +40,6 @@
 #include "cx88.h"
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
-#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("v4l2 driver module for cx2388x based TV cards");
 MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
@@ -977,7 +976,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
        const struct cx88_ctrl *c = NULL;
        u32 value,mask;
        int i;
-       struct v4l2_control client_ctl;
 
        for (i = 0; i < CX8800_CTLS; i++) {
                if (cx8800_ctls[i].v.id == ctl->id) {
@@ -991,27 +989,6 @@ int cx88_set_control(struct cx88_core *core, struct v4l2_control *ctl)
                ctl->value = c->v.minimum;
        if (ctl->value > c->v.maximum)
                ctl->value = c->v.maximum;
-
-       /* Pass changes onto any WM8775 */
-       client_ctl.id = ctl->id;
-       switch (ctl->id) {
-       case V4L2_CID_AUDIO_MUTE:
-               client_ctl.value = ctl->value;
-               break;
-       case V4L2_CID_AUDIO_VOLUME:
-               client_ctl.value = (ctl->value) ?
-                       (0x90 + ctl->value) << 8 : 0;
-               break;
-       case V4L2_CID_AUDIO_BALANCE:
-               client_ctl.value = ctl->value << 9;
-               break;
-       default:
-               client_ctl.id = 0;
-               break;
-       }
-       if (client_ctl.id)
-               call_hw(core, WM8775_GID, core, s_ctrl, &client_ctl);
-
        mask=c->mask;
        switch (ctl->id) {
        case V4L2_CID_AUDIO_BALANCE:
@@ -1558,9 +1535,7 @@ static int radio_queryctrl (struct file *file, void *priv,
        if (c->id <  V4L2_CID_BASE ||
                c->id >= V4L2_CID_LASTP1)
                return -EINVAL;
-       if (c->id == V4L2_CID_AUDIO_MUTE ||
-               c->id == V4L2_CID_AUDIO_VOLUME ||
-               c->id == V4L2_CID_AUDIO_BALANCE) {
+       if (c->id == V4L2_CID_AUDIO_MUTE) {
                for (i = 0; i < CX8800_CTLS; i++) {
                        if (cx8800_ctls[i].v.id == c->id)
                                break;
index e8c732e7ae4f612dfc5004d7f56ba5bd9b9a8d53..c9981e77416a6f8292721b53333bd9cbba1da35c 100644 (file)
@@ -398,19 +398,17 @@ static inline struct cx88_core *to_core(struct v4l2_device *v4l2_dev)
        return container_of(v4l2_dev, struct cx88_core, v4l2_dev);
 }
 
-#define call_hw(core, grpid, o, f, args...) \
+#define call_all(core, o, f, args...)                          \
        do {                                                    \
                if (!core->i2c_rc) {                            \
                        if (core->gate_ctrl)                    \
                                core->gate_ctrl(core, 1);       \
-                       v4l2_device_call_all(&core->v4l2_dev, grpid, o, f, ##args); \
+                       v4l2_device_call_all(&core->v4l2_dev, 0, o, f, ##args); \
                        if (core->gate_ctrl)                    \
                                core->gate_ctrl(core, 0);       \
                }                                               \
        } while (0)
 
-#define call_all(core, o, f, args...) call_hw(core, 0, o, f, ##args)
-
 struct cx8800_dev;
 struct cx8802_dev;
 
index 908e3bc88303989c10f6ae8950286a820d2132e9..2c3007280032ea5472e45e9b1c75ee38aaa36b4c 100644 (file)
@@ -2377,7 +2377,7 @@ static const struct v4l2_file_operations radio_fops = {
        .owner         = THIS_MODULE,
        .open          = em28xx_v4l2_open,
        .release       = em28xx_v4l2_close,
-       .ioctl         = video_ioctl2,
+       .unlocked_ioctl = video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops radio_ioctl_ops = {
index 072bd2d1cfad46f4c5a25ea33f789409c2848a5c..13565cba237d13e0a99922067ceb9edb9edd1e83 100644 (file)
@@ -807,8 +807,6 @@ static int mx2_camera_set_bus_param(struct soc_camera_device *icd,
 
        if (common_flags & SOCAM_PCLK_SAMPLE_RISING)
                csicr1 |= CSICR1_REDGE;
-       if (common_flags & SOCAM_PCLK_SAMPLE_FALLING)
-               csicr1 |= CSICR1_INV_PCLK;
        if (common_flags & SOCAM_VSYNC_ACTIVE_HIGH)
                csicr1 |= CSICR1_SOF_POL;
        if (common_flags & SOCAM_HSYNC_ACTIVE_HIGH)
index 1b93207c89e84efbbf6a4e3e8357a7af7f32f90d..2f500809f53d9b418dd2d3117ce44ef7706d0287 100644 (file)
@@ -522,6 +522,7 @@ static int fimc_cap_streamon(struct file *file, void *priv,
        INIT_LIST_HEAD(&fimc->vid_cap.active_buf_q);
        fimc->vid_cap.active_buf_cnt = 0;
        fimc->vid_cap.frame_count = 0;
+       fimc->vid_cap.buf_index = fimc_hw_get_frame_index(fimc);
 
        set_bit(ST_CAPT_PEND, &fimc->state);
        ret = videobuf_streamon(&fimc->vid_cap.vbq);
@@ -652,6 +653,50 @@ static int fimc_cap_s_ctrl(struct file *file, void *priv,
        return ret;
 }
 
+static int fimc_cap_cropcap(struct file *file, void *fh,
+                           struct v4l2_cropcap *cr)
+{
+       struct fimc_frame *f;
+       struct fimc_ctx *ctx = fh;
+       struct fimc_dev *fimc = ctx->fimc_dev;
+
+       if (cr->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+               return -EINVAL;
+
+       if (mutex_lock_interruptible(&fimc->lock))
+               return -ERESTARTSYS;
+
+       f = &ctx->s_frame;
+       cr->bounds.left         = 0;
+       cr->bounds.top          = 0;
+       cr->bounds.width        = f->o_width;
+       cr->bounds.height       = f->o_height;
+       cr->defrect             = cr->bounds;
+
+       mutex_unlock(&fimc->lock);
+       return 0;
+}
+
+static int fimc_cap_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
+{
+       struct fimc_frame *f;
+       struct fimc_ctx *ctx = file->private_data;
+       struct fimc_dev *fimc = ctx->fimc_dev;
+
+
+       if (mutex_lock_interruptible(&fimc->lock))
+               return -ERESTARTSYS;
+
+       f = &ctx->s_frame;
+       cr->c.left      = f->offs_h;
+       cr->c.top       = f->offs_v;
+       cr->c.width     = f->width;
+       cr->c.height    = f->height;
+
+       mutex_unlock(&fimc->lock);
+       return 0;
+}
+
 static int fimc_cap_s_crop(struct file *file, void *fh,
                               struct v4l2_crop *cr)
 {
@@ -716,9 +761,9 @@ static const struct v4l2_ioctl_ops fimc_capture_ioctl_ops = {
        .vidioc_g_ctrl                  = fimc_vidioc_g_ctrl,
        .vidioc_s_ctrl                  = fimc_cap_s_ctrl,
 
-       .vidioc_g_crop                  = fimc_vidioc_g_crop,
+       .vidioc_g_crop                  = fimc_cap_g_crop,
        .vidioc_s_crop                  = fimc_cap_s_crop,
-       .vidioc_cropcap                 = fimc_vidioc_cropcap,
+       .vidioc_cropcap                 = fimc_cap_cropcap,
 
        .vidioc_enum_input              = fimc_cap_enum_input,
        .vidioc_s_input                 = fimc_cap_s_input,
@@ -785,7 +830,7 @@ int fimc_register_capture_device(struct fimc_dev *fimc)
        videobuf_queue_dma_contig_init(&vid_cap->vbq, &fimc_qops,
                vid_cap->v4l2_dev.dev, &fimc->irqlock,
                V4L2_BUF_TYPE_VIDEO_CAPTURE, V4L2_FIELD_NONE,
-               sizeof(struct fimc_vid_buffer), (void *)ctx);
+               sizeof(struct fimc_vid_buffer), (void *)ctx, NULL);
 
        ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
        if (ret) {
index 2e7c547894b687d9fd37288e365ceae704dbcbfc..bb99f2d805d3347975e0ff32603bd2f7b8f44e20 100644 (file)
@@ -50,8 +50,8 @@ static struct fimc_fmt fimc_formats[] = {
                .planes_cnt = 1,
                .flags = FMT_FLAGS_M2M,
        }, {
-               .name = "XRGB-8-8-8-8, 24 bpp",
-               .fourcc = V4L2_PIX_FMT_RGB24,
+               .name = "XRGB-8-8-8-8, 32 bpp",
+               .fourcc = V4L2_PIX_FMT_RGB32,
                .depth = 32,
                .color  = S5P_FIMC_RGB888,
                .buff_cnt = 1,
@@ -983,6 +983,7 @@ int fimc_vidioc_queryctrl(struct file *file, void *priv,
 {
        struct fimc_ctx *ctx = priv;
        struct v4l2_queryctrl *c;
+       int ret = -EINVAL;
 
        c = get_ctrl(qc->id);
        if (c) {
@@ -990,10 +991,14 @@ int fimc_vidioc_queryctrl(struct file *file, void *priv,
                return 0;
        }
 
-       if (ctx->state & FIMC_CTX_CAP)
-               return v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd,
+       if (ctx->state & FIMC_CTX_CAP) {
+               if (mutex_lock_interruptible(&ctx->fimc_dev->lock))
+                       return -ERESTARTSYS;
+               ret = v4l2_subdev_call(ctx->fimc_dev->vid_cap.sd,
                                        core, queryctrl, qc);
-       return -EINVAL;
+               mutex_unlock(&ctx->fimc_dev->lock);
+       }
+       return ret;
 }
 
 int fimc_vidioc_g_ctrl(struct file *file, void *priv,
@@ -1115,7 +1120,7 @@ static int fimc_m2m_s_ctrl(struct file *file, void *priv,
        return 0;
 }
 
-int fimc_vidioc_cropcap(struct file *file, void *fh,
+static int fimc_m2m_cropcap(struct file *file, void *fh,
                        struct v4l2_cropcap *cr)
 {
        struct fimc_frame *frame;
@@ -1139,7 +1144,7 @@ int fimc_vidioc_cropcap(struct file *file, void *fh,
        return 0;
 }
 
-int fimc_vidioc_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
+static int fimc_m2m_g_crop(struct file *file, void *fh, struct v4l2_crop *cr)
 {
        struct fimc_frame *frame;
        struct fimc_ctx *ctx = file->private_data;
@@ -1167,22 +1172,22 @@ int fimc_try_crop(struct fimc_ctx *ctx, struct v4l2_crop *cr)
        struct fimc_frame *f;
        u32 min_size, halign;
 
-       f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
-               &ctx->s_frame : &ctx->d_frame;
-
        if (cr->c.top < 0 || cr->c.left < 0) {
                v4l2_err(&fimc->m2m.v4l2_dev,
                        "doesn't support negative values for top & left\n");
                return -EINVAL;
        }
 
-       f = ctx_get_frame(ctx, cr->type);
-       if (IS_ERR(f))
-               return PTR_ERR(f);
+       if (cr->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+               f = (ctx->state & FIMC_CTX_CAP) ? &ctx->s_frame : &ctx->d_frame;
+       else if (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
+                ctx->state & FIMC_CTX_M2M)
+               f = &ctx->s_frame;
+       else
+               return -EINVAL;
 
-       min_size = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
-               ? fimc->variant->min_inp_pixsize
-               : fimc->variant->min_out_pixsize;
+       min_size = (f == &ctx->s_frame) ?
+               fimc->variant->min_inp_pixsize : fimc->variant->min_out_pixsize;
 
        if (ctx->state & FIMC_CTX_M2M) {
                if (fimc->id == 1 && fimc->variant->pix_hoff)
@@ -1233,6 +1238,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
        f = (cr->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) ?
                &ctx->s_frame : &ctx->d_frame;
 
+       if (mutex_lock_interruptible(&fimc->lock))
+               return -ERESTARTSYS;
+
        spin_lock_irqsave(&ctx->slock, flags);
        if (~ctx->state & (FIMC_SRC_FMT | FIMC_DST_FMT)) {
                /* Check to see if scaling ratio is within supported range */
@@ -1241,9 +1249,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
                else
                        ret = fimc_check_scaler_ratio(&cr->c, &ctx->s_frame);
                if (ret) {
-                       spin_unlock_irqrestore(&ctx->slock, flags);
                        v4l2_err(&fimc->m2m.v4l2_dev, "Out of scaler range");
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto scr_unlock;
                }
        }
        ctx->state |= FIMC_PARAMS;
@@ -1253,7 +1261,9 @@ static int fimc_m2m_s_crop(struct file *file, void *fh, struct v4l2_crop *cr)
        f->width  = cr->c.width;
        f->height = cr->c.height;
 
+scr_unlock:
        spin_unlock_irqrestore(&ctx->slock, flags);
+       mutex_unlock(&fimc->lock);
        return 0;
 }
 
@@ -1285,9 +1295,9 @@ static const struct v4l2_ioctl_ops fimc_m2m_ioctl_ops = {
        .vidioc_g_ctrl                  = fimc_vidioc_g_ctrl,
        .vidioc_s_ctrl                  = fimc_m2m_s_ctrl,
 
-       .vidioc_g_crop                  = fimc_vidioc_g_crop,
+       .vidioc_g_crop                  = fimc_m2m_g_crop,
        .vidioc_s_crop                  = fimc_m2m_s_crop,
-       .vidioc_cropcap                 = fimc_vidioc_cropcap
+       .vidioc_cropcap                 = fimc_m2m_cropcap
 
 };
 
@@ -1396,7 +1406,7 @@ static const struct v4l2_file_operations fimc_m2m_fops = {
        .open           = fimc_m2m_open,
        .release        = fimc_m2m_release,
        .poll           = fimc_m2m_poll,
-       .ioctl          = video_ioctl2,
+       .unlocked_ioctl = video_ioctl2,
        .mmap           = fimc_m2m_mmap,
 };
 
@@ -1736,6 +1746,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
        .pix_hoff        = 1,
        .has_inp_rot     = 1,
        .has_out_rot     = 1,
+       .has_cistatus2   = 1,
        .min_inp_pixsize = 16,
        .min_out_pixsize = 16,
        .hor_offs_align  = 1,
@@ -1745,6 +1756,7 @@ static struct samsung_fimc_variant fimc0_variant_s5pv310 = {
 
 static struct samsung_fimc_variant fimc2_variant_s5pv310 = {
        .pix_hoff        = 1,
+       .has_cistatus2   = 1,
        .min_inp_pixsize = 16,
        .min_out_pixsize = 16,
        .hor_offs_align  = 1,
index 3e107851656017adfc1a2567956a93e28fd43cd5..4f047d35f8ad9333ce3b4768836685b3d5f2f411 100644 (file)
 
 /*#define DEBUG*/
 
+#include <linux/sched.h>
 #include <linux/types.h>
+#include <linux/videodev2.h>
 #include <media/videobuf-core.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-mem2mem.h>
 #include <media/v4l2-mediabus.h>
 #include <media/s3c_fimc.h>
-#include <linux/videodev2.h>
+
 #include "regs-fimc.h"
 
 #define err(fmt, args...) \
@@ -369,6 +371,7 @@ struct fimc_pix_limit {
  * @pix_hoff: indicate whether horizontal offset is in pixels or in bytes
  * @has_inp_rot: set if has input rotator
  * @has_out_rot: set if has output rotator
+ * @has_cistatus2: 1 if CISTATUS2 register is present in this IP revision
  * @pix_limit: pixel size constraints for the scaler
  * @min_inp_pixsize: minimum input pixel size
  * @min_out_pixsize: minimum output pixel size
@@ -379,6 +382,7 @@ struct samsung_fimc_variant {
        unsigned int    pix_hoff:1;
        unsigned int    has_inp_rot:1;
        unsigned int    has_out_rot:1;
+       unsigned int    has_cistatus2:1;
        struct fimc_pix_limit *pix_limit;
        u16             min_inp_pixsize;
        u16             min_out_pixsize;
@@ -554,11 +558,19 @@ static inline struct fimc_frame *ctx_get_frame(struct fimc_ctx *ctx,
        return frame;
 }
 
+/* Return an index to the buffer actually being written. */
 static inline u32 fimc_hw_get_frame_index(struct fimc_dev *dev)
 {
-       u32 reg = readl(dev->regs + S5P_CISTATUS);
-       return (reg & S5P_CISTATUS_FRAMECNT_MASK) >>
-               S5P_CISTATUS_FRAMECNT_SHIFT;
+       u32 reg;
+
+       if (dev->variant->has_cistatus2) {
+               reg = readl(dev->regs + S5P_CISTATUS2) & 0x3F;
+               return reg > 0 ? --reg : reg;
+       } else {
+               reg = readl(dev->regs + S5P_CISTATUS);
+               return (reg & S5P_CISTATUS_FRAMECNT_MASK) >>
+                       S5P_CISTATUS_FRAMECNT_SHIFT;
+       }
 }
 
 /* -----------------------------------------------------*/
@@ -594,10 +606,6 @@ int fimc_vidioc_g_fmt(struct file *file, void *priv,
                      struct v4l2_format *f);
 int fimc_vidioc_try_fmt(struct file *file, void *priv,
                        struct v4l2_format *f);
-int fimc_vidioc_g_crop(struct file *file, void *fh,
-                      struct v4l2_crop *cr);
-int fimc_vidioc_cropcap(struct file *file, void *fh,
-                       struct v4l2_cropcap *cr);
 int fimc_vidioc_queryctrl(struct file *file, void *priv,
                          struct v4l2_queryctrl *qc);
 int fimc_vidioc_g_ctrl(struct file *file, void *priv,
index a57daedb5b5cf7099e273afb276759b4228fef42..57e33f84fcfa5fa4a4fd35d7eb9998e75ac1d003 100644 (file)
 #define S5P_CISTATUS_VVALID_A          (1 << 15)
 #define S5P_CISTATUS_VVALID_B          (1 << 14)
 
+/* Indexes to the last and the currently processed buffer. */
+#define S5P_CISTATUS2                  0x68
+
 /* Image capture control */
 #define S5P_CIIMGCPT                   0xc0
 #define S5P_CIIMGCPT_IMGCPTEN          (1 << 31)
index 5c209afb0ac8ddb434d5c6335c2661161e36aa5b..2486520582f2c576639a28bc231ccf847a472e4b 100644 (file)
@@ -1980,7 +1980,7 @@ static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
                 * we complete the completion.
                 */
 
-               if (!csi2->driver || !csi2->driver->owner) {
+               if (!csi2->driver) {
                        complete(&wait.completion);
                        /* Either too late, or probing failed */
                        bus_unregister_notifier(&platform_bus_type, &wait.notifier);
index 335120c2021bc2cdbf5de98b59f58cc001144ad5..052bd6dfa5a787033d445401ddb114bb40796732 100644 (file)
@@ -405,13 +405,13 @@ static int soc_camera_open(struct file *file)
                ret = soc_camera_set_fmt(icd, &f);
                if (ret < 0)
                        goto esfmt;
+
+               ici->ops->init_videobuf(&icd->vb_vidq, icd);
        }
 
        file->private_data = icd;
        dev_dbg(&icd->dev, "camera device open\n");
 
-       ici->ops->init_videobuf(&icd->vb_vidq, icd);
-
        mutex_unlock(&icd->video_lock);
 
        return 0;
index 135525649086401779ce84cfbb90d87b4c5a7131..fe8ef6419f831f36d52347ff275d8c96d4dc8c5f 100644 (file)
@@ -35,7 +35,6 @@
 #include <media/v4l2-device.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/v4l2-ctrls.h>
-#include <media/wm8775.h>
 
 MODULE_DESCRIPTION("wm8775 driver");
 MODULE_AUTHOR("Ulf Eklund, Hans Verkuil");
@@ -51,16 +50,10 @@ enum {
        TOT_REGS
 };
 
-#define ALC_HOLD 0x85 /* R17: use zero cross detection, ALC hold time 42.6 ms */
-#define ALC_EN 0x100  /* R17: ALC enable */
-
 struct wm8775_state {
        struct v4l2_subdev sd;
        struct v4l2_ctrl_handler hdl;
        struct v4l2_ctrl *mute;
-       struct v4l2_ctrl *vol;
-       struct v4l2_ctrl *bal;
-       struct v4l2_ctrl *loud;
        u8 input;               /* Last selected input (0-0xf) */
 };
 
@@ -92,30 +85,6 @@ static int wm8775_write(struct v4l2_subdev *sd, int reg, u16 val)
        return -1;
 }
 
-static void wm8775_set_audio(struct v4l2_subdev *sd, int quietly)
-{
-       struct wm8775_state *state = to_state(sd);
-       u8 vol_l, vol_r;
-       int muted = 0 != state->mute->val;
-       u16 volume = (u16)state->vol->val;
-       u16 balance = (u16)state->bal->val;
-
-       /* normalize ( 65535 to 0 -> 255 to 0 (+24dB to -103dB) ) */
-       vol_l = (min(65536 - balance, 32768) * volume) >> 23;
-       vol_r = (min(balance, (u16)32768) * volume) >> 23;
-
-       /* Mute */
-       if (muted || quietly)
-               wm8775_write(sd, R21, 0x0c0 | state->input);
-
-       wm8775_write(sd, R14, vol_l | 0x100); /* 0x100= Left channel ADC zero cross enable */
-       wm8775_write(sd, R15, vol_r | 0x100); /* 0x100= Right channel ADC zero cross enable */
-
-       /* Un-mute */
-       if (!muted)
-               wm8775_write(sd, R21, state->input);
-}
-
 static int wm8775_s_routing(struct v4l2_subdev *sd,
                            u32 input, u32 output, u32 config)
 {
@@ -133,26 +102,25 @@ static int wm8775_s_routing(struct v4l2_subdev *sd,
        state->input = input;
        if (!v4l2_ctrl_g_ctrl(state->mute))
                return 0;
-       if (!v4l2_ctrl_g_ctrl(state->vol))
-               return 0;
-       if (!v4l2_ctrl_g_ctrl(state->bal))
-               return 0;
-       wm8775_set_audio(sd, 1);
+       wm8775_write(sd, R21, 0x0c0);
+       wm8775_write(sd, R14, 0x1d4);
+       wm8775_write(sd, R15, 0x1d4);
+       wm8775_write(sd, R21, 0x100 + state->input);
        return 0;
 }
 
 static int wm8775_s_ctrl(struct v4l2_ctrl *ctrl)
 {
        struct v4l2_subdev *sd = to_sd(ctrl);
+       struct wm8775_state *state = to_state(sd);
 
        switch (ctrl->id) {
        case V4L2_CID_AUDIO_MUTE:
-       case V4L2_CID_AUDIO_VOLUME:
-       case V4L2_CID_AUDIO_BALANCE:
-               wm8775_set_audio(sd, 0);
-               return 0;
-       case V4L2_CID_AUDIO_LOUDNESS:
-               wm8775_write(sd, R17, (ctrl->val ? ALC_EN : 0) | ALC_HOLD);
+               wm8775_write(sd, R21, 0x0c0);
+               wm8775_write(sd, R14, 0x1d4);
+               wm8775_write(sd, R15, 0x1d4);
+               if (!ctrl->val)
+                       wm8775_write(sd, R21, 0x100 + state->input);
                return 0;
        }
        return -EINVAL;
@@ -176,7 +144,16 @@ static int wm8775_log_status(struct v4l2_subdev *sd)
 
 static int wm8775_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *freq)
 {
-       wm8775_set_audio(sd, 0);
+       struct wm8775_state *state = to_state(sd);
+
+       /* If I remove this, then it can happen that I have no
+          sound the first time I tune from static to a valid channel.
+          It's difficult to reproduce and is almost certainly related
+          to the zero cross detect circuit. */
+       wm8775_write(sd, R21, 0x0c0);
+       wm8775_write(sd, R14, 0x1d4);
+       wm8775_write(sd, R15, 0x1d4);
+       wm8775_write(sd, R21, 0x100 + state->input);
        return 0;
 }
 
@@ -226,7 +203,6 @@ static int wm8775_probe(struct i2c_client *client,
 {
        struct wm8775_state *state;
        struct v4l2_subdev *sd;
-       int err;
 
        /* Check if the adapter supports the needed features */
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
@@ -240,21 +216,15 @@ static int wm8775_probe(struct i2c_client *client,
                return -ENOMEM;
        sd = &state->sd;
        v4l2_i2c_subdev_init(sd, client, &wm8775_ops);
-       sd->grp_id = WM8775_GID; /* subdev group id */
        state->input = 2;
 
-       v4l2_ctrl_handler_init(&state->hdl, 4);
+       v4l2_ctrl_handler_init(&state->hdl, 1);
        state->mute = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
                        V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0);
-       state->vol = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-                       V4L2_CID_AUDIO_VOLUME, 0, 65535, (65535+99)/100, 0xCF00); /* 0dB*/
-       state->bal = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-                       V4L2_CID_AUDIO_BALANCE, 0, 65535, (65535+99)/100, 32768);
-       state->loud = v4l2_ctrl_new_std(&state->hdl, &wm8775_ctrl_ops,
-                       V4L2_CID_AUDIO_LOUDNESS, 0, 1, 1, 1);
        sd->ctrl_handler = &state->hdl;
-       err = state->hdl.error;
-       if (err) {
+       if (state->hdl.error) {
+               int err = state->hdl.error;
+
                v4l2_ctrl_handler_free(&state->hdl);
                kfree(state);
                return err;
@@ -266,25 +236,29 @@ static int wm8775_probe(struct i2c_client *client,
        wm8775_write(sd, R23, 0x000);
        /* Disable zero cross detect timeout */
        wm8775_write(sd, R7, 0x000);
-       /* HPF enable, I2S mode, 24-bit */
-       wm8775_write(sd, R11, 0x022);
+       /* Left justified, 24-bit mode */
+       wm8775_write(sd, R11, 0x021);
        /* Master mode, clock ratio 256fs */
        wm8775_write(sd, R12, 0x102);
        /* Powered up */
        wm8775_write(sd, R13, 0x000);
-       /* ALC stereo, ALC target level -5dB FS, ALC max gain +8dB */
-       wm8775_write(sd, R16, 0x1bb);
-       /* Set ALC mode and hold time */
-       wm8775_write(sd, R17, (state->loud->val ? ALC_EN : 0) | ALC_HOLD);
+       /* ADC gain +2.5dB, enable zero cross */
+       wm8775_write(sd, R14, 0x1d4);
+       /* ADC gain +2.5dB, enable zero cross */
+       wm8775_write(sd, R15, 0x1d4);
+       /* ALC Stereo, ALC target level -1dB FS max gain +8dB */
+       wm8775_write(sd, R16, 0x1bf);
+       /* Enable gain control, use zero cross detection,
+          ALC hold time 42.6 ms */
+       wm8775_write(sd, R17, 0x185);
        /* ALC gain ramp up delay 34 s, ALC gain ramp down delay 33 ms */
        wm8775_write(sd, R18, 0x0a2);
        /* Enable noise gate, threshold -72dBfs */
        wm8775_write(sd, R19, 0x005);
-       /* Transient window 4ms, ALC min gain -5dB  */
-       wm8775_write(sd, R20, 0x0fb);
-
-       wm8775_set_audio(sd, 1);      /* set volume/mute/mux */
-
+       /* Transient window 4ms, lower PGA gain limit -1dB */
+       wm8775_write(sd, R20, 0x07a);
+       /* LRBOTH = 1, use input 2. */
+       wm8775_write(sd, R21, 0x102);
        return 0;
 }
 
index dbe1c93c1af3eeac287cc795f16307aa44f86ccc..d9640a623ff483446822e4370127c376d3c631c5 100644 (file)
@@ -303,7 +303,7 @@ static irqreturn_t ab8500_irq(int irq, void *dev)
                        continue;
 
                do {
-                       int bit = __ffs(status);
+                       int bit = __ffs(value);
                        int line = i * 8 + bit;
 
                        handle_nested_irq(ab8500->irq_base + line);
index 7d2563fc15c6096ba30a37f15eb4c95c3cfae68a..76cadcf3b1fee2ccf996d318c01137c53ba852a7 100644 (file)
@@ -1455,7 +1455,11 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
                dev_err(wm831x->dev, "Failed to read parent ID: %d\n", ret);
                goto err;
        }
-       if (ret != 0x6204) {
+       switch (ret) {
+       case 0x6204:
+       case 0x6246:
+               break;
+       default:
                dev_err(wm831x->dev, "Device is not a WM831x: ID %x\n", ret);
                ret = -EINVAL;
                goto err;
@@ -1620,7 +1624,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
        case WM8325:
                ret = mfd_add_devices(wm831x->dev, -1,
                                      wm8320_devs, ARRAY_SIZE(wm8320_devs),
-                                     NULL, 0);
+                                     NULL, wm831x->irq_base);
                break;
 
        default:
index 31ae07a36576678fb27dbd294623f6f8ac21cc9d..57dcf8fa774a7302be1fbb6f0104d6320c6287c8 100644 (file)
@@ -1773,6 +1773,7 @@ int mmc_pm_notify(struct notifier_block *notify_block,
 
        case PM_POST_SUSPEND:
        case PM_POST_HIBERNATION:
+       case PM_POST_RESTORE:
 
                spin_lock_irqsave(&host->lock, flags);
                host->rescan_disable = 0;
index 591ab540b407ad9805c3aa384795b2c59a7d8c69..d3e6a962f42343ac4f30fdd7dc7da8f13ff8a95a 100644 (file)
@@ -69,6 +69,7 @@
 #include <linux/highmem.h>
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/sdio.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -493,10 +494,14 @@ static void at91_mci_send_command(struct at91mci_host *host, struct mmc_command
                else if (data->flags & MMC_DATA_WRITE)
                        cmdr |= AT91_MCI_TRCMD_START;
 
-               if (data->flags & MMC_DATA_STREAM)
-                       cmdr |= AT91_MCI_TRTYP_STREAM;
-               if (data->blocks > 1)
-                       cmdr |= AT91_MCI_TRTYP_MULTIPLE;
+               if (cmd->opcode == SD_IO_RW_EXTENDED) {
+                       cmdr |= AT91_MCI_TRTYP_SDIO_BLOCK;
+               } else {
+                       if (data->flags & MMC_DATA_STREAM)
+                               cmdr |= AT91_MCI_TRTYP_STREAM;
+                       if (data->blocks > 1)
+                               cmdr |= AT91_MCI_TRTYP_MULTIPLE;
+               }
        }
        else {
                block_length = 0;
index 301351a5d83853f877953a1ecd10553ecc8f5c22..ad2a7a032cdf02478a80cd87055abce9648e31ca 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/stat.h>
 
 #include <linux/mmc/host.h>
+#include <linux/mmc/sdio.h>
 
 #include <mach/atmel-mci.h>
 #include <linux/atmel-mci.h>
@@ -532,12 +533,17 @@ static u32 atmci_prepare_command(struct mmc_host *mmc,
        data = cmd->data;
        if (data) {
                cmdr |= MCI_CMDR_START_XFER;
-               if (data->flags & MMC_DATA_STREAM)
-                       cmdr |= MCI_CMDR_STREAM;
-               else if (data->blocks > 1)
-                       cmdr |= MCI_CMDR_MULTI_BLOCK;
-               else
-                       cmdr |= MCI_CMDR_BLOCK;
+
+               if (cmd->opcode == SD_IO_RW_EXTENDED) {
+                       cmdr |= MCI_CMDR_SDIO_BLOCK;
+               } else {
+                       if (data->flags & MMC_DATA_STREAM)
+                               cmdr |= MCI_CMDR_STREAM;
+                       else if (data->blocks > 1)
+                               cmdr |= MCI_CMDR_MULTI_BLOCK;
+                       else
+                               cmdr |= MCI_CMDR_BLOCK;
+               }
 
                if (data->flags & MMC_DATA_READ)
                        cmdr |= MCI_CMDR_TRDIR_READ;
index 09b099bfab2b1b75c5dc7e3f0b323393e0124f8a..bdf11d89a4997887fc1c1145b393ef91ad5785a4 100644 (file)
@@ -702,6 +702,7 @@ static int __devinit atl1c_sw_init(struct atl1c_adapter *adapter)
 
 
        adapter->wol = 0;
+       device_set_wakeup_enable(&pdev->dev, false);
        adapter->link_speed = SPEED_0;
        adapter->link_duplex = FULL_DUPLEX;
        adapter->num_rx_queues = AT_DEF_RECEIVE_QUEUE;
@@ -2444,8 +2445,9 @@ static int atl1c_close(struct net_device *netdev)
        return 0;
 }
 
-static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
+static int atl1c_suspend(struct device *dev)
 {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct atl1c_adapter *adapter = netdev_priv(netdev);
        struct atl1c_hw *hw = &adapter->hw;
@@ -2454,7 +2456,6 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
        u32 wol_ctrl_data = 0;
        u16 mii_intr_status_data = 0;
        u32 wufc = adapter->wol;
-       int retval = 0;
 
        atl1c_disable_l0s_l1(hw);
        if (netif_running(netdev)) {
@@ -2462,9 +2463,6 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
                atl1c_down(adapter);
        }
        netif_device_detach(netdev);
-       retval = pci_save_state(pdev);
-       if (retval)
-               return retval;
 
        if (wufc)
                if (atl1c_phy_power_saving(hw) != 0)
@@ -2525,12 +2523,8 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
                AT_WRITE_REG(hw, REG_WOL_CTRL, wol_ctrl_data);
                AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
 
-               /* pcie patch */
-               device_set_wakeup_enable(&pdev->dev, 1);
-
                AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_DEFAULT |
                        GPHY_CTRL_EXT_RESET);
-               pci_prepare_to_sleep(pdev);
        } else {
                AT_WRITE_REG(hw, REG_GPHY_CTRL, GPHY_CTRL_POWER_SAVING);
                master_ctrl_data |= MASTER_CTRL_CLK_SEL_DIS;
@@ -2540,25 +2534,17 @@ static int atl1c_suspend(struct pci_dev *pdev, pm_message_t state)
                AT_WRITE_REG(hw, REG_MAC_CTRL, mac_ctrl_data);
                AT_WRITE_REG(hw, REG_WOL_CTRL, 0);
                hw->phy_configured = false; /* re-init PHY when resume */
-               pci_enable_wake(pdev, pci_choose_state(pdev, state), 0);
        }
 
-       pci_disable_device(pdev);
-       pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
        return 0;
 }
 
-static int atl1c_resume(struct pci_dev *pdev)
+static int atl1c_resume(struct device *dev)
 {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct atl1c_adapter *adapter = netdev_priv(netdev);
 
-       pci_set_power_state(pdev, PCI_D0);
-       pci_restore_state(pdev);
-       pci_enable_wake(pdev, PCI_D3hot, 0);
-       pci_enable_wake(pdev, PCI_D3cold, 0);
-
        AT_WRITE_REG(&adapter->hw, REG_WOL_CTRL, 0);
        atl1c_reset_pcie(&adapter->hw, ATL1C_PCIE_L0S_L1_DISABLE |
                        ATL1C_PCIE_PHY_RESET);
@@ -2582,7 +2568,12 @@ static int atl1c_resume(struct pci_dev *pdev)
 
 static void atl1c_shutdown(struct pci_dev *pdev)
 {
-       atl1c_suspend(pdev, PMSG_SUSPEND);
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct atl1c_adapter *adapter = netdev_priv(netdev);
+
+       atl1c_suspend(&pdev->dev);
+       pci_wake_from_d3(pdev, adapter->wol);
+       pci_set_power_state(pdev, PCI_D3hot);
 }
 
 static const struct net_device_ops atl1c_netdev_ops = {
@@ -2886,16 +2877,16 @@ static struct pci_error_handlers atl1c_err_handler = {
        .resume = atl1c_io_resume,
 };
 
+static SIMPLE_DEV_PM_OPS(atl1c_pm_ops, atl1c_suspend, atl1c_resume);
+
 static struct pci_driver atl1c_driver = {
        .name     = atl1c_driver_name,
        .id_table = atl1c_pci_tbl,
        .probe    = atl1c_probe,
        .remove   = __devexit_p(atl1c_remove),
-       /* Power Managment Hooks */
-       .suspend  = atl1c_suspend,
-       .resume   = atl1c_resume,
        .shutdown = atl1c_shutdown,
-       .err_handler = &atl1c_err_handler
+       .err_handler = &atl1c_err_handler,
+       .driver.pm = &atl1c_pm_ops,
 };
 
 /*
index 53363108994ee93bed670940b0eb91170a389456..3acf5123a6efa8c0e21d76b7deb487a76d21ea3e 100644 (file)
@@ -3504,6 +3504,8 @@ static int atl1_set_ringparam(struct net_device *netdev,
        struct atl1_rfd_ring rfd_old, rfd_new;
        struct atl1_rrd_ring rrd_old, rrd_new;
        struct atl1_ring_header rhdr_old, rhdr_new;
+       struct atl1_smb smb;
+       struct atl1_cmb cmb;
        int err;
 
        tpd_old = adapter->tpd_ring;
@@ -3544,11 +3546,19 @@ static int atl1_set_ringparam(struct net_device *netdev,
                adapter->rrd_ring = rrd_old;
                adapter->tpd_ring = tpd_old;
                adapter->ring_header = rhdr_old;
+               /*
+                * Save SMB and CMB, since atl1_free_ring_resources
+                * will clear them.
+                */
+               smb = adapter->smb;
+               cmb = adapter->cmb;
                atl1_free_ring_resources(adapter);
                adapter->rfd_ring = rfd_new;
                adapter->rrd_ring = rrd_new;
                adapter->tpd_ring = tpd_new;
                adapter->ring_header = rhdr_new;
+               adapter->smb = smb;
+               adapter->cmb = cmb;
 
                err = atl1_up(adapter);
                if (err)
index 4594a28b1f665ef7923aa462739b96e39182ac5e..d64313b7090e2217038130bb314cedde2bb7c152 100644 (file)
@@ -234,7 +234,7 @@ struct be_adapter {
        u8 __iomem *db;         /* Door Bell */
        u8 __iomem *pcicfg;     /* PCI config space */
 
-       spinlock_t mbox_lock;   /* For serializing mbox cmds to BE card */
+       struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
        struct be_dma_mem mbox_mem;
        /* Mbox mem is adjusted to align to 16 bytes. The allocated addr
         * is stored for freeing purpose */
index e4465d222a7d019654c27c5f4d3db89067f89cf8..1c8c79c9d214e45feb59dbf20b439be1d369702a 100644 (file)
@@ -462,7 +462,8 @@ int be_cmd_fw_init(struct be_adapter *adapter)
        u8 *wrb;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = (u8 *)wrb_from_mbox(adapter);
        *wrb++ = 0xFF;
@@ -476,7 +477,7 @@ int be_cmd_fw_init(struct be_adapter *adapter)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -491,7 +492,8 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
        if (adapter->eeh_err)
                return -EIO;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = (u8 *)wrb_from_mbox(adapter);
        *wrb++ = 0xFF;
@@ -505,7 +507,7 @@ int be_cmd_fw_clean(struct be_adapter *adapter)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 int be_cmd_eq_create(struct be_adapter *adapter,
@@ -516,7 +518,8 @@ int be_cmd_eq_create(struct be_adapter *adapter,
        struct be_dma_mem *q_mem = &eq->dma_mem;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -546,7 +549,7 @@ int be_cmd_eq_create(struct be_adapter *adapter,
                eq->created = true;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -558,7 +561,8 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
        struct be_cmd_req_mac_query *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -583,7 +587,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
                memcpy(mac_addr, resp->mac.addr, ETH_ALEN);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -667,7 +671,8 @@ int be_cmd_cq_create(struct be_adapter *adapter,
        void *ctxt;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -701,7 +706,7 @@ int be_cmd_cq_create(struct be_adapter *adapter,
                cq->created = true;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -724,7 +729,8 @@ int be_cmd_mccq_create(struct be_adapter *adapter,
        void *ctxt;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -754,7 +760,7 @@ int be_cmd_mccq_create(struct be_adapter *adapter,
                mccq->id = le16_to_cpu(resp->id);
                mccq->created = true;
        }
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -769,7 +775,8 @@ int be_cmd_txq_create(struct be_adapter *adapter,
        void *ctxt;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -801,7 +808,7 @@ int be_cmd_txq_create(struct be_adapter *adapter,
                txq->created = true;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -816,7 +823,8 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
        struct be_dma_mem *q_mem = &rxq->dma_mem;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -843,7 +851,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter,
                *rss_id = resp->rss_id;
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -862,7 +870,8 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
        if (adapter->eeh_err)
                return -EIO;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -899,7 +908,7 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q,
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -915,7 +924,8 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
        struct be_cmd_req_if_create *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -941,7 +951,7 @@ int be_cmd_if_create(struct be_adapter *adapter, u32 cap_flags, u32 en_flags,
                        *pmac_id = le32_to_cpu(resp->pmac_id);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -955,7 +965,8 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id)
        if (adapter->eeh_err)
                return -EIO;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -970,7 +981,7 @@ int be_cmd_if_destroy(struct be_adapter *adapter, u32 interface_id)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
 
        return status;
 }
@@ -1060,7 +1071,8 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter, char *fw_ver)
        struct be_cmd_req_get_fw_version *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1077,7 +1089,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter, char *fw_ver)
                strncpy(fw_ver, resp->firmware_version_string, FW_VER_LEN);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -1322,7 +1334,8 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
        struct be_cmd_req_query_fw_cfg *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1341,7 +1354,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
                *caps = le32_to_cpu(resp->function_caps);
        }
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -1352,7 +1365,8 @@ int be_cmd_reset_function(struct be_adapter *adapter)
        struct be_cmd_req_hdr *req;
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1365,7 +1379,7 @@ int be_cmd_reset_function(struct be_adapter *adapter)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
@@ -1376,7 +1390,8 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size)
        u32 myhash[10];
        int status;
 
-       spin_lock(&adapter->mbox_lock);
+       if (mutex_lock_interruptible(&adapter->mbox_lock))
+               return -1;
 
        wrb = wrb_from_mbox(adapter);
        req = embedded_payload(wrb);
@@ -1396,7 +1411,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, u16 table_size)
 
        status = be_mbox_notify_wait(adapter);
 
-       spin_unlock(&adapter->mbox_lock);
+       mutex_unlock(&adapter->mbox_lock);
        return status;
 }
 
index 93354eee2cfd1e679929054c9cc28fe7b35dea51..fd251b59b7f96b4d58ca0f7b22e91e22485d7005 100644 (file)
@@ -2677,7 +2677,7 @@ static int be_ctrl_init(struct be_adapter *adapter)
        }
        memset(mc_cmd_mem->va, 0, mc_cmd_mem->size);
 
-       spin_lock_init(&adapter->mbox_lock);
+       mutex_init(&adapter->mbox_lock);
        spin_lock_init(&adapter->mcc_lock);
        spin_lock_init(&adapter->mcc_cq_lock);
 
index 121b073a6c3fae1985c164071f0a8f285dba0c14..84fbd4ebd778d472a9cf115cfc4faaf72e92939e 100644 (file)
@@ -88,7 +88,12 @@ static void bond_na_send(struct net_device *slave_dev,
        }
 
        if (vlan_id) {
-               skb = vlan_put_tag(skb, vlan_id);
+               /* The Ethernet header is not present yet, so it is
+                * too early to insert a VLAN tag.  Force use of an
+                * out-of-line tag here and let dev_hard_start_xmit()
+                * insert it if the slave hardware can't.
+                */
+               skb = __vlan_hwaccel_put_tag(skb, vlan_id);
                if (!skb) {
                        pr_err("failed to insert VLAN tag\n");
                        return;
index d0ea760ce419f0ae8040bf880cd80013c651a93a..3b16c34ed86e93ddfacd0338ef42f687ab70fce7 100644 (file)
@@ -418,36 +418,11 @@ struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr)
  * @bond: bond device that got this skb for tx.
  * @skb: hw accel VLAN tagged skb to transmit
  * @slave_dev: slave that is supposed to xmit this skbuff
- *
- * When the bond gets an skb to transmit that is
- * already hardware accelerated VLAN tagged, and it
- * needs to relay this skb to a slave that is not
- * hw accel capable, the skb needs to be "unaccelerated",
- * i.e. strip the hwaccel tag and re-insert it as part
- * of the payload.
  */
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
                        struct net_device *slave_dev)
 {
-       unsigned short uninitialized_var(vlan_id);
-
-       /* Test vlan_list not vlgrp to catch and handle 802.1p tags */
-       if (!list_empty(&bond->vlan_list) &&
-           !(slave_dev->features & NETIF_F_HW_VLAN_TX) &&
-           vlan_get_tag(skb, &vlan_id) == 0) {
-               skb->dev = slave_dev;
-               skb = vlan_put_tag(skb, vlan_id);
-               if (!skb) {
-                       /* vlan_put_tag() frees the skb in case of error,
-                        * so return success here so the calling functions
-                        * won't attempt to free is again.
-                        */
-                       return 0;
-               }
-       } else {
-               skb->dev = slave_dev;
-       }
-
+       skb->dev = slave_dev;
        skb->priority = 1;
 #ifdef CONFIG_NET_POLL_CONTROLLER
        if (unlikely(bond->dev->priv_flags & IFF_IN_NETPOLL)) {
@@ -1203,11 +1178,13 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                                bond_do_fail_over_mac(bond, new_active,
                                                      old_active);
 
-                       bond->send_grat_arp = bond->params.num_grat_arp;
-                       bond_send_gratuitous_arp(bond);
+                       if (netif_running(bond->dev)) {
+                               bond->send_grat_arp = bond->params.num_grat_arp;
+                               bond_send_gratuitous_arp(bond);
 
-                       bond->send_unsol_na = bond->params.num_unsol_na;
-                       bond_send_unsolicited_na(bond);
+                               bond->send_unsol_na = bond->params.num_unsol_na;
+                               bond_send_unsolicited_na(bond);
+                       }
 
                        write_unlock_bh(&bond->curr_slave_lock);
                        read_unlock(&bond->lock);
@@ -1221,8 +1198,9 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 
        /* resend IGMP joins since active slave has changed or
         * all were sent on curr_active_slave */
-       if ((USES_PRIMARY(bond->params.mode) && new_active) ||
-           bond->params.mode == BOND_MODE_ROUNDROBIN) {
+       if (((USES_PRIMARY(bond->params.mode) && new_active) ||
+            bond->params.mode == BOND_MODE_ROUNDROBIN) &&
+           netif_running(bond->dev)) {
                bond->igmp_retrans = bond->params.resend_igmp;
                queue_delayed_work(bond->wq, &bond->mcast_work, 0);
        }
index c2f081352a037894d6e8238ccb70c39e796c4a69..4feeb2d650a4f478bc6ab4f3c70db3f3563c622d 100644 (file)
@@ -269,11 +269,11 @@ static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct n
 
        bond_for_each_slave(bond, slave, i) {
                if (slave->dev == slave_dev) {
-                       break;
+                       return slave;
                }
        }
 
-       return slave;
+       return 0;
 }
 
 static inline struct bonding *bond_get_bond_by_slave(struct slave *slave)
index 92bac19ad60ab0be5383b7566f48cc5dbbf2ab8e..6dff32196c92bcb65d10bda3c54cbb0048dee416 100644 (file)
@@ -940,7 +940,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
                                           &udev->l2_ring_map,
                                           GFP_KERNEL | __GFP_COMP);
        if (!udev->l2_ring)
-               return -ENOMEM;
+               goto err_udev;
 
        udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
        udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size);
@@ -948,7 +948,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
                                          &udev->l2_buf_map,
                                          GFP_KERNEL | __GFP_COMP);
        if (!udev->l2_buf)
-               return -ENOMEM;
+               goto err_dma;
 
        write_lock(&cnic_dev_lock);
        list_add(&udev->list, &cnic_udev_list);
@@ -959,6 +959,12 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages)
        cp->udev = udev;
 
        return 0;
+ err_dma:
+       dma_free_coherent(&udev->pdev->dev, udev->l2_ring_size,
+                         udev->l2_ring, udev->l2_ring_map);
+ err_udev:
+       kfree(udev);
+       return -ENOMEM;
 }
 
 static int cnic_init_uio(struct cnic_dev *dev)
index 1f37ee6b2a2626282fd5a772cc21f821321b9379..d6cf502906cfeeafa41b1072a9aadf8a97a5b475 100644 (file)
@@ -263,6 +263,13 @@ static void ehea_get_ethtool_stats(struct net_device *dev,
 
 static int ehea_set_flags(struct net_device *dev, u32 data)
 {
+       /* Avoid changing the VLAN flags */
+       if ((data & (ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN)) !=
+           (ethtool_op_get_flags(dev) & (ETH_FLAG_RXVLAN |
+                                         ETH_FLAG_TXVLAN))){
+               return -EINVAL;
+       }
+
        return ethtool_op_set_flags(dev, data, ETH_FLAG_LRO
                                        | ETH_FLAG_TXVLAN
                                        | ETH_FLAG_RXVLAN);
index aa56963ad55820e2a763d64c102a36ae1b35dabf..c353bf3113cc335088e4322703a91cd5add4d60d 100644 (file)
@@ -935,7 +935,7 @@ static void epic_init_ring(struct net_device *dev)
 
        /* Fill in the Rx buffers.  Handle allocation failure gracefully. */
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(ep->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(ep->rx_buf_sz + 2);
                ep->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1233,7 +1233,7 @@ static int epic_rx(struct net_device *dev, int budget)
                entry = ep->dirty_rx % RX_RING_SIZE;
                if (ep->rx_skbuff[entry] == NULL) {
                        struct sk_buff *skb;
-                       skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz);
+                       skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz + 2);
                        if (skb == NULL)
                                break;
                        skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
index 9a6485892b3d48dfbcf783a27b670be83704e803..80d25ed533447735d55804e152bb8d581fc911ef 100644 (file)
@@ -1202,7 +1202,7 @@ static void hamachi_init_ring(struct net_device *dev)
        }
        /* Fill in the Rx buffers.  Handle allocation failure gracefully. */
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz + 2);
                hmp->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1669,7 +1669,7 @@ static int hamachi_rx(struct net_device *dev)
                entry = hmp->dirty_rx % RX_RING_SIZE;
                desc = &(hmp->rx_ring[entry]);
                if (hmp->rx_skbuff[entry] == NULL) {
-                       struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz);
+                       struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz + 2);
 
                        hmp->rx_skbuff[entry] = skb;
                        if (skb == NULL)
index 8a4d19e5de064bd3fbbb11a2d2e916a28cf593b0..f1047dd8a526715857929e8ca86a47d846e5a15e 100644 (file)
@@ -690,6 +690,7 @@ static void block_output(struct net_device *dev, int count,
 static struct pcmcia_device_id axnet_ids[] = {
        PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x016c, 0x0081),
        PCMCIA_DEVICE_MANF_CARD(0x018a, 0x0301),
+       PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x2328),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0301),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0303),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0309),
index d05c44692f08306f4dc04dbbb30098c46fd8e362..2c158910f7ea8f1784f629df23bfee2a21cc41c9 100644 (file)
@@ -1493,7 +1493,6 @@ static struct pcmcia_device_id pcnet_ids[] = {
        PCMCIA_DEVICE_MANF_CARD(0x0149, 0x4530),
        PCMCIA_DEVICE_MANF_CARD(0x0149, 0xc1ab),
        PCMCIA_DEVICE_MANF_CARD(0x0186, 0x0110),
-       PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x2328),
        PCMCIA_DEVICE_MANF_CARD(0x01bf, 0x8041),
        PCMCIA_DEVICE_MANF_CARD(0x0213, 0x2452),
        PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0300),
index 39659976a1acfb1589834368aab20080c894d6eb..89294b43c4a901f8ce927e1981ad317b95538f44 100644 (file)
@@ -1285,6 +1285,11 @@ ppp_push(struct ppp *ppp)
 }
 
 #ifdef CONFIG_PPP_MULTILINK
+static bool mp_protocol_compress __read_mostly = true;
+module_param(mp_protocol_compress, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(mp_protocol_compress,
+                "compress protocol id in multilink fragments");
+
 /*
  * Divide a packet to be transmitted into fragments and
  * send them out the individual links.
@@ -1347,10 +1352,10 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
        if (nfree == 0 || nfree < navail / 2)
                return 0; /* can't take now, leave it in xmit_pending */
 
-       /* Do protocol field compression (XXX this should be optional) */
+       /* Do protocol field compression */
        p = skb->data;
        len = skb->len;
-       if (*p == 0) {
+       if (*p == 0 && mp_protocol_compress) {
                ++p;
                --len;
        }
index 0a66fed52e8ed48e9a82bfdcfb831957acc491cf..16c62659cdd96040e7831d0e7d7e18bc58832752 100644 (file)
@@ -412,7 +412,7 @@ static  int skfp_driver_init(struct net_device *dev)
                bp->SharedMemAddr = pci_alloc_consistent(&bp->pdev,
                                                         bp->SharedMemSize,
                                                         &bp->SharedMemDMA);
-               if (!bp->SharedMemSize) {
+               if (!bp->SharedMemAddr) {
                        printk("could not allocate mem for ");
                        printk("hardware module: %ld byte\n",
                               bp->SharedMemSize);
index 4adf124227877e704fe66a7bb74919738bbd46c5..a4f2bd52e546f522995df2fb7fae5721f94e0244 100644 (file)
@@ -148,7 +148,7 @@ static int full_duplex[MAX_UNITS] = {0, };
  * This SUCKS.
  * We need a much better method to determine if dma_addr_t is 64-bit.
  */
-#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || defined(__mips64__) || (defined(__mips__) && defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT))
+#if (defined(__i386__) && defined(CONFIG_HIGHMEM64G)) || defined(__x86_64__) || defined (__ia64__) || defined(__alpha__) || (defined(CONFIG_MIPS) && ((defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) || defined(CONFIG_64BIT))) || (defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT))
 /* 64-bit dma_addr_t */
 #define ADDR_64BITS    /* This chip uses 64 bit addresses. */
 #define netdrv_addr_t __le64
index 3ed2a67bd6d36c5da4608570bc8d494faa614f59..b409d7ec4ac15ff5e5eb82c2aff17111d48954a3 100644 (file)
@@ -1016,7 +1016,7 @@ static void init_ring(struct net_device *dev)
 
        /* Fill in the Rx buffers.  Handle allocation failure gracefully. */
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + 2);
                np->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1407,7 +1407,7 @@ static void refill_rx (struct net_device *dev)
                struct sk_buff *skb;
                entry = np->dirty_rx % RX_RING_SIZE;
                if (np->rx_skbuff[entry] == NULL) {
-                       skb = dev_alloc_skb(np->rx_buf_sz);
+                       skb = dev_alloc_skb(np->rx_buf_sz + 2);
                        np->rx_skbuff[entry] = skb;
                        if (skb == NULL)
                                break;          /* Better luck next round. */
index 8b3dc1eb401541e8e1c3de31ce5308d84f477845..296000bf5a25d9186826da3c3c2f401802409e07 100644 (file)
@@ -324,7 +324,7 @@ static int bdx_fw_load(struct bdx_priv *priv)
        ENTER;
        master = READ_REG(priv, regINIT_SEMAPHORE);
        if (!READ_REG(priv, regINIT_STATUS) && master) {
-               rc = request_firmware(&fw, "tehuti/firmware.bin", &priv->pdev->dev);
+               rc = request_firmware(&fw, "tehuti/bdx.bin", &priv->pdev->dev);
                if (rc)
                        goto out;
                bdx_tx_push_desc_safe(priv, (char *)fw->data, fw->size);
@@ -2510,4 +2510,4 @@ module_exit(bdx_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(BDX_DRV_DESC);
-MODULE_FIRMWARE("tehuti/firmware.bin");
+MODULE_FIRMWARE("tehuti/bdx.bin");
index 30ccbb6d097af220dced34f430723a3b3c5a9dca..6f97b7bbcbf13a29b9f2c567eb317a12ec51609c 100644 (file)
@@ -12658,7 +12658,7 @@ static void __devinit tg3_read_vpd(struct tg3 *tp)
                        cnt = pci_read_vpd(tp->pdev, pos,
                                           TG3_NVM_VPD_LEN - pos,
                                           &vpd_data[pos]);
-                       if (cnt == -ETIMEDOUT || -EINTR)
+                       if (cnt == -ETIMEDOUT || cnt == -EINTR)
                                cnt = 0;
                        else if (cnt < 0)
                                goto out_not_found;
index 5b83c3f35f47c3421a518aaa04fb39224dac2ed3..a3c46f6a15e7c5b02a91542406208928cdb23a6d 100644 (file)
@@ -1004,7 +1004,6 @@ typhoon_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
        }
 
        strcpy(info->driver, KBUILD_MODNAME);
-       strcpy(info->version, UTS_RELEASE);
        strcpy(info->bus_info, pci_name(pci_dev));
 }
 
index aea4645be7f68956472e6f6fbefeadb2e049f060..6140b56cce53d2205638803ce13702bcbfeb8910 100644 (file)
@@ -1507,6 +1507,10 @@ static const struct usb_device_id        products [] = {
        // ASIX AX88178 10/100/1000
        USB_DEVICE (0x0b95, 0x1780),
        .driver_info = (unsigned long) &ax88178_info,
+}, {
+       // Logitec LAN-GTJ/U2A
+       USB_DEVICE (0x0789, 0x0160),
+       .driver_info = (unsigned long) &ax88178_info,
 }, {
        // Linksys USB200M Rev 2
        USB_DEVICE (0x13b1, 0x0018),
index a6281e3987b5c47d6a061e1bb45538000bd6745e..2b791392e788ef893657d04056df673fc3ded672 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * MOSCHIP MCS7830 based USB 2.0 Ethernet Devices
+ * MOSCHIP MCS7830 based (7730/7830/7832) USB 2.0 Ethernet Devices
  *
  * based on usbnet.c, asix.c and the vendor provided mcs7830 driver
  *
@@ -11,6 +11,9 @@
  *
  * Definitions gathered from MOSCHIP, Data Sheet_7830DA.pdf (thanks!).
  *
+ * 2010-12-19: add 7832 USB PID ("functionality same as MCS7830"),
+ *             per active notification by manufacturer
+ *
  * TODO:
  * - support HIF_REG_CONFIG_SLEEPMODE/HIF_REG_CONFIG_TXENABLE (via autopm?)
  * - implement ethtool_ops get_pauseparam/set_pauseparam
@@ -60,6 +63,7 @@
 #define MCS7830_MAX_MCAST      64
 
 #define MCS7830_VENDOR_ID      0x9710
+#define MCS7832_PRODUCT_ID     0x7832
 #define MCS7830_PRODUCT_ID     0x7830
 #define MCS7730_PRODUCT_ID     0x7730
 
@@ -351,7 +355,7 @@ static int mcs7830_set_autoneg(struct usbnet *dev, int ptrUserPhyMode)
        if (!ret)
                ret = mcs7830_write_phy(dev, MII_BMCR,
                                BMCR_ANENABLE | BMCR_ANRESTART  );
-       return ret < 0 ? : 0;
+       return ret;
 }
 
 
@@ -626,7 +630,7 @@ static int mcs7830_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 }
 
 static const struct driver_info moschip_info = {
-       .description    = "MOSCHIP 7830/7730 usb-NET adapter",
+       .description    = "MOSCHIP 7830/7832/7730 usb-NET adapter",
        .bind           = mcs7830_bind,
        .rx_fixup       = mcs7830_rx_fixup,
        .flags          = FLAG_ETHER,
@@ -644,6 +648,10 @@ static const struct driver_info sitecom_info = {
 };
 
 static const struct usb_device_id products[] = {
+       {
+               USB_DEVICE(MCS7830_VENDOR_ID, MCS7832_PRODUCT_ID),
+               .driver_info = (unsigned long) &moschip_info,
+       },
        {
                USB_DEVICE(MCS7830_VENDOR_ID, MCS7830_PRODUCT_ID),
                .driver_info = (unsigned long) &moschip_info,
index 0bbc0c3231358de20f395d9fd34b05279e0e13fd..cc83fa71c3ffb970b5107aa812e1e1956c9c7c82 100644 (file)
@@ -166,7 +166,9 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
        if (!(rcv->flags & IFF_UP))
                goto tx_drop;
 
-       if (dev->features & NETIF_F_NO_CSUM)
+       /* don't change ip_summed == CHECKSUM_PARTIAL, as that
+          will cause bad checksum on forwarded packets */
+       if (skb->ip_summed == CHECKSUM_NONE)
                skb->ip_summed = rcv_priv->ip_summed;
 
        length = skb->len + ETH_HLEN;
index 25a2722c8a986ce35a8a37a9076814095cc70ee3..1d9aed6457234fe0cc18c34fe198dc6ca4e4d62b 100644 (file)
@@ -891,7 +891,6 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local,
 
        SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops);
 
-       netif_stop_queue(dev);
 }
 
 static int hostap_enable_hostapd(local_info_t *local, int rtnl_locked)
index db540910b1104d42b5d20d15e542c3fc5da7649c..0e027f787fbce6452b0c78fefc24dbd30cb75100 100644 (file)
@@ -315,6 +315,7 @@ struct iwl_cfg iwl100_bgn_cfg = {
        .mod_params = &iwlagn_mod_params,
        .base_params = &iwl1000_base_params,
        .ht_params = &iwl1000_ht_params,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl100_bg_cfg = {
@@ -330,6 +331,7 @@ struct iwl_cfg iwl100_bg_cfg = {
        .ops = &iwl1000_ops,
        .mod_params = &iwlagn_mod_params,
        .base_params = &iwl1000_base_params,
+       .use_new_eeprom_reading = true,
 };
 
 MODULE_FIRMWARE(IWL1000_MODULE_FIRMWARE(IWL1000_UCODE_API_MAX));
index 11e6532fc573d1ed93d5ec4aaf886342d0fe8073..0ceeaac85eda1ab8a8dd8a046044b47dc5e5df63 100644 (file)
@@ -561,6 +561,7 @@ struct iwl_cfg iwl6000g2a_2agn_cfg = {
        .ht_params = &iwl6000_ht_params,
        .need_dc_calib = true,
        .need_temp_offset_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2a_2abg_cfg = {
@@ -578,6 +579,7 @@ struct iwl_cfg iwl6000g2a_2abg_cfg = {
        .base_params = &iwl6000_base_params,
        .need_dc_calib = true,
        .need_temp_offset_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2a_2bg_cfg = {
@@ -595,6 +597,7 @@ struct iwl_cfg iwl6000g2a_2bg_cfg = {
        .base_params = &iwl6000_base_params,
        .need_dc_calib = true,
        .need_temp_offset_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2agn_cfg = {
@@ -616,6 +619,7 @@ struct iwl_cfg iwl6000g2b_2agn_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2abg_cfg = {
@@ -636,6 +640,7 @@ struct iwl_cfg iwl6000g2b_2abg_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2bgn_cfg = {
@@ -657,6 +662,7 @@ struct iwl_cfg iwl6000g2b_2bgn_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_2bg_cfg = {
@@ -677,6 +683,7 @@ struct iwl_cfg iwl6000g2b_2bg_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_bgn_cfg = {
@@ -698,6 +705,7 @@ struct iwl_cfg iwl6000g2b_bgn_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6000g2b_bg_cfg = {
@@ -718,6 +726,7 @@ struct iwl_cfg iwl6000g2b_bg_cfg = {
        .need_temp_offset_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 /*
@@ -804,6 +813,7 @@ struct iwl_cfg iwl6050g2_bgn_cfg = {
        .base_params = &iwl6050_base_params,
        .ht_params = &iwl6000_ht_params,
        .need_dc_calib = true,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl6050_2abg_cfg = {
@@ -857,6 +867,7 @@ struct iwl_cfg iwl130_bgn_cfg = {
        .need_dc_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 struct iwl_cfg iwl130_bg_cfg = {
@@ -876,6 +887,7 @@ struct iwl_cfg iwl130_bg_cfg = {
        .need_dc_calib = true,
        /* Due to bluetooth, we transmit 2.4 GHz probes only on antenna A */
        .scan_tx_antennas[IEEE80211_BAND_2GHZ] = ANT_A,
+       .use_new_eeprom_reading = true,
 };
 
 MODULE_FIRMWARE(IWL6000_MODULE_FIRMWARE(IWL6000_UCODE_API_MAX));
index a650baba0809d1bf1d56d86f687f7dc07cf53d1b..9eeeda18748de6fb12a09cc5f5b74c90553a4bf4 100644 (file)
@@ -392,7 +392,7 @@ static s8 iwl_update_channel_txpower(struct iwl_priv *priv,
 /**
  * iwlcore_eeprom_enhanced_txpower: process enhanced tx power info
  */
-void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
+static void iwlcore_eeprom_enhanced_txpower_old(struct iwl_priv *priv)
 {
        int eeprom_section_count = 0;
        int section, element;
@@ -419,7 +419,8 @@ void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
                 * always check for valid entry before process
                 * the information
                 */
-               if (!enhanced_txpower->common || enhanced_txpower->reserved)
+               if (!(enhanced_txpower->flags || enhanced_txpower->channel) ||
+                   enhanced_txpower->delta_20_in_40)
                        continue;
 
                for (element = 0; element < eeprom_section_count; element++) {
@@ -452,3 +453,86 @@ void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
                }
        }
 }
+
+static void
+iwlcore_eeprom_enh_txp_read_element(struct iwl_priv *priv,
+                                   struct iwl_eeprom_enhanced_txpwr *txp,
+                                   s8 max_txpower_avg)
+{
+       int ch_idx;
+       bool is_ht40 = txp->flags & IWL_EEPROM_ENH_TXP_FL_40MHZ;
+       enum ieee80211_band band;
+
+       band = txp->flags & IWL_EEPROM_ENH_TXP_FL_BAND_52G ?
+               IEEE80211_BAND_5GHZ : IEEE80211_BAND_2GHZ;
+
+       for (ch_idx = 0; ch_idx < priv->channel_count; ch_idx++) {
+               struct iwl_channel_info *ch_info = &priv->channel_info[ch_idx];
+
+               /* update matching channel or from common data only */
+               if (txp->channel != 0 && ch_info->channel != txp->channel)
+                       continue;
+
+               /* update matching band only */
+               if (band != ch_info->band)
+                       continue;
+
+               if (ch_info->max_power_avg < max_txpower_avg && !is_ht40) {
+                       ch_info->max_power_avg = max_txpower_avg;
+                       ch_info->curr_txpow = max_txpower_avg;
+                       ch_info->scan_power = max_txpower_avg;
+               }
+
+               if (is_ht40 && ch_info->ht40_max_power_avg < max_txpower_avg)
+                       ch_info->ht40_max_power_avg = max_txpower_avg;
+       }
+}
+
+#define EEPROM_TXP_OFFS        (0x00 | INDIRECT_ADDRESS | INDIRECT_TXP_LIMIT)
+#define EEPROM_TXP_ENTRY_LEN sizeof(struct iwl_eeprom_enhanced_txpwr)
+#define EEPROM_TXP_SZ_OFFS (0x00 | INDIRECT_ADDRESS | INDIRECT_TXP_LIMIT_SIZE)
+
+static void iwlcore_eeprom_enhanced_txpower_new(struct iwl_priv *priv)
+{
+       struct iwl_eeprom_enhanced_txpwr *txp_array, *txp;
+       int idx, entries;
+       __le16 *txp_len;
+       s8 max_txp_avg, max_txp_avg_halfdbm;
+
+       BUILD_BUG_ON(sizeof(struct iwl_eeprom_enhanced_txpwr) != 8);
+
+       /* the length is in 16-bit words, but we want entries */
+       txp_len = (__le16 *) iwlagn_eeprom_query_addr(priv, EEPROM_TXP_SZ_OFFS);
+       entries = le16_to_cpup(txp_len) * 2 / EEPROM_TXP_ENTRY_LEN;
+
+       txp_array = (void *) iwlagn_eeprom_query_addr(priv, EEPROM_TXP_OFFS);
+       for (idx = 0; idx < entries; idx++) {
+               txp = &txp_array[idx];
+
+               /* skip invalid entries */
+               if (!(txp->flags & IWL_EEPROM_ENH_TXP_FL_VALID))
+                       continue;
+
+               max_txp_avg = iwl_get_max_txpower_avg(priv, txp_array, idx,
+                                                     &max_txp_avg_halfdbm);
+
+               /*
+                * Update the user limit values values to the highest
+                * power supported by any channel
+                */
+               if (max_txp_avg > priv->tx_power_user_lmt)
+                       priv->tx_power_user_lmt = max_txp_avg;
+               if (max_txp_avg_halfdbm > priv->tx_power_lmt_in_half_dbm)
+                       priv->tx_power_lmt_in_half_dbm = max_txp_avg_halfdbm;
+
+               iwlcore_eeprom_enh_txp_read_element(priv, txp, max_txp_avg);
+       }
+}
+
+void iwlcore_eeprom_enhanced_txpower(struct iwl_priv *priv)
+{
+       if (priv->cfg->use_new_eeprom_reading)
+               iwlcore_eeprom_enhanced_txpower_new(priv);
+       else
+               iwlcore_eeprom_enhanced_txpower_old(priv);
+}
index b555edd533547e3bf98eed8e948a276ef141aace..554afb7d9670dff564873b9509cd59c3058a346e 100644 (file)
@@ -569,6 +569,12 @@ static u32 eeprom_indirect_address(const struct iwl_priv *priv, u32 address)
        case INDIRECT_REGULATORY:
                offset = iwl_eeprom_query16(priv, EEPROM_LINK_REGULATORY);
                break;
+       case INDIRECT_TXP_LIMIT:
+               offset = iwl_eeprom_query16(priv, EEPROM_LINK_TXP_LIMIT);
+               break;
+       case INDIRECT_TXP_LIMIT_SIZE:
+               offset = iwl_eeprom_query16(priv, EEPROM_LINK_TXP_LIMIT_SIZE);
+               break;
        case INDIRECT_CALIBRATION:
                offset = iwl_eeprom_query16(priv, EEPROM_LINK_CALIBRATION);
                break;
index 64527def059f7de76ac0e4bdcadd0147576805c1..954ecc2c34c41f239dc1d24932d67be68aa741fa 100644 (file)
@@ -390,6 +390,7 @@ struct iwl_cfg {
        const bool need_temp_offset_calib; /* if used set to true */
        u8 scan_rx_antennas[IEEE80211_NUM_BANDS];
        u8 scan_tx_antennas[IEEE80211_NUM_BANDS];
+       const bool use_new_eeprom_reading; /* temporary, remove later */
 };
 
 /***************************
index d9b590625ae4eb16ba996d0bbd8d794acc30dae9..e3a279d2d0b6307fcf5d38948454cb7ba8da29c7 100644 (file)
@@ -120,6 +120,17 @@ struct iwl_eeprom_channel {
        s8 max_power_avg;       /* max power (dBm) on this chnl, limit 31 */
 } __packed;
 
+enum iwl_eeprom_enhanced_txpwr_flags {
+       IWL_EEPROM_ENH_TXP_FL_VALID             = BIT(0),
+       IWL_EEPROM_ENH_TXP_FL_BAND_52G          = BIT(1),
+       IWL_EEPROM_ENH_TXP_FL_OFDM              = BIT(2),
+       IWL_EEPROM_ENH_TXP_FL_40MHZ             = BIT(3),
+       IWL_EEPROM_ENH_TXP_FL_HT_AP             = BIT(4),
+       IWL_EEPROM_ENH_TXP_FL_RES1              = BIT(5),
+       IWL_EEPROM_ENH_TXP_FL_RES2              = BIT(6),
+       IWL_EEPROM_ENH_TXP_FL_COMMON_TYPE       = BIT(7),
+};
+
 /**
  * iwl_eeprom_enhanced_txpwr structure
  *    This structure presents the enhanced regulatory tx power limit layout
@@ -127,21 +138,23 @@ struct iwl_eeprom_channel {
  *    Enhanced regulatory tx power portion of eeprom image can be broken down
  *    into individual structures; each one is 8 bytes in size and contain the
  *    following information
- * @common: (desc + channel) not used by driver, should _NOT_ be "zero"
+ * @flags: entry flags
+ * @channel: channel number
  * @chain_a_max_pwr: chain a max power in 1/2 dBm
  * @chain_b_max_pwr: chain b max power in 1/2 dBm
  * @chain_c_max_pwr: chain c max power in 1/2 dBm
- * @reserved: not used, should be "zero"
+ * @delta_20_in_40: 20-in-40 deltas (hi/lo)
  * @mimo2_max_pwr: mimo2 max power in 1/2 dBm
  * @mimo3_max_pwr: mimo3 max power in 1/2 dBm
  *
  */
 struct iwl_eeprom_enhanced_txpwr {
-       __le16 common;
+       u8 flags;
+       u8 channel;
        s8 chain_a_max;
        s8 chain_b_max;
        s8 chain_c_max;
-       s8 reserved;
+       u8 delta_20_in_40;
        s8 mimo2_max;
        s8 mimo3_max;
 } __packed;
@@ -186,6 +199,8 @@ struct iwl_eeprom_enhanced_txpwr {
 #define EEPROM_LINK_CALIBRATION      (2*0x67)
 #define EEPROM_LINK_PROCESS_ADJST    (2*0x68)
 #define EEPROM_LINK_OTHERS           (2*0x69)
+#define EEPROM_LINK_TXP_LIMIT        (2*0x6a)
+#define EEPROM_LINK_TXP_LIMIT_SIZE   (2*0x6b)
 
 /* agn regulatory - indirect access */
 #define EEPROM_REG_BAND_1_CHANNELS       ((0x08)\
@@ -389,6 +404,8 @@ struct iwl_eeprom_calib_info {
 #define INDIRECT_CALIBRATION        0x00040000
 #define INDIRECT_PROCESS_ADJST      0x00050000
 #define INDIRECT_OTHERS             0x00060000
+#define INDIRECT_TXP_LIMIT          0x00070000
+#define INDIRECT_TXP_LIMIT_SIZE     0x00080000
 #define INDIRECT_ADDRESS            0x00100000
 
 /* General */
index 373930afc26b50f2d7cddeb9027b07f8e5ad7775..113f4f204657ec911b6fe01c112f83f57d0d7fa8 100644 (file)
@@ -619,7 +619,7 @@ static int lbs_ret_scan(struct lbs_private *priv, unsigned long dummy,
                                     print_ssid(ssid_buf, ssid, ssid_len),
                                     LBS_SCAN_RSSI_TO_MBM(rssi)/100);
 
-                       if (channel ||
+                       if (channel &&
                            !(channel->flags & IEEE80211_CHAN_DISABLED))
                                cfg80211_inform_bss(wiphy, channel,
                                        bssid, le64_to_cpu(*(__le64 *)tsfdesc),
index d5bc21e5a02c7520d6599ad5822eafe85b267823..2325e56a9b0bd8cc35c7ee958be19ed5ff2adf2f 100644 (file)
@@ -43,6 +43,7 @@ MODULE_FIRMWARE("isl3887usb");
 
 static struct usb_device_id p54u_table[] __devinitdata = {
        /* Version 1 devices (pci chip + net2280) */
+       {USB_DEVICE(0x0411, 0x0050)},   /* Buffalo WLI2-USB2-G54 */
        {USB_DEVICE(0x045e, 0x00c2)},   /* Microsoft MN-710 */
        {USB_DEVICE(0x0506, 0x0a11)},   /* 3COM 3CRWE254G72 */
        {USB_DEVICE(0x06b9, 0x0120)},   /* Thomson SpeedTouch 120g */
@@ -56,9 +57,13 @@ static struct usb_device_id p54u_table[] __devinitdata = {
        {USB_DEVICE(0x0846, 0x4220)},   /* Netgear WG111 */
        {USB_DEVICE(0x09aa, 0x1000)},   /* Spinnaker Proto board */
        {USB_DEVICE(0x0cde, 0x0006)},   /* Medion 40900, Roper Europe */
+       {USB_DEVICE(0x0db0, 0x6826)},   /* MSI UB54G (MS-6826) */
        {USB_DEVICE(0x107b, 0x55f2)},   /* Gateway WGU-210 (Gemtek) */
        {USB_DEVICE(0x124a, 0x4023)},   /* Shuttle PN15, Airvast WM168g, IOGear GWU513 */
+       {USB_DEVICE(0x1435, 0x0210)},   /* Inventel UR054G */
+       {USB_DEVICE(0x15a9, 0x0002)},   /* Gemtek WUBI-100GW 802.11g */
        {USB_DEVICE(0x1630, 0x0005)},   /* 2Wire 802.11g USB (v1) / Z-Com */
+       {USB_DEVICE(0x182d, 0x096b)},   /* Sitecom WL-107 */
        {USB_DEVICE(0x1915, 0x2234)},   /* Linksys WUSB54G OEM */
        {USB_DEVICE(0x1915, 0x2235)},   /* Linksys WUSB54G Portable OEM */
        {USB_DEVICE(0x2001, 0x3701)},   /* DLink DWL-G120 Spinnaker */
@@ -94,6 +99,7 @@ static struct usb_device_id p54u_table[] __devinitdata = {
        {USB_DEVICE(0x1435, 0x0427)},   /* Inventel UR054G */
        {USB_DEVICE(0x1668, 0x1050)},   /* Actiontec 802UIG-1 */
        {USB_DEVICE(0x2001, 0x3704)},   /* DLink DWL-G122 rev A2 */
+       {USB_DEVICE(0x2001, 0x3705)},   /* D-Link DWL-G120 rev C1 */
        {USB_DEVICE(0x413c, 0x5513)},   /* Dell WLA3310 USB Wireless Adapter */
        {USB_DEVICE(0x413c, 0x8102)},   /* Spinnaker DUT */
        {USB_DEVICE(0x413c, 0x8104)},   /* Cohiba Proto board */
index b267395359863ca10a55fe8a39f186e5bf08deb9..09a67905c230e5f9a9029ba2c786a707d73b5108 100644 (file)
@@ -912,6 +912,7 @@ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev)
        __set_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags);
        __set_bit(DRIVER_REQUIRE_L2PAD, &rt2x00dev->flags);
        __set_bit(DRIVER_REQUIRE_TXSTATUS_FIFO, &rt2x00dev->flags);
+       __set_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags);
        if (!modparam_nohwcrypt)
                __set_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags);
        __set_bit(DRIVER_SUPPORT_LINK_TUNING, &rt2x00dev->flags);
index 94fe589acfaabff06eac097fe7e3ae3e2f7ea310..ab43e7ca2a231000ba0d40489860f950883be16d 100644 (file)
@@ -664,6 +664,7 @@ enum rt2x00_flags {
        DRIVER_REQUIRE_COPY_IV,
        DRIVER_REQUIRE_L2PAD,
        DRIVER_REQUIRE_TXSTATUS_FIFO,
+       DRIVER_REQUIRE_TASKLET_CONTEXT,
 
        /*
         * Driver features
index 5ba79b935f09f5bed56f999653338501584cf141..d019830ca8407c2aa10ed0ae95abe8679c654e55 100644 (file)
@@ -390,9 +390,12 @@ void rt2x00lib_txdone(struct queue_entry *entry,
         * through a mac80211 library call (RTS/CTS) then we should not
         * send the status report back.
         */
-       if (!(skbdesc_flags & SKBDESC_NOT_MAC80211))
-               ieee80211_tx_status(rt2x00dev->hw, entry->skb);
-       else
+       if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) {
+               if (test_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags))
+                       ieee80211_tx_status(rt2x00dev->hw, entry->skb);
+               else
+                       ieee80211_tx_status_ni(rt2x00dev->hw, entry->skb);
+       } else
                dev_kfree_skb_any(entry->skb);
 
        /*
index cd1b3dcd61db67671e28df47cae7a19293fa881f..ec47e22fa186ce89ed7c2758998e6ff008d140aa 100644 (file)
@@ -744,7 +744,7 @@ static int yellowfin_init_ring(struct net_device *dev)
        }
 
        for (i = 0; i < RX_RING_SIZE; i++) {
-               struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz);
+               struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz + 2);
                yp->rx_skbuff[i] = skb;
                if (skb == NULL)
                        break;
@@ -1157,7 +1157,7 @@ static int yellowfin_rx(struct net_device *dev)
        for (; yp->cur_rx - yp->dirty_rx > 0; yp->dirty_rx++) {
                entry = yp->dirty_rx % RX_RING_SIZE;
                if (yp->rx_skbuff[entry] == NULL) {
-                       struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz);
+                       struct sk_buff *skb = dev_alloc_skb(yp->rx_buf_sz + 2);
                        if (skb == NULL)
                                break;                          /* Better luck next round. */
                        yp->rx_skbuff[entry] = skb;
index c85d3c7421fc94796c6814d0c453b33f831a9279..f37fbeb66a4400c2c7fa28bc45cd8be5af7c7c47 100644 (file)
@@ -61,7 +61,7 @@ void of_i2c_register_devices(struct i2c_adapter *adap)
                info.of_node = of_node_get(node);
                info.archdata = &dev_ad;
 
-               request_module("%s", info.type);
+               request_module("%s%s", I2C_MODULE_PREFIX, info.type);
 
                result = i2c_new_device(adap, &info);
                if (result == NULL) {
index 2574700db461559717a95e0623c1c58c5821c687..5f7226223a62c9067bcc4a47f1b3e6bd47b69c40 100644 (file)
@@ -115,7 +115,8 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
 static int __init select_detection_mode(void)
 {
        struct dummy_slot *slot, *tmp;
-       pcie_port_service_register(&dummy_driver);
+       if (pcie_port_service_register(&dummy_driver))
+               return PCIEHP_DETECT_ACPI;
        pcie_port_service_unregister(&dummy_driver);
        list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
                list_del(&slot->list);
index c44a5e8b8b82da9d06706d9cd3a3ec0fcb2b883c..f0b3ad13c273fb73394cc78ca8fb36521a8a4c90 100644 (file)
@@ -75,6 +75,7 @@
 #include <drm/i915_drm.h>
 #include <asm/msr.h>
 #include <asm/processor.h>
+#include "intel_ips.h"
 
 #define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
 
 #define thm_writel(off, val) writel((val), ips->regmap + (off))
 
 static const int IPS_ADJUST_PERIOD = 5000; /* ms */
+static bool late_i915_load = false;
 
 /* For initial average collection */
 static const int IPS_SAMPLE_PERIOD = 200; /* ms */
@@ -339,6 +341,9 @@ struct ips_driver {
        u64 orig_turbo_ratios;
 };
 
+static bool
+ips_gpu_turbo_enabled(struct ips_driver *ips);
+
 /**
  * ips_cpu_busy - is CPU busy?
  * @ips: IPS driver struct
@@ -517,7 +522,7 @@ static void ips_disable_cpu_turbo(struct ips_driver *ips)
  */
 static bool ips_gpu_busy(struct ips_driver *ips)
 {
-       if (!ips->gpu_turbo_enabled)
+       if (!ips_gpu_turbo_enabled(ips))
                return false;
 
        return ips->gpu_busy();
@@ -532,7 +537,7 @@ static bool ips_gpu_busy(struct ips_driver *ips)
  */
 static void ips_gpu_raise(struct ips_driver *ips)
 {
-       if (!ips->gpu_turbo_enabled)
+       if (!ips_gpu_turbo_enabled(ips))
                return;
 
        if (!ips->gpu_raise())
@@ -549,7 +554,7 @@ static void ips_gpu_raise(struct ips_driver *ips)
  */
 static void ips_gpu_lower(struct ips_driver *ips)
 {
-       if (!ips->gpu_turbo_enabled)
+       if (!ips_gpu_turbo_enabled(ips))
                return;
 
        if (!ips->gpu_lower())
@@ -1454,6 +1459,31 @@ out_err:
        return false;
 }
 
+static bool
+ips_gpu_turbo_enabled(struct ips_driver *ips)
+{
+       if (!ips->gpu_busy && late_i915_load) {
+               if (ips_get_i915_syms(ips)) {
+                       dev_info(&ips->dev->dev,
+                                "i915 driver attached, reenabling gpu turbo\n");
+                       ips->gpu_turbo_enabled = !(thm_readl(THM_HTS) & HTS_GTD_DIS);
+               }
+       }
+
+       return ips->gpu_turbo_enabled;
+}
+
+void
+ips_link_to_i915_driver()
+{
+       /* We can't cleanly get at the various ips_driver structs from
+        * this caller (the i915 driver), so just set a flag saying
+        * that it's time to try getting the symbols again.
+        */
+       late_i915_load = true;
+}
+EXPORT_SYMBOL_GPL(ips_link_to_i915_driver);
+
 static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
                     PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
diff --git a/drivers/platform/x86/intel_ips.h b/drivers/platform/x86/intel_ips.h
new file mode 100644 (file)
index 0000000..73299be
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+
+void ips_link_to_i915_driver(void);
index 41a9e34899ac5f81da6dde61f6aa2a1d34e2e134..ca35b0ce944a58ca017ff5b5678b73a468d7a42a 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/sfi.h>
 #include <asm/mrst.h>
 #include <asm/intel_scu_ipc.h>
+#include <asm/mrst.h>
 
 /* IPC defines the following message types */
 #define IPCMSG_WATCHDOG_TIMER 0xF8 /* Set Kernel Watchdog Threshold */
@@ -699,6 +700,9 @@ static int ipc_probe(struct pci_dev *dev, const struct pci_device_id *id)
                iounmap(ipcdev.ipc_base);
                return -ENOMEM;
        }
+
+       intel_scu_devices_create();
+
        return 0;
 }
 
@@ -720,6 +724,7 @@ static void ipc_remove(struct pci_dev *pdev)
        iounmap(ipcdev.ipc_base);
        iounmap(ipcdev.i2c_base);
        ipcdev.pdev = NULL;
+       intel_scu_devices_destroy();
 }
 
 static const struct pci_device_id pci_ids[] = {
index 2883428d5ac806408b5082221036ffa30915356c..4941cade319f5cef06d508d0b1f1354d951c1034 100644 (file)
@@ -463,6 +463,18 @@ config RTC_DRV_CMOS
          This driver can also be built as a module. If so, the module
          will be called rtc-cmos.
 
+config RTC_DRV_VRTC
+       tristate "Virtual RTC for Moorestown platforms"
+       depends on X86_MRST
+       default y if X86_MRST
+
+       help
+       Say "yes" here to get direct support for the real time clock
+       found on Moorestown platforms. The VRTC is a emulated RTC that
+       derives its clock source from a real RTC in the PMIC. The MC146818
+       style programming interface is mostly conserved, but any
+       updates are done via IPC calls to the system controller FW.
+
 config RTC_DRV_DS1216
        tristate "Dallas DS1216"
        depends on SNI_RM
index 4c2832df4697d3cdba4af02990c683dc13118776..2afdaf3ff98660f53c72a189503786ebeac0a27c 100644 (file)
@@ -30,6 +30,7 @@ obj-$(CONFIG_RTC_DRV_CMOS)    += rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_COH901331)        += rtc-coh901331.o
 obj-$(CONFIG_RTC_DRV_DAVINCI)  += rtc-davinci.o
 obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o
+obj-$(CONFIG_RTC_DRV_VRTC)     += rtc-mrst.o
 obj-$(CONFIG_RTC_DRV_DS1216)   += rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1286)   += rtc-ds1286.o
 obj-$(CONFIG_RTC_DRV_DS1302)   += rtc-ds1302.o
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
new file mode 100644 (file)
index 0000000..bcd0cf6
--- /dev/null
@@ -0,0 +1,582 @@
+/*
+ * rtc-mrst.c: Driver for Moorestown virtual RTC
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Jacob Pan (jacob.jun.pan@intel.com)
+ *        Feng Tang (feng.tang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * Note:
+ * VRTC is emulated by system controller firmware, the real HW
+ * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
+ * in a memory mapped IO space that is visible to the host IA
+ * processor.
+ *
+ * This driver is based upon drivers/rtc/rtc-cmos.c
+ */
+
+/*
+ * Note:
+ *  * vRTC only supports binary mode and 24H mode
+ *  * vRTC only support PIE and AIE, no UIE, and its PIE only happens
+ *    at 23:59:59pm everyday, no support for adjustable frequency
+ *  * Alarm function is also limited to hr/min/sec.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+
+#include <asm-generic/rtc.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
+
+struct mrst_rtc {
+       struct rtc_device       *rtc;
+       struct device           *dev;
+       int                     irq;
+       struct resource         *iomem;
+
+       u8                      enabled_wake;
+       u8                      suspend_ctrl;
+};
+
+static const char driver_name[] = "rtc_mrst";
+
+#define        RTC_IRQMASK     (RTC_PF | RTC_AF)
+
+static inline int is_intr(u8 rtc_intr)
+{
+       if (!(rtc_intr & RTC_IRQF))
+               return 0;
+       return rtc_intr & RTC_IRQMASK;
+}
+
+/*
+ * rtc_time's year contains the increment over 1900, but vRTC's YEAR
+ * register can't be programmed to value larger than 0x64, so vRTC
+ * driver chose to use 1960 (1970 is UNIX time start point) as the base,
+ * and does the translation at read/write time.
+ *
+ * Why not just use 1970 as the offset? it's because using 1960 will
+ * make it consistent in leap year setting for both vrtc and low-level
+ * physical rtc devices.
+ */
+static int mrst_read_time(struct device *dev, struct rtc_time *time)
+{
+       unsigned long flags;
+
+       if (rtc_is_updating())
+               mdelay(20);
+
+       spin_lock_irqsave(&rtc_lock, flags);
+       time->tm_sec = vrtc_cmos_read(RTC_SECONDS);
+       time->tm_min = vrtc_cmos_read(RTC_MINUTES);
+       time->tm_hour = vrtc_cmos_read(RTC_HOURS);
+       time->tm_mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
+       time->tm_mon = vrtc_cmos_read(RTC_MONTH);
+       time->tm_year = vrtc_cmos_read(RTC_YEAR);
+       spin_unlock_irqrestore(&rtc_lock, flags);
+
+       /* Adjust for the 1960/1900 */
+       time->tm_year += 60;
+       time->tm_mon--;
+       return RTC_24H;
+}
+
+static int mrst_set_time(struct device *dev, struct rtc_time *time)
+{
+       int ret;
+       unsigned long flags;
+       unsigned char mon, day, hrs, min, sec;
+       unsigned int yrs;
+
+       yrs = time->tm_year;
+       mon = time->tm_mon + 1;   /* tm_mon starts at zero */
+       day = time->tm_mday;
+       hrs = time->tm_hour;
+       min = time->tm_min;
+       sec = time->tm_sec;
+
+       if (yrs < 70 || yrs > 138)
+               return -EINVAL;
+       yrs -= 60;
+
+       spin_lock_irqsave(&rtc_lock, flags);
+
+       vrtc_cmos_write(yrs, RTC_YEAR);
+       vrtc_cmos_write(mon, RTC_MONTH);
+       vrtc_cmos_write(day, RTC_DAY_OF_MONTH);
+       vrtc_cmos_write(hrs, RTC_HOURS);
+       vrtc_cmos_write(min, RTC_MINUTES);
+       vrtc_cmos_write(sec, RTC_SECONDS);
+
+       spin_unlock_irqrestore(&rtc_lock, flags);
+
+       ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETTIME);
+       return ret;
+}
+
+static int mrst_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char rtc_control;
+
+       if (mrst->irq <= 0)
+               return -EIO;
+
+       /* Basic alarms only support hour, minute, and seconds fields.
+        * Some also support day and month, for alarms up to a year in
+        * the future.
+        */
+       t->time.tm_mday = -1;
+       t->time.tm_mon = -1;
+       t->time.tm_year = -1;
+
+       /* vRTC only supports binary mode */
+       spin_lock_irq(&rtc_lock);
+       t->time.tm_sec = vrtc_cmos_read(RTC_SECONDS_ALARM);
+       t->time.tm_min = vrtc_cmos_read(RTC_MINUTES_ALARM);
+       t->time.tm_hour = vrtc_cmos_read(RTC_HOURS_ALARM);
+
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       spin_unlock_irq(&rtc_lock);
+
+       t->enabled = !!(rtc_control & RTC_AIE);
+       t->pending = 0;
+
+       return 0;
+}
+
+static void mrst_checkintr(struct mrst_rtc *mrst, unsigned char rtc_control)
+{
+       unsigned char   rtc_intr;
+
+       /*
+        * NOTE after changing RTC_xIE bits we always read INTR_FLAGS;
+        * allegedly some older rtcs need that to handle irqs properly
+        */
+       rtc_intr = vrtc_cmos_read(RTC_INTR_FLAGS);
+       rtc_intr &= (rtc_control & RTC_IRQMASK) | RTC_IRQF;
+       if (is_intr(rtc_intr))
+               rtc_update_irq(mrst->rtc, 1, rtc_intr);
+}
+
+static void mrst_irq_enable(struct mrst_rtc *mrst, unsigned char mask)
+{
+       unsigned char   rtc_control;
+
+       /*
+        * Flush any pending IRQ status, notably for update irqs,
+        * before we enable new IRQs
+        */
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       mrst_checkintr(mrst, rtc_control);
+
+       rtc_control |= mask;
+       vrtc_cmos_write(rtc_control, RTC_CONTROL);
+
+       mrst_checkintr(mrst, rtc_control);
+}
+
+static void mrst_irq_disable(struct mrst_rtc *mrst, unsigned char mask)
+{
+       unsigned char   rtc_control;
+
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       rtc_control &= ~mask;
+       vrtc_cmos_write(rtc_control, RTC_CONTROL);
+       mrst_checkintr(mrst, rtc_control);
+}
+
+static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char hrs, min, sec;
+       int ret = 0;
+
+       if (!mrst->irq)
+               return -EIO;
+
+       hrs = t->time.tm_hour;
+       min = t->time.tm_min;
+       sec = t->time.tm_sec;
+
+       spin_lock_irq(&rtc_lock);
+       /* Next rtc irq must not be from previous alarm setting */
+       mrst_irq_disable(mrst, RTC_AIE);
+
+       /* Update alarm */
+       vrtc_cmos_write(hrs, RTC_HOURS_ALARM);
+       vrtc_cmos_write(min, RTC_MINUTES_ALARM);
+       vrtc_cmos_write(sec, RTC_SECONDS_ALARM);
+
+       spin_unlock_irq(&rtc_lock);
+
+       ret = intel_scu_ipc_simple_command(IPCMSG_VRTC, IPC_CMD_VRTC_SETALARM);
+       if (ret)
+               return ret;
+
+       spin_lock_irq(&rtc_lock);
+       if (t->enabled)
+               mrst_irq_enable(mrst, RTC_AIE);
+
+       spin_unlock_irq(&rtc_lock);
+
+       return 0;
+}
+
+static int mrst_irq_set_state(struct device *dev, int enabled)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned long   flags;
+
+       if (!mrst->irq)
+               return -ENXIO;
+
+       spin_lock_irqsave(&rtc_lock, flags);
+
+       if (enabled)
+               mrst_irq_enable(mrst, RTC_PIE);
+       else
+               mrst_irq_disable(mrst, RTC_PIE);
+
+       spin_unlock_irqrestore(&rtc_lock, flags);
+       return 0;
+}
+
+#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
+
+/* Currently, the vRTC doesn't support UIE ON/OFF */
+static int
+mrst_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned long   flags;
+
+       switch (cmd) {
+       case RTC_AIE_OFF:
+       case RTC_AIE_ON:
+               if (!mrst->irq)
+                       return -EINVAL;
+               break;
+       default:
+               /* PIE ON/OFF is handled by mrst_irq_set_state() */
+               return -ENOIOCTLCMD;
+       }
+
+       spin_lock_irqsave(&rtc_lock, flags);
+       switch (cmd) {
+       case RTC_AIE_OFF:       /* alarm off */
+               mrst_irq_disable(mrst, RTC_AIE);
+               break;
+       case RTC_AIE_ON:        /* alarm on */
+               mrst_irq_enable(mrst, RTC_AIE);
+               break;
+       }
+       spin_unlock_irqrestore(&rtc_lock, flags);
+       return 0;
+}
+
+#else
+#define        mrst_rtc_ioctl  NULL
+#endif
+
+#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
+
+static int mrst_procfs(struct device *dev, struct seq_file *seq)
+{
+       unsigned char   rtc_control, valid;
+
+       spin_lock_irq(&rtc_lock);
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       valid = vrtc_cmos_read(RTC_VALID);
+       spin_unlock_irq(&rtc_lock);
+
+       return seq_printf(seq,
+                       "periodic_IRQ\t: %s\n"
+                       "alarm\t\t: %s\n"
+                       "BCD\t\t: no\n"
+                       "periodic_freq\t: daily (not adjustable)\n",
+                       (rtc_control & RTC_PIE) ? "on" : "off",
+                       (rtc_control & RTC_AIE) ? "on" : "off");
+}
+
+#else
+#define        mrst_procfs     NULL
+#endif
+
+static const struct rtc_class_ops mrst_rtc_ops = {
+       .ioctl          = mrst_rtc_ioctl,
+       .read_time      = mrst_read_time,
+       .set_time       = mrst_set_time,
+       .read_alarm     = mrst_read_alarm,
+       .set_alarm      = mrst_set_alarm,
+       .proc           = mrst_procfs,
+       .irq_set_state  = mrst_irq_set_state,
+};
+
+static struct mrst_rtc mrst_rtc;
+
+/*
+ * When vRTC IRQ is captured by SCU FW, FW will clear the AIE bit in
+ * Reg B, so no need for this driver to clear it
+ */
+static irqreturn_t mrst_rtc_irq(int irq, void *p)
+{
+       u8 irqstat;
+
+       spin_lock(&rtc_lock);
+       /* This read will clear all IRQ flags inside Reg C */
+       irqstat = vrtc_cmos_read(RTC_INTR_FLAGS);
+       spin_unlock(&rtc_lock);
+
+       irqstat &= RTC_IRQMASK | RTC_IRQF;
+       if (is_intr(irqstat)) {
+               rtc_update_irq(p, 1, irqstat);
+               return IRQ_HANDLED;
+       }
+       return IRQ_NONE;
+}
+
+static int __init
+vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, int rtc_irq)
+{
+       int retval = 0;
+       unsigned char rtc_control;
+
+       /* There can be only one ... */
+       if (mrst_rtc.dev)
+               return -EBUSY;
+
+       if (!iomem)
+               return -ENODEV;
+
+       iomem = request_mem_region(iomem->start,
+                       iomem->end + 1 - iomem->start,
+                       driver_name);
+       if (!iomem) {
+               dev_dbg(dev, "i/o mem already in use.\n");
+               return -EBUSY;
+       }
+
+       mrst_rtc.irq = rtc_irq;
+       mrst_rtc.iomem = iomem;
+
+       mrst_rtc.rtc = rtc_device_register(driver_name, dev,
+                               &mrst_rtc_ops, THIS_MODULE);
+       if (IS_ERR(mrst_rtc.rtc)) {
+               retval = PTR_ERR(mrst_rtc.rtc);
+               goto cleanup0;
+       }
+
+       mrst_rtc.dev = dev;
+       dev_set_drvdata(dev, &mrst_rtc);
+       rename_region(iomem, dev_name(&mrst_rtc.rtc->dev));
+
+       spin_lock_irq(&rtc_lock);
+       mrst_irq_disable(&mrst_rtc, RTC_PIE | RTC_AIE);
+       rtc_control = vrtc_cmos_read(RTC_CONTROL);
+       spin_unlock_irq(&rtc_lock);
+
+       if (!(rtc_control & RTC_24H) || (rtc_control & (RTC_DM_BINARY)))
+               dev_dbg(dev, "TODO: support more than 24-hr BCD mode\n");
+
+       if (rtc_irq) {
+               retval = request_irq(rtc_irq, mrst_rtc_irq,
+                               IRQF_DISABLED, dev_name(&mrst_rtc.rtc->dev),
+                               mrst_rtc.rtc);
+               if (retval < 0) {
+                       dev_dbg(dev, "IRQ %d is already in use, err %d\n",
+                               rtc_irq, retval);
+                       goto cleanup1;
+               }
+       }
+       dev_dbg(dev, "initialised\n");
+       return 0;
+
+cleanup1:
+       mrst_rtc.dev = NULL;
+       rtc_device_unregister(mrst_rtc.rtc);
+cleanup0:
+       release_region(iomem->start, iomem->end + 1 - iomem->start);
+       dev_err(dev, "rtc-mrst: unable to initialise\n");
+       return retval;
+}
+
+static void rtc_mrst_do_shutdown(void)
+{
+       spin_lock_irq(&rtc_lock);
+       mrst_irq_disable(&mrst_rtc, RTC_IRQMASK);
+       spin_unlock_irq(&rtc_lock);
+}
+
+static void __exit rtc_mrst_do_remove(struct device *dev)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       struct resource *iomem;
+
+       rtc_mrst_do_shutdown();
+
+       if (mrst->irq)
+               free_irq(mrst->irq, mrst->rtc);
+
+       rtc_device_unregister(mrst->rtc);
+       mrst->rtc = NULL;
+
+       iomem = mrst->iomem;
+       release_region(iomem->start, iomem->end + 1 - iomem->start);
+       mrst->iomem = NULL;
+
+       mrst->dev = NULL;
+       dev_set_drvdata(dev, NULL);
+}
+
+#ifdef CONFIG_PM
+static int mrst_suspend(struct device *dev, pm_message_t mesg)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char   tmp;
+
+       /* Only the alarm might be a wakeup event source */
+       spin_lock_irq(&rtc_lock);
+       mrst->suspend_ctrl = tmp = vrtc_cmos_read(RTC_CONTROL);
+       if (tmp & (RTC_PIE | RTC_AIE)) {
+               unsigned char   mask;
+
+               if (device_may_wakeup(dev))
+                       mask = RTC_IRQMASK & ~RTC_AIE;
+               else
+                       mask = RTC_IRQMASK;
+               tmp &= ~mask;
+               vrtc_cmos_write(tmp, RTC_CONTROL);
+
+               mrst_checkintr(mrst, tmp);
+       }
+       spin_unlock_irq(&rtc_lock);
+
+       if (tmp & RTC_AIE) {
+               mrst->enabled_wake = 1;
+               enable_irq_wake(mrst->irq);
+       }
+
+       dev_dbg(&mrst_rtc.rtc->dev, "suspend%s, ctrl %02x\n",
+                       (tmp & RTC_AIE) ? ", alarm may wake" : "",
+                       tmp);
+
+       return 0;
+}
+
+/*
+ * We want RTC alarms to wake us from the deep power saving state
+ */
+static inline int mrst_poweroff(struct device *dev)
+{
+       return mrst_suspend(dev, PMSG_HIBERNATE);
+}
+
+static int mrst_resume(struct device *dev)
+{
+       struct mrst_rtc *mrst = dev_get_drvdata(dev);
+       unsigned char tmp = mrst->suspend_ctrl;
+
+       /* Re-enable any irqs previously active */
+       if (tmp & RTC_IRQMASK) {
+               unsigned char   mask;
+
+               if (mrst->enabled_wake) {
+                       disable_irq_wake(mrst->irq);
+                       mrst->enabled_wake = 0;
+               }
+
+               spin_lock_irq(&rtc_lock);
+               do {
+                       vrtc_cmos_write(tmp, RTC_CONTROL);
+
+                       mask = vrtc_cmos_read(RTC_INTR_FLAGS);
+                       mask &= (tmp & RTC_IRQMASK) | RTC_IRQF;
+                       if (!is_intr(mask))
+                               break;
+
+                       rtc_update_irq(mrst->rtc, 1, mask);
+                       tmp &= ~RTC_AIE;
+               } while (mask & RTC_AIE);
+               spin_unlock_irq(&rtc_lock);
+       }
+
+       dev_dbg(&mrst_rtc.rtc->dev, "resume, ctrl %02x\n", tmp);
+
+       return 0;
+}
+
+#else
+#define        mrst_suspend    NULL
+#define        mrst_resume     NULL
+
+static inline int mrst_poweroff(struct device *dev)
+{
+       return -ENOSYS;
+}
+
+#endif
+
+static int __init vrtc_mrst_platform_probe(struct platform_device *pdev)
+{
+       return vrtc_mrst_do_probe(&pdev->dev,
+                       platform_get_resource(pdev, IORESOURCE_MEM, 0),
+                       platform_get_irq(pdev, 0));
+}
+
+static int __exit vrtc_mrst_platform_remove(struct platform_device *pdev)
+{
+       rtc_mrst_do_remove(&pdev->dev);
+       return 0;
+}
+
+static void vrtc_mrst_platform_shutdown(struct platform_device *pdev)
+{
+       if (system_state == SYSTEM_POWER_OFF && !mrst_poweroff(&pdev->dev))
+               return;
+
+       rtc_mrst_do_shutdown();
+}
+
+MODULE_ALIAS("platform:vrtc_mrst");
+
+static struct platform_driver vrtc_mrst_platform_driver = {
+       .probe          = vrtc_mrst_platform_probe,
+       .remove         = __exit_p(vrtc_mrst_platform_remove),
+       .shutdown       = vrtc_mrst_platform_shutdown,
+       .driver = {
+               .name           = (char *) driver_name,
+               .suspend        = mrst_suspend,
+               .resume         = mrst_resume,
+       }
+};
+
+static int __init vrtc_mrst_init(void)
+{
+       return platform_driver_register(&vrtc_mrst_platform_driver);
+}
+
+static void __exit vrtc_mrst_exit(void)
+{
+       platform_driver_unregister(&vrtc_mrst_platform_driver);
+}
+
+module_init(vrtc_mrst_init);
+module_exit(vrtc_mrst_exit);
+
+MODULE_AUTHOR("Jacob Pan; Feng Tang");
+MODULE_DESCRIPTION("Driver for Moorestown virtual RTC");
+MODULE_LICENSE("GPL");
index 90cf0a6ff23e3cce363958cb0c3e81cde08c14af..dd14e202c2c8cbaafe4ed261cab01835a16f8f89 100644 (file)
@@ -207,7 +207,7 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 {
        struct rs5c372  *rs5c = i2c_get_clientdata(client);
-       unsigned char   buf[8];
+       unsigned char   buf[7];
        int             addr;
 
        dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d "
index c94502dfac664f55fd42080695989d9e1cb1697b..045d7e87b6323e36213d57ee6615c6362f53bab7 100644 (file)
@@ -677,7 +677,7 @@ bfa_fcs_fabric_sm_isolated(struct bfa_fcs_fabric_s *fabric,
        bfa_trc(fabric->fcs, event);
        wwn2str(pwwn_ptr, fabric->bport.port_cfg.pwwn);
 
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Port is isolated due to VF_ID mismatch. "
                "PWWN: %s Port VF_ID: %04x switch port VF_ID: %04x.",
                pwwn_ptr, fabric->fcs->port_vfid,
@@ -1411,7 +1411,7 @@ bfa_fcs_fabric_set_fabric_name(struct bfa_fcs_fabric_s *fabric,
                wwn2str(pwwn_ptr, bfa_fcs_lport_get_pwwn(&fabric->bport));
                wwn2str(fwwn_ptr,
                        bfa_fcs_lport_get_fabric_name(&fabric->bport));
-               BFA_LOG(KERN_WARNING, bfad, log_level,
+               BFA_LOG(KERN_WARNING, bfad, bfa_log_level,
                        "Base port WWN = %s Fabric WWN = %s\n",
                        pwwn_ptr, fwwn_ptr);
        }
index 9662bcdeb41d64c50ab7530fd34ab17d543fc44c..413b58eef93a08088cc7957d63fc9c815d58027c 100644 (file)
@@ -261,7 +261,7 @@ bfa_fcs_itnim_sm_hcb_online(struct bfa_fcs_itnim_s *itnim,
                bfa_fcb_itnim_online(itnim->itnim_drv);
                wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port));
                wwn2str(rpwwn_buf, itnim->rport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Target (WWN = %s) is online for initiator (WWN = %s)\n",
                rpwwn_buf, lpwwn_buf);
                break;
@@ -301,11 +301,11 @@ bfa_fcs_itnim_sm_online(struct bfa_fcs_itnim_s *itnim,
                wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(itnim->rport->port));
                wwn2str(rpwwn_buf, itnim->rport->pwwn);
                if (bfa_fcs_lport_is_online(itnim->rport->port) == BFA_TRUE)
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                        "Target (WWN = %s) connectivity lost for "
                        "initiator (WWN = %s)\n", rpwwn_buf, lpwwn_buf);
                else
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Target (WWN = %s) offlined by initiator (WWN = %s)\n",
                        rpwwn_buf, lpwwn_buf);
                break;
index 377cbfff6f2ec88a467b6e8014d27c91d3b4a0c3..8d651309302b1f745ef842869ce782d8c9f490b6 100644 (file)
@@ -491,7 +491,7 @@ bfa_fcs_lport_online_actions(struct bfa_fcs_lport_s *port)
        __port_action[port->fabric->fab_type].online(port);
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Logical port online: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
@@ -512,11 +512,11 @@ bfa_fcs_lport_offline_actions(struct bfa_fcs_lport_s *port)
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
        if (bfa_fcs_fabric_is_online(port->fabric) == BFA_TRUE)
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                "Logical port lost fabric connectivity: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
        else
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Logical port taken offline: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
@@ -573,7 +573,7 @@ bfa_fcs_lport_deleted(struct bfa_fcs_lport_s *port)
        char    lpwwn_buf[BFA_STRING_32];
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Logical port deleted: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
@@ -878,7 +878,7 @@ bfa_fcs_lport_init(struct bfa_fcs_lport_s *lport,
                                        vport ? vport->vport_drv : NULL);
 
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(lport));
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "New logical port created: WWN = %s Role = %s\n",
                lpwwn_buf, "Initiator");
 
index 47f35c0ef29a0c69f5d6842a6db0d173a0195e84..cf4a6e73e60d7dfa5bcaf9b7d94ea986d7389a57 100644 (file)
@@ -2056,7 +2056,7 @@ bfa_fcs_rport_online_action(struct bfa_fcs_rport_s *rport)
        wwn2str(lpwwn_buf, bfa_fcs_lport_get_pwwn(port));
        wwn2str(rpwwn_buf, rport->pwwn);
        if (!BFA_FCS_PID_IS_WKA(rport->pid))
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "Remote port (WWN = %s) online for logical port (WWN = %s)\n",
                rpwwn_buf, lpwwn_buf);
 }
@@ -2075,12 +2075,12 @@ bfa_fcs_rport_offline_action(struct bfa_fcs_rport_s *rport)
        wwn2str(rpwwn_buf, rport->pwwn);
        if (!BFA_FCS_PID_IS_WKA(rport->pid)) {
                if (bfa_fcs_lport_is_online(rport->port) == BFA_TRUE)
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Remote port (WWN = %s) connectivity lost for "
                                "logical port (WWN = %s)\n",
                                rpwwn_buf, lpwwn_buf);
                else
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Remote port (WWN = %s) offlined by "
                                "logical port (WWN = %s)\n",
                                rpwwn_buf, lpwwn_buf);
index 54475b53a5ab1494490f2561fdc0c9d7884c0a55..9f4aa391ea9deca5508a0ae90060572ec8a53889 100644 (file)
@@ -402,7 +402,7 @@ bfa_ioc_sm_op_entry(struct bfa_ioc_s *ioc)
 
        ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_OK);
        bfa_ioc_hb_monitor(ioc);
-       BFA_LOG(KERN_INFO, bfad, log_level, "IOC enabled\n");
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "IOC enabled\n");
 }
 
 static void
@@ -444,7 +444,7 @@ bfa_ioc_sm_disabling_entry(struct bfa_ioc_s *ioc)
 {
        struct bfad_s *bfad = (struct bfad_s *)ioc->bfa->bfad;
        bfa_iocpf_disable(ioc);
-       BFA_LOG(KERN_INFO, bfad, log_level, "IOC disabled\n");
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "IOC disabled\n");
 }
 
 /*
@@ -565,7 +565,7 @@ bfa_ioc_sm_fail_entry(struct bfa_ioc_s *ioc)
                notify->cbfn(notify->cbarg);
        }
 
-       BFA_LOG(KERN_CRIT, bfad, log_level,
+       BFA_LOG(KERN_CRIT, bfad, bfa_log_level,
                "Heart Beat of IOC has failed\n");
 }
 
@@ -1812,7 +1812,7 @@ bfa_ioc_pf_fwmismatch(struct bfa_ioc_s *ioc)
         * Provide enable completion callback.
         */
        ioc->cbfn->enable_cbfn(ioc->bfa, BFA_STATUS_IOC_FAILURE);
-       BFA_LOG(KERN_WARNING, bfad, log_level,
+       BFA_LOG(KERN_WARNING, bfad, bfa_log_level,
                "Running firmware version is incompatible "
                "with the driver version\n");
 }
index c768143f4805a7cb5f0094682a39c30b73ea433d..37e16ac8f249a33a4045506df74c360d3c5eb698 100644 (file)
@@ -2138,7 +2138,7 @@ bfa_fcport_sm_enabling_qwait(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2198,7 +2198,7 @@ bfa_fcport_sm_enabling(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2251,7 +2251,7 @@ bfa_fcport_sm_linkdown(struct bfa_fcport_s *fcport,
 
                bfa_fcport_scn(fcport, BFA_PORT_LINKUP, BFA_FALSE);
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port online: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2277,7 +2277,7 @@ bfa_fcport_sm_linkdown(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2322,9 +2322,9 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_DISABLE, 0, "Port Disable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port offline: WWN = %s\n", pwwn_buf);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port disabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2336,10 +2336,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                                BFA_PL_EID_PORT_ST_CHANGE, 0, "Port Linkdown");
                wwn2str(pwwn_buf, fcport->pwwn);
                if (BFA_PORT_IS_DISABLED(fcport->bfa))
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Base port offline: WWN = %s\n", pwwn_buf);
                else
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Base port (WWN = %s) "
                                "lost fabric connectivity\n", pwwn_buf);
                break;
@@ -2349,10 +2349,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                bfa_fcport_reset_linkinfo(fcport);
                wwn2str(pwwn_buf, fcport->pwwn);
                if (BFA_PORT_IS_DISABLED(fcport->bfa))
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Base port offline: WWN = %s\n", pwwn_buf);
                else
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Base port (WWN = %s) "
                                "lost fabric connectivity\n", pwwn_buf);
                break;
@@ -2363,10 +2363,10 @@ bfa_fcport_sm_linkup(struct bfa_fcport_s *fcport,
                bfa_fcport_scn(fcport, BFA_PORT_LINKDOWN, BFA_FALSE);
                wwn2str(pwwn_buf, fcport->pwwn);
                if (BFA_PORT_IS_DISABLED(fcport->bfa))
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "Base port offline: WWN = %s\n", pwwn_buf);
                else
-                       BFA_LOG(KERN_ERR, bfad, log_level,
+                       BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "Base port (WWN = %s) "
                                "lost fabric connectivity\n", pwwn_buf);
                break;
@@ -2497,7 +2497,7 @@ bfa_fcport_sm_disabling(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_ENABLE, 0, "Port Enable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port enabled: WWN = %s\n", pwwn_buf);
                break;
 
@@ -2551,7 +2551,7 @@ bfa_fcport_sm_disabled(struct bfa_fcport_s *fcport,
                bfa_plog_str(fcport->bfa->plog, BFA_PL_MID_HAL,
                                BFA_PL_EID_PORT_ENABLE, 0, "Port Enable");
                wwn2str(pwwn_buf, fcport->pwwn);
-               BFA_LOG(KERN_INFO, bfad, log_level,
+               BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                        "Base port enabled: WWN = %s\n", pwwn_buf);
                break;
 
index 1f938974b84876978600b40d97c265ddd7c2c7c0..6797720213b233bf444937445be983630e60d50d 100644 (file)
@@ -50,7 +50,7 @@ int           reqq_size, rspq_size, num_sgpgs;
 int            rport_del_timeout = BFA_FCS_RPORT_DEF_DEL_TIMEOUT;
 int            bfa_lun_queue_depth = BFAD_LUN_QUEUE_DEPTH;
 int            bfa_io_max_sge = BFAD_IO_MAX_SGE;
-int            log_level = 3; /* WARNING log level */
+int            bfa_log_level = 3; /* WARNING log level */
 int            ioc_auto_recover = BFA_TRUE;
 int            bfa_linkup_delay = -1;
 int            fdmi_enable = BFA_TRUE;
@@ -108,8 +108,8 @@ module_param(bfa_lun_queue_depth, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(bfa_lun_queue_depth, "Lun queue depth, default=32, Range[>0]");
 module_param(bfa_io_max_sge, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(bfa_io_max_sge, "Max io scatter/gather elements, default=255");
-module_param(log_level, int, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(log_level, "Driver log level, default=3, "
+module_param(bfa_log_level, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(bfa_log_level, "Driver log level, default=3, "
                                "Range[Critical:1|Error:2|Warning:3|Info:4]");
 module_param(ioc_auto_recover, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ioc_auto_recover, "IOC auto recovery, default=1, "
@@ -1112,7 +1112,7 @@ bfad_start_ops(struct bfad_s *bfad) {
        } else
                bfad_os_rport_online_wait(bfad);
 
-       BFA_LOG(KERN_INFO, bfad, log_level, "bfa device claimed\n");
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "bfa device claimed\n");
 
        return BFA_STATUS_OK;
 }
index 97f9b6c0937e75cf3a16762092ac46ac6f6cb890..d5ce2349ac59fa5c13c6d3b89d3ec2927561b84c 100644 (file)
@@ -337,7 +337,7 @@ extern int  num_sgpgs;
 extern int      rport_del_timeout;
 extern int      bfa_lun_queue_depth;
 extern int      bfa_io_max_sge;
-extern int      log_level;
+extern int      bfa_log_level;
 extern int      ioc_auto_recover;
 extern int      bfa_linkup_delay;
 extern int      msix_disable_cb;
index 8ca967dee66d4eac1b551c82ff5416454e0ffcb6..fbad5e9b240218bb349ac4c5217e328e785106c3 100644 (file)
@@ -225,7 +225,8 @@ bfad_im_abort_handler(struct scsi_cmnd *cmnd)
        }
 
        bfa_trc(bfad, hal_io->iotag);
-       BFA_LOG(KERN_INFO, bfad, log_level, "scsi%d: abort cmnd %p iotag %x\n",
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
+               "scsi%d: abort cmnd %p iotag %x\n",
                im_port->shost->host_no, cmnd, hal_io->iotag);
        (void) bfa_ioim_abort(hal_io);
        spin_unlock_irqrestore(&bfad->bfad_lock, flags);
@@ -241,7 +242,7 @@ bfad_im_abort_handler(struct scsi_cmnd *cmnd)
 
        cmnd->scsi_done(cmnd);
        bfa_trc(bfad, hal_io->iotag);
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "scsi%d: complete abort 0x%p iotag 0x%x\n",
                im_port->shost->host_no, cmnd, hal_io->iotag);
        return SUCCESS;
@@ -260,7 +261,7 @@ bfad_im_target_reset_send(struct bfad_s *bfad, struct scsi_cmnd *cmnd,
 
        tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd);
        if (!tskim) {
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                        "target reset, fail to allocate tskim\n");
                rc = BFA_STATUS_FAILED;
                goto out;
@@ -311,7 +312,7 @@ bfad_im_reset_lun_handler(struct scsi_cmnd *cmnd)
 
        tskim = bfa_tskim_alloc(&bfad->bfa, (struct bfad_tskim_s *) cmnd);
        if (!tskim) {
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                "LUN reset, fail to allocate tskim");
                spin_unlock_irqrestore(&bfad->bfad_lock, flags);
                rc = FAILED;
@@ -336,7 +337,7 @@ bfad_im_reset_lun_handler(struct scsi_cmnd *cmnd)
 
        task_status = cmnd->SCp.Status >> 1;
        if (task_status != BFI_TSKIM_STS_OK) {
-               BFA_LOG(KERN_ERR, bfad, log_level,
+               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                        "LUN reset failure, status: %d\n", task_status);
                rc = FAILED;
        }
@@ -380,7 +381,7 @@ bfad_im_reset_bus_handler(struct scsi_cmnd *cmnd)
 
                        task_status = cmnd->SCp.Status >> 1;
                        if (task_status != BFI_TSKIM_STS_OK) {
-                               BFA_LOG(KERN_ERR, bfad, log_level,
+                               BFA_LOG(KERN_ERR, bfad, bfa_log_level,
                                        "target reset failure,"
                                        " status: %d\n", task_status);
                                err_cnt++;
@@ -460,7 +461,7 @@ bfa_fcb_itnim_free(struct bfad_s *bfad, struct bfad_itnim_s *itnim_drv)
        fcid = bfa_fcs_itnim_get_fcid(&itnim_drv->fcs_itnim);
        wwn2str(wwpn_str, wwpn);
        fcid2str(fcid_str, fcid);
-       BFA_LOG(KERN_INFO, bfad, log_level,
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                "ITNIM FREE scsi%d: FCID: %s WWPN: %s\n",
                port->im_port->shost->host_no,
                fcid_str, wwpn_str);
@@ -589,7 +590,7 @@ void
 bfad_im_scsi_host_free(struct bfad_s *bfad, struct bfad_im_port_s *im_port)
 {
        bfa_trc(bfad, bfad->inst_no);
-       BFA_LOG(KERN_INFO, bfad, log_level, "Free scsi%d\n",
+       BFA_LOG(KERN_INFO, bfad, bfa_log_level, "Free scsi%d\n",
                        im_port->shost->host_no);
 
        fc_remove_host(im_port->shost);
@@ -1048,7 +1049,7 @@ bfad_im_itnim_work_handler(struct work_struct *work)
                        fcid2str(fcid_str, fcid);
                        list_add_tail(&itnim->list_entry,
                                &im_port->itnim_mapped_list);
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "ITNIM ONLINE Target: %d:0:%d "
                                "FCID: %s WWPN: %s\n",
                                im_port->shost->host_no,
@@ -1081,7 +1082,7 @@ bfad_im_itnim_work_handler(struct work_struct *work)
                        wwn2str(wwpn_str, wwpn);
                        fcid2str(fcid_str, fcid);
                        list_del(&itnim->list_entry);
-                       BFA_LOG(KERN_INFO, bfad, log_level,
+                       BFA_LOG(KERN_INFO, bfad, bfa_log_level,
                                "ITNIM OFFLINE Target: %d:0:%d "
                                "FCID: %s WWPN: %s\n",
                                im_port->shost->host_no,
index e5e9e6735f7d62e3d585505de7be4fe379fbbd40..9739431092d126aefdef78ec03888c66b40b256d 100644 (file)
@@ -198,6 +198,7 @@ int __init register_intc_controller(struct intc_desc *desc)
        list_add_tail(&d->list, &intc_list);
 
        raw_spin_lock_init(&d->lock);
+       INIT_RADIX_TREE(&d->tree, GFP_ATOMIC);
 
        d->index = nr_intc_controllers;
 
index 052b3c7fa6a0f644d26c613e67e69ffe00ad05be..8856bcca9d2933db4a8f98526bc716c6640bb075 100644 (file)
@@ -317,7 +317,7 @@ static void mcfqspi_work(struct work_struct *work)
                msg = container_of(mcfqspi->msgq.next, struct spi_message,
                                   queue);
 
-               list_del_init(&mcfqspi->msgq);
+               list_del_init(&msg->queue);
                spin_unlock_irqrestore(&mcfqspi->lock, flags);
 
                spi = msg->spi;
index ec9f0b1bf86494da73bd8d14cc69fe5927db8065..84439f655601f71577483acd423aed7622fae807 100644 (file)
@@ -563,7 +563,7 @@ static struct of_platform_driver mpc52xx_spi_of_driver = {
                .of_match_table = mpc52xx_spi_match,
        },
        .probe = mpc52xx_spi_probe,
-       .remove = __exit_p(mpc52xx_spi_remove),
+       .remove = __devexit_p(mpc52xx_spi_remove),
 };
 
 static int __init mpc52xx_spi_init(void)
index 2a651e61bfbff30f23e8e44dddfcb70ab8a05173..951a160fc27fbe2614376776ba3b712646e305ff 100644 (file)
@@ -1305,10 +1305,49 @@ static int __exit omap2_mcspi_remove(struct platform_device *pdev)
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:omap2_mcspi");
 
+#ifdef CONFIG_SUSPEND
+/*
+ * When SPI wake up from off-mode, CS is in activate state. If it was in
+ * unactive state when driver was suspend, then force it to unactive state at
+ * wake up.
+ */
+static int omap2_mcspi_resume(struct device *dev)
+{
+       struct spi_master       *master = dev_get_drvdata(dev);
+       struct omap2_mcspi      *mcspi = spi_master_get_devdata(master);
+       struct omap2_mcspi_cs *cs;
+
+       omap2_mcspi_enable_clocks(mcspi);
+       list_for_each_entry(cs, &omap2_mcspi_ctx[master->bus_num - 1].cs,
+                           node) {
+               if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) {
+
+                       /*
+                        * We need to toggle CS state for OMAP take this
+                        * change in account.
+                        */
+                       MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 1);
+                       __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
+                       MOD_REG_BIT(cs->chconf0, OMAP2_MCSPI_CHCONF_FORCE, 0);
+                       __raw_writel(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
+               }
+       }
+       omap2_mcspi_disable_clocks(mcspi);
+       return 0;
+}
+#else
+#define        omap2_mcspi_resume      NULL
+#endif
+
+static const struct dev_pm_ops omap2_mcspi_pm_ops = {
+       .resume = omap2_mcspi_resume,
+};
+
 static struct platform_driver omap2_mcspi_driver = {
        .driver = {
                .name =         "omap2_mcspi",
                .owner =        THIS_MODULE,
+               .pm =           &omap2_mcspi_pm_ops
        },
        .remove =       __exit_p(omap2_mcspi_remove),
 };
index 709c836607de23636e6c567a38833cceff752b08..b02d0cbce89049e003fe8300421adeed847e92ce 100644 (file)
@@ -584,8 +584,7 @@ void spi_unregister_master(struct spi_master *master)
        list_del(&master->list);
        mutex_unlock(&board_lock);
 
-       dummy = device_for_each_child(master->dev.parent, &master->dev,
-                                       __unregister);
+       dummy = device_for_each_child(&master->dev, NULL, __unregister);
        device_unregister(&master->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_master);
index e3b4f645196603e517e22ef782205a02fb224b22..a99e2333b949efdbc10e391253ae952137238b7d 100644 (file)
@@ -258,18 +258,18 @@ static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
        return mpc8xxx_spi->count;
 }
 
-static void fsl_espi_addr2cmd(unsigned int addr, u8 *cmd)
+static inline void fsl_espi_addr2cmd(unsigned int addr, u8 *cmd)
 {
-       if (cmd[1] && cmd[2] && cmd[3]) {
+       if (cmd) {
                cmd[1] = (u8)(addr >> 16);
                cmd[2] = (u8)(addr >> 8);
                cmd[3] = (u8)(addr >> 0);
        }
 }
 
-static unsigned int fsl_espi_cmd2addr(u8 *cmd)
+static inline unsigned int fsl_espi_cmd2addr(u8 *cmd)
 {
-       if (cmd[1] && cmd[2] && cmd[3])
+       if (cmd)
                return cmd[1] << 16 | cmd[2] << 8 | cmd[3] << 0;
 
        return 0;
@@ -395,9 +395,11 @@ static void fsl_espi_rw_trans(struct spi_message *m,
                        }
                }
 
-               addr = fsl_espi_cmd2addr(local_buf);
-               addr += pos;
-               fsl_espi_addr2cmd(addr, local_buf);
+               if (pos > 0) {
+                       addr = fsl_espi_cmd2addr(local_buf);
+                       addr += pos;
+                       fsl_espi_addr2cmd(addr, local_buf);
+               }
 
                espi_trans->n_tx = n_tx;
                espi_trans->n_rx = trans_len;
@@ -507,16 +509,29 @@ void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
 
        /* We need handle RX first */
        if (events & SPIE_NE) {
-               u32 rx_data;
+               u32 rx_data, tmp;
+               u8 rx_data_8;
 
                /* Spin until RX is done */
                while (SPIE_RXCNT(events) < min(4, mspi->len)) {
                        cpu_relax();
                        events = mpc8xxx_spi_read_reg(&reg_base->event);
                }
-               mspi->len -= 4;
 
-               rx_data = mpc8xxx_spi_read_reg(&reg_base->receive);
+               if (mspi->len >= 4) {
+                       rx_data = mpc8xxx_spi_read_reg(&reg_base->receive);
+               } else {
+                       tmp = mspi->len;
+                       rx_data = 0;
+                       while (tmp--) {
+                               rx_data_8 = in_8((u8 *)&reg_base->receive);
+                               rx_data |= (rx_data_8 << (tmp * 8));
+                       }
+
+                       rx_data <<= (4 - mspi->len) * 8;
+               }
+
+               mspi->len -= 4;
 
                if (mspi->rx)
                        mspi->get_rx(rx_data, mspi);
index 8c3c057aa8478fb2a436edfd571843513544ca71..d0e9e0207539e2c9491fc6c1de25c75dda4bd9c6 100644 (file)
@@ -435,12 +435,6 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio)
        int ret = 0;
        struct zram *zram = queue->queuedata;
 
-       if (unlikely(!zram->init_done)) {
-               set_bit(BIO_UPTODATE, &bio->bi_flags);
-               bio_endio(bio, 0);
-               return 0;
-       }
-
        if (!valid_io_request(zram, bio)) {
                zram_stat64_inc(zram, &zram->stats.invalid_io);
                bio_io_error(bio);
index 44447f54942f6d6e1c7c7e2a777b4c882f6b546d..99ac70e32556f841b83c909108767dedf444237e 100644 (file)
@@ -2206,8 +2206,11 @@ static int uea_boot(struct uea_softc *sc)
                goto err1;
        }
 
-       sc->kthread = kthread_run(uea_kthread, sc, "ueagle-atm");
-       if (sc->kthread == ERR_PTR(-ENOMEM)) {
+       /* Create worker thread, but don't start it here.  Start it after
+        * all usbatm generic initialization is done.
+        */
+       sc->kthread = kthread_create(uea_kthread, sc, "ueagle-atm");
+       if (IS_ERR(sc->kthread)) {
                uea_err(INS_TO_USBDEV(sc), "failed to create thread\n");
                goto err2;
        }
@@ -2624,6 +2627,7 @@ static struct usbatm_driver uea_usbatm_driver = {
 static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
        struct usb_device *usb = interface_to_usbdev(intf);
+       int ret;
 
        uea_enters(usb);
        uea_info(usb, "ADSL device founded vid (%#X) pid (%#X) Rev (%#X): %s\n",
@@ -2637,7 +2641,19 @@ static int uea_probe(struct usb_interface *intf, const struct usb_device_id *id)
        if (UEA_IS_PREFIRM(id))
                return uea_load_firmware(usb, UEA_CHIP_VERSION(id));
 
-       return usbatm_usb_probe(intf, id, &uea_usbatm_driver);
+       ret = usbatm_usb_probe(intf, id, &uea_usbatm_driver);
+       if (ret == 0) {
+               struct usbatm_data *usbatm = usb_get_intfdata(intf);
+               struct uea_softc *sc = usbatm->driver_data;
+
+               /* Ensure carrier is initialized to off as early as possible */
+               UPDATE_ATM_SIGNAL(ATM_PHY_SIG_LOST);
+
+               /* Only start the worker thread when all init is done */
+               wake_up_process(sc->kthread);
+       }
+
+       return ret;
 }
 
 static void uea_disconnect(struct usb_interface *intf)
index a4f4546f0be08555f8a2d793b6b94beaae91186f..397d15eb1ea8aee3d4fe870ae87f8b14f161654f 100644 (file)
@@ -242,6 +242,7 @@ static int cr_backlight_remove(struct platform_device *pdev)
        backlight_device_unregister(crp->cr_backlight_device);
        lcd_device_unregister(crp->cr_lcd_device);
        pci_dev_put(lpc_dev);
+       kfree(crp);
 
        return 0;
 }
index 0e6aa3d96a4246f7c05b0274a2295a3275d6d2ac..4ac1201ad6c2fb3229a60e1377f29b4eef78a2eb 100644 (file)
@@ -1458,7 +1458,7 @@ static bool apertures_overlap(struct aperture *gen, struct aperture *hw)
        if (gen->base == hw->base)
                return true;
        /* is the generic aperture base inside the hw base->hw base+size */
-       if (gen->base > hw->base && gen->base <= hw->base + hw->size)
+       if (gen->base > hw->base && gen->base < hw->base + hw->size)
                return true;
        return false;
 }
index 5c363d026f64c2856b44e748ecb149d9242be6b6..1ab2c25886757616c4faa324620ed7536fac9350 100644 (file)
 #define LCDC_SIZE      0x04
 #define SIZE_XMAX(x)   ((((x) >> 4) & 0x3f) << 20)
 
-#ifdef CONFIG_ARCH_MX1
-#define SIZE_YMAX(y)   ((y) & 0x1ff)
-#else
-#define SIZE_YMAX(y)   ((y) & 0x3ff)
-#endif
+#define YMAX_MASK       (cpu_is_mx1() ? 0x1ff : 0x3ff)
+#define SIZE_YMAX(y)   ((y) & YMAX_MASK)
 
 #define LCDC_VPW       0x08
 #define VPW_VPW(x)     ((x) & 0x3ff)
@@ -623,7 +620,7 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
        if (var->right_margin > 255)
                printk(KERN_ERR "%s: invalid right_margin %d\n",
                        info->fix.id, var->right_margin);
-       if (var->yres < 1 || var->yres > 511)
+       if (var->yres < 1 || var->yres > YMAX_MASK)
                printk(KERN_ERR "%s: invalid yres %d\n",
                        info->fix.id, var->yres);
        if (var->vsync_len > 100)
index d7df10315d8d6f51aba676328985a52128e81876..fcda0e97011384bd7ee55eaeeed5377f328cf016 100644 (file)
@@ -787,6 +787,9 @@ static int sh_hdmi_read_edid(struct sh_hdmi *hdmi)
                found_rate_error = rate_error;
        }
 
+       hdmi->var.width = hdmi->monspec.max_x * 10;
+       hdmi->var.height = hdmi->monspec.max_y * 10;
+
        /*
         * TODO 1: if no ->info is present, postpone running the config until
         * after ->info first gets registered.
@@ -960,8 +963,12 @@ static bool sh_hdmi_must_reconfigure(struct sh_hdmi *hdmi)
        dev_dbg(info->dev, "Old %ux%u, new %ux%u\n",
                mode1.xres, mode1.yres, mode2.xres, mode2.yres);
 
-       if (fb_mode_is_equal(&mode1, &mode2))
+       if (fb_mode_is_equal(&mode1, &mode2)) {
+               /* It can be a different monitor with an equal video-mode */
+               old_var->width = new_var->width;
+               old_var->height = new_var->height;
                return false;
+       }
 
        dev_dbg(info->dev, "Switching %u -> %u lines\n",
                mode1.yres, mode2.yres);
@@ -1057,8 +1064,11 @@ static void sh_hdmi_edid_work_fn(struct work_struct *work)
                         * on, if we run a resume here, the logo disappears
                         */
                        if (lock_fb_info(hdmi->info)) {
-                               sh_hdmi_display_on(hdmi, hdmi->info);
-                               unlock_fb_info(hdmi->info);
+                               struct fb_info *info = hdmi->info;
+                               info->var.width = hdmi->var.width;
+                               info->var.height = hdmi->var.height;
+                               sh_hdmi_display_on(hdmi, info);
+                               unlock_fb_info(info);
                        }
                } else {
                        /* New monitor or have to wake up */
index b02d97a879d67256992a3941536fcd7e88626f63..c05326b61235e724e18ea288d88aa66138751899 100644 (file)
@@ -54,8 +54,8 @@ static int lcdc_shared_regs[] = {
 };
 #define NR_SHARED_REGS ARRAY_SIZE(lcdc_shared_regs)
 
-#define DEFAULT_XRES 1280
-#define DEFAULT_YRES 1024
+#define MAX_XRES 1920
+#define MAX_YRES 1080
 
 static unsigned long lcdc_offs_mainlcd[NR_CH_REGS] = {
        [LDDCKPAT1R] = 0x400,
@@ -914,22 +914,12 @@ static int sh_mobile_check_var(struct fb_var_screeninfo *var, struct fb_info *in
 {
        struct sh_mobile_lcdc_chan *ch = info->par;
 
-       if (var->xres < 160 || var->xres > 1920 ||
-           var->yres < 120 || var->yres > 1080 ||
-           var->left_margin < 32 || var->left_margin > 320 ||
-           var->right_margin < 12 || var->right_margin > 240 ||
-           var->upper_margin < 12 || var->upper_margin > 120 ||
-           var->lower_margin < 1 || var->lower_margin > 64 ||
-           var->hsync_len < 32 || var->hsync_len > 240 ||
-           var->vsync_len < 2 || var->vsync_len > 64 ||
-           var->pixclock < 6000 || var->pixclock > 40000 ||
+       if (var->xres > MAX_XRES || var->yres > MAX_YRES ||
            var->xres * var->yres * (ch->cfg.bpp / 8) * 2 > info->fix.smem_len) {
-               dev_warn(info->dev, "Invalid info: %u %u %u %u %u %u %u %u %u!\n",
-                        var->xres, var->yres,
-                        var->left_margin, var->right_margin,
-                        var->upper_margin, var->lower_margin,
-                        var->hsync_len, var->vsync_len,
-                        var->pixclock);
+               dev_warn(info->dev, "Invalid info: %u-%u-%u-%u x %u-%u-%u-%u @ %ukHz!\n",
+                        var->left_margin, var->xres, var->right_margin, var->hsync_len,
+                        var->upper_margin, var->yres, var->lower_margin, var->vsync_len,
+                        PICOS2KHZ(var->pixclock));
                return -EINVAL;
        }
        return 0;
@@ -1226,7 +1216,7 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
                }
 
                if (!mode)
-                       max_size = DEFAULT_XRES * DEFAULT_YRES;
+                       max_size = MAX_XRES * MAX_YRES;
                else if (max_cfg)
                        dev_dbg(&pdev->dev, "Found largest videomode %ux%u\n",
                                max_cfg->xres, max_cfg->yres);
@@ -1238,12 +1228,14 @@ static int __devinit sh_mobile_lcdc_probe(struct platform_device *pdev)
                        mode = &default_720p;
                        num_cfg = 1;
                } else {
-                       num_cfg = ch->cfg.num_cfg;
+                       num_cfg = cfg->num_cfg;
                }
 
                fb_videomode_to_modelist(mode, num_cfg, &info->modelist);
 
                fb_videomode_to_var(var, mode);
+               var->width = cfg->lcd_size_cfg.width;
+               var->height = cfg->lcd_size_cfg.height;
                /* Default Y virtual resolution is 2x panel size */
                var->yres_virtual = var->yres * 2;
                var->activate = FB_ACTIVATE_NOW;
index 3d77116e463410dac81b6c530c74d5a55f9c7afc..dea7b5bf6e2ccd986cf99840db504d6a38382293 100644 (file)
@@ -642,19 +642,14 @@ static struct notifier_block die_notifier = {
  */
 
 #ifdef CONFIG_HPWDT_NMI_DECODING
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
 {
        /*
         * If nmi_watchdog is turned off then we can turn on
         * our nmi decoding capability.
         */
-       if (!nmi_watchdog_active())
-               hpwdt_nmi_decoding = 1;
-       else
-               dev_warn(&dev->dev, "NMI decoding is disabled. To enable this "
-                       "functionality you must reboot with nmi_watchdog=0 "
-                       "and load the hpwdt driver with priority=1.\n");
+       hpwdt_nmi_decoding = 1;
 }
 #else
 static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
@@ -662,7 +657,7 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev)
        dev_warn(&dev->dev, "NMI decoding is disabled. "
                "Your kernel does not support a NMI Watchdog.\n");
 }
-#endif /* ARCH_HAS_NMI_WATCHDOG */
+#endif /* CONFIG_X86_LOCAL_APIC */
 
 static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
index 428f8a1583e8598ae69414f8171e21a3bbbf2ba8..3939e53f5f981d327ff344d522ab59f4df43238b 100644 (file)
@@ -231,7 +231,7 @@ static int __devinit rdc321x_wdt_probe(struct platform_device *pdev)
        struct resource *r;
        struct rdc321x_wdt_pdata *pdata;
 
-       pdata = pdev->dev.platform_data;
+       pdata = platform_get_drvdata(pdev);
        if (!pdata) {
                dev_err(&pdev->dev, "no platform data supplied\n");
                return -ENODEV;
index dc963929de652cb997550e38338855823832e53c..981c8477adab6dcde6708d2b8bfde8bf83c8d53b 100644 (file)
@@ -232,6 +232,8 @@ static int setup_new_group_blocks(struct super_block *sb,
                               GFP_NOFS);
        if (err)
                goto exit_bh;
+       for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
+               ext4_set_bit(bit, bh->b_data);
 
        ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
                   input->block_bitmap - start);
@@ -247,6 +249,9 @@ static int setup_new_group_blocks(struct super_block *sb,
        err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
        if (err)
                goto exit_bh;
+       for (i = 0, bit = input->inode_table - start;
+            i < sbi->s_itb_per_group; i++, bit++)
+               ext4_set_bit(bit, bh->b_data);
 
        if ((err = extend_or_restart_transaction(handle, 2, bh)))
                goto exit_bh;
index 5476c066d4ee336733445eda2f804561179ecb41..3c4039d5eef12d1b35ffd93c3f1861e43cc9b520 100644 (file)
@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        int metadata;
        unsigned int revokes = 0;
        int x;
-       int error;
+       int error = 0;
 
        if (!*top)
                sm->sm_first = 0;
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        if (metadata)
                revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
 
-       error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+       if (ip != GFS2_I(sdp->sd_rindex))
+               error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+       else if (!sdp->sd_rgrps)
+               error = gfs2_ri_update(ip);
+
        if (error)
                return error;
 
@@ -879,7 +883,8 @@ out_rg_gunlock:
 out_rlist:
        gfs2_rlist_free(&rlist);
 out:
-       gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
+       if (ip != GFS2_I(sdp->sd_rindex))
+               gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
        return error;
 }
 
index f92c1770416981df8b625b6f918bac6c061c6e5e..08a8beb152e60d6aa4dd0b38ea852973e99263d4 100644 (file)
@@ -541,21 +541,6 @@ out_locked:
        spin_unlock(&gl->gl_spin);
 }
 
-static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
-                                unsigned int req_state,
-                                unsigned int flags)
-{
-       int ret = LM_OUT_ERROR;
-
-       if (!sdp->sd_lockstruct.ls_ops->lm_lock)
-               return req_state == LM_ST_UNLOCKED ? 0 : req_state;
-
-       if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-               ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
-                                                        req_state, flags);
-       return ret;
-}
-
 /**
  * do_xmote - Calls the DLM to change the state of a lock
  * @gl: The lock state
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin)
 
        lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
                      LM_FLAG_PRIORITY);
-       BUG_ON(gl->gl_state == target);
-       BUG_ON(gl->gl_state == gl->gl_target);
+       GLOCK_BUG_ON(gl, gl->gl_state == target);
+       GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
        if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
            glops->go_inval) {
                set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
                do_error(gl, 0); /* Fail queued try locks */
        }
+       gl->gl_req = target;
        spin_unlock(&gl->gl_spin);
        if (glops->go_xmote_th)
                glops->go_xmote_th(gl);
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin)
            gl->gl_state == LM_ST_DEFERRED) &&
            !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
                lck_flags |= LM_FLAG_TRY_1CB;
-       ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
 
-       if (!(ret & LM_OUT_ASYNC)) {
-               finish_xmote(gl, ret);
+       if (sdp->sd_lockstruct.ls_ops->lm_lock) {
+               /* lock_dlm */
+               ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
+               GLOCK_BUG_ON(gl, ret);
+       } else { /* lock_nolock */
+               finish_xmote(gl, target);
                if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                        gfs2_glock_put(gl);
-       } else {
-               GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
        }
+
        spin_lock(&gl->gl_spin);
 }
 
@@ -951,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
 
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 {
+       struct va_format vaf;
        va_list args;
 
        va_start(args, fmt);
+
        if (seq) {
                struct gfs2_glock_iter *gi = seq->private;
                vsprintf(gi->string, fmt, args);
                seq_printf(seq, gi->string);
        } else {
-               printk(KERN_ERR " ");
-               vprintk(fmt, args);
+               vaf.fmt = fmt;
+               vaf.va = &args;
+
+               printk(KERN_ERR " %pV", &vaf);
        }
+
        va_end(args);
 }
 
@@ -1361,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
  * @gl: Pointer to the glock
  * @ret: The return value from the dlm
  *
+ * The gl_reply field is under the gl_spin lock so that it is ok
+ * to use a bitfield shared with other glock state fields.
  */
 
 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 {
        struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
 
+       spin_lock(&gl->gl_spin);
        gl->gl_reply = ret;
 
        if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
-               spin_lock(&gl->gl_spin);
                if (gfs2_should_freeze(gl)) {
                        set_bit(GLF_FROZEN, &gl->gl_flags);
                        spin_unlock(&gl->gl_spin);
                        return;
                }
-               spin_unlock(&gl->gl_spin);
        }
+
+       spin_unlock(&gl->gl_spin);
        set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+       smp_wmb();
        gfs2_glock_hold(gl);
        if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
                gfs2_glock_put(gl);
@@ -1626,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
 static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
 {
        struct task_struct *gh_owner = NULL;
-       char buffer[KSYM_SYMBOL_LEN];
        char flags_buf[32];
 
-       sprint_symbol(buffer, gh->gh_ip);
        if (gh->gh_owner_pid)
                gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
-       gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n",
-                 state2str(gh->gh_state),
-                 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
-                 gh->gh_error, 
-                 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
-                 gh_owner ? gh_owner->comm : "(ended)", buffer);
+       gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
+                      state2str(gh->gh_state),
+                      hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
+                      gh->gh_error,
+                      gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
+                      gh_owner ? gh_owner->comm : "(ended)",
+                      (void *)gh->gh_ip);
        return 0;
 }
 
@@ -1782,12 +1778,13 @@ int __init gfs2_glock_init(void)
        }
 #endif
 
-       glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER |
+       glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
                                          WQ_HIGHPRI | WQ_FREEZEABLE, 0);
        if (IS_ERR(glock_workqueue))
                return PTR_ERR(glock_workqueue);
-       gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER |
-                                               WQ_FREEZEABLE, 0);
+       gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
+                                               WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+                                               0);
        if (IS_ERR(gfs2_delete_workqueue)) {
                destroy_workqueue(glock_workqueue);
                return PTR_ERR(gfs2_delete_workqueue);
index db1c26d6d2206c8f9e9b68396380ed8791f3c720..691851ceb6153f59b91cd64d1ce0fecea46904e0 100644 (file)
@@ -87,11 +87,10 @@ enum {
 #define GL_ASYNC               0x00000040
 #define GL_EXACT               0x00000080
 #define GL_SKIP                        0x00000100
-#define GL_ATIME               0x00000200
 #define GL_NOCACHE             0x00000400
   
 /*
- * lm_lock() and lm_async_cb return flags
+ * lm_async_cb return flags
  *
  * LM_OUT_ST_MASK
  * Masks the lower two bits of lock state in the returned value.
@@ -99,15 +98,11 @@ enum {
  * LM_OUT_CANCELED
  * The lock request was canceled.
  *
- * LM_OUT_ASYNC
- * The result of the request will be returned in an LM_CB_ASYNC callback.
- *
  */
 
 #define LM_OUT_ST_MASK         0x00000003
 #define LM_OUT_CANCELED                0x00000008
-#define LM_OUT_ASYNC           0x00000080
-#define LM_OUT_ERROR           0x00000100
+#define LM_OUT_ERROR           0x00000004
 
 /*
  * lm_recovery_done() messages
@@ -124,25 +119,12 @@ struct lm_lockops {
        void (*lm_unmount) (struct gfs2_sbd *sdp);
        void (*lm_withdraw) (struct gfs2_sbd *sdp);
        void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
-       unsigned int (*lm_lock) (struct gfs2_glock *gl,
-                                unsigned int req_state, unsigned int flags);
+       int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
+                       unsigned int flags);
        void (*lm_cancel) (struct gfs2_glock *gl);
        const match_table_t *lm_tokens;
 };
 
-#define LM_FLAG_TRY            0x00000001
-#define LM_FLAG_TRY_1CB                0x00000002
-#define LM_FLAG_NOEXP          0x00000004
-#define LM_FLAG_ANY            0x00000008
-#define LM_FLAG_PRIORITY       0x00000010
-
-#define GL_ASYNC               0x00000040
-#define GL_EXACT               0x00000080
-#define GL_SKIP                        0x00000100
-#define GL_NOCACHE             0x00000400
-
-#define GLR_TRYFAILED          13
-
 extern struct workqueue_struct *gfs2_delete_workqueue;
 static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 {
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
 void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
+
+__attribute__ ((format(printf, 2, 3)))
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
 
 /**
index 0d149dcc04e515adfaaeb632a6677e5e3b555f45..263561bf1a5059b4bf644340faa6a4435c62d14f 100644 (file)
@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl)
 
        if (gl->gl_state != LM_ST_UNLOCKED &&
            test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-               flush_workqueue(gfs2_delete_workqueue);
                gfs2_meta_syncfs(sdp);
                gfs2_log_shutdown(sdp);
        }
index 764fbb49efc8e3adbdeda7f83f178b0fd6ea70f8..8d3d2b4a0a7d64431d63edff082cbedbd5b2543b 100644 (file)
@@ -207,12 +207,14 @@ struct gfs2_glock {
 
        spinlock_t gl_spin;
 
-       unsigned int gl_state;
-       unsigned int gl_target;
-       unsigned int gl_reply;
+       /* State fields protected by gl_spin */
+       unsigned int gl_state:2,        /* Current state */
+                    gl_target:2,       /* Target state */
+                    gl_demote_state:2, /* State requested by remote node */
+                    gl_req:2,          /* State in last dlm request */
+                    gl_reply:8;        /* Last reply from the dlm */
+
        unsigned int gl_hash;
-       unsigned int gl_req;
-       unsigned int gl_demote_state; /* state requested by remote node */
        unsigned long gl_demote_time; /* time of first demote request */
        struct list_head gl_holders;
 
index e1213f7f92179aa2472304ff0db4294be66040d8..14e682dbe8bff4bd4063e4a54a5445cbcfe17937 100644 (file)
@@ -916,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
        if (error)
                return error;
 
-       if ((attr->ia_valid & ATTR_SIZE) &&
-           attr->ia_size != i_size_read(inode)) {
-               error = vmtruncate(inode, attr->ia_size);
-               if (error)
-                       return error;
-       }
-
        setattr_copy(inode, attr);
        mark_inode_dirty(inode);
-
-       gfs2_assert_warn(GFS2_SB(inode), !error);
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
index 1c09425b45fd728ba52c1f5f49c3feac187640a2..6e493aee28f82dfb593574f751ed81025207645c 100644 (file)
@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
        return lkf;
 }
 
-static unsigned int gdlm_lock(struct gfs2_glock *gl,
-                             unsigned int req_state, unsigned int flags)
+static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
+                    unsigned int flags)
 {
        struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
-       int error;
        int req;
        u32 lkf;
 
-       gl->gl_req = req_state;
        req = make_mode(req_state);
        lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
 
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
         * Submit the actual lock request.
         */
 
-       error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
-                        GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
-       if (error == -EAGAIN)
-               return 0;
-       if (error)
-               return LM_OUT_ERROR;
-       return LM_OUT_ASYNC;
+       return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
+                       GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
 }
 
 static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
index 12cbea7502c26040fb90db5750e764bdd831079a..1db6b73432298d4092c0e8684483b8fff29c78ee 100644 (file)
@@ -1069,7 +1069,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
-       struct buffer_head *dibh;
        u32 ouid, ogid, nuid, ngid;
        int error;
 
@@ -1100,25 +1099,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
        if (error)
                goto out_gunlock_q;
 
-       error = gfs2_meta_inode_buffer(ip, &dibh);
+       error = gfs2_setattr_simple(ip, attr);
        if (error)
                goto out_end_trans;
 
-       if ((attr->ia_valid & ATTR_SIZE) &&
-           attr->ia_size != i_size_read(inode)) {
-               int error;
-
-               error = vmtruncate(inode, attr->ia_size);
-               gfs2_assert_warn(sdp, !error);
-       }
-
-       setattr_copy(inode, attr);
-       mark_inode_dirty(inode);
-
-       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-       gfs2_dinode_out(ip, dibh->b_data);
-       brelse(dibh);
-
        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
                u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
                gfs2_quota_change(ip, -blocks, ouid, ogid);
index f606baf9ba7247e9a5fd9ccfb2cc9426019e589e..a689901963dea43c82b6178a4451c09560061e76 100644 (file)
@@ -666,6 +666,10 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                        qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
                        qd->qd_qb.qb_limit = qp->qu_limit;
                }
+               if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
+                       qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+                       qd->qd_qb.qb_value = qp->qu_value;
+               }
        }
 
        /* Write the quota into the quota file on disk */
@@ -1509,7 +1513,7 @@ out:
 }
 
 /* GFS2 only supports a subset of the XFS fields */
-#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
+#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
 
 static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
                          struct fs_disk_quota *fdq)
@@ -1569,9 +1573,15 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
        if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
            ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
                fdq->d_fieldmask ^= FS_DQ_BSOFT;
+
        if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
            ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
                fdq->d_fieldmask ^= FS_DQ_BHARD;
+
+       if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
+           ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
+               fdq->d_fieldmask ^= FS_DQ_BCOUNT;
+
        if (fdq->d_fieldmask == 0)
                goto out_i;
 
@@ -1620,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = {
        .get_dqblk      = gfs2_get_dqblk,
        .set_dqblk      = gfs2_set_dqblk,
 };
-
index 33c8407b876f00ceef0741221ebae4ba46ecb426..7293ea27020c680307e0145e863ebbb7eb0d6949 100644 (file)
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
        for (rgrps = 0;; rgrps++) {
                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
 
-               if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode))
+               if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
                        break;
                error = gfs2_internal_read(ip, &ra_state, buf, &pos,
                                           sizeof(struct gfs2_rindex));
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
  * Returns: 0 on successful update, error code otherwise
  */
 
-static int gfs2_ri_update(struct gfs2_inode *ip)
+int gfs2_ri_update(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct inode *inode = &ip->i_inode;
@@ -613,46 +613,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
        return 0;
 }
 
-/**
- * gfs2_ri_update_special - Pull in a new resource index from the disk
- *
- * This is a special version that's safe to call from gfs2_inplace_reserve_i.
- * In this case we know that we don't have any resource groups in memory yet.
- *
- * @ip: pointer to the rindex inode
- *
- * Returns: 0 on successful update, error code otherwise
- */
-static int gfs2_ri_update_special(struct gfs2_inode *ip)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct inode *inode = &ip->i_inode;
-       struct file_ra_state ra_state;
-       struct gfs2_rgrpd *rgd;
-       unsigned int max_data = 0;
-       int error;
-
-       file_ra_state_init(&ra_state, inode->i_mapping);
-       for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
-               /* Ignore partials */
-               if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
-                   i_size_read(inode))
-                       break;
-               error = read_rindex_entry(ip, &ra_state);
-               if (error) {
-                       clear_rgrpdi(sdp);
-                       return error;
-               }
-       }
-       list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
-               if (rgd->rd_data > max_data)
-                       max_data = rgd->rd_data;
-       sdp->sd_max_rg_data = max_data;
-
-       sdp->sd_rindex_uptodate = 1;
-       return 0;
-}
-
 /**
  * gfs2_rindex_hold - Grab a lock on the rindex
  * @sdp: The GFS2 superblock
@@ -1226,16 +1186,25 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
                        error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
                else if (!sdp->sd_rgrps) /* We may not have the rindex read
                                            in, so: */
-                       error = gfs2_ri_update_special(ip);
+                       error = gfs2_ri_update(ip);
                if (error)
                        return error;
        }
 
+try_again:
        do {
                error = get_local_rgrp(ip, &last_unlinked);
                /* If there is no space, flushing the log may release some */
-               if (error)
+               if (error) {
+                       if (ip == GFS2_I(sdp->sd_rindex) &&
+                           !sdp->sd_rindex_uptodate) {
+                               error = gfs2_ri_update(ip);
+                               if (error)
+                                       return error;
+                               goto try_again;
+                       }
                        gfs2_log_flush(sdp, NULL);
+               }
        } while (error && tries++ < 3);
 
        if (error) {
index 0e35c0466f9a6c5979a3fe8c339def323bc37fad..50c2bb04369c8dd617fed95513461f6dc3651d0d 100644 (file)
@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
 
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
 
+extern int gfs2_ri_update(struct gfs2_inode *ip);
 extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
 extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
 
index 30b58f07c8a6b219fc964efe101ce5f861397885..439b61c03262b767956e23f761b637e0b6905383 100644 (file)
@@ -1296,10 +1296,8 @@ fail:
 
 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 {
-       struct inode *inode = &ip->i_inode;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_ea_location el;
-       struct buffer_head *dibh;
        int error;
 
        error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
@@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
        if (error)
                return error;
 
-       error = gfs2_meta_inode_buffer(ip, &dibh);
-       if (error)
-               goto out_trans_end;
-
-       if ((attr->ia_valid & ATTR_SIZE) &&
-           attr->ia_size != i_size_read(inode)) {
-               int error;
-
-               error = vmtruncate(inode, attr->ia_size);
-               gfs2_assert_warn(GFS2_SB(inode), !error);
-       }
-
-       setattr_copy(inode, attr);
-       mark_inode_dirty(inode);
-
-       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-       gfs2_dinode_out(ip, dibh->b_data);
-       brelse(dibh);
-
-out_trans_end:
+       error = gfs2_setattr_simple(ip, attr);
        gfs2_trans_end(sdp);
        return error;
 }
index f46ee8b0e135eb62d00e45dc30bbee31c61c213a..9da29706f91cd74772169e6391333ae877a1fa05 100644 (file)
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
                super->s_journal_seg[i] = segno;
                super->s_journal_ec[i] = ec;
                logfs_set_segment_reserved(sb, segno);
-               err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
+               err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
                BUG_ON(err); /* mempool should prevent this */
                err = logfs_erase_segment(sb, segno, 1);
                BUG_ON(err); /* FIXME: remount-ro would be nicer */
index 6127baf0e1884760e757d656a772cf13d3953be2..ee99a9f5dfd3ac251b6b824a1032ce45b68b6ed7 100644 (file)
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
 
        /* FIXME: transaction is part of logfs_block now.  Is that enough? */
        err = logfs_write_buf(master_inode, page, 0);
+       if (err)
+               move_page_to_inode(inode, page);
+
        logfs_put_write_page(page);
        return err;
 }
index f1e962cb3b73084699a182933b7760926c36880e..0d7c5540ad669a55d4def39f21bbe7b42bfb83cc 100644 (file)
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
        /* this io's submitter should not have unlocked this before we could */
        BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
+       if (ocfs2_iocb_is_sem_locked(iocb)) {
+               up_read(&inode->i_alloc_sem);
+               ocfs2_iocb_clear_sem_locked(iocb);
+       }
+
        ocfs2_iocb_clear_rw_locked(iocb);
 
        level = ocfs2_iocb_rw_locked_level(iocb);
-       if (!level)
-               up_read(&inode->i_alloc_sem);
        ocfs2_rw_unlock(inode, level);
 
        if (is_async)
index 76bfdfda691a03b1790ac9670a8bb79d042e7584..eceb456037c11c7a2e68cc2d510908ef781cfdbf 100644 (file)
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
        else
                clear_bit(1, (unsigned long *)&iocb->private);
 }
+
+/*
+ * Using a named enum representing lock types in terms of #N bit stored in
+ * iocb->private, which is going to be used for communication bewteen
+ * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
+ */
+enum ocfs2_iocb_lock_bits {
+       OCFS2_IOCB_RW_LOCK = 0,
+       OCFS2_IOCB_RW_LOCK_LEVEL,
+       OCFS2_IOCB_SEM,
+       OCFS2_IOCB_NUM_LOCKS
+};
+
 #define ocfs2_iocb_clear_rw_locked(iocb) \
-       clear_bit(0, (unsigned long *)&iocb->private)
+       clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
 #define ocfs2_iocb_rw_locked_level(iocb) \
-       test_bit(1, (unsigned long *)&iocb->private)
+       test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_sem_locked(iocb) \
+       set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_sem_locked(iocb) \
+       clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_sem_locked(iocb) \
+       test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
 #endif /* OCFS2_FILE_H */
index c7fba396392d8c0a866c5194204eb1abb49d827d..6c61771469af24ef9e2610e93e65092768b59691 100644 (file)
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
        define_mask(QUOTA),
        define_mask(REFCOUNT),
        define_mask(BASTS),
+       define_mask(RESERVATIONS),
+       define_mask(CLUSTER),
        define_mask(ERROR),
        define_mask(NOTICE),
        define_mask(KTHREAD),
-       define_mask(RESERVATIONS),
 };
 
 static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
index ea2ed9f56c94ad0654a5f79259e50a1f046bed97..34d6544357d9718699af25e25aecc48a1f26c9c0 100644 (file)
@@ -81,7 +81,7 @@
 #include <linux/sched.h>
 
 /* bits that are frequently given and infrequently matched in the low word */
-/* NOTE: If you add a flag, you need to also update mlog.c! */
+/* NOTE: If you add a flag, you need to also update masklog.c! */
 #define ML_ENTRY       0x0000000000000001ULL /* func call entry */
 #define ML_EXIT                0x0000000000000002ULL /* func call exit */
 #define ML_TCP         0x0000000000000004ULL /* net cluster/tcp.c */
 #define ML_XATTR       0x0000000020000000ULL /* ocfs2 extended attributes */
 #define ML_QUOTA       0x0000000040000000ULL /* ocfs2 quota operations */
 #define ML_REFCOUNT    0x0000000080000000ULL /* refcount tree operations */
-#define ML_BASTS       0x0000001000000000ULL /* dlmglue asts and basts */
+#define ML_BASTS       0x0000000100000000ULL /* dlmglue asts and basts */
+#define ML_RESERVATIONS        0x0000000200000000ULL /* ocfs2 alloc reservations */
+#define ML_CLUSTER     0x0000000400000000ULL /* cluster stack */
+
 /* bits that are infrequently given and frequently matched in the high word */
-#define ML_ERROR       0x0000000100000000ULL /* sent to KERN_ERR */
-#define ML_NOTICE      0x0000000200000000ULL /* setn to KERN_NOTICE */
-#define ML_KTHREAD     0x0000000400000000ULL /* kernel thread activity */
-#define ML_RESERVATIONS        0x0000000800000000ULL /* ocfs2 alloc reservations */
-#define ML_CLUSTER     0x0000001000000000ULL /* cluster stack */
+#define ML_ERROR       0x1000000000000000ULL /* sent to KERN_ERR */
+#define ML_NOTICE      0x2000000000000000ULL /* setn to KERN_NOTICE */
+#define ML_KTHREAD     0x4000000000000000ULL /* kernel thread activity */
 
 #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
 #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
index c49f6de0e7abb6e096ddc56e795957e0194bb8dd..d417b3f9b0c730e5cbb8d475358e6d8891fdc05c 100644 (file)
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 
        di->i_dx_root = cpu_to_le64(dr_blkno);
 
+       spin_lock(&OCFS2_I(dir)->ip_lock);
        OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
        di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+       spin_unlock(&OCFS2_I(dir)->ip_lock);
 
        ocfs2_journal_dirty(handle, di_bh);
 
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
                goto out_commit;
        }
 
+       spin_lock(&OCFS2_I(dir)->ip_lock);
        OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
        di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+       spin_unlock(&OCFS2_I(dir)->ip_lock);
        di->i_dx_root = cpu_to_le64(0ULL);
 
        ocfs2_journal_dirty(handle, di_bh);
index f564b0e5f80d8c89e08eeba4a5e133b24cb67373..59f0f6bdfc62110141ca7ad413dc51abadd4b3c8 100644 (file)
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
  */
 static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
                                      struct dlm_lock_resource *res,
-                                     int *numlocks)
+                                     int *numlocks,
+                                     int *hasrefs)
 {
        int ret;
        int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
 
        assert_spin_locked(&res->spinlock);
 
+       *numlocks = 0;
+       *hasrefs = 0;
+
        ret = -EINVAL;
        if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
                mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
        }
 
        *numlocks = count;
-       mlog(0, "migrateable lockres having %d locks\n", *numlocks);
+
+       count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+       if (count < O2NM_MAX_NODES)
+               *hasrefs = 1;
+
+       mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
+            res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
 
 leave:
        return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
        const char *name;
        unsigned int namelen;
        int mle_added = 0;
-       int numlocks;
+       int numlocks, hasrefs;
        int wake = 0;
 
        if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
        name = res->lockname.name;
        namelen = res->lockname.len;
 
-       mlog(0, "migrating %.*s to %u\n", namelen, name, target);
+       mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
 
        /*
         * ensure this lockres is a proper candidate for migration
         */
        spin_lock(&res->spinlock);
-       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
+       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
        if (ret < 0) {
                spin_unlock(&res->spinlock);
                goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
        spin_unlock(&res->spinlock);
 
        /* no work to do */
-       if (numlocks == 0) {
-               mlog(0, "no locks were found on this lockres! done!\n");
+       if (numlocks == 0 && !hasrefs)
                goto leave;
-       }
 
        /*
         * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
         * find a node to migrate the lockres to
         */
 
-       mlog(0, "picking a migration node\n");
        spin_lock(&dlm->spinlock);
        /* pick a new node */
        if (!test_bit(target, dlm->domain_map) ||
            target >= O2NM_MAX_NODES) {
                target = dlm_pick_migration_target(dlm, res);
        }
-       mlog(0, "node %u chosen for migration\n", target);
+       mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
+            namelen, name, target);
 
        if (target >= O2NM_MAX_NODES ||
            !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
        int ret;
        int lock_dropped = 0;
-       int numlocks;
+       int numlocks, hasrefs;
 
        spin_lock(&res->spinlock);
        if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
        }
 
        /* No need to migrate a lockres having no locks */
-       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
-       if (ret >= 0 && numlocks == 0) {
+       ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
+       if (ret >= 0 && numlocks == 0 && !hasrefs) {
                spin_unlock(&res->spinlock);
                goto leave;
        }
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
                }
                queue++;
        }
+
+       nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+       if (nodenum < O2NM_MAX_NODES) {
+               spin_unlock(&res->spinlock);
+               return nodenum;
+       }
        spin_unlock(&res->spinlock);
        mlog(0, "have not found a suitable target yet! checking domain map\n");
 
index 77b4c04a2809831e7209d1be3f8e860a0ba7edc8..f6cba566429d314fe82f9f433889d1e22111462a 100644 (file)
@@ -2241,11 +2241,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 
        mutex_lock(&inode->i_mutex);
 
+       ocfs2_iocb_clear_sem_locked(iocb);
+
 relock:
        /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
        if (direct_io) {
                down_read(&inode->i_alloc_sem);
                have_alloc_sem = 1;
+               /* communicate with ocfs2_dio_end_io */
+               ocfs2_iocb_set_sem_locked(iocb);
        }
 
        /*
@@ -2382,8 +2386,10 @@ out:
                ocfs2_rw_unlock(inode, rw_level);
 
 out_sems:
-       if (have_alloc_sem)
+       if (have_alloc_sem) {
                up_read(&inode->i_alloc_sem);
+               ocfs2_iocb_clear_sem_locked(iocb);
+       }
 
        mutex_unlock(&inode->i_mutex);
 
@@ -2527,6 +2533,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
                goto bail;
        }
 
+       ocfs2_iocb_clear_sem_locked(iocb);
+
        /*
         * buffered reads protect themselves in ->readpage().  O_DIRECT reads
         * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2542,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
        if (filp->f_flags & O_DIRECT) {
                down_read(&inode->i_alloc_sem);
                have_alloc_sem = 1;
+               ocfs2_iocb_set_sem_locked(iocb);
 
                ret = ocfs2_rw_lock(inode, 0);
                if (ret < 0) {
@@ -2575,8 +2584,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
        }
 
 bail:
-       if (have_alloc_sem)
+       if (have_alloc_sem) {
                up_read(&inode->i_alloc_sem);
+               ocfs2_iocb_clear_sem_locked(iocb);
+       }
        if (rw_level != -1)
                ocfs2_rw_unlock(inode, rw_level);
        mlog_exit(ret);
index c2e4f8222e2f0ac707de217555bee7edd51f76d6..bf2e7764920e92e3d621a94caac7124b3fa15914 100644 (file)
@@ -350,7 +350,7 @@ enum {
 #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
        NUM_SYSTEM_INODES
 };
-#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE
+#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
 #define NUM_LOCAL_SYSTEM_INODES        \
                (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
 
index 182845147fe45bde8f5607a799f23cc1e2818117..08cba2c3b61240e085b9861967af5bd0adb4d227 100644 (file)
@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = {
 
 #endif
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+/*
+ * Print out autogroup related information:
+ */
+static int sched_autogroup_show(struct seq_file *m, void *v)
+{
+       struct inode *inode = m->private;
+       struct task_struct *p;
+
+       p = get_proc_task(inode);
+       if (!p)
+               return -ESRCH;
+       proc_sched_autogroup_show_task(p, m);
+
+       put_task_struct(p);
+
+       return 0;
+}
+
+static ssize_t
+sched_autogroup_write(struct file *file, const char __user *buf,
+           size_t count, loff_t *offset)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct task_struct *p;
+       char buffer[PROC_NUMBUF];
+       long nice;
+       int err;
+
+       memset(buffer, 0, sizeof(buffer));
+       if (count > sizeof(buffer) - 1)
+               count = sizeof(buffer) - 1;
+       if (copy_from_user(buffer, buf, count))
+               return -EFAULT;
+
+       err = strict_strtol(strstrip(buffer), 0, &nice);
+       if (err)
+               return -EINVAL;
+
+       p = get_proc_task(inode);
+       if (!p)
+               return -ESRCH;
+
+       err = nice;
+       err = proc_sched_autogroup_set_nice(p, &err);
+       if (err)
+               count = err;
+
+       put_task_struct(p);
+
+       return count;
+}
+
+static int sched_autogroup_open(struct inode *inode, struct file *filp)
+{
+       int ret;
+
+       ret = single_open(filp, sched_autogroup_show, NULL);
+       if (!ret) {
+               struct seq_file *m = filp->private_data;
+
+               m->private = inode;
+       }
+       return ret;
+}
+
+static const struct file_operations proc_pid_sched_autogroup_operations = {
+       .open           = sched_autogroup_open,
+       .read           = seq_read,
+       .write          = sched_autogroup_write,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
+
 static ssize_t comm_write(struct file *file, const char __user *buf,
                                size_t count, loff_t *offset)
 {
@@ -2732,6 +2808,9 @@ static const struct pid_entry tgid_base_stuff[] = {
        INF("limits",     S_IRUGO, proc_pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
        REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
+#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+       REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
 #endif
        REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
index 36d57f74cd01c6c126ee2f7c2ea2c98f66868b2f..51494e6b55487f30496c8870165dd75f8ba4c7b1 100644 (file)
@@ -81,10 +81,10 @@ extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
 extern unsigned long wait_for_completion_timeout(struct completion *x,
                                                   unsigned long timeout);
-extern unsigned long wait_for_completion_interruptible_timeout(
-                       struct completion *x, unsigned long timeout);
-extern unsigned long wait_for_completion_killable_timeout(
-                       struct completion *x, unsigned long timeout);
+extern long wait_for_completion_interruptible_timeout(
+       struct completion *x, unsigned long timeout);
+extern long wait_for_completion_killable_timeout(
+       struct completion *x, unsigned long timeout);
 extern bool try_wait_for_completion(struct completion *x);
 extern bool completion_done(struct completion *x);
 
index 9d8688b92d8b02c46980f9a4397e4b2d0c313413..8cd00ad98d3773a4afa44e5e7ad6eaac184e6d71 100644 (file)
@@ -824,6 +824,8 @@ enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 #ifdef CONFIG_DMA_ENGINE
 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
 void dma_issue_pending_all(void);
+struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
+void dma_release_channel(struct dma_chan *chan);
 #else
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
@@ -831,7 +833,14 @@ static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descript
 }
 static inline void dma_issue_pending_all(void)
 {
-       do { } while (0);
+}
+static inline struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask,
+                                             dma_filter_fn fn, void *fn_param)
+{
+       return NULL;
+}
+static inline void dma_release_channel(struct dma_chan *chan)
+{
 }
 #endif
 
@@ -842,8 +851,6 @@ void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
 #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
-struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, void *fn_param);
-void dma_release_channel(struct dma_chan *chan);
 
 /* --- Helper iov-locking functions --- */
 
index 8beabb958f61d5147c8893f1e780415a91fcb2e6..47e3997f7b5cf39233283ff43d84937daa502c2f 100644 (file)
@@ -154,12 +154,14 @@ enum {
        TRACE_EVENT_FL_ENABLED_BIT,
        TRACE_EVENT_FL_FILTERED_BIT,
        TRACE_EVENT_FL_RECORDED_CMD_BIT,
+       TRACE_EVENT_FL_CAP_ANY_BIT,
 };
 
 enum {
        TRACE_EVENT_FL_ENABLED          = (1 << TRACE_EVENT_FL_ENABLED_BIT),
        TRACE_EVENT_FL_FILTERED         = (1 << TRACE_EVENT_FL_FILTERED_BIT),
        TRACE_EVENT_FL_RECORDED_CMD     = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
+       TRACE_EVENT_FL_CAP_ANY          = (1 << TRACE_EVENT_FL_CAP_ANY_BIT),
 };
 
 struct ftrace_event_call {
@@ -196,6 +198,14 @@ struct ftrace_event_call {
 #endif
 };
 
+#define __TRACE_EVENT_FLAGS(name, value)                               \
+       static int __init trace_init_flags_##name(void)                 \
+       {                                                               \
+               event_##name.flags = value;                             \
+               return 0;                                               \
+       }                                                               \
+       early_initcall(trace_init_flags_##name);
+
 #define PERF_MAX_TRACE_SIZE    2048
 
 #define MAX_FILTER_PRED                32
@@ -215,6 +225,10 @@ enum {
        FILTER_PTR_STRING,
 };
 
+#define EVENT_STORAGE_SIZE 128
+extern struct mutex event_storage_mutex;
+extern char event_storage[EVENT_STORAGE_SIZE];
+
 extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
                              const char *name, int offset, int size,
index fd0c1b857d3dbcd9c074e461ded81b69f3ab897d..330586ffffbbccad534b1f81b7309d766f8d48fb 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/wait.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
-
+#include <linux/timerqueue.h>
 
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
@@ -79,8 +79,8 @@ enum hrtimer_restart {
 
 /**
  * struct hrtimer - the basic hrtimer structure
- * @node:      red black tree node for time ordered insertion
- * @_expires:  the absolute expiry time in the hrtimers internal
+ * @node:      timerqueue node, which also manages node.expires,
+ *             the absolute expiry time in the hrtimers internal
  *             representation. The time is related to the clock on
  *             which the timer is based. Is setup by adding
  *             slack to the _softexpires value. For non range timers
@@ -101,8 +101,7 @@ enum hrtimer_restart {
  * The hrtimer structure must be initialized by hrtimer_init()
  */
 struct hrtimer {
-       struct rb_node                  node;
-       ktime_t                         _expires;
+       struct timerqueue_node          node;
        ktime_t                         _softexpires;
        enum hrtimer_restart            (*function)(struct hrtimer *);
        struct hrtimer_clock_base       *base;
@@ -141,8 +140,7 @@ struct hrtimer_sleeper {
 struct hrtimer_clock_base {
        struct hrtimer_cpu_base *cpu_base;
        clockid_t               index;
-       struct rb_root          active;
-       struct rb_node          *first;
+       struct timerqueue_head  active;
        ktime_t                 resolution;
        ktime_t                 (*get_time)(void);
        ktime_t                 softirq_time;
@@ -158,7 +156,6 @@ struct hrtimer_clock_base {
  * @lock:              lock protecting the base and associated clock bases
  *                     and timers
  * @clock_base:                array of clock bases for this cpu
- * @curr_timer:                the timer which is executing a callback right now
  * @expires_next:      absolute time of the next event which was scheduled
  *                     via clock_set_next_event()
  * @hres_active:       State of high resolution mode
@@ -184,43 +181,43 @@ struct hrtimer_cpu_base {
 
 static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
 {
-       timer->_expires = time;
+       timer->node.expires = time;
        timer->_softexpires = time;
 }
 
 static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta)
 {
        timer->_softexpires = time;
-       timer->_expires = ktime_add_safe(time, delta);
+       timer->node.expires = ktime_add_safe(time, delta);
 }
 
 static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta)
 {
        timer->_softexpires = time;
-       timer->_expires = ktime_add_safe(time, ns_to_ktime(delta));
+       timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta));
 }
 
 static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64)
 {
-       timer->_expires.tv64 = tv64;
+       timer->node.expires.tv64 = tv64;
        timer->_softexpires.tv64 = tv64;
 }
 
 static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time)
 {
-       timer->_expires = ktime_add_safe(timer->_expires, time);
+       timer->node.expires = ktime_add_safe(timer->node.expires, time);
        timer->_softexpires = ktime_add_safe(timer->_softexpires, time);
 }
 
 static inline void hrtimer_add_expires_ns(struct hrtimer *timer, u64 ns)
 {
-       timer->_expires = ktime_add_ns(timer->_expires, ns);
+       timer->node.expires = ktime_add_ns(timer->node.expires, ns);
        timer->_softexpires = ktime_add_ns(timer->_softexpires, ns);
 }
 
 static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer)
 {
-       return timer->_expires;
+       return timer->node.expires;
 }
 
 static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
@@ -230,7 +227,7 @@ static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer)
 
 static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer)
 {
-       return timer->_expires.tv64;
+       return timer->node.expires.tv64;
 }
 static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
 {
@@ -239,12 +236,12 @@ static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer)
 
 static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer)
 {
-       return ktime_to_ns(timer->_expires);
+       return ktime_to_ns(timer->node.expires);
 }
 
 static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer)
 {
-    return ktime_sub(timer->_expires, timer->base->get_time());
+       return ktime_sub(timer->node.expires, timer->base->get_time());
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
index 1f8c06ce0fa66b83760863735eaf1209908205d7..caa151fbebb74c661289a69ffb52762435178d53 100644 (file)
 #include <linux/securebits.h>
 #include <net/net_namespace.h>
 
+#ifdef CONFIG_SMP
+# define INIT_PUSHABLE_TASKS(tsk)                                      \
+       .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
+#else
+# define INIT_PUSHABLE_TASKS(tsk)
+#endif
+
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
@@ -83,6 +90,12 @@ extern struct group_info init_groups;
  */
 # define CAP_INIT_BSET  CAP_FULL_SET
 
+#ifdef CONFIG_RCU_BOOST
+#define INIT_TASK_RCU_BOOST()                                          \
+       .rcu_boost_mutex = NULL,
+#else
+#define INIT_TASK_RCU_BOOST()
+#endif
 #ifdef CONFIG_TREE_PREEMPT_RCU
 #define INIT_TASK_RCU_TREE_PREEMPT()                                   \
        .rcu_blocked_node = NULL,
@@ -94,7 +107,8 @@ extern struct group_info init_groups;
        .rcu_read_lock_nesting = 0,                                     \
        .rcu_read_unlock_special = 0,                                   \
        .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),           \
-       INIT_TASK_RCU_TREE_PREEMPT()
+       INIT_TASK_RCU_TREE_PREEMPT()                                    \
+       INIT_TASK_RCU_BOOST()
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
 #endif
@@ -137,7 +151,7 @@ extern struct cred init_cred;
                .nr_cpus_allowed = NR_CPUS,                             \
        },                                                              \
        .tasks          = LIST_HEAD_INIT(tsk.tasks),                    \
-       .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
+       INIT_PUSHABLE_TASKS(tsk)                                        \
        .ptraced        = LIST_HEAD_INIT(tsk.ptraced),                  \
        .ptrace_entry   = LIST_HEAD_INIT(tsk.ptrace_entry),             \
        .real_parent    = &tsk,                                         \
index 79d0c4f6d0719452c20494b1439d0d695e212e90..55e0d4253e4927eb67254f38137b2a9e787afa9d 100644 (file)
@@ -114,15 +114,15 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
 struct irqaction {
        irq_handler_t handler;
        unsigned long flags;
-       const char *name;
        void *dev_id;
        struct irqaction *next;
        int irq;
-       struct proc_dir_entry *dir;
        irq_handler_t thread_fn;
        struct task_struct *thread;
        unsigned long thread_flags;
-};
+       const char *name;
+       struct proc_dir_entry *dir;
+} ____cacheline_internodealigned_in_smp;
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
 
index e7d1b2e0070d3570b7022877a79fa2f0ed081507..b78edb58ee66164e756b4789baf71ab86e8684c4 100644 (file)
@@ -275,7 +275,9 @@ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn);
 extern int arch_check_optimized_kprobe(struct optimized_kprobe *op);
 extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op);
 extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op);
-extern int  arch_optimize_kprobe(struct optimized_kprobe *op);
+extern void arch_optimize_kprobes(struct list_head *oplist);
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+                                   struct list_head *done_list);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
 extern kprobe_opcode_t *get_optinsn_slot(void);
 extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
index 685ea65eb803fddf1c47ee3e420fe3056278685b..ce0775aa64c376b8980a6af70ef59f14d19f1ba0 100644 (file)
@@ -81,16 +81,41 @@ struct kthread_work {
 #define DEFINE_KTHREAD_WORK(work, fn)                                  \
        struct kthread_work work = KTHREAD_WORK_INIT(work, fn)
 
-static inline void init_kthread_worker(struct kthread_worker *worker)
-{
-       *worker = (struct kthread_worker)KTHREAD_WORKER_INIT(*worker);
-}
-
-static inline void init_kthread_work(struct kthread_work *work,
-                                    kthread_work_func_t fn)
-{
-       *work = (struct kthread_work)KTHREAD_WORK_INIT(*work, fn);
-}
+/*
+ * kthread_worker.lock and kthread_work.done need their own lockdep class
+ * keys if they are defined on stack with lockdep enabled.  Use the
+ * following macros when defining them on stack.
+ */
+#ifdef CONFIG_LOCKDEP
+# define KTHREAD_WORKER_INIT_ONSTACK(worker)                           \
+       ({ init_kthread_worker(&worker); worker; })
+# define DEFINE_KTHREAD_WORKER_ONSTACK(worker)                         \
+       struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker)
+# define KTHREAD_WORK_INIT_ONSTACK(work, fn)                           \
+       ({ init_kthread_work((&work), fn); work; })
+# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn)                         \
+       struct kthread_work work = KTHREAD_WORK_INIT_ONSTACK(work, fn)
+#else
+# define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker)
+# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) DEFINE_KTHREAD_WORK(work, fn)
+#endif
+
+extern void __init_kthread_worker(struct kthread_worker *worker,
+                       const char *name, struct lock_class_key *key);
+
+#define init_kthread_worker(worker)                                    \
+       do {                                                            \
+               static struct lock_class_key __key;                     \
+               __init_kthread_worker((worker), "("#worker")->lock", &__key); \
+       } while (0)
+
+#define init_kthread_work(work, fn)                                    \
+       do {                                                            \
+               memset((work), 0, sizeof(struct kthread_work));         \
+               INIT_LIST_HEAD(&(work)->node);                          \
+               (work)->func = (fn);                                    \
+               init_waitqueue_head(&(work)->done);                     \
+       } while (0)
 
 int kthread_worker_fn(void *worker_ptr);
 
index 7575bbbdf2a2b8e6a716fb6252c3d8e958756f52..8b17fd8c790d8601f8aff0a33c7b909984e30545 100644 (file)
@@ -308,6 +308,9 @@ struct module
        /* The size of the executable code in each section.  */
        unsigned int init_text_size, core_text_size;
 
+       /* Size of RO sections of the module (text+rodata) */
+       unsigned int init_ro_size, core_ro_size;
+
        /* Arch-specific module values */
        struct mod_arch_specific arch;
 
@@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
 {
        return 0;
 }
-
 #endif /* CONFIG_MODULES */
 
 #ifdef CONFIG_SYSFS
@@ -687,6 +689,13 @@ extern int module_sysfs_initialized;
 
 #define __MODULE_STRING(x) __stringify(x)
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+extern void set_all_modules_text_rw(void);
+extern void set_all_modules_text_ro(void);
+#else
+static inline void set_all_modules_text_rw(void) { }
+static inline void set_all_modules_text_ro(void) { }
+#endif
 
 #ifdef CONFIG_GENERIC_BUG
 void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
index f363bc8fdc74c821c99aa59d5bfcb9554c012c9a..94b48bd40dd735f77963fcd31797d32bb68b3379 100644 (file)
@@ -160,4 +160,8 @@ extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
+#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
+#define arch_mutex_cpu_relax() cpu_relax()
+#endif
+
 #endif
index 123566912d7312f276bf975cd1a48c97b2ffa830..e2b9e63afa68b53f45fdc60c1a3f76cf0f338705 100644 (file)
@@ -70,7 +70,7 @@ struct nlmsghdr {
    Check               NLM_F_EXCL
  */
 
-#define NLMSG_ALIGNTO  4
+#define NLMSG_ALIGNTO  4U
 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
 #define NLMSG_HDRLEN    ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
 #define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
index 06aab5eee134cd56c4bade9005912fa3a785a327..c536f8545f74c11e345943187f201bfc25e48baa 100644 (file)
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
  */
-#ifdef ARCH_HAS_NMI_WATCHDOG
+#if defined(ARCH_HAS_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 #include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
-extern void acpi_nmi_disable(void);
-extern void acpi_nmi_enable(void);
 #else
-#ifndef CONFIG_HARDLOCKUP_DETECTOR
 static inline void touch_nmi_watchdog(void)
 {
        touch_softlockup_watchdog();
 }
-#else
-extern void touch_nmi_watchdog(void);
-#endif
-static inline void acpi_nmi_disable(void) { }
-static inline void acpi_nmi_enable(void) { }
 #endif
 
 /*
index 4f1279e105ee143e4317219b3cb093bc8bbdd954..dda5b0a3ff6014b8a0741a186ed0e3968b63d298 100644 (file)
@@ -215,8 +215,9 @@ struct perf_event_attr {
                                 */
                                precise_ip     :  2, /* skid constraint       */
                                mmap_data      :  1, /* non-exec mmap data    */
+                               sample_id_all  :  1, /* sample_type all events */
 
-                               __reserved_1   : 46;
+                               __reserved_1   : 45;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -327,6 +328,15 @@ struct perf_event_header {
 enum perf_event_type {
 
        /*
+        * If perf_event_attr.sample_id_all is set then all event types will
+        * have the sample_type selected fields related to where/when
+        * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
+        * described in PERF_RECORD_SAMPLE below, it will be stashed just after
+        * the perf_event_header and the fields already present for the existing
+        * fields, i.e. at the end of the payload. That way a newer perf.data
+        * file will be supported by older perf tools, with these new optional
+        * fields being ignored.
+        *
         * The MMAP events record the PROT_EXEC mappings so that we can
         * correlate userspace IPs to code. They have the following structure:
         *
@@ -578,6 +588,10 @@ struct perf_event;
 struct pmu {
        struct list_head                entry;
 
+       struct device                   *dev;
+       char                            *name;
+       int                             type;
+
        int * __percpu                  pmu_disable_count;
        struct perf_cpu_context * __percpu pmu_cpu_context;
        int                             task_ctx_nr;
@@ -758,6 +772,9 @@ struct perf_event {
        u64                             shadow_ctx_time;
 
        struct perf_event_attr          attr;
+       u16                             header_size;
+       u16                             id_header_size;
+       u16                             read_size;
        struct hw_perf_event            hw;
 
        struct perf_event_context       *ctx;
@@ -903,7 +920,7 @@ struct perf_output_handle {
 
 #ifdef CONFIG_PERF_EVENTS
 
-extern int perf_pmu_register(struct pmu *pmu);
+extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
 extern void perf_pmu_unregister(struct pmu *pmu);
 
 extern int perf_num_counters(void);
@@ -970,6 +987,11 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
                                 struct perf_sample_data *data,
                                 struct pt_regs *regs);
 
+static inline bool is_sampling_event(struct perf_event *event)
+{
+       return event->attr.sample_period != 0;
+}
+
 /*
  * Return 1 for a software event, 0 for a hardware event
  */
index f31ef61f1c650b585bd6faf969f7cec754dffe2d..2dea94fc44026a1048f912913be298b8df179873 100644 (file)
@@ -241,11 +241,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
 #define list_first_entry_rcu(ptr, type, member) \
        list_entry_rcu((ptr)->next, type, member)
 
-#define __list_for_each_rcu(pos, head) \
-       for (pos = rcu_dereference_raw(list_next_rcu(head)); \
-               pos != (head); \
-               pos = rcu_dereference_raw(list_next_rcu((pos)))
-
 /**
  * list_for_each_entry_rcu     -       iterate over rcu list of given type
  * @pos:       the type * to use as a loop cursor.
index 03cda7bed98587b128c5a9953316644a8debb4d2..af5614856285d32e0f07d3ca7e7294b03b9b27b7 100644 (file)
@@ -47,6 +47,8 @@
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+#define UINT_CMP_GE(a, b)      (UINT_MAX / 2 >= (a) - (b))
+#define UINT_CMP_LT(a, b)      (UINT_MAX / 2 < (a) - (b))
 #define ULONG_CMP_GE(a, b)     (ULONG_MAX / 2 >= (a) - (b))
 #define ULONG_CMP_LT(a, b)     (ULONG_MAX / 2 < (a) - (b))
 
@@ -66,7 +68,6 @@ extern void call_rcu_sched(struct rcu_head *head,
 extern void synchronize_sched(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
-extern void synchronize_sched_expedited(void);
 extern int sched_expedited_torture_stats(char *page);
 
 static inline void __rcu_read_lock_bh(void)
@@ -118,7 +119,6 @@ static inline int rcu_preempt_depth(void)
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
 /* Internal to kernel */
-extern void rcu_init(void);
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern void rcu_check_callbacks(int cpu, int user);
index 13877cb93a6000043f11a6704f2d90b0cc04552d..30ebd7c8d874b4dfeb9c9c9e5c5e857fbb43ab62 100644 (file)
@@ -27,7 +27,9 @@
 
 #include <linux/cache.h>
 
-#define rcu_init_sched()       do { } while (0)
+static inline void rcu_init(void)
+{
+}
 
 #ifdef CONFIG_TINY_RCU
 
@@ -58,6 +60,11 @@ static inline void synchronize_rcu_bh_expedited(void)
        synchronize_sched();
 }
 
+static inline void synchronize_sched_expedited(void)
+{
+       synchronize_sched();
+}
+
 #ifdef CONFIG_TINY_RCU
 
 static inline void rcu_preempt_note_context_switch(void)
@@ -125,16 +132,12 @@ static inline void rcu_cpu_stall_reset(void)
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-
 extern int rcu_scheduler_active __read_mostly;
 extern void rcu_scheduler_starting(void);
-
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
 static inline void rcu_scheduler_starting(void)
 {
 }
-
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #endif /* __LINUX_RCUTINY_H */
index 95518e6287946177e0eceb5cbf201ebfcaf0e072..3a933482734aeccbafc7a0bb735be11ede47cbd2 100644 (file)
@@ -30,6 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
+extern void rcu_init(void);
 extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern void rcu_cpu_stall_reset(void);
@@ -47,6 +48,7 @@ static inline void exit_rcu(void)
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
 extern void synchronize_rcu_bh(void);
+extern void synchronize_sched_expedited(void);
 extern void synchronize_rcu_expedited(void);
 
 static inline void synchronize_rcu_bh_expedited(void)
index 223874538b33208e3c5ff11710f3161d58b4aef2..777cd01e240ee0fca7a8b6a76c74137d0dcfaaa9 100644 (file)
@@ -316,6 +316,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
                                  size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
+void lockup_detector_init(void);
 #else
 static inline void touch_softlockup_watchdog(void)
 {
@@ -326,6 +327,9 @@ static inline void touch_softlockup_watchdog_sync(void)
 static inline void touch_all_softlockup_watchdogs(void)
 {
 }
+static inline void lockup_detector_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
@@ -509,6 +513,8 @@ struct thread_group_cputimer {
        spinlock_t lock;
 };
 
+struct autogroup;
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -576,6 +582,9 @@ struct signal_struct {
 
        struct tty_struct *tty; /* NULL if no tty */
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+       struct autogroup *autogroup;
+#endif
        /*
         * Cumulative resource counters for dead threads in the group,
         * and for reaped dead child processes forked by this group.
@@ -1229,13 +1238,18 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
        struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+       struct rt_mutex *rcu_boost_mutex;
+#endif /* #ifdef CONFIG_RCU_BOOST */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        struct sched_info sched_info;
 #endif
 
        struct list_head tasks;
+#ifdef CONFIG_SMP
        struct plist_node pushable_tasks;
+#endif
 
        struct mm_struct *mm, *active_mm;
 #if defined(SPLIT_RSS_COUNTING)
@@ -1759,7 +1773,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #ifdef CONFIG_PREEMPT_RCU
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
@@ -1767,7 +1782,10 @@ static inline void rcu_copy_process(struct task_struct *p)
        p->rcu_read_unlock_special = 0;
 #ifdef CONFIG_TREE_PREEMPT_RCU
        p->rcu_blocked_node = NULL;
-#endif
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+       p->rcu_boost_mutex = NULL;
+#endif /* #ifdef CONFIG_RCU_BOOST */
        INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
@@ -1872,14 +1890,11 @@ extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
-extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
 #endif
 
-extern void sched_idle_next(void);
-
 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
 extern void wake_up_idle_cpu(int cpu);
 #else
@@ -1889,8 +1904,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_shares_ratelimit;
-extern unsigned int sysctl_sched_shares_thresh;
 extern unsigned int sysctl_sched_child_runs_first;
 
 enum sched_tunable_scaling {
@@ -1906,6 +1919,7 @@ extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
+extern unsigned int sysctl_sched_shares_window;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *length,
@@ -1931,6 +1945,24 @@ int sched_rt_handler(struct ctl_table *table, int write,
 
 extern unsigned int sysctl_sched_compat_yield;
 
+#ifdef CONFIG_SCHED_AUTOGROUP
+extern unsigned int sysctl_sched_autogroup_enabled;
+
+extern void sched_autogroup_create_attach(struct task_struct *p);
+extern void sched_autogroup_detach(struct task_struct *p);
+extern void sched_autogroup_fork(struct signal_struct *sig);
+extern void sched_autogroup_exit(struct signal_struct *sig);
+#ifdef CONFIG_PROC_FS
+extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
+#endif
+#else
+static inline void sched_autogroup_create_attach(struct task_struct *p) { }
+static inline void sched_autogroup_detach(struct task_struct *p) { }
+static inline void sched_autogroup_fork(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+#endif
+
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
@@ -1949,9 +1981,10 @@ extern int task_nice(const struct task_struct *p);
 extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
-extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
+extern int sched_setscheduler(struct task_struct *, int,
+                             const struct sched_param *);
 extern int sched_setscheduler_nocheck(struct task_struct *, int,
-                                     struct sched_param *);
+                                     const struct sched_param *);
 extern struct task_struct *idle_task(int cpu);
 extern struct task_struct *curr_task(int cpu);
 extern void set_curr_task(int cpu, struct task_struct *p);
index 7f770c638e99d670840ed99856dce0b07a660f8b..fe817918b30e49ff96648394d99d8fc85eebaac8 100644 (file)
@@ -77,6 +77,8 @@
 #define SFI_OEM_ID_SIZE                6
 #define SFI_OEM_TABLE_ID_SIZE  8
 
+#define SFI_NAME_LEN           16
+
 #define SFI_SYST_SEARCH_BEGIN          0x000E0000
 #define SFI_SYST_SEARCH_END            0x000FFFFF
 
@@ -156,13 +158,13 @@ struct sfi_device_table_entry {
        u16     addr;
        u8      irq;
        u32     max_freq;
-       char    name[16];
+       char    name[SFI_NAME_LEN];
 } __packed;
 
 struct sfi_gpio_table_entry {
-       char    controller_name[16];
+       char    controller_name[SFI_NAME_LEN];
        u16     pin_no;
-       char    pin_name[16];
+       char    pin_name[SFI_NAME_LEN];
 } __packed;
 
 typedef int (*sfi_table_handler) (struct sfi_table_header *table);
index 51efbef38fb0e204cfddb61b56619d52cefab623..25310f1d7f3773c540e51e7103a4edfa98db7a33 100644 (file)
@@ -2,6 +2,7 @@
 #define __LINUX_STACKTRACE_H
 
 struct task_struct;
+struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
 struct task_struct;
@@ -13,7 +14,8 @@ struct stack_trace {
 };
 
 extern void save_stack_trace(struct stack_trace *trace);
-extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp);
+extern void save_stack_trace_regs(struct stack_trace *trace,
+                                 struct pt_regs *regs);
 extern void save_stack_trace_tsk(struct task_struct *tsk,
                                struct stack_trace *trace);
 
index cacc27a0e285163d9a8727a4131ffed478b8f46c..18cd0684fc4ec4bb2e6fb52ed6a7838737688c17 100644 (file)
@@ -127,8 +127,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 #define SYSCALL_TRACE_ENTER_EVENT(sname)                               \
        static struct syscall_metadata                                  \
        __attribute__((__aligned__(4))) __syscall_meta_##sname;         \
-       static struct ftrace_event_call                                 \
-       __attribute__((__aligned__(4))) event_enter_##sname;            \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
@@ -137,13 +135,12 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .class                  = &event_class_syscall_enter,   \
                .event.funcs            = &enter_syscall_print_funcs,   \
                .data                   = (void *)&__syscall_meta_##sname,\
-       }
+       };                                                              \
+       __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)                                        \
        static struct syscall_metadata                                  \
        __attribute__((__aligned__(4))) __syscall_meta_##sname;         \
-       static struct ftrace_event_call                                 \
-       __attribute__((__aligned__(4))) event_exit_##sname;             \
        static struct ftrace_event_call __used                          \
          __attribute__((__aligned__(4)))                               \
          __attribute__((section("_ftrace_events")))                    \
@@ -152,7 +149,8 @@ extern struct trace_event_functions exit_syscall_print_funcs;
                .class                  = &event_class_syscall_exit,    \
                .event.funcs            = &exit_syscall_print_funcs,    \
                .data                   = (void *)&__syscall_meta_##sname,\
-       }
+       };                                                              \
+       __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
 
 #define SYSCALL_METADATA(sname, nb)                            \
        SYSCALL_TRACE_ENTER_EVENT(sname);                       \
index 341dddb55090853430c39bd76a081bc2125c8fe5..2466e550a41d40beefe6aa1b5ed4560ac076bc29 100644 (file)
@@ -33,7 +33,7 @@
  */
 
 
-#define TASKSTATS_VERSION      7
+#define TASKSTATS_VERSION      8
 #define TS_COMM_LEN            32      /* should be >= TASK_COMM_LEN
                                         * in linux/sched.h */
 
@@ -188,6 +188,7 @@ enum {
        TASKSTATS_TYPE_STATS,           /* taskstats structure */
        TASKSTATS_TYPE_AGGR_PID,        /* contains pid + stats */
        TASKSTATS_TYPE_AGGR_TGID,       /* contains tgid + stats */
+       TASKSTATS_TYPE_NULL,            /* contains nothing */
        __TASKSTATS_TYPE_MAX,
 };
 
index 38cf093ef62c745d9f06e1038127ef61834796a1..6abd9138beda57f7555b96b9fa0d51c60edaa50f 100644 (file)
@@ -24,9 +24,9 @@ struct timer_list {
        int slack;
 
 #ifdef CONFIG_TIMER_STATS
+       int start_pid;
        void *start_site;
        char start_comm[16];
-       int start_pid;
 #endif
 #ifdef CONFIG_LOCKDEP
        struct lockdep_map lockdep_map;
@@ -48,12 +48,38 @@ extern struct tvec_base boot_tvec_bases;
 #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
 #endif
 
+/*
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
+ * base in timer_list is guaranteed to be zero. Use the LSB to
+ * indicate whether the timer is deferrable.
+ *
+ * A deferrable timer will work normally when the system is busy, but
+ * will not cause a CPU to come out of idle just to service it; instead,
+ * the timer will be serviced when the CPU eventually wakes up with a
+ * subsequent non-deferrable timer.
+ */
+#define TBASE_DEFERRABLE_FLAG          (0x1)
+
 #define TIMER_INITIALIZER(_function, _expires, _data) {                \
                .entry = { .prev = TIMER_ENTRY_STATIC },        \
                .function = (_function),                        \
                .expires = (_expires),                          \
                .data = (_data),                                \
                .base = &boot_tvec_bases,                       \
+               .slack = -1,                                    \
+               __TIMER_LOCKDEP_MAP_INITIALIZER(                \
+                       __FILE__ ":" __stringify(__LINE__))     \
+       }
+
+#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *)         \
+                 ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG))
+
+#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\
+               .entry = { .prev = TIMER_ENTRY_STATIC },        \
+               .function = (_function),                        \
+               .expires = (_expires),                          \
+               .data = (_data),                                \
+               .base = TBASE_MAKE_DEFERRED(&boot_tvec_bases),  \
                __TIMER_LOCKDEP_MAP_INITIALIZER(                \
                        __FILE__ ":" __stringify(__LINE__))     \
        }
@@ -248,11 +274,11 @@ static inline void timer_stats_timer_clear_start_info(struct timer_list *timer)
 
 extern void add_timer(struct timer_list *timer);
 
+extern int try_to_del_timer_sync(struct timer_list *timer);
+
 #ifdef CONFIG_SMP
-  extern int try_to_del_timer_sync(struct timer_list *timer);
   extern int del_timer_sync(struct timer_list *timer);
 #else
-# define try_to_del_timer_sync(t)      del_timer(t)
 # define del_timer_sync(t)             del_timer(t)
 #endif
 
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
new file mode 100644 (file)
index 0000000..d24aaba
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef _LINUX_TIMERQUEUE_H
+#define _LINUX_TIMERQUEUE_H
+
+#include <linux/rbtree.h>
+#include <linux/ktime.h>
+
+
+struct timerqueue_node {
+       struct rb_node node;
+       ktime_t expires;
+};
+
+struct timerqueue_head {
+       struct rb_root head;
+       struct timerqueue_node *next;
+};
+
+
+extern void timerqueue_add(struct timerqueue_head *head,
+                               struct timerqueue_node *node);
+extern void timerqueue_del(struct timerqueue_head *head,
+                               struct timerqueue_node *node);
+extern struct timerqueue_node *timerqueue_iterate_next(
+                                               struct timerqueue_node *node);
+
+/**
+ * timerqueue_getnext - Returns the timer with the earlies expiration time
+ *
+ * @head: head of timerqueue
+ *
+ * Returns a pointer to the timer node that has the
+ * earliest expiration time.
+ */
+static inline
+struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
+{
+       return head->next;
+}
+
+static inline void timerqueue_init(struct timerqueue_node *node)
+{
+       RB_CLEAR_NODE(&node->node);
+}
+
+static inline void timerqueue_init_head(struct timerqueue_head *head)
+{
+       head->head = RB_ROOT;
+       head->next = NULL;
+}
+#endif /* _LINUX_TIMERQUEUE_H */
index a4a90b6726ce6129b43174609fb3e35a2bd088ae..d3e4f87e95c0fa67236f92c2a688fdaa640cfaae 100644 (file)
@@ -106,6 +106,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 
 #define TP_PROTO(args...)      args
 #define TP_ARGS(args...)       args
+#define TP_CONDITION(args...)  args
 
 #ifdef CONFIG_TRACEPOINTS
 
@@ -119,12 +120,14 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args)                                    \
+#define __DO_TRACE(tp, proto, args, cond)                              \
        do {                                                            \
                struct tracepoint_func *it_func_ptr;                    \
                void *it_func;                                          \
                void *__data;                                           \
                                                                        \
+               if (!(cond))                                            \
+                       return;                                         \
                rcu_read_lock_sched_notrace();                          \
                it_func_ptr = rcu_dereference_sched((tp)->funcs);       \
                if (it_func_ptr) {                                      \
@@ -142,7 +145,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)      \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
        extern struct tracepoint __tracepoint_##name;                   \
        static inline void trace_##name(proto)                          \
        {                                                               \
@@ -151,7 +154,8 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
 do_trace:                                                              \
                        __DO_TRACE(&__tracepoint_##name,                \
                                TP_PROTO(data_proto),                   \
-                               TP_ARGS(data_args));                    \
+                               TP_ARGS(data_args),                     \
+                               TP_CONDITION(cond));                    \
        }                                                               \
        static inline int                                               \
        register_trace_##name(void (*probe)(data_proto), void *data)    \
@@ -186,7 +190,7 @@ do_trace:                                                           \
        EXPORT_SYMBOL(__tracepoint_##name)
 
 #else /* !CONFIG_TRACEPOINTS */
-#define __DECLARE_TRACE(name, proto, args, data_proto, data_args)      \
+#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args)        \
        static inline void trace_##name(proto)                          \
        { }                                                             \
        static inline int                                               \
@@ -227,13 +231,20 @@ do_trace:                                                         \
  * "void *__data, proto" as the callback prototype.
  */
 #define DECLARE_TRACE_NOARGS(name)                                     \
-               __DECLARE_TRACE(name, void, , void *__data, __data)
+               __DECLARE_TRACE(name, void, , 1, void *__data, __data)
 
 #define DECLARE_TRACE(name, proto, args)                               \
-               __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),      \
+               __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), 1,   \
                                PARAMS(void *__data, proto),            \
                                PARAMS(__data, args))
 
+#define DECLARE_TRACE_CONDITION(name, proto, args, cond)               \
+       __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), PARAMS(cond), \
+                       PARAMS(void *__data, proto),                    \
+                       PARAMS(__data, args))
+
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #endif /* DECLARE_TRACE */
 
 #ifndef TRACE_EVENT
@@ -347,11 +358,21 @@ do_trace:                                                         \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define DEFINE_EVENT_CONDITION(template, name, proto,          \
+                              args, cond)                      \
+       DECLARE_TRACE_CONDITION(name, PARAMS(proto),            \
+                               PARAMS(args), PARAMS(cond))
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)  \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #define TRACE_EVENT_FN(name, proto, args, struct,              \
                assign, print, reg, unreg)                      \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_CONDITION(name, proto, args, cond,         \
+                             struct, assign, print)            \
+       DECLARE_TRACE_CONDITION(name, PARAMS(proto),            \
+                               PARAMS(args), PARAMS(cond))
+
+#define TRACE_EVENT_FLAGS(event, flag)
 
 #endif /* ifdef TRACE_EVENT (see note above) */
index 2498bb9fe002a4bafee70389e70c57b842ede507..c9a6abd972a142e7aea4129da8a8e5b9bc64b6ce 100644 (file)
@@ -3,9 +3,9 @@
 
 #include <linux/kernel.h>
 
-struct __una_u16 { u16 x __attribute__((packed)); };
-struct __una_u32 { u32 x __attribute__((packed)); };
-struct __una_u64 { u64 x __attribute__((packed)); };
+struct __una_u16 { u16 x; } __attribute__((packed));
+struct __una_u32 { u32 x; } __attribute__((packed));
+struct __una_u64 { u64 x; } __attribute__((packed));
 
 static inline u16 __get_unaligned_cpu16(const void *p)
 {
index 0c0771f06bfa745e8e4e5add4ec4823cf52eb813..bd257fee60310184b52d0f8c15f1206f6a4f5dad 100644 (file)
@@ -127,12 +127,20 @@ struct execute_work {
        .timer = TIMER_INITIALIZER(NULL, 0, 0),                 \
        }
 
+#define __DEFERRED_WORK_INITIALIZER(n, f) {                    \
+       .work = __WORK_INITIALIZER((n).work, (f)),              \
+       .timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0),        \
+       }
+
 #define DECLARE_WORK(n, f)                                     \
        struct work_struct n = __WORK_INITIALIZER(n, f)
 
 #define DECLARE_DELAYED_WORK(n, f)                             \
        struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
+#define DECLARE_DEFERRED_WORK(n, f)                            \
+       struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
+
 /*
  * initialize a work item's function pointer
  */
index a1c4d417dfa205e8d5c2cf1d4f9d6bbd7a6ec419..60739c5a23ae3e30e943b55dc45cd53289720db4 100644 (file)
@@ -32,7 +32,4 @@
 #define WM8775_AIN3 4
 #define WM8775_AIN4 8
 
-/* subdev group ID */
-#define WM8775_GID (1 << 0)
-
 #endif
index 0ac3fb5e0973460f3046cdf1f00a0e1114a13dff..bb08692a20b08841ead94acce78c9ead82293d94 100644 (file)
@@ -49,7 +49,6 @@ struct flowi {
        __u8    proto;
        __u8    flags;
 #define FLOWI_FLAG_ANYSRC 0x01
-#define FLOWI_FLAG_MATCH_ANY_IIF 0x02
        union {
                struct {
                        __be16  sport;
index 278312c95f9600bd863fa0dc76c52a7c048a9d19..2ab926860cd855b6ec7861a998d19612624980e6 100644 (file)
@@ -164,5 +164,15 @@ static inline int ipv6_unicast_destination(struct sk_buff *skb)
        return rt->rt6i_flags & RTF_LOCAL;
 }
 
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+
+static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+{
+       struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+       return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
+              skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
+}
+
 #endif
 #endif
index 9fdf982d1286a95c02a73bae4131746f05de9343..365359b24177a1a52310679293c43dba7b97533f 100644 (file)
@@ -2024,8 +2024,8 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
  *
  * This function may not be called in IRQ context. Calls to this function
  * for a single hardware must be synchronized against each other. Calls
- * to this function and ieee80211_tx_status_irqsafe() may not be mixed
- * for a single hardware.
+ * to this function, ieee80211_tx_status_ni() and ieee80211_tx_status_irqsafe()
+ * may not be mixed for a single hardware.
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
@@ -2033,14 +2033,34 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw,
 void ieee80211_tx_status(struct ieee80211_hw *hw,
                         struct sk_buff *skb);
 
+/**
+ * ieee80211_tx_status_ni - transmit status callback (in process context)
+ *
+ * Like ieee80211_tx_status() but can be called in process context.
+ *
+ * Calls to this function, ieee80211_tx_status() and
+ * ieee80211_tx_status_irqsafe() may not be mixed
+ * for a single hardware.
+ *
+ * @hw: the hardware the frame was transmitted by
+ * @skb: the frame that was transmitted, owned by mac80211 after this call
+ */
+static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw,
+                                         struct sk_buff *skb)
+{
+       local_bh_disable();
+       ieee80211_tx_status(hw, skb);
+       local_bh_enable();
+}
+
 /**
  * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback
  *
  * Like ieee80211_tx_status() but can be called in IRQ context
  * (internally defers to a tasklet.)
  *
- * Calls to this function and ieee80211_tx_status() may not be mixed for a
- * single hardware.
+ * Calls to this function, ieee80211_tx_status() and
+ * ieee80211_tx_status_ni() may not be mixed for a single hardware.
  *
  * @hw: the hardware the frame was transmitted by
  * @skb: the frame that was transmitted, owned by mac80211 after this call
index dd3031aed9d52e7c3bf5d2c6ad464dbdc23477c0..9fcc680ab6b9d1f4de4d15583275efe7acf66975 100644 (file)
@@ -323,7 +323,9 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 static inline int tcf_valid_offset(const struct sk_buff *skb,
                                   const unsigned char *ptr, const int len)
 {
-       return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head);
+       return likely((ptr + len) <= skb_tail_pointer(skb) &&
+                     ptr >= skb->head &&
+                     (ptr <= (ptr + len)));
 }
 
 #ifdef CONFIG_NET_CLS_IND
index ea1f8a83160df419e986d51ad864a14a215cbea3..79f34e2b752f61479a4b8aae9bc29551cff3b7d0 100644 (file)
@@ -610,11 +610,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
 {
        struct sk_buff *n;
 
-       if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) &&
-           !skb_shared(skb))
-               n = skb_get(skb);
-       else
-               n = skb_clone(skb, gfp_mask);
+       n = skb_clone(skb, gfp_mask);
 
        if (n) {
                n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
index 659d968d95c54afe6df51c3289a0d2d6fb5b34ed..7d3f7ce239b5b831c66b20b7df61e593384604b2 100644 (file)
@@ -754,6 +754,7 @@ struct proto {
        void                    (*unhash)(struct sock *sk);
        void                    (*rehash)(struct sock *sk);
        int                     (*get_port)(struct sock *sk, unsigned short snum);
+       void                    (*clear_sk)(struct sock *sk, int size);
 
        /* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
@@ -852,6 +853,8 @@ static inline void __sk_prot_rehash(struct sock *sk)
        sk->sk_prot->hash(sk);
 }
 
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
+
 /* About 10 seconds */
 #define SOCK_DESTROY_TIME (10*HZ)
 
index 1dfab54015113b83bce9f3302470c3a5ed95b5e7..b0b4eb24d592fb1f8ecba11294c10e802ff7cd2b 100644 (file)
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
        DEFINE_TRACE(name)
 
+#undef TRACE_EVENT_CONDITION
+#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
+       TRACE_EVENT(name,                                               \
+               PARAMS(proto),                                          \
+               PARAMS(args),                                           \
+               PARAMS(tstruct),                                        \
+               PARAMS(assign),                                         \
+               PARAMS(print))
+
 #undef TRACE_EVENT_FN
 #define TRACE_EVENT_FN(name, proto, args, tstruct,             \
                assign, print, reg, unreg)                      \
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_TRACE(name)
 
+#undef DEFINE_EVENT_CONDITION
+#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)       \
        DEFINE_TRACE(name)
 
 #undef TRACE_EVENT
 #undef TRACE_EVENT_FN
+#undef TRACE_EVENT_CONDITION
 #undef DECLARE_EVENT_CLASS
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
+#undef DEFINE_EVENT_CONDITION
 #undef TRACE_HEADER_MULTI_READ
 #undef DECLARE_TRACE
 
index 286784d69b8f480343244d8046327a5a7d9883d9..1bcc2a8c00e29966aa8e48f1f135a3330364b668 100644 (file)
@@ -7,16 +7,67 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
-#ifndef _TRACE_POWER_ENUM_
-#define _TRACE_POWER_ENUM_
-enum {
-       POWER_NONE      = 0,
-       POWER_CSTATE    = 1,    /* C-State */
-       POWER_PSTATE    = 2,    /* Fequency change or DVFS */
-       POWER_SSTATE    = 3,    /* Suspend */
-};
+DECLARE_EVENT_CLASS(cpu,
+
+       TP_PROTO(unsigned int state, unsigned int cpu_id),
+
+       TP_ARGS(state, cpu_id),
+
+       TP_STRUCT__entry(
+               __field(        u32,            state           )
+               __field(        u32,            cpu_id          )
+       ),
+
+       TP_fast_assign(
+               __entry->state = state;
+               __entry->cpu_id = cpu_id;
+       ),
+
+       TP_printk("state=%lu cpu_id=%lu", (unsigned long)__entry->state,
+                 (unsigned long)__entry->cpu_id)
+);
+
+DEFINE_EVENT(cpu, cpu_idle,
+
+       TP_PROTO(unsigned int state, unsigned int cpu_id),
+
+       TP_ARGS(state, cpu_id)
+);
+
+/* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
+#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
+#define _PWR_EVENT_AVOID_DOUBLE_DEFINING
+
+#define PWR_EVENT_EXIT -1
 #endif
 
+DEFINE_EVENT(cpu, cpu_frequency,
+
+       TP_PROTO(unsigned int frequency, unsigned int cpu_id),
+
+       TP_ARGS(frequency, cpu_id)
+);
+
+TRACE_EVENT(machine_suspend,
+
+       TP_PROTO(unsigned int state),
+
+       TP_ARGS(state),
+
+       TP_STRUCT__entry(
+               __field(        u32,            state           )
+       ),
+
+       TP_fast_assign(
+               __entry->state = state;
+       ),
+
+       TP_printk("state=%lu", (unsigned long)__entry->state)
+);
+
+/* This code will be removed after deprecation time exceeded (2.6.41) */
+#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED
+
 /*
  * The power events are used for cpuidle & suspend (power_start, power_end)
  *  and for cpufreq (power_frequency)
@@ -75,6 +126,36 @@ TRACE_EVENT(power_end,
 
 );
 
+/* Deprecated dummy functions must be protected against multi-declartion */
+#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+
+enum {
+       POWER_NONE = 0,
+       POWER_CSTATE = 1,
+       POWER_PSTATE = 2,
+};
+#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
+
+#else /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
+
+#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED
+enum {
+       POWER_NONE = 0,
+       POWER_CSTATE = 1,
+       POWER_PSTATE = 2,
+};
+
+/* These dummy declaration have to be ripped out when the deprecated
+   events get removed */
+static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {};
+static inline void trace_power_end(u64 cpuid) {};
+static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {};
+#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */
+
+#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */
+
 /*
  * The clock events are used for clock enable/disable and for
  *  clock rate change
@@ -153,7 +234,6 @@ DEFINE_EVENT(power_domain, power_domain_target,
 
        TP_ARGS(name, state, cpu_id)
 );
-
 #endif /* _TRACE_POWER_H */
 
 /* This part must be outside protection */
index fb726ac7caee4f465033ff5d314d788db929ded5..5a4c04a75b3d369fc9665eca1deee12a2d442d61 100644 (file)
@@ -40,6 +40,8 @@ TRACE_EVENT_FN(sys_enter,
        syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_enter, TRACE_EVENT_FL_CAP_ANY)
+
 TRACE_EVENT_FN(sys_exit,
 
        TP_PROTO(struct pt_regs *regs, long ret),
@@ -62,6 +64,8 @@ TRACE_EVENT_FN(sys_exit,
        syscall_regfunc, syscall_unregfunc
 );
 
+TRACE_EVENT_FLAGS(sys_exit, TRACE_EVENT_FL_CAP_ANY)
+
 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
 
 #endif /* _TRACE_EVENTS_SYSCALLS_H */
index a9377c0083ad3ed612547f783647132a8268ef09..e16610c208c954541587684c8af64584b01dbfda 100644 (file)
        TRACE_EVENT(name, PARAMS(proto), PARAMS(args),                  \
                PARAMS(tstruct), PARAMS(assign), PARAMS(print))         \
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(name, value)                                 \
+       __TRACE_EVENT_FLAGS(name, value)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
        DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
 
+#undef TRACE_EVENT_FLAGS
+#define TRACE_EVENT_FLAGS(event, flag)
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 /*
@@ -289,13 +296,19 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {    \
 
 #undef __array
 #define __array(type, item, len)                                       \
-       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
-       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+       do {                                                            \
+               mutex_lock(&event_storage_mutex);                       \
+               BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                 \
+               snprintf(event_storage, sizeof(event_storage),          \
+                        "%s[%d]", #type, len);                         \
+               ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
                                 is_signed_type(type), FILTER_OTHER);   \
-       if (ret)                                                        \
-               return ret;
+               mutex_unlock(&event_storage_mutex);                     \
+               if (ret)                                                \
+                       return ret;                                     \
+       } while (0);
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)                                      \
index c9728992a776356e043d21df7b33aa045c2d7904..8dfd094e68753dd919dd50299c82fa7385d6621e 100644 (file)
@@ -393,7 +393,6 @@ config PREEMPT_RCU
 
 config RCU_TRACE
        bool "Enable tracing for RCU"
-       depends on TREE_RCU || TREE_PREEMPT_RCU
        help
          This option provides tracing in RCU which presents stats
          in debugfs for debugging RCU implementation.
@@ -459,6 +458,60 @@ config TREE_RCU_TRACE
          TREE_PREEMPT_RCU implementations, permitting Makefile to
          trivially select kernel/rcutree_trace.c.
 
+config RCU_BOOST
+       bool "Enable RCU priority boosting"
+       depends on RT_MUTEXES && TINY_PREEMPT_RCU
+       default n
+       help
+         This option boosts the priority of preempted RCU readers that
+         block the current preemptible RCU grace period for too long.
+         This option also prevents heavy loads from blocking RCU
+         callback invocation for all flavors of RCU.
+
+         Say Y here if you are working with real-time apps or heavy loads
+         Say N here if you are unsure.
+
+config RCU_BOOST_PRIO
+       int "Real-time priority to boost RCU readers to"
+       range 1 99
+       depends on RCU_BOOST
+       default 1
+       help
+         This option specifies the real-time priority to which preempted
+         RCU readers are to be boosted.  If you are working with CPU-bound
+         real-time applications, you should specify a priority higher then
+         the highest-priority CPU-bound application.
+
+         Specify the real-time priority, or take the default if unsure.
+
+config RCU_BOOST_DELAY
+       int "Milliseconds to delay boosting after RCU grace-period start"
+       range 0 3000
+       depends on RCU_BOOST
+       default 500
+       help
+         This option specifies the time to wait after the beginning of
+         a given grace period before priority-boosting preempted RCU
+         readers blocking that grace period.  Note that any RCU reader
+         blocking an expedited RCU grace period is boosted immediately.
+
+         Accept the default if unsure.
+
+config SRCU_SYNCHRONIZE_DELAY
+       int "Microseconds to delay before waiting for readers"
+       range 0 20
+       default 10
+       help
+         This option controls how long SRCU delays before entering its
+         loop waiting on SRCU readers.  The purpose of this loop is
+         to avoid the unconditional context-switch penalty that would
+         otherwise be incurred if there was an active SRCU reader,
+         in a manner similar to adaptive locking schemes.  This should
+         be set to be a bit longer than the common-case SRCU read-side
+         critical-section overhead.
+
+         Accept the default if unsure.
+
 endmenu # "RCU Subsystem"
 
 config IKCONFIG
@@ -741,6 +794,19 @@ config NET_NS
 
 endif # NAMESPACES
 
+config SCHED_AUTOGROUP
+       bool "Automatic process group scheduling"
+       select EVENTFD
+       select CGROUPS
+       select CGROUP_SCHED
+       select FAIR_GROUP_SCHED
+       help
+         This option optimizes the scheduler for common desktop workloads by
+         automatically creating and populating task groups.  This separation
+         of workloads isolates aggressive CPU burners (like build jobs) from
+         desktop applications.  Task group autogeneration is currently based
+         upon task session.
+
 config MM_OWNER
        bool
 
index 830aaec9c7d5e0cb8df39af760791b5335772768..2b54bef33b55c65f8fda4ebf3129db3453c8f53d 100644 (file)
@@ -93,7 +93,7 @@ no_match:
  *
  * Returns the matching dev_t on success or 0 on failure.
  */
-static dev_t __init devt_from_partuuid(char *uuid_str)
+static dev_t devt_from_partuuid(char *uuid_str)
 {
        dev_t res = 0;
        struct device *dev = NULL;
index 8646401f7a0e4b77579aa13f8de6ac191787be73..ea51770c01701e312f70f9a81357babbf93fb865 100644 (file)
@@ -67,6 +67,7 @@
 #include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -603,6 +604,8 @@ asmlinkage void __init start_kernel(void)
                                "enabled *very* early, fixing it\n");
                local_irq_disable();
        }
+       idr_init_cache();
+       perf_event_init();
        rcu_init();
        radix_tree_init();
        /* init some links before init_ISA_irqs() */
@@ -658,7 +661,6 @@ asmlinkage void __init start_kernel(void)
        enable_debug_pagealloc();
        kmemleak_init();
        debug_objects_mem_init();
-       idr_init_cache();
        setup_per_cpu_pageset();
        numa_policy_init();
        if (late_time_init)
@@ -882,6 +884,7 @@ static int __init kernel_init(void * unused)
        smp_prepare_cpus(setup_max_cpus);
 
        do_pre_smp_initcalls();
+       lockup_detector_init();
 
        smp_init();
        sched_init_smp();
index f6e726f184916029e2d1cfdbcd4acb2b26f14e69..156cc555614089345553a6e7710580c4f069be0e 100644 (file)
@@ -189,7 +189,6 @@ static inline void check_for_tasks(int cpu)
 }
 
 struct take_cpu_down_param {
-       struct task_struct *caller;
        unsigned long mod;
        void *hcpu;
 };
@@ -198,7 +197,6 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
        struct take_cpu_down_param *param = _param;
-       unsigned int cpu = (unsigned long)param->hcpu;
        int err;
 
        /* Ensure this CPU doesn't handle any more interrupts. */
@@ -208,11 +206,6 @@ static int __ref take_cpu_down(void *_param)
 
        cpu_notify(CPU_DYING | param->mod, param->hcpu);
 
-       if (task_cpu(param->caller) == cpu)
-               move_task_off_dead_cpu(cpu, param->caller);
-       /* Force idle task to run as soon as we yield: it should
-          immediately notice cpu is offline and die quickly. */
-       sched_idle_next();
        return 0;
 }
 
@@ -223,7 +216,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        void *hcpu = (void *)(long)cpu;
        unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
        struct take_cpu_down_param tcd_param = {
-               .caller = current,
                .mod = mod,
                .hcpu = hcpu,
        };
@@ -253,9 +245,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        }
        BUG_ON(cpu_online(cpu));
 
-       /* Wait for it to sleep (leaving idle task). */
+       /*
+        * The migration_call() CPU_DYING callback will have removed all
+        * runnable tasks from the cpu, there's only the idle task left now
+        * that the migration thread is done doing the stop_machine thing.
+        *
+        * Wait for the stop thread to go away.
+        */
        while (!idle_cpu(cpu))
-               yield();
+               cpu_relax();
 
        /* This actually kills the CPU. */
        __cpu_die(cpu);
@@ -386,6 +384,14 @@ out:
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;
 
+void __weak arch_disable_nonboot_cpus_begin(void)
+{
+}
+
+void __weak arch_disable_nonboot_cpus_end(void)
+{
+}
+
 int disable_nonboot_cpus(void)
 {
        int cpu, first_cpu, error = 0;
@@ -397,6 +403,7 @@ int disable_nonboot_cpus(void)
         * with the userspace trying to use the CPU hotplug at the same time
         */
        cpumask_clear(frozen_cpus);
+       arch_disable_nonboot_cpus_begin();
 
        printk("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
@@ -412,6 +419,8 @@ int disable_nonboot_cpus(void)
                }
        }
 
+       arch_disable_nonboot_cpus_end();
+
        if (!error) {
                BUG_ON(num_online_cpus() > 1);
                /* Make sure the CPUs won't be enabled by someone else */
index 5447dc7defa95b8f0e13acb80b45487df7dc2e73..7d164e25b0f0ea42498d748f389824773e2a0c78 100644 (file)
@@ -174,8 +174,10 @@ static inline void free_signal_struct(struct signal_struct *sig)
 
 static inline void put_signal_struct(struct signal_struct *sig)
 {
-       if (atomic_dec_and_test(&sig->sigcnt))
+       if (atomic_dec_and_test(&sig->sigcnt)) {
+               sched_autogroup_exit(sig);
                free_signal_struct(sig);
+       }
 }
 
 void __put_task_struct(struct task_struct *tsk)
@@ -905,6 +907,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        posix_cpu_timers_init_group(sig);
 
        tty_audit_fork(sig);
+       sched_autogroup_fork(sig);
 
        sig->oom_adj = current->signal->oom_adj;
        sig->oom_score_adj = current->signal->oom_score_adj;
@@ -1315,7 +1318,7 @@ bad_fork_cleanup_mm:
        }
 bad_fork_cleanup_signal:
        if (!(clone_flags & CLONE_THREAD))
-               free_signal_struct(p->signal);
+               put_signal_struct(p->signal);
 bad_fork_cleanup_sighand:
        __cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
index 40a8777a27d0d85e173f4b7a3efbd4ecff1c9654..3019b92e691744169b3ac50bb3836afae5ab1085 100644 (file)
@@ -68,6 +68,14 @@ int __read_mostly futex_cmpxchg_enabled;
 
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
+/*
+ * Futex flags used to encode options to functions and preserve them across
+ * restarts.
+ */
+#define FLAGS_SHARED           0x01
+#define FLAGS_CLOCKRT          0x02
+#define FLAGS_HAS_TIMEOUT      0x04
+
 /*
  * Priority Inheritance state:
  */
@@ -123,6 +131,12 @@ struct futex_q {
        u32 bitset;
 };
 
+static const struct futex_q futex_q_init = {
+       /* list gets initialized in queue_me()*/
+       .key = FUTEX_KEY_INIT,
+       .bitset = FUTEX_BITSET_MATCH_ANY
+};
+
 /*
  * Hash buckets are shared by all the futex_keys that hash to the same
  * location.  Each key may have multiple futex_q structures, one for each task
@@ -283,8 +297,7 @@ again:
        return 0;
 }
 
-static inline
-void put_futex_key(int fshared, union futex_key *key)
+static inline void put_futex_key(union futex_key *key)
 {
        drop_futex_key_refs(key);
 }
@@ -870,7 +883,8 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
 /*
  * Wake up waiters matching bitset queued on this futex (uaddr).
  */
-static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
+static int
+futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
@@ -881,7 +895,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
        if (!bitset)
                return -EINVAL;
 
-       ret = get_futex_key(uaddr, fshared, &key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
        if (unlikely(ret != 0))
                goto out;
 
@@ -907,7 +921,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
        }
 
        spin_unlock(&hb->lock);
-       put_futex_key(fshared, &key);
+       put_futex_key(&key);
 out:
        return ret;
 }
@@ -917,7 +931,7 @@ out:
  * to this virtual address:
  */
 static int
-futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
+futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
              int nr_wake, int nr_wake2, int op)
 {
        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
@@ -927,10 +941,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
        int ret, op_ret;
 
 retry:
-       ret = get_futex_key(uaddr1, fshared, &key1);
+       ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
        if (unlikely(ret != 0))
                goto out;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -962,11 +976,11 @@ retry_private:
                if (ret)
                        goto out_put_keys;
 
-               if (!fshared)
+               if (!(flags & FLAGS_SHARED))
                        goto retry_private;
 
-               put_futex_key(fshared, &key2);
-               put_futex_key(fshared, &key1);
+               put_futex_key(&key2);
+               put_futex_key(&key1);
                goto retry;
        }
 
@@ -996,9 +1010,9 @@ retry_private:
 
        double_unlock_hb(hb1, hb2);
 out_put_keys:
-       put_futex_key(fshared, &key2);
+       put_futex_key(&key2);
 out_put_key1:
-       put_futex_key(fshared, &key1);
+       put_futex_key(&key1);
 out:
        return ret;
 }
@@ -1133,13 +1147,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 /**
  * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
  * @uaddr1:    source futex user address
- * @fshared:   0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
+ * @flags:     futex flags (FLAGS_SHARED, etc.)
  * @uaddr2:    target futex user address
  * @nr_wake:   number of waiters to wake (must be 1 for requeue_pi)
  * @nr_requeue:        number of waiters to requeue (0-INT_MAX)
  * @cmpval:    @uaddr1 expected value (or %NULL)
  * @requeue_pi:        if we are attempting to requeue from a non-pi futex to a
- *             pi futex (pi to pi requeue is not supported)
+ *             pi futex (pi to pi requeue is not supported)
  *
  * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
  * uaddr2 atomically on behalf of the top waiter.
@@ -1148,9 +1162,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
  * >=0 - on success, the number of tasks requeued or woken
  *  <0 - on error
  */
-static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-                        int nr_wake, int nr_requeue, u32 *cmpval,
-                        int requeue_pi)
+static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+                        u32 __user *uaddr2, int nr_wake, int nr_requeue,
+                        u32 *cmpval, int requeue_pi)
 {
        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
        int drop_count = 0, task_count = 0, ret;
@@ -1191,10 +1205,10 @@ retry:
                pi_state = NULL;
        }
 
-       ret = get_futex_key(uaddr1, fshared, &key1);
+       ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
        if (unlikely(ret != 0))
                goto out;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -1216,11 +1230,11 @@ retry_private:
                        if (ret)
                                goto out_put_keys;
 
-                       if (!fshared)
+                       if (!(flags & FLAGS_SHARED))
                                goto retry_private;
 
-                       put_futex_key(fshared, &key2);
-                       put_futex_key(fshared, &key1);
+                       put_futex_key(&key2);
+                       put_futex_key(&key1);
                        goto retry;
                }
                if (curval != *cmpval) {
@@ -1260,8 +1274,8 @@ retry_private:
                        break;
                case -EFAULT:
                        double_unlock_hb(hb1, hb2);
-                       put_futex_key(fshared, &key2);
-                       put_futex_key(fshared, &key1);
+                       put_futex_key(&key2);
+                       put_futex_key(&key1);
                        ret = fault_in_user_writeable(uaddr2);
                        if (!ret)
                                goto retry;
@@ -1269,8 +1283,8 @@ retry_private:
                case -EAGAIN:
                        /* The owner was exiting, try again. */
                        double_unlock_hb(hb1, hb2);
-                       put_futex_key(fshared, &key2);
-                       put_futex_key(fshared, &key1);
+                       put_futex_key(&key2);
+                       put_futex_key(&key1);
                        cond_resched();
                        goto retry;
                default:
@@ -1352,9 +1366,9 @@ out_unlock:
                drop_futex_key_refs(&key1);
 
 out_put_keys:
-       put_futex_key(fshared, &key2);
+       put_futex_key(&key2);
 out_put_key1:
-       put_futex_key(fshared, &key1);
+       put_futex_key(&key1);
 out:
        if (pi_state != NULL)
                free_pi_state(pi_state);
@@ -1494,7 +1508,7 @@ static void unqueue_me_pi(struct futex_q *q)
  * private futexes.
  */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-                               struct task_struct *newowner, int fshared)
+                               struct task_struct *newowner)
 {
        u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
        struct futex_pi_state *pi_state = q->pi_state;
@@ -1587,20 +1601,11 @@ handle_fault:
        goto retry;
 }
 
-/*
- * In case we must use restart_block to restart a futex_wait,
- * we encode in the 'flags' shared capability
- */
-#define FLAGS_SHARED           0x01
-#define FLAGS_CLOCKRT          0x02
-#define FLAGS_HAS_TIMEOUT      0x04
-
 static long futex_wait_restart(struct restart_block *restart);
 
 /**
  * fixup_owner() - Post lock pi_state and corner case management
  * @uaddr:     user address of the futex
- * @fshared:   whether the futex is shared (1) or not (0)
  * @q:         futex_q (contains pi_state and access to the rt_mutex)
  * @locked:    if the attempt to take the rt_mutex succeeded (1) or not (0)
  *
@@ -1613,8 +1618,7 @@ static long futex_wait_restart(struct restart_block *restart);
  *  0 - success, lock not taken
  * <0 - on error (-EFAULT)
  */
-static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
-                      int locked)
+static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 {
        struct task_struct *owner;
        int ret = 0;
@@ -1625,7 +1629,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
                 * did a lock-steal - fix up the PI-state in that case:
                 */
                if (q->pi_state->owner != current)
-                       ret = fixup_pi_state_owner(uaddr, q, current, fshared);
+                       ret = fixup_pi_state_owner(uaddr, q, current);
                goto out;
        }
 
@@ -1652,7 +1656,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
                 * lock. Fix the state up.
                 */
                owner = rt_mutex_owner(&q->pi_state->pi_mutex);
-               ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
+               ret = fixup_pi_state_owner(uaddr, q, owner);
                goto out;
        }
 
@@ -1715,7 +1719,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
  * futex_wait_setup() - Prepare to wait on a futex
  * @uaddr:     the futex userspace address
  * @val:       the expected value
- * @fshared:   whether the futex is shared (1) or not (0)
+ * @flags:     futex flags (FLAGS_SHARED, etc.)
  * @q:         the associated futex_q
  * @hb:                storage for hash_bucket pointer to be returned to caller
  *
@@ -1728,7 +1732,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
  *  0 - uaddr contains val and hb has been locked
  * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
  */
-static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
+static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
                           struct futex_q *q, struct futex_hash_bucket **hb)
 {
        u32 uval;
@@ -1752,8 +1756,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
         * rare, but normal.
         */
 retry:
-       q->key = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr, fshared, &q->key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
        if (unlikely(ret != 0))
                return ret;
 
@@ -1769,10 +1772,10 @@ retry_private:
                if (ret)
                        goto out;
 
-               if (!fshared)
+               if (!(flags & FLAGS_SHARED))
                        goto retry_private;
 
-               put_futex_key(fshared, &q->key);
+               put_futex_key(&q->key);
                goto retry;
        }
 
@@ -1783,32 +1786,29 @@ retry_private:
 
 out:
        if (ret)
-               put_futex_key(fshared, &q->key);
+               put_futex_key(&q->key);
        return ret;
 }
 
-static int futex_wait(u32 __user *uaddr, int fshared,
-                     u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+                     ktime_t *abs_time, u32 bitset)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct restart_block *restart;
        struct futex_hash_bucket *hb;
-       struct futex_q q;
+       struct futex_q q = futex_q_init;
        int ret;
 
        if (!bitset)
                return -EINVAL;
-
-       q.pi_state = NULL;
        q.bitset = bitset;
-       q.rt_waiter = NULL;
-       q.requeue_pi_key = NULL;
 
        if (abs_time) {
                to = &timeout;
 
-               hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
-                                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+               hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
+                                     CLOCK_REALTIME : CLOCK_MONOTONIC,
+                                     HRTIMER_MODE_ABS);
                hrtimer_init_sleeper(to, current);
                hrtimer_set_expires_range_ns(&to->timer, *abs_time,
                                             current->timer_slack_ns);
@@ -1819,7 +1819,7 @@ retry:
         * Prepare to wait on uaddr. On success, holds hb lock and increments
         * q.key refs.
         */
-       ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+       ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
                goto out;
 
@@ -1852,12 +1852,7 @@ retry:
        restart->futex.val = val;
        restart->futex.time = abs_time->tv64;
        restart->futex.bitset = bitset;
-       restart->futex.flags = FLAGS_HAS_TIMEOUT;
-
-       if (fshared)
-               restart->futex.flags |= FLAGS_SHARED;
-       if (clockrt)
-               restart->futex.flags |= FLAGS_CLOCKRT;
+       restart->futex.flags = flags;
 
        ret = -ERESTART_RESTARTBLOCK;
 
@@ -1873,7 +1868,6 @@ out:
 static long futex_wait_restart(struct restart_block *restart)
 {
        u32 __user *uaddr = restart->futex.uaddr;
-       int fshared = 0;
        ktime_t t, *tp = NULL;
 
        if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
@@ -1881,11 +1875,9 @@ static long futex_wait_restart(struct restart_block *restart)
                tp = &t;
        }
        restart->fn = do_no_restart_syscall;
-       if (restart->futex.flags & FLAGS_SHARED)
-               fshared = 1;
-       return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
-                               restart->futex.bitset,
-                               restart->futex.flags & FLAGS_CLOCKRT);
+
+       return (long)futex_wait(uaddr, restart->futex.flags,
+                               restart->futex.val, tp, restart->futex.bitset);
 }
 
 
@@ -1895,12 +1887,12 @@ static long futex_wait_restart(struct restart_block *restart)
  * if there are waiters then it will block, it does PI, etc. (Due to
  * races the kernel might see a 0 value of the futex too.)
  */
-static int futex_lock_pi(u32 __user *uaddr, int fshared,
-                        int detect, ktime_t *time, int trylock)
+static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
+                        ktime_t *time, int trylock)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct futex_hash_bucket *hb;
-       struct futex_q q;
+       struct futex_q q = futex_q_init;
        int res, ret;
 
        if (refill_pi_state_cache())
@@ -1914,12 +1906,8 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
                hrtimer_set_expires(&to->timer, *time);
        }
 
-       q.pi_state = NULL;
-       q.rt_waiter = NULL;
-       q.requeue_pi_key = NULL;
 retry:
-       q.key = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr, fshared, &q.key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
        if (unlikely(ret != 0))
                goto out;
 
@@ -1941,7 +1929,7 @@ retry_private:
                         * exit to complete.
                         */
                        queue_unlock(&q, hb);
-                       put_futex_key(fshared, &q.key);
+                       put_futex_key(&q.key);
                        cond_resched();
                        goto retry;
                default:
@@ -1971,7 +1959,7 @@ retry_private:
         * Fixup the pi_state owner and possibly acquire the lock if we
         * haven't already.
         */
-       res = fixup_owner(uaddr, fshared, &q, !ret);
+       res = fixup_owner(uaddr, &q, !ret);
        /*
         * If fixup_owner() returned an error, proprogate that.  If it acquired
         * the lock, clear our -ETIMEDOUT or -EINTR.
@@ -1995,7 +1983,7 @@ out_unlock_put_key:
        queue_unlock(&q, hb);
 
 out_put_key:
-       put_futex_key(fshared, &q.key);
+       put_futex_key(&q.key);
 out:
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
@@ -2008,10 +1996,10 @@ uaddr_faulted:
        if (ret)
                goto out_put_key;
 
-       if (!fshared)
+       if (!(flags & FLAGS_SHARED))
                goto retry_private;
 
-       put_futex_key(fshared, &q.key);
+       put_futex_key(&q.key);
        goto retry;
 }
 
@@ -2020,7 +2008,7 @@ uaddr_faulted:
  * This is the in-kernel slowpath: we look up the PI state (if any),
  * and do the rt-mutex unlock.
  */
-static int futex_unlock_pi(u32 __user *uaddr, int fshared)
+static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 {
        struct futex_hash_bucket *hb;
        struct futex_q *this, *next;
@@ -2038,7 +2026,7 @@ retry:
        if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
                return -EPERM;
 
-       ret = get_futex_key(uaddr, fshared, &key);
+       ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
        if (unlikely(ret != 0))
                goto out;
 
@@ -2093,14 +2081,14 @@ retry:
 
 out_unlock:
        spin_unlock(&hb->lock);
-       put_futex_key(fshared, &key);
+       put_futex_key(&key);
 
 out:
        return ret;
 
 pi_faulted:
        spin_unlock(&hb->lock);
-       put_futex_key(fshared, &key);
+       put_futex_key(&key);
 
        ret = fault_in_user_writeable(uaddr);
        if (!ret)
@@ -2160,7 +2148,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
 /**
  * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
  * @uaddr:     the futex we initially wait on (non-pi)
- * @fshared:   whether the futexes are shared (1) or not (0).  They must be
+ * @flags:     futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
  *             the same type, no requeueing from private to shared, etc.
  * @val:       the expected value of uaddr
  * @abs_time:  absolute timeout
@@ -2198,16 +2186,16 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  *  0 - On success
  * <0 - On error
  */
-static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
+static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
                                 u32 val, ktime_t *abs_time, u32 bitset,
-                                int clockrt, u32 __user *uaddr2)
+                                u32 __user *uaddr2)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
        struct rt_mutex_waiter rt_waiter;
        struct rt_mutex *pi_mutex = NULL;
        struct futex_hash_bucket *hb;
-       union futex_key key2;
-       struct futex_q q;
+       union futex_key key2 = FUTEX_KEY_INIT;
+       struct futex_q q = futex_q_init;
        int res, ret;
 
        if (!bitset)
@@ -2215,8 +2203,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
 
        if (abs_time) {
                to = &timeout;
-               hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
-                                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+               hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
+                                     CLOCK_REALTIME : CLOCK_MONOTONIC,
+                                     HRTIMER_MODE_ABS);
                hrtimer_init_sleeper(to, current);
                hrtimer_set_expires_range_ns(&to->timer, *abs_time,
                                             current->timer_slack_ns);
@@ -2229,12 +2218,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
        debug_rt_mutex_init_waiter(&rt_waiter);
        rt_waiter.task = NULL;
 
-       key2 = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr2, fshared, &key2);
+       ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
        if (unlikely(ret != 0))
                goto out;
 
-       q.pi_state = NULL;
        q.bitset = bitset;
        q.rt_waiter = &rt_waiter;
        q.requeue_pi_key = &key2;
@@ -2243,7 +2230,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
         * Prepare to wait on uaddr. On success, increments q.key (key1) ref
         * count.
         */
-       ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+       ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
        if (ret)
                goto out_key2;
 
@@ -2273,8 +2260,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
                 */
                if (q.pi_state && (q.pi_state->owner != current)) {
                        spin_lock(q.lock_ptr);
-                       ret = fixup_pi_state_owner(uaddr2, &q, current,
-                                                  fshared);
+                       ret = fixup_pi_state_owner(uaddr2, &q, current);
                        spin_unlock(q.lock_ptr);
                }
        } else {
@@ -2293,7 +2279,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
                 * Fixup the pi_state owner and possibly acquire the lock if we
                 * haven't already.
                 */
-               res = fixup_owner(uaddr2, fshared, &q, !ret);
+               res = fixup_owner(uaddr2, &q, !ret);
                /*
                 * If fixup_owner() returned an error, proprogate that.  If it
                 * acquired the lock, clear -ETIMEDOUT or -EINTR.
@@ -2324,9 +2310,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
        }
 
 out_put_keys:
-       put_futex_key(fshared, &q.key);
+       put_futex_key(&q.key);
 out_key2:
-       put_futex_key(fshared, &key2);
+       put_futex_key(&key2);
 
 out:
        if (to) {
@@ -2551,58 +2537,57 @@ void exit_robust_list(struct task_struct *curr)
 long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                u32 __user *uaddr2, u32 val2, u32 val3)
 {
-       int clockrt, ret = -ENOSYS;
-       int cmd = op & FUTEX_CMD_MASK;
-       int fshared = 0;
+       int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
+       unsigned int flags = 0;
 
        if (!(op & FUTEX_PRIVATE_FLAG))
-               fshared = 1;
+               flags |= FLAGS_SHARED;
 
-       clockrt = op & FUTEX_CLOCK_REALTIME;
-       if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
-               return -ENOSYS;
+       if (op & FUTEX_CLOCK_REALTIME) {
+               flags |= FLAGS_CLOCKRT;
+               if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
+                       return -ENOSYS;
+       }
 
        switch (cmd) {
        case FUTEX_WAIT:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAIT_BITSET:
-               ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt);
+               ret = futex_wait(uaddr, flags, val, timeout, val3);
                break;
        case FUTEX_WAKE:
                val3 = FUTEX_BITSET_MATCH_ANY;
        case FUTEX_WAKE_BITSET:
-               ret = futex_wake(uaddr, fshared, val, val3);
+               ret = futex_wake(uaddr, flags, val, val3);
                break;
        case FUTEX_REQUEUE:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
                break;
        case FUTEX_CMP_REQUEUE:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
-                                   0);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
                break;
        case FUTEX_WAKE_OP:
-               ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
+               ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
                break;
        case FUTEX_LOCK_PI:
                if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
+                       ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
                break;
        case FUTEX_UNLOCK_PI:
                if (futex_cmpxchg_enabled)
-                       ret = futex_unlock_pi(uaddr, fshared);
+                       ret = futex_unlock_pi(uaddr, flags);
                break;
        case FUTEX_TRYLOCK_PI:
                if (futex_cmpxchg_enabled)
-                       ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
+                       ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
                break;
        case FUTEX_WAIT_REQUEUE_PI:
                val3 = FUTEX_BITSET_MATCH_ANY;
-               ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
-                                           clockrt, uaddr2);
+               ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
+                                           uaddr2);
                break;
        case FUTEX_CMP_REQUEUE_PI:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
-                                   1);
+               ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
                break;
        default:
                ret = -ENOSYS;
index 72206cf5c6cf854898d889a6a645e44febdd526f..f2429fc3438c4f1c2094e59fe54415dc30e4bb51 100644 (file)
@@ -516,10 +516,13 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
                struct hrtimer *timer;
+               struct timerqueue_node *next;
 
-               if (!base->first)
+               next = timerqueue_getnext(&base->active);
+               if (!next)
                        continue;
-               timer = rb_entry(base->first, struct hrtimer, node);
+               timer = container_of(next, struct hrtimer, node);
+
                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
                /*
                 * clock_was_set() has changed base->offset so the
@@ -840,48 +843,17 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
 static int enqueue_hrtimer(struct hrtimer *timer,
                           struct hrtimer_clock_base *base)
 {
-       struct rb_node **link = &base->active.rb_node;
-       struct rb_node *parent = NULL;
-       struct hrtimer *entry;
-       int leftmost = 1;
-
        debug_activate(timer);
 
-       /*
-        * Find the right place in the rbtree:
-        */
-       while (*link) {
-               parent = *link;
-               entry = rb_entry(parent, struct hrtimer, node);
-               /*
-                * We dont care about collisions. Nodes with
-                * the same expiry time stay together.
-                */
-               if (hrtimer_get_expires_tv64(timer) <
-                               hrtimer_get_expires_tv64(entry)) {
-                       link = &(*link)->rb_left;
-               } else {
-                       link = &(*link)->rb_right;
-                       leftmost = 0;
-               }
-       }
-
-       /*
-        * Insert the timer to the rbtree and check whether it
-        * replaces the first pending timer
-        */
-       if (leftmost)
-               base->first = &timer->node;
+       timerqueue_add(&base->active, &timer->node);
 
-       rb_link_node(&timer->node, parent, link);
-       rb_insert_color(&timer->node, &base->active);
        /*
         * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
         * state of a possibly running callback.
         */
        timer->state |= HRTIMER_STATE_ENQUEUED;
 
-       return leftmost;
+       return (&timer->node == base->active.next);
 }
 
 /*
@@ -901,12 +873,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
        if (!(timer->state & HRTIMER_STATE_ENQUEUED))
                goto out;
 
-       /*
-        * Remove the timer from the rbtree and replace the first
-        * entry pointer if necessary.
-        */
-       if (base->first == &timer->node) {
-               base->first = rb_next(&timer->node);
+       if (&timer->node == timerqueue_getnext(&base->active)) {
 #ifdef CONFIG_HIGH_RES_TIMERS
                /* Reprogram the clock event device. if enabled */
                if (reprogram && hrtimer_hres_active()) {
@@ -919,7 +886,7 @@ static void __remove_hrtimer(struct hrtimer *timer,
                }
 #endif
        }
-       rb_erase(&timer->node, &base->active);
+       timerqueue_del(&base->active, &timer->node);
 out:
        timer->state = newstate;
 }
@@ -1128,11 +1095,13 @@ ktime_t hrtimer_get_next_event(void)
        if (!hrtimer_hres_active()) {
                for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
                        struct hrtimer *timer;
+                       struct timerqueue_node *next;
 
-                       if (!base->first)
+                       next = timerqueue_getnext(&base->active);
+                       if (!next)
                                continue;
 
-                       timer = rb_entry(base->first, struct hrtimer, node);
+                       timer = container_of(next, struct hrtimer, node);
                        delta.tv64 = hrtimer_get_expires_tv64(timer);
                        delta = ktime_sub(delta, base->get_time());
                        if (delta.tv64 < mindelta.tv64)
@@ -1162,6 +1131,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 
        timer->base = &cpu_base->clock_base[clock_id];
        hrtimer_init_timer_hres(timer);
+       timerqueue_init(&timer->node);
 
 #ifdef CONFIG_TIMER_STATS
        timer->start_site = NULL;
@@ -1278,14 +1248,14 @@ retry:
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                ktime_t basenow;
-               struct rb_node *node;
+               struct timerqueue_node *node;
 
                basenow = ktime_add(now, base->offset);
 
-               while ((node = base->first)) {
+               while ((node = timerqueue_getnext(&base->active))) {
                        struct hrtimer *timer;
 
-                       timer = rb_entry(node, struct hrtimer, node);
+                       timer = container_of(node, struct hrtimer, node);
 
                        /*
                         * The immediate goal for using the softexpires is
@@ -1441,7 +1411,7 @@ void hrtimer_run_pending(void)
  */
 void hrtimer_run_queues(void)
 {
-       struct rb_node *node;
+       struct timerqueue_node *node;
        struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
        struct hrtimer_clock_base *base;
        int index, gettime = 1;
@@ -1451,8 +1421,7 @@ void hrtimer_run_queues(void)
 
        for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
                base = &cpu_base->clock_base[index];
-
-               if (!base->first)
+               if (!timerqueue_getnext(&base->active))
                        continue;
 
                if (gettime) {
@@ -1462,10 +1431,10 @@ void hrtimer_run_queues(void)
 
                raw_spin_lock(&cpu_base->lock);
 
-               while ((node = base->first)) {
+               while ((node = timerqueue_getnext(&base->active))) {
                        struct hrtimer *timer;
 
-                       timer = rb_entry(node, struct hrtimer, node);
+                       timer = container_of(node, struct hrtimer, node);
                        if (base->softirq_time.tv64 <=
                                        hrtimer_get_expires_tv64(timer))
                                break;
@@ -1630,8 +1599,10 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
 
        raw_spin_lock_init(&cpu_base->lock);
 
-       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
+       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                cpu_base->clock_base[i].cpu_base = cpu_base;
+               timerqueue_init_head(&cpu_base->clock_base[i].active);
+       }
 
        hrtimer_init_hres(cpu_base);
 }
@@ -1642,10 +1613,10 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
                                struct hrtimer_clock_base *new_base)
 {
        struct hrtimer *timer;
-       struct rb_node *node;
+       struct timerqueue_node *node;
 
-       while ((node = rb_first(&old_base->active))) {
-               timer = rb_entry(node, struct hrtimer, node);
+       while ((node = timerqueue_getnext(&old_base->active))) {
+               timer = container_of(node, struct hrtimer, node);
                BUG_ON(hrtimer_callback_running(timer));
                debug_deactivate(timer);
 
index e5325825aeb6e1e4ea0514ee37cfa53412ec4e3c..086adf25a55e3aaecf3eb3172a7569b2c1a209e0 100644 (file)
@@ -641,7 +641,7 @@ int __init init_hw_breakpoint(void)
 
        constraints_initialized = 1;
 
-       perf_pmu_register(&perf_breakpoint);
+       perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 
        return register_die_notifier(&hw_breakpoint_exceptions_nb);
 
index 5f92acc5f952e0afb0489017c265a943a4a7d464..91a5fa25054e1d14d62339749f3229fae49f3766 100644 (file)
@@ -577,7 +577,9 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
  */
 static int irq_thread(void *data)
 {
-       struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+       static struct sched_param param = {
+               .sched_priority = MAX_USER_RT_PRIO/2,
+       };
        struct irqaction *action = data;
        struct irq_desc *desc = irq_to_desc(action->irq);
        int wake, oneshot = desc->status & IRQ_ONESHOT;
index 9737a76e106ff1554ecc2174f0e49a92b5badf45..7663e5df0e6f731f1804201a5e6cdf9b6162dd05 100644 (file)
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
        return p->pre_handler == aggr_pre_handler;
 }
 
+/* Return true(!0) if the kprobe is unused */
+static inline int kprobe_unused(struct kprobe *p)
+{
+       return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
+              list_empty(&p->list);
+}
+
 /*
  * Keep all fields in the kprobe consistent
  */
-static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
+static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
 {
-       memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
-       memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
+       memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
+       memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
 }
 
 #ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
        }
 }
 
+/* Free optimized instructions and optimized_kprobe */
+static __kprobes void free_aggr_kprobe(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       op = container_of(p, struct optimized_kprobe, kp);
+       arch_remove_optimized_kprobe(op);
+       arch_remove_kprobe(p);
+       kfree(op);
+}
+
 /* Return true(!0) if the kprobe is ready for optimization. */
 static inline int kprobe_optready(struct kprobe *p)
 {
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
        return 0;
 }
 
+/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+static inline int kprobe_disarmed(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
+       if (!kprobe_aggrprobe(p))
+               return kprobe_disabled(p);
+
+       op = container_of(p, struct optimized_kprobe, kp);
+
+       return kprobe_disabled(p) && list_empty(&op->list);
+}
+
+/* Return true(!0) if the probe is queued on (un)optimizing lists */
+static int __kprobes kprobe_queued(struct kprobe *p)
+{
+       struct optimized_kprobe *op;
+
+       if (kprobe_aggrprobe(p)) {
+               op = container_of(p, struct optimized_kprobe, kp);
+               if (!list_empty(&op->list))
+                       return 1;
+       }
+       return 0;
+}
+
 /*
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
 
 /* Optimization staging list, protected by kprobe_mutex */
 static LIST_HEAD(optimizing_list);
+static LIST_HEAD(unoptimizing_list);
 
 static void kprobe_optimizer(struct work_struct *work);
 static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
+static DECLARE_COMPLETION(optimizer_comp);
 #define OPTIMIZE_DELAY 5
 
-/* Kprobe jump optimizer */
-static __kprobes void kprobe_optimizer(struct work_struct *work)
+/*
+ * Optimize (replace a breakpoint with a jump) kprobes listed on
+ * optimizing_list.
+ */
+static __kprobes void do_optimize_kprobes(void)
 {
-       struct optimized_kprobe *op, *tmp;
-
-       /* Lock modules while optimizing kprobes */
-       mutex_lock(&module_mutex);
-       mutex_lock(&kprobe_mutex);
-       if (kprobes_all_disarmed || !kprobes_allow_optimization)
-               goto end;
-
-       /*
-        * Wait for quiesence period to ensure all running interrupts
-        * are done. Because optprobe may modify multiple instructions
-        * there is a chance that Nth instruction is interrupted. In that
-        * case, running interrupt can return to 2nd-Nth byte of jump
-        * instruction. This wait is for avoiding it.
-        */
-       synchronize_sched();
+       /* Optimization never be done when disarmed */
+       if (kprobes_all_disarmed || !kprobes_allow_optimization ||
+           list_empty(&optimizing_list))
+               return;
 
        /*
         * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
         */
        get_online_cpus();
        mutex_lock(&text_mutex);
-       list_for_each_entry_safe(op, tmp, &optimizing_list, list) {
-               WARN_ON(kprobe_disabled(&op->kp));
-               if (arch_optimize_kprobe(op) < 0)
-                       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-               list_del_init(&op->list);
+       arch_optimize_kprobes(&optimizing_list);
+       mutex_unlock(&text_mutex);
+       put_online_cpus();
+}
+
+/*
+ * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
+ * if need) kprobes listed on unoptimizing_list.
+ */
+static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
+{
+       struct optimized_kprobe *op, *tmp;
+
+       /* Unoptimization must be done anytime */
+       if (list_empty(&unoptimizing_list))
+               return;
+
+       /* Ditto to do_optimize_kprobes */
+       get_online_cpus();
+       mutex_lock(&text_mutex);
+       arch_unoptimize_kprobes(&unoptimizing_list, free_list);
+       /* Loop free_list for disarming */
+       list_for_each_entry_safe(op, tmp, free_list, list) {
+               /* Disarm probes if marked disabled */
+               if (kprobe_disabled(&op->kp))
+                       arch_disarm_kprobe(&op->kp);
+               if (kprobe_unused(&op->kp)) {
+                       /*
+                        * Remove unused probes from hash list. After waiting
+                        * for synchronization, these probes are reclaimed.
+                        * (reclaiming is done by do_free_cleaned_kprobes.)
+                        */
+                       hlist_del_rcu(&op->kp.hlist);
+               } else
+                       list_del_init(&op->list);
        }
        mutex_unlock(&text_mutex);
        put_online_cpus();
-end:
+}
+
+/* Reclaim all kprobes on the free_list */
+static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
+{
+       struct optimized_kprobe *op, *tmp;
+
+       list_for_each_entry_safe(op, tmp, free_list, list) {
+               BUG_ON(!kprobe_unused(&op->kp));
+               list_del_init(&op->list);
+               free_aggr_kprobe(&op->kp);
+       }
+}
+
+/* Start optimizer after OPTIMIZE_DELAY passed */
+static __kprobes void kick_kprobe_optimizer(void)
+{
+       if (!delayed_work_pending(&optimizing_work))
+               schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+}
+
+/* Kprobe jump optimizer */
+static __kprobes void kprobe_optimizer(struct work_struct *work)
+{
+       LIST_HEAD(free_list);
+
+       /* Lock modules while optimizing kprobes */
+       mutex_lock(&module_mutex);
+       mutex_lock(&kprobe_mutex);
+
+       /*
+        * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
+        * kprobes before waiting for quiesence period.
+        */
+       do_unoptimize_kprobes(&free_list);
+
+       /*
+        * Step 2: Wait for quiesence period to ensure all running interrupts
+        * are done. Because optprobe may modify multiple instructions
+        * there is a chance that Nth instruction is interrupted. In that
+        * case, running interrupt can return to 2nd-Nth byte of jump
+        * instruction. This wait is for avoiding it.
+        */
+       synchronize_sched();
+
+       /* Step 3: Optimize kprobes after quiesence period */
+       do_optimize_kprobes();
+
+       /* Step 4: Free cleaned kprobes after quiesence period */
+       do_free_cleaned_kprobes(&free_list);
+
        mutex_unlock(&kprobe_mutex);
        mutex_unlock(&module_mutex);
+
+       /* Step 5: Kick optimizer again if needed */
+       if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
+               kick_kprobe_optimizer();
+       else
+               /* Wake up all waiters */
+               complete_all(&optimizer_comp);
+}
+
+/* Wait for completing optimization and unoptimization */
+static __kprobes void wait_for_kprobe_optimizer(void)
+{
+       if (delayed_work_pending(&optimizing_work))
+               wait_for_completion(&optimizer_comp);
 }
 
 /* Optimize kprobe if p is ready to be optimized */
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
        /* Check if it is already optimized. */
        if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
                return;
-
        op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
-       list_add(&op->list, &optimizing_list);
-       if (!delayed_work_pending(&optimizing_work))
-               schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
+
+       if (!list_empty(&op->list))
+               /* This is under unoptimizing. Just dequeue the probe */
+               list_del_init(&op->list);
+       else {
+               list_add(&op->list, &optimizing_list);
+               kick_kprobe_optimizer();
+       }
+}
+
+/* Short cut to direct unoptimizing */
+static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+       get_online_cpus();
+       arch_unoptimize_kprobe(op);
+       put_online_cpus();
+       if (kprobe_disabled(&op->kp))
+               arch_disarm_kprobe(&op->kp);
 }
 
 /* Unoptimize a kprobe if p is optimized */
-static __kprobes void unoptimize_kprobe(struct kprobe *p)
+static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
 {
        struct optimized_kprobe *op;
 
-       if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) {
-               op = container_of(p, struct optimized_kprobe, kp);
-               if (!list_empty(&op->list))
-                       /* Dequeue from the optimization queue */
+       if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
+               return; /* This is not an optprobe nor optimized */
+
+       op = container_of(p, struct optimized_kprobe, kp);
+       if (!kprobe_optimized(p)) {
+               /* Unoptimized or unoptimizing case */
+               if (force && !list_empty(&op->list)) {
+                       /*
+                        * Only if this is unoptimizing kprobe and forced,
+                        * forcibly unoptimize it. (No need to unoptimize
+                        * unoptimized kprobe again :)
+                        */
                        list_del_init(&op->list);
-               else
-                       /* Replace jump with break */
-                       arch_unoptimize_kprobe(op);
-               op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+                       force_unoptimize_kprobe(op);
+               }
+               return;
+       }
+
+       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+       if (!list_empty(&op->list)) {
+               /* Dequeue from the optimization queue */
+               list_del_init(&op->list);
+               return;
+       }
+       /* Optimized kprobe case */
+       if (force)
+               /* Forcibly update the code: this is a special case */
+               force_unoptimize_kprobe(op);
+       else {
+               list_add(&op->list, &unoptimizing_list);
+               kick_kprobe_optimizer();
        }
 }
 
+/* Cancel unoptimizing for reusing */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+       struct optimized_kprobe *op;
+
+       BUG_ON(!kprobe_unused(ap));
+       /*
+        * Unused kprobe MUST be on the way of delayed unoptimizing (means
+        * there is still a relative jump) and disabled.
+        */
+       op = container_of(ap, struct optimized_kprobe, kp);
+       if (unlikely(list_empty(&op->list)))
+               printk(KERN_WARNING "Warning: found a stray unused "
+                       "aggrprobe@%p\n", ap->addr);
+       /* Enable the probe again */
+       ap->flags &= ~KPROBE_FLAG_DISABLED;
+       /* Optimize it again (remove from op->list) */
+       BUG_ON(!kprobe_optready(ap));
+       optimize_kprobe(ap);
+}
+
 /* Remove optimized instructions */
 static void __kprobes kill_optimized_kprobe(struct kprobe *p)
 {
        struct optimized_kprobe *op;
 
        op = container_of(p, struct optimized_kprobe, kp);
-       if (!list_empty(&op->list)) {
-               /* Dequeue from the optimization queue */
+       if (!list_empty(&op->list))
+               /* Dequeue from the (un)optimization queue */
                list_del_init(&op->list);
-               op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
-       }
-       /* Don't unoptimize, because the target code will be freed. */
+
+       op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
+       /* Don't touch the code, because it is already freed. */
        arch_remove_optimized_kprobe(op);
 }
 
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
        arch_prepare_optimized_kprobe(op);
 }
 
-/* Free optimized instructions and optimized_kprobe */
-static __kprobes void free_aggr_kprobe(struct kprobe *p)
-{
-       struct optimized_kprobe *op;
-
-       op = container_of(p, struct optimized_kprobe, kp);
-       arch_remove_optimized_kprobe(op);
-       kfree(op);
-}
-
 /* Allocate new optimized_kprobe and try to prepare optimized instructions */
 static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
        op = container_of(ap, struct optimized_kprobe, kp);
        if (!arch_prepared_optinsn(&op->optinsn)) {
                /* If failed to setup optimizing, fallback to kprobe */
-               free_aggr_kprobe(ap);
+               arch_remove_optimized_kprobe(op);
+               kfree(op);
                return;
        }
 
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void)
                return;
 
        kprobes_allow_optimization = false;
-       printk(KERN_INFO "Kprobes globally unoptimized\n");
-       get_online_cpus();      /* For avoiding text_mutex deadlock */
-       mutex_lock(&text_mutex);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist) {
                        if (!kprobe_disabled(p))
-                               unoptimize_kprobe(p);
+                               unoptimize_kprobe(p, false);
                }
        }
-
-       mutex_unlock(&text_mutex);
-       put_online_cpus();
-       /* Allow all currently running kprobes to complete */
-       synchronize_sched();
+       /* Wait for unoptimizing completion */
+       wait_for_kprobe_optimizer();
+       printk(KERN_INFO "Kprobes globally unoptimized\n");
 }
 
 int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
 }
 #endif /* CONFIG_SYSCTL */
 
+/* Put a breakpoint for a probe. Must be called with text_mutex locked */
 static void __kprobes __arm_kprobe(struct kprobe *p)
 {
-       struct kprobe *old_p;
+       struct kprobe *_p;
 
        /* Check collision with other optimized kprobes */
-       old_p = get_optimized_kprobe((unsigned long)p->addr);
-       if (unlikely(old_p))
-               unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */
+       _p = get_optimized_kprobe((unsigned long)p->addr);
+       if (unlikely(_p))
+               /* Fallback to unoptimized kprobe */
+               unoptimize_kprobe(_p, true);
 
        arch_arm_kprobe(p);
        optimize_kprobe(p);     /* Try to optimize (add kprobe to a list) */
 }
 
-static void __kprobes __disarm_kprobe(struct kprobe *p)
+/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
+static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
 {
-       struct kprobe *old_p;
+       struct kprobe *_p;
 
-       unoptimize_kprobe(p);   /* Try to unoptimize */
-       arch_disarm_kprobe(p);
+       unoptimize_kprobe(p, false);    /* Try to unoptimize */
 
-       /* If another kprobe was blocked, optimize it. */
-       old_p = get_optimized_kprobe((unsigned long)p->addr);
-       if (unlikely(old_p))
-               optimize_kprobe(old_p);
+       if (!kprobe_queued(p)) {
+               arch_disarm_kprobe(p);
+               /* If another kprobe was blocked, optimize it. */
+               _p = get_optimized_kprobe((unsigned long)p->addr);
+               if (unlikely(_p) && reopt)
+                       optimize_kprobe(_p);
+       }
+       /* TODO: reoptimize others after unoptimized this probe */
 }
 
 #else /* !CONFIG_OPTPROBES */
 
 #define optimize_kprobe(p)                     do {} while (0)
-#define unoptimize_kprobe(p)                   do {} while (0)
+#define unoptimize_kprobe(p, f)                        do {} while (0)
 #define kill_optimized_kprobe(p)               do {} while (0)
 #define prepare_optimized_kprobe(p)            do {} while (0)
 #define try_to_optimize_kprobe(p)              do {} while (0)
 #define __arm_kprobe(p)                                arch_arm_kprobe(p)
-#define __disarm_kprobe(p)                     arch_disarm_kprobe(p)
+#define __disarm_kprobe(p, o)                  arch_disarm_kprobe(p)
+#define kprobe_disarmed(p)                     kprobe_disabled(p)
+#define wait_for_kprobe_optimizer()            do {} while (0)
+
+/* There should be no unused kprobes can be reused without optimization */
+static void reuse_unused_kprobe(struct kprobe *ap)
+{
+       printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+       BUG_ON(kprobe_unused(ap));
+}
 
 static __kprobes void free_aggr_kprobe(struct kprobe *p)
 {
+       arch_remove_kprobe(p);
        kfree(p);
 }
 
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
 /* Disarm a kprobe with text_mutex */
 static void __kprobes disarm_kprobe(struct kprobe *kp)
 {
-       get_online_cpus();      /* For avoiding text_mutex deadlock */
+       /* Ditto */
        mutex_lock(&text_mutex);
-       __disarm_kprobe(kp);
+       __disarm_kprobe(kp, true);
        mutex_unlock(&text_mutex);
-       put_online_cpus();
 }
 
 /*
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
        BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
 
        if (p->break_handler || p->post_handler)
-               unoptimize_kprobe(ap);  /* Fall back to normal kprobe */
+               unoptimize_kprobe(ap, true);    /* Fall back to normal kprobe */
 
        if (p->break_handler) {
                if (ap->break_handler)
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
  * This is the second or subsequent kprobe at the address - handle
  * the intricacies
  */
-static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
+static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
                                          struct kprobe *p)
 {
        int ret = 0;
-       struct kprobe *ap = old_p;
+       struct kprobe *ap = orig_p;
 
-       if (!kprobe_aggrprobe(old_p)) {
-               /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */
-               ap = alloc_aggr_kprobe(old_p);
+       if (!kprobe_aggrprobe(orig_p)) {
+               /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
+               ap = alloc_aggr_kprobe(orig_p);
                if (!ap)
                        return -ENOMEM;
-               init_aggr_kprobe(ap, old_p);
-       }
+               init_aggr_kprobe(ap, orig_p);
+       } else if (kprobe_unused(ap))
+               /* This probe is going to die. Rescue it */
+               reuse_unused_kprobe(ap);
 
        if (kprobe_gone(ap)) {
                /*
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
        return add_new_kprobe(ap, p);
 }
 
-/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
-static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
-{
-       struct kprobe *kp;
-
-       list_for_each_entry_rcu(kp, &p->list, list) {
-               if (!kprobe_disabled(kp))
-                       /*
-                        * There is an active probe on the list.
-                        * We can't disable aggr_kprobe.
-                        */
-                       return 0;
-       }
-       p->flags |= KPROBE_FLAG_DISABLED;
-       return 1;
-}
-
 static int __kprobes in_kprobes_functions(unsigned long addr)
 {
        struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
 /* Check passed kprobe is valid and return kprobe in kprobe_table. */
 static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
 {
-       struct kprobe *old_p, *list_p;
+       struct kprobe *ap, *list_p;
 
-       old_p = get_kprobe(p->addr);
-       if (unlikely(!old_p))
+       ap = get_kprobe(p->addr);
+       if (unlikely(!ap))
                return NULL;
 
-       if (p != old_p) {
-               list_for_each_entry_rcu(list_p, &old_p->list, list)
+       if (p != ap) {
+               list_for_each_entry_rcu(list_p, &ap->list, list)
                        if (list_p == p)
                        /* kprobe p is a valid probe */
                                goto valid;
                return NULL;
        }
 valid:
-       return old_p;
+       return ap;
 }
 
 /* Return error if the kprobe is being re-registered */
 static inline int check_kprobe_rereg(struct kprobe *p)
 {
        int ret = 0;
-       struct kprobe *old_p;
 
        mutex_lock(&kprobe_mutex);
-       old_p = __get_valid_kprobe(p);
-       if (old_p)
+       if (__get_valid_kprobe(p))
                ret = -EINVAL;
        mutex_unlock(&kprobe_mutex);
+
        return ret;
 }
 
@@ -1229,67 +1403,121 @@ fail_with_jump_label:
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
+/* Check if all probes on the aggrprobe are disabled */
+static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
+{
+       struct kprobe *kp;
+
+       list_for_each_entry_rcu(kp, &ap->list, list)
+               if (!kprobe_disabled(kp))
+                       /*
+                        * There is an active probe on the list.
+                        * We can't disable this ap.
+                        */
+                       return 0;
+
+       return 1;
+}
+
+/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
+static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
+{
+       struct kprobe *orig_p;
+
+       /* Get an original kprobe for return */
+       orig_p = __get_valid_kprobe(p);
+       if (unlikely(orig_p == NULL))
+               return NULL;
+
+       if (!kprobe_disabled(p)) {
+               /* Disable probe if it is a child probe */
+               if (p != orig_p)
+                       p->flags |= KPROBE_FLAG_DISABLED;
+
+               /* Try to disarm and disable this/parent probe */
+               if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
+                       disarm_kprobe(orig_p);
+                       orig_p->flags |= KPROBE_FLAG_DISABLED;
+               }
+       }
+
+       return orig_p;
+}
+
 /*
  * Unregister a kprobe without a scheduler synchronization.
  */
 static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 {
-       struct kprobe *old_p, *list_p;
+       struct kprobe *ap, *list_p;
 
-       old_p = __get_valid_kprobe(p);
-       if (old_p == NULL)
+       /* Disable kprobe. This will disarm it if needed. */
+       ap = __disable_kprobe(p);
+       if (ap == NULL)
                return -EINVAL;
 
-       if (old_p == p ||
-           (kprobe_aggrprobe(old_p) &&
-            list_is_singular(&old_p->list))) {
+       if (ap == p)
                /*
-                * Only probe on the hash list. Disarm only if kprobes are
-                * enabled and not gone - otherwise, the breakpoint would
-                * already have been removed. We save on flushing icache.
+                * This probe is an independent(and non-optimized) kprobe
+                * (not an aggrprobe). Remove from the hash list.
                 */
-               if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
-                       disarm_kprobe(old_p);
-               hlist_del_rcu(&old_p->hlist);
-       } else {
+               goto disarmed;
+
+       /* Following process expects this probe is an aggrprobe */
+       WARN_ON(!kprobe_aggrprobe(ap));
+
+       if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
+               /*
+                * !disarmed could be happen if the probe is under delayed
+                * unoptimizing.
+                */
+               goto disarmed;
+       else {
+               /* If disabling probe has special handlers, update aggrprobe */
                if (p->break_handler && !kprobe_gone(p))
-                       old_p->break_handler = NULL;
+                       ap->break_handler = NULL;
                if (p->post_handler && !kprobe_gone(p)) {
-                       list_for_each_entry_rcu(list_p, &old_p->list, list) {
+                       list_for_each_entry_rcu(list_p, &ap->list, list) {
                                if ((list_p != p) && (list_p->post_handler))
                                        goto noclean;
                        }
-                       old_p->post_handler = NULL;
+                       ap->post_handler = NULL;
                }
 noclean:
+               /*
+                * Remove from the aggrprobe: this path will do nothing in
+                * __unregister_kprobe_bottom().
+                */
                list_del_rcu(&p->list);
-               if (!kprobe_disabled(old_p)) {
-                       try_to_disable_aggr_kprobe(old_p);
-                       if (!kprobes_all_disarmed) {
-                               if (kprobe_disabled(old_p))
-                                       disarm_kprobe(old_p);
-                               else
-                                       /* Try to optimize this probe again */
-                                       optimize_kprobe(old_p);
-                       }
-               }
+               if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
+                       /*
+                        * Try to optimize this probe again, because post
+                        * handler may have been changed.
+                        */
+                       optimize_kprobe(ap);
        }
        return 0;
+
+disarmed:
+       BUG_ON(!kprobe_disarmed(ap));
+       hlist_del_rcu(&ap->hlist);
+       return 0;
 }
 
 static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
 {
-       struct kprobe *old_p;
+       struct kprobe *ap;
 
        if (list_empty(&p->list))
+               /* This is an independent kprobe */
                arch_remove_kprobe(p);
        else if (list_is_singular(&p->list)) {
-               /* "p" is the last child of an aggr_kprobe */
-               old_p = list_entry(p->list.next, struct kprobe, list);
+               /* This is the last child of an aggrprobe */
+               ap = list_entry(p->list.next, struct kprobe, list);
                list_del(&p->list);
-               arch_remove_kprobe(old_p);
-               free_aggr_kprobe(old_p);
+               free_aggr_kprobe(ap);
        }
+       /* Otherwise, do nothing. */
 }
 
 int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
 int __kprobes disable_kprobe(struct kprobe *kp)
 {
        int ret = 0;
-       struct kprobe *p;
 
        mutex_lock(&kprobe_mutex);
 
-       /* Check whether specified probe is valid. */
-       p = __get_valid_kprobe(kp);
-       if (unlikely(p == NULL)) {
+       /* Disable this kprobe */
+       if (__disable_kprobe(kp) == NULL)
                ret = -EINVAL;
-               goto out;
-       }
 
-       /* If the probe is already disabled (or gone), just return */
-       if (kprobe_disabled(kp))
-               goto out;
-
-       kp->flags |= KPROBE_FLAG_DISABLED;
-       if (p != kp)
-               /* When kp != p, p is always enabled. */
-               try_to_disable_aggr_kprobe(p);
-
-       if (!kprobes_all_disarmed && kprobe_disabled(p))
-               disarm_kprobe(p);
-out:
        mutex_unlock(&kprobe_mutex);
        return ret;
 }
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void)
        mutex_lock(&kprobe_mutex);
 
        /* If kprobes are already disarmed, just return */
-       if (kprobes_all_disarmed)
-               goto already_disabled;
+       if (kprobes_all_disarmed) {
+               mutex_unlock(&kprobe_mutex);
+               return;
+       }
 
        kprobes_all_disarmed = true;
        printk(KERN_INFO "Kprobes globally disabled\n");
 
-       /*
-        * Here we call get_online_cpus() for avoiding text_mutex deadlock,
-        * because disarming may also unoptimize kprobes.
-        */
-       get_online_cpus();
        mutex_lock(&text_mutex);
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                head = &kprobe_table[i];
                hlist_for_each_entry_rcu(p, node, head, hlist) {
                        if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
-                               __disarm_kprobe(p);
+                               __disarm_kprobe(p, false);
                }
        }
-
        mutex_unlock(&text_mutex);
-       put_online_cpus();
        mutex_unlock(&kprobe_mutex);
-       /* Allow all currently running kprobes to complete */
-       synchronize_sched();
-       return;
 
-already_disabled:
-       mutex_unlock(&kprobe_mutex);
-       return;
+       /* Wait for disarming all kprobes by optimizer */
+       wait_for_kprobe_optimizer();
 }
 
 /*
index 2dc3786349d1723394c25e512b9b570bcf5a4f7e..5355cfd44a3fd21cd767c13d053410ec338f2ede 100644 (file)
@@ -148,7 +148,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
        wait_for_completion(&create.done);
 
        if (!IS_ERR(create.result)) {
-               struct sched_param param = { .sched_priority = 0 };
+               static struct sched_param param = { .sched_priority = 0 };
                va_list args;
 
                va_start(args, namefmt);
@@ -265,6 +265,17 @@ int kthreadd(void *unused)
        return 0;
 }
 
+void __init_kthread_worker(struct kthread_worker *worker,
+                               const char *name,
+                               struct lock_class_key *key)
+{
+       spin_lock_init(&worker->lock);
+       lockdep_set_class_and_name(&worker->lock, key, name);
+       INIT_LIST_HEAD(&worker->work_list);
+       worker->task = NULL;
+}
+EXPORT_SYMBOL_GPL(__init_kthread_worker);
+
 /**
  * kthread_worker_fn - kthread function to process kthread_worker
  * @worker_ptr: pointer to initialized kthread_worker
index 59b76c8ce9d7172e8176f355da9719495077a133..1969d2fc4b36328cf48798620506ddcd0ec330d0 100644 (file)
@@ -494,7 +494,6 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
                namelen += 2;
 
        for (i = 0; i < LOCKSTAT_POINTS; i++) {
-               char sym[KSYM_SYMBOL_LEN];
                char ip[32];
 
                if (class->contention_point[i] == 0)
@@ -503,15 +502,13 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
                if (!i)
                        seq_line(m, '-', 40-namelen, namelen);
 
-               sprint_symbol(sym, class->contention_point[i]);
                snprintf(ip, sizeof(ip), "[<%p>]",
                                (void *)class->contention_point[i]);
-               seq_printf(m, "%40s %14lu %29s %s\n", name,
-                               stats->contention_point[i],
-                               ip, sym);
+               seq_printf(m, "%40s %14lu %29s %pS\n",
+                          name, stats->contention_point[i],
+                          ip, (void *)class->contention_point[i]);
        }
        for (i = 0; i < LOCKSTAT_POINTS; i++) {
-               char sym[KSYM_SYMBOL_LEN];
                char ip[32];
 
                if (class->contending_point[i] == 0)
@@ -520,12 +517,11 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
                if (!i)
                        seq_line(m, '-', 40-namelen, namelen);
 
-               sprint_symbol(sym, class->contending_point[i]);
                snprintf(ip, sizeof(ip), "[<%p>]",
                                (void *)class->contending_point[i]);
-               seq_printf(m, "%40s %14lu %29s %s\n", name,
-                               stats->contending_point[i],
-                               ip, sym);
+               seq_printf(m, "%40s %14lu %29s %pS\n",
+                          name, stats->contending_point[i],
+                          ip, (void *)class->contending_point[i]);
        }
        if (i) {
                seq_puts(m, "\n");
index d190664f25ff3fa10dca29f37b483f08ad07eae1..34e00b708fad2c79b260ab3d8d4cc199cece8eca 100644 (file)
@@ -56,6 +56,7 @@
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
 #include <linux/jump_label.h>
+#include <linux/pfn.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/module.h>
 #define ARCH_SHF_SMALL 0
 #endif
 
+/*
+ * Modules' sections will be aligned on page boundaries
+ * to ensure complete separation of code and data, but
+ * only when CONFIG_DEBUG_SET_MODULE_RONX=y
+ */
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+# define debug_align(X) ALIGN(X, PAGE_SIZE)
+#else
+# define debug_align(X) (X)
+#endif
+
+/*
+ * Given BASE and SIZE this macro calculates the number of pages the
+ * memory regions occupies
+ */
+#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ?                \
+               (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \
+                        PFN_DOWN((unsigned long)BASE) + 1)     \
+               : (0UL))
+
 /* If this is set, the section belongs in the init part of the module */
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 
@@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod)
        return 0;
 }
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+/*
+ * LKM RO/NX protection: protect module's text/ro-data
+ * from modification and any data from execution.
+ */
+void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages))
+{
+       unsigned long begin_pfn = PFN_DOWN((unsigned long)start);
+       unsigned long end_pfn = PFN_DOWN((unsigned long)end);
+
+       if (end_pfn > begin_pfn)
+               set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+}
+
+static void set_section_ro_nx(void *base,
+                       unsigned long text_size,
+                       unsigned long ro_size,
+                       unsigned long total_size)
+{
+       /* begin and end PFNs of the current subsection */
+       unsigned long begin_pfn;
+       unsigned long end_pfn;
+
+       /*
+        * Set RO for module text and RO-data:
+        * - Always protect first page.
+        * - Do not protect last partial page.
+        */
+       if (ro_size > 0)
+               set_page_attributes(base, base + ro_size, set_memory_ro);
+
+       /*
+        * Set NX permissions for module data:
+        * - Do not protect first partial page.
+        * - Always protect last page.
+        */
+       if (total_size > text_size) {
+               begin_pfn = PFN_UP((unsigned long)base + text_size);
+               end_pfn = PFN_UP((unsigned long)base + total_size);
+               if (end_pfn > begin_pfn)
+                       set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn);
+       }
+}
+
+/* Setting memory back to RW+NX before releasing it */
+void unset_section_ro_nx(struct module *mod, void *module_region)
+{
+       unsigned long total_pages;
+
+       if (mod->module_core == module_region) {
+               /* Set core as NX+RW */
+               total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size);
+               set_memory_nx((unsigned long)mod->module_core, total_pages);
+               set_memory_rw((unsigned long)mod->module_core, total_pages);
+
+       } else if (mod->module_init == module_region) {
+               /* Set init as NX+RW */
+               total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size);
+               set_memory_nx((unsigned long)mod->module_init, total_pages);
+               set_memory_rw((unsigned long)mod->module_init, total_pages);
+       }
+}
+
+/* Iterate through all modules and set each module's text as RW */
+void set_all_modules_text_rw()
+{
+       struct module *mod;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry_rcu(mod, &modules, list) {
+               if ((mod->module_core) && (mod->core_text_size)) {
+                       set_page_attributes(mod->module_core,
+                                               mod->module_core + mod->core_text_size,
+                                               set_memory_rw);
+               }
+               if ((mod->module_init) && (mod->init_text_size)) {
+                       set_page_attributes(mod->module_init,
+                                               mod->module_init + mod->init_text_size,
+                                               set_memory_rw);
+               }
+       }
+       mutex_unlock(&module_mutex);
+}
+
+/* Iterate through all modules and set each module's text as RO */
+void set_all_modules_text_ro()
+{
+       struct module *mod;
+
+       mutex_lock(&module_mutex);
+       list_for_each_entry_rcu(mod, &modules, list) {
+               if ((mod->module_core) && (mod->core_text_size)) {
+                       set_page_attributes(mod->module_core,
+                                               mod->module_core + mod->core_text_size,
+                                               set_memory_ro);
+               }
+               if ((mod->module_init) && (mod->init_text_size)) {
+                       set_page_attributes(mod->module_init,
+                                               mod->module_init + mod->init_text_size,
+                                               set_memory_ro);
+               }
+       }
+       mutex_unlock(&module_mutex);
+}
+#else
+static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { }
+static inline void unset_section_ro_nx(struct module *mod, void *module_region) { }
+#endif
+
 /* Free a module, remove from lists, etc. */
 static void free_module(struct module *mod)
 {
@@ -1566,6 +1696,7 @@ static void free_module(struct module *mod)
        destroy_params(mod->kp, mod->num_kp);
 
        /* This may be NULL, but that's OK */
+       unset_section_ro_nx(mod, mod->module_init);
        module_free(mod, mod->module_init);
        kfree(mod->args);
        percpu_modfree(mod);
@@ -1574,6 +1705,7 @@ static void free_module(struct module *mod)
        lockdep_free_key_range(mod->module_core, mod->core_size);
 
        /* Finally, free the core (containing the module structure) */
+       unset_section_ro_nx(mod, mod->module_core);
        module_free(mod, mod->module_core);
 
 #ifdef CONFIG_MPU
@@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
                        s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
                        DEBUGP("\t%s\n", name);
                }
-               if (m == 0)
+               switch (m) {
+               case 0: /* executable */
+                       mod->core_size = debug_align(mod->core_size);
                        mod->core_text_size = mod->core_size;
+                       break;
+               case 1: /* RO: text and ro-data */
+                       mod->core_size = debug_align(mod->core_size);
+                       mod->core_ro_size = mod->core_size;
+                       break;
+               case 3: /* whole core */
+                       mod->core_size = debug_align(mod->core_size);
+                       break;
+               }
        }
 
        DEBUGP("Init section allocation order:\n");
@@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info)
                                         | INIT_OFFSET_MASK);
                        DEBUGP("\t%s\n", sname);
                }
-               if (m == 0)
+               switch (m) {
+               case 0: /* executable */
+                       mod->init_size = debug_align(mod->init_size);
                        mod->init_text_size = mod->init_size;
+                       break;
+               case 1: /* RO: text and ro-data */
+                       mod->init_size = debug_align(mod->init_size);
+                       mod->init_ro_size = mod->init_size;
+                       break;
+               case 3: /* whole init */
+                       mod->init_size = debug_align(mod->init_size);
+                       break;
+               }
        }
 }
 
@@ -2722,6 +2876,18 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
        blocking_notifier_call_chain(&module_notify_list,
                        MODULE_STATE_COMING, mod);
 
+       /* Set RO and NX regions for core */
+       set_section_ro_nx(mod->module_core,
+                               mod->core_text_size,
+                               mod->core_ro_size,
+                               mod->core_size);
+
+       /* Set RO and NX regions for init */
+       set_section_ro_nx(mod->module_init,
+                               mod->init_text_size,
+                               mod->init_ro_size,
+                               mod->init_size);
+
        do_mod_ctors(mod);
        /* Start the module */
        if (mod->init != NULL)
@@ -2765,6 +2931,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
        mod->symtab = mod->core_symtab;
        mod->strtab = mod->core_strtab;
 #endif
+       unset_section_ro_nx(mod, mod->module_init);
        module_free(mod, mod->module_init);
        mod->module_init = NULL;
        mod->init_size = 0;
index 200407c1502f509ee3f9d8a665bc4d3b78a27f74..a5889fb28ecff33eaf5fae64c9d2a50ca03cb2f7 100644 (file)
@@ -199,7 +199,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * memory barriers as we'll eventually observe the right
                 * values at the cost of a few extra spins.
                 */
-               cpu_relax();
+               arch_mutex_cpu_relax();
        }
 #endif
        spin_lock_mutex(&lock->wait_lock, flags);
index 2870feee81dd7a046703645c9ec50022d4339f39..11847bf1e8cc254db7f2a2a255511fd36eea4a68 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/mm.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/idr.h>
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
@@ -21,7 +22,9 @@
 #include <linux/dcache.h>
 #include <linux/percpu.h>
 #include <linux/ptrace.h>
+#include <linux/reboot.h>
 #include <linux/vmstat.h>
+#include <linux/device.h>
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/rculist.h>
@@ -133,6 +136,28 @@ static void unclone_ctx(struct perf_event_context *ctx)
        }
 }
 
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+       /*
+        * only top level events have the pid namespace they were created in
+        */
+       if (event->parent)
+               event = event->parent;
+
+       return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+       /*
+        * only top level events have the pid namespace they were created in
+        */
+       if (event->parent)
+               event = event->parent;
+
+       return task_pid_nr_ns(p, event->ns);
+}
+
 /*
  * If we inherit events we want to return the parent event id
  * to userspace.
@@ -312,9 +337,84 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                ctx->nr_stat++;
 }
 
+/*
+ * Called at perf_event creation and when events are attached/detached from a
+ * group.
+ */
+static void perf_event__read_size(struct perf_event *event)
+{
+       int entry = sizeof(u64); /* value */
+       int size = 0;
+       int nr = 1;
+
+       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+               size += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+               size += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_ID)
+               entry += sizeof(u64);
+
+       if (event->attr.read_format & PERF_FORMAT_GROUP) {
+               nr += event->group_leader->nr_siblings;
+               size += sizeof(u64);
+       }
+
+       size += entry * nr;
+       event->read_size = size;
+}
+
+static void perf_event__header_size(struct perf_event *event)
+{
+       struct perf_sample_data *data;
+       u64 sample_type = event->attr.sample_type;
+       u16 size = 0;
+
+       perf_event__read_size(event);
+
+       if (sample_type & PERF_SAMPLE_IP)
+               size += sizeof(data->ip);
+
+       if (sample_type & PERF_SAMPLE_ADDR)
+               size += sizeof(data->addr);
+
+       if (sample_type & PERF_SAMPLE_PERIOD)
+               size += sizeof(data->period);
+
+       if (sample_type & PERF_SAMPLE_READ)
+               size += event->read_size;
+
+       event->header_size = size;
+}
+
+static void perf_event__id_header_size(struct perf_event *event)
+{
+       struct perf_sample_data *data;
+       u64 sample_type = event->attr.sample_type;
+       u16 size = 0;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               size += sizeof(data->tid_entry);
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               size += sizeof(data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               size += sizeof(data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               size += sizeof(data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               size += sizeof(data->cpu_entry);
+
+       event->id_header_size = size;
+}
+
 static void perf_group_attach(struct perf_event *event)
 {
-       struct perf_event *group_leader = event->group_leader;
+       struct perf_event *group_leader = event->group_leader, *pos;
 
        /*
         * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +433,11 @@ static void perf_group_attach(struct perf_event *event)
 
        list_add_tail(&event->group_entry, &group_leader->sibling_list);
        group_leader->nr_siblings++;
+
+       perf_event__header_size(group_leader);
+
+       list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
+               perf_event__header_size(pos);
 }
 
 /*
@@ -391,7 +496,7 @@ static void perf_group_detach(struct perf_event *event)
        if (event->group_leader != event) {
                list_del_init(&event->group_entry);
                event->group_leader->nr_siblings--;
-               return;
+               goto out;
        }
 
        if (!list_empty(&event->group_entry))
@@ -410,6 +515,12 @@ static void perf_group_detach(struct perf_event *event)
                /* Inherit group flags from the previous leader */
                sibling->group_flags = event->group_flags;
        }
+
+out:
+       perf_event__header_size(event->group_leader);
+
+       list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
+               perf_event__header_size(tmp);
 }
 
 static inline int
@@ -1073,7 +1184,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
        /*
         * not supported on inherited events
         */
-       if (event->attr.inherit)
+       if (event->attr.inherit || !is_sampling_event(event))
                return -EINVAL;
 
        atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2400,6 @@ static int perf_release(struct inode *inode, struct file *file)
        return perf_event_release_kernel(event);
 }
 
-static int perf_event_read_size(struct perf_event *event)
-{
-       int entry = sizeof(u64); /* value */
-       int size = 0;
-       int nr = 1;
-
-       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-               size += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-               size += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_ID)
-               entry += sizeof(u64);
-
-       if (event->attr.read_format & PERF_FORMAT_GROUP) {
-               nr += event->group_leader->nr_siblings;
-               size += sizeof(u64);
-       }
-
-       size += entry * nr;
-
-       return size;
-}
-
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
        struct perf_event *child;
@@ -2428,7 +2514,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
        if (event->state == PERF_EVENT_STATE_ERROR)
                return 0;
 
-       if (count < perf_event_read_size(event))
+       if (count < event->read_size)
                return -ENOSPC;
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2600,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
        int ret = 0;
        u64 value;
 
-       if (!event->attr.sample_period)
+       if (!is_sampling_event(event))
                return -EINVAL;
 
        if (copy_from_user(&value, arg, sizeof(value)))
@@ -3305,6 +3391,73 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
        } while (len);
 }
 
+static void __perf_event_header__init_id(struct perf_event_header *header,
+                                        struct perf_sample_data *data,
+                                        struct perf_event *event)
+{
+       u64 sample_type = event->attr.sample_type;
+
+       data->type = sample_type;
+       header->size += event->id_header_size;
+
+       if (sample_type & PERF_SAMPLE_TID) {
+               /* namespace issues */
+               data->tid_entry.pid = perf_event_pid(event, current);
+               data->tid_entry.tid = perf_event_tid(event, current);
+       }
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               data->time = perf_clock();
+
+       if (sample_type & PERF_SAMPLE_ID)
+               data->id = primary_event_id(event);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               data->stream_id = event->id;
+
+       if (sample_type & PERF_SAMPLE_CPU) {
+               data->cpu_entry.cpu      = raw_smp_processor_id();
+               data->cpu_entry.reserved = 0;
+       }
+}
+
+static void perf_event_header__init_id(struct perf_event_header *header,
+                                      struct perf_sample_data *data,
+                                      struct perf_event *event)
+{
+       if (event->attr.sample_id_all)
+               __perf_event_header__init_id(header, data, event);
+}
+
+static void __perf_event__output_id_sample(struct perf_output_handle *handle,
+                                          struct perf_sample_data *data)
+{
+       u64 sample_type = data->type;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               perf_output_put(handle, data->tid_entry);
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               perf_output_put(handle, data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               perf_output_put(handle, data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               perf_output_put(handle, data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               perf_output_put(handle, data->cpu_entry);
+}
+
+static void perf_event__output_id_sample(struct perf_event *event,
+                                        struct perf_output_handle *handle,
+                                        struct perf_sample_data *sample)
+{
+       if (event->attr.sample_id_all)
+               __perf_event__output_id_sample(handle, sample);
+}
+
 int perf_output_begin(struct perf_output_handle *handle,
                      struct perf_event *event, unsigned int size,
                      int nmi, int sample)
@@ -3312,6 +3465,7 @@ int perf_output_begin(struct perf_output_handle *handle,
        struct perf_buffer *buffer;
        unsigned long tail, offset, head;
        int have_lost;
+       struct perf_sample_data sample_data;
        struct {
                struct perf_event_header header;
                u64                      id;
@@ -3338,8 +3492,12 @@ int perf_output_begin(struct perf_output_handle *handle,
                goto out;
 
        have_lost = local_read(&buffer->lost);
-       if (have_lost)
-               size += sizeof(lost_event);
+       if (have_lost) {
+               lost_event.header.size = sizeof(lost_event);
+               perf_event_header__init_id(&lost_event.header, &sample_data,
+                                          event);
+               size += lost_event.header.size;
+       }
 
        perf_output_get_handle(handle);
 
@@ -3370,11 +3528,11 @@ int perf_output_begin(struct perf_output_handle *handle,
        if (have_lost) {
                lost_event.header.type = PERF_RECORD_LOST;
                lost_event.header.misc = 0;
-               lost_event.header.size = sizeof(lost_event);
                lost_event.id          = event->id;
                lost_event.lost        = local_xchg(&buffer->lost, 0);
 
                perf_output_put(handle, lost_event);
+               perf_event__output_id_sample(event, handle, &sample_data);
        }
 
        return 0;
@@ -3407,28 +3565,6 @@ void perf_output_end(struct perf_output_handle *handle)
        rcu_read_unlock();
 }
 
-static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
-{
-       /*
-        * only top level events have the pid namespace they were created in
-        */
-       if (event->parent)
-               event = event->parent;
-
-       return task_tgid_nr_ns(p, event->ns);
-}
-
-static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
-{
-       /*
-        * only top level events have the pid namespace they were created in
-        */
-       if (event->parent)
-               event = event->parent;
-
-       return task_pid_nr_ns(p, event->ns);
-}
-
 static void perf_output_read_one(struct perf_output_handle *handle,
                                 struct perf_event *event,
                                 u64 enabled, u64 running)
@@ -3603,61 +3739,16 @@ void perf_prepare_sample(struct perf_event_header *header,
 {
        u64 sample_type = event->attr.sample_type;
 
-       data->type = sample_type;
-
        header->type = PERF_RECORD_SAMPLE;
-       header->size = sizeof(*header);
+       header->size = sizeof(*header) + event->header_size;
 
        header->misc = 0;
        header->misc |= perf_misc_flags(regs);
 
-       if (sample_type & PERF_SAMPLE_IP) {
-               data->ip = perf_instruction_pointer(regs);
-
-               header->size += sizeof(data->ip);
-       }
-
-       if (sample_type & PERF_SAMPLE_TID) {
-               /* namespace issues */
-               data->tid_entry.pid = perf_event_pid(event, current);
-               data->tid_entry.tid = perf_event_tid(event, current);
-
-               header->size += sizeof(data->tid_entry);
-       }
-
-       if (sample_type & PERF_SAMPLE_TIME) {
-               data->time = perf_clock();
-
-               header->size += sizeof(data->time);
-       }
-
-       if (sample_type & PERF_SAMPLE_ADDR)
-               header->size += sizeof(data->addr);
-
-       if (sample_type & PERF_SAMPLE_ID) {
-               data->id = primary_event_id(event);
-
-               header->size += sizeof(data->id);
-       }
-
-       if (sample_type & PERF_SAMPLE_STREAM_ID) {
-               data->stream_id = event->id;
-
-               header->size += sizeof(data->stream_id);
-       }
-
-       if (sample_type & PERF_SAMPLE_CPU) {
-               data->cpu_entry.cpu             = raw_smp_processor_id();
-               data->cpu_entry.reserved        = 0;
-
-               header->size += sizeof(data->cpu_entry);
-       }
-
-       if (sample_type & PERF_SAMPLE_PERIOD)
-               header->size += sizeof(data->period);
+       __perf_event_header__init_id(header, data, event);
 
-       if (sample_type & PERF_SAMPLE_READ)
-               header->size += perf_event_read_size(event);
+       if (sample_type & PERF_SAMPLE_IP)
+               data->ip = perf_instruction_pointer(regs);
 
        if (sample_type & PERF_SAMPLE_CALLCHAIN) {
                int size = 1;
@@ -3722,23 +3813,26 @@ perf_event_read_event(struct perf_event *event,
                        struct task_struct *task)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        struct perf_read_event read_event = {
                .header = {
                        .type = PERF_RECORD_READ,
                        .misc = 0,
-                       .size = sizeof(read_event) + perf_event_read_size(event),
+                       .size = sizeof(read_event) + event->read_size,
                },
                .pid = perf_event_pid(event, task),
                .tid = perf_event_tid(event, task),
        };
        int ret;
 
+       perf_event_header__init_id(&read_event.header, &sample, event);
        ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
        if (ret)
                return;
 
        perf_output_put(&handle, read_event);
        perf_output_read(&handle, event);
+       perf_event__output_id_sample(event, &handle, &sample);
 
        perf_output_end(&handle);
 }
@@ -3768,14 +3862,16 @@ static void perf_event_task_output(struct perf_event *event,
                                     struct perf_task_event *task_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        struct task_struct *task = task_event->task;
-       int size, ret;
+       int ret, size = task_event->event_id.header.size;
 
-       size  = task_event->event_id.header.size;
-       ret = perf_output_begin(&handle, event, size, 0, 0);
+       perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
+       ret = perf_output_begin(&handle, event,
+                               task_event->event_id.header.size, 0, 0);
        if (ret)
-               return;
+               goto out;
 
        task_event->event_id.pid = perf_event_pid(event, task);
        task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3785,7 +3881,11 @@ static void perf_event_task_output(struct perf_event *event,
 
        perf_output_put(&handle, task_event->event_id);
 
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       task_event->event_id.header.size = size;
 }
 
 static int perf_event_task_match(struct perf_event *event)
@@ -3900,11 +4000,16 @@ static void perf_event_comm_output(struct perf_event *event,
                                     struct perf_comm_event *comm_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int size = comm_event->event_id.header.size;
-       int ret = perf_output_begin(&handle, event, size, 0, 0);
+       int ret;
+
+       perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
+       ret = perf_output_begin(&handle, event,
+                               comm_event->event_id.header.size, 0, 0);
 
        if (ret)
-               return;
+               goto out;
 
        comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
        comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
@@ -3912,7 +4017,12 @@ static void perf_event_comm_output(struct perf_event *event,
        perf_output_put(&handle, comm_event->event_id);
        perf_output_copy(&handle, comm_event->comm,
                                   comm_event->comm_size);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       comm_event->event_id.header.size = size;
 }
 
 static int perf_event_comm_match(struct perf_event *event)
@@ -3957,7 +4067,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
        comm_event->comm_size = size;
 
        comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
-
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
@@ -4038,11 +4147,15 @@ static void perf_event_mmap_output(struct perf_event *event,
                                     struct perf_mmap_event *mmap_event)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int size = mmap_event->event_id.header.size;
-       int ret = perf_output_begin(&handle, event, size, 0, 0);
+       int ret;
 
+       perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
+       ret = perf_output_begin(&handle, event,
+                               mmap_event->event_id.header.size, 0, 0);
        if (ret)
-               return;
+               goto out;
 
        mmap_event->event_id.pid = perf_event_pid(event, current);
        mmap_event->event_id.tid = perf_event_tid(event, current);
@@ -4050,7 +4163,12 @@ static void perf_event_mmap_output(struct perf_event *event,
        perf_output_put(&handle, mmap_event->event_id);
        perf_output_copy(&handle, mmap_event->file_name,
                                   mmap_event->file_size);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
        perf_output_end(&handle);
+out:
+       mmap_event->event_id.header.size = size;
 }
 
 static int perf_event_mmap_match(struct perf_event *event,
@@ -4205,6 +4323,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
 static void perf_log_throttle(struct perf_event *event, int enable)
 {
        struct perf_output_handle handle;
+       struct perf_sample_data sample;
        int ret;
 
        struct {
@@ -4226,11 +4345,15 @@ static void perf_log_throttle(struct perf_event *event, int enable)
        if (enable)
                throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
 
-       ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+       perf_event_header__init_id(&throttle_event.header, &sample, event);
+
+       ret = perf_output_begin(&handle, event,
+                               throttle_event.header.size, 1, 0);
        if (ret)
                return;
 
        perf_output_put(&handle, throttle_event);
+       perf_event__output_id_sample(event, &handle, &sample);
        perf_output_end(&handle);
 }
 
@@ -4246,6 +4369,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
        struct hw_perf_event *hwc = &event->hw;
        int ret = 0;
 
+       /*
+        * Non-sampling counters might still use the PMI to fold short
+        * hardware counters, ignore those.
+        */
+       if (unlikely(!is_sampling_event(event)))
+               return 0;
+
        if (!throttle) {
                hwc->interrupts++;
        } else {
@@ -4391,7 +4521,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
        if (!regs)
                return;
 
-       if (!hwc->sample_period)
+       if (!is_sampling_event(event))
                return;
 
        if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4554,7 +4684,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
        struct hw_perf_event *hwc = &event->hw;
        struct hlist_head *head;
 
-       if (hwc->sample_period) {
+       if (is_sampling_event(event)) {
                hwc->last_period = hwc->sample_period;
                perf_swevent_set_period(event);
        }
@@ -4811,15 +4941,6 @@ static int perf_tp_event_init(struct perf_event *event)
        if (event->attr.type != PERF_TYPE_TRACEPOINT)
                return -ENOENT;
 
-       /*
-        * Raw tracepoint data is a severe data leak, only allow root to
-        * have these.
-        */
-       if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
-                       perf_paranoid_tracepoint_raw() &&
-                       !capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
        err = perf_trace_init(event);
        if (err)
                return err;
@@ -4842,7 +4963,7 @@ static struct pmu perf_tracepoint = {
 
 static inline void perf_tp_register(void)
 {
-       perf_pmu_register(&perf_tracepoint);
+       perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
 }
 
 static int perf_event_set_filter(struct perf_event *event, void __user *arg)
@@ -4932,31 +5053,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
 static void perf_swevent_start_hrtimer(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
+       s64 period;
+
+       if (!is_sampling_event(event))
+               return;
 
        hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        hwc->hrtimer.function = perf_swevent_hrtimer;
-       if (hwc->sample_period) {
-               s64 period = local64_read(&hwc->period_left);
 
-               if (period) {
-                       if (period < 0)
-                               period = 10000;
+       period = local64_read(&hwc->period_left);
+       if (period) {
+               if (period < 0)
+                       period = 10000;
 
-                       local64_set(&hwc->period_left, 0);
-               } else {
-                       period = max_t(u64, 10000, hwc->sample_period);
-               }
-               __hrtimer_start_range_ns(&hwc->hrtimer,
+               local64_set(&hwc->period_left, 0);
+       } else {
+               period = max_t(u64, 10000, hwc->sample_period);
+       }
+       __hrtimer_start_range_ns(&hwc->hrtimer,
                                ns_to_ktime(period), 0,
                                HRTIMER_MODE_REL_PINNED, 0);
-       }
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
 
-       if (hwc->sample_period) {
+       if (is_sampling_event(event)) {
                ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
                local64_set(&hwc->period_left, ktime_to_ns(remaining));
 
@@ -5184,8 +5307,61 @@ static void free_pmu_context(struct pmu *pmu)
 out:
        mutex_unlock(&pmus_lock);
 }
+static struct idr pmu_idr;
+
+static ssize_t
+type_show(struct device *dev, struct device_attribute *attr, char *page)
+{
+       struct pmu *pmu = dev_get_drvdata(dev);
+
+       return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
+}
+
+static struct device_attribute pmu_dev_attrs[] = {
+       __ATTR_RO(type),
+       __ATTR_NULL,
+};
+
+static int pmu_bus_running;
+static struct bus_type pmu_bus = {
+       .name           = "event_source",
+       .dev_attrs      = pmu_dev_attrs,
+};
+
+static void pmu_dev_release(struct device *dev)
+{
+       kfree(dev);
+}
+
+static int pmu_dev_alloc(struct pmu *pmu)
+{
+       int ret = -ENOMEM;
+
+       pmu->dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+       if (!pmu->dev)
+               goto out;
+
+       device_initialize(pmu->dev);
+       ret = dev_set_name(pmu->dev, "%s", pmu->name);
+       if (ret)
+               goto free_dev;
+
+       dev_set_drvdata(pmu->dev, pmu);
+       pmu->dev->bus = &pmu_bus;
+       pmu->dev->release = pmu_dev_release;
+       ret = device_add(pmu->dev);
+       if (ret)
+               goto free_dev;
+
+out:
+       return ret;
+
+free_dev:
+       put_device(pmu->dev);
+       goto out;
+}
 
-int perf_pmu_register(struct pmu *pmu)
+int perf_pmu_register(struct pmu *pmu, char *name, int type)
 {
        int cpu, ret;
 
@@ -5195,13 +5371,38 @@ int perf_pmu_register(struct pmu *pmu)
        if (!pmu->pmu_disable_count)
                goto unlock;
 
+       pmu->type = -1;
+       if (!name)
+               goto skip_type;
+       pmu->name = name;
+
+       if (type < 0) {
+               int err = idr_pre_get(&pmu_idr, GFP_KERNEL);
+               if (!err)
+                       goto free_pdc;
+
+               err = idr_get_new_above(&pmu_idr, pmu, PERF_TYPE_MAX, &type);
+               if (err) {
+                       ret = err;
+                       goto free_pdc;
+               }
+       }
+       pmu->type = type;
+
+       if (pmu_bus_running) {
+               ret = pmu_dev_alloc(pmu);
+               if (ret)
+                       goto free_idr;
+       }
+
+skip_type:
        pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
        if (pmu->pmu_cpu_context)
                goto got_cpu_context;
 
        pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
        if (!pmu->pmu_cpu_context)
-               goto free_pdc;
+               goto free_dev;
 
        for_each_possible_cpu(cpu) {
                struct perf_cpu_context *cpuctx;
@@ -5245,6 +5446,14 @@ unlock:
 
        return ret;
 
+free_dev:
+       device_del(pmu->dev);
+       put_device(pmu->dev);
+
+free_idr:
+       if (pmu->type >= PERF_TYPE_MAX)
+               idr_remove(&pmu_idr, pmu->type);
+
 free_pdc:
        free_percpu(pmu->pmu_disable_count);
        goto unlock;
@@ -5264,6 +5473,10 @@ void perf_pmu_unregister(struct pmu *pmu)
        synchronize_rcu();
 
        free_percpu(pmu->pmu_disable_count);
+       if (pmu->type >= PERF_TYPE_MAX)
+               idr_remove(&pmu_idr, pmu->type);
+       device_del(pmu->dev);
+       put_device(pmu->dev);
        free_pmu_context(pmu);
 }
 
@@ -5273,6 +5486,13 @@ struct pmu *perf_init_event(struct perf_event *event)
        int idx;
 
        idx = srcu_read_lock(&pmus_srcu);
+
+       rcu_read_lock();
+       pmu = idr_find(&pmu_idr, event->attr.type);
+       rcu_read_unlock();
+       if (pmu)
+               goto unlock;
+
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                int ret = pmu->event_init(event);
                if (!ret)
@@ -5737,6 +5957,12 @@ SYSCALL_DEFINE5(perf_event_open,
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
 
+       /*
+        * Precalculate sample_data sizes
+        */
+       perf_event__header_size(event);
+       perf_event__id_header_size(event);
+
        /*
         * Drop the reference on the group_event after placing the
         * new event on the sibling_list. This ensures destruction
@@ -6089,6 +6315,12 @@ inherit_event(struct perf_event *parent_event,
        child_event->ctx = child_ctx;
        child_event->overflow_handler = parent_event->overflow_handler;
 
+       /*
+        * Precalculate sample_data sizes
+        */
+       perf_event__header_size(child_event);
+       perf_event__id_header_size(child_event);
+
        /*
         * Link it up in the child's context:
         */
@@ -6320,7 +6552,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
        mutex_unlock(&swhash->hlist_mutex);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
+#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
 static void perf_pmu_rotate_stop(struct pmu *pmu)
 {
        struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -6374,6 +6606,26 @@ static void perf_event_exit_cpu(int cpu)
 static inline void perf_event_exit_cpu(int cpu) { }
 #endif
 
+static int
+perf_reboot(struct notifier_block *notifier, unsigned long val, void *v)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu)
+               perf_event_exit_cpu(cpu);
+
+       return NOTIFY_OK;
+}
+
+/*
+ * Run the perf reboot notifier at the very last possible moment so that
+ * the generic watchdog code runs as long as possible.
+ */
+static struct notifier_block perf_reboot_notifier = {
+       .notifier_call = perf_reboot,
+       .priority = INT_MIN,
+};
+
 static int __cpuinit
 perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
@@ -6402,14 +6654,45 @@ void __init perf_event_init(void)
 {
        int ret;
 
+       idr_init(&pmu_idr);
+
        perf_event_init_all_cpus();
        init_srcu_struct(&pmus_srcu);
-       perf_pmu_register(&perf_swevent);
-       perf_pmu_register(&perf_cpu_clock);
-       perf_pmu_register(&perf_task_clock);
+       perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
+       perf_pmu_register(&perf_cpu_clock, NULL, -1);
+       perf_pmu_register(&perf_task_clock, NULL, -1);
        perf_tp_register();
        perf_cpu_notifier(perf_cpu_notify);
+       register_reboot_notifier(&perf_reboot_notifier);
 
        ret = init_hw_breakpoint();
        WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
+
+static int __init perf_event_sysfs_init(void)
+{
+       struct pmu *pmu;
+       int ret;
+
+       mutex_lock(&pmus_lock);
+
+       ret = bus_register(&pmu_bus);
+       if (ret)
+               goto unlock;
+
+       list_for_each_entry(pmu, &pmus, entry) {
+               if (!pmu->name || pmu->type < 0)
+                       continue;
+
+               ret = pmu_dev_alloc(pmu);
+               WARN(ret, "Failed to register pmu: %s, reason %d\n", pmu->name, ret);
+       }
+       pmu_bus_running = 1;
+       ret = 0;
+
+unlock:
+       mutex_unlock(&pmus_lock);
+
+       return ret;
+}
+device_initcall(perf_event_sysfs_init);
index 9ca4973f736d53b04bf4eea9373ce635cf7098c3..93bd2eb2bc53efe76dd120501b0cbda115b71bfd 100644 (file)
@@ -145,7 +145,13 @@ static int common_timer_del(struct k_itimer *timer);
 
 static enum hrtimer_restart posix_timer_fn(struct hrtimer *data);
 
-static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags);
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
+
+#define lock_timer(tid, flags)                                            \
+({     struct k_itimer *__timr;                                           \
+       __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
+       __timr;                                                            \
+})
 
 static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
 {
@@ -619,7 +625,7 @@ out:
  * the find to the timer lock.  To avoid a dead lock, the timer id MUST
  * be release with out holding the timer lock.
  */
-static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags)
+static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
 {
        struct k_itimer *timr;
        /*
index ecf770509d0d1bb9dce2381716d6e7583ee6c492..031d5e3a61973464eec7e0e72791e423bfb3396f 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/suspend.h>
+#include <trace/events/power.h>
 
 #include "power.h"
 
@@ -201,6 +202,7 @@ int suspend_devices_and_enter(suspend_state_t state)
        if (!suspend_ops)
                return -ENOSYS;
 
+       trace_machine_suspend(state);
        if (suspend_ops->begin) {
                error = suspend_ops->begin(state);
                if (error)
@@ -229,6 +231,7 @@ int suspend_devices_and_enter(suspend_state_t state)
  Close:
        if (suspend_ops->end)
                suspend_ops->end();
+       trace_machine_suspend(PWR_EVENT_EXIT);
        return error;
 
  Recover_platform:
index a23315dc4498844c113cecc9792eabd063e1d87b..ab3ffc5b3b64613507134573dbb94af132c4adff 100644 (file)
@@ -1074,17 +1074,17 @@ static DEFINE_PER_CPU(int, printk_pending);
 
 void printk_tick(void)
 {
-       if (__get_cpu_var(printk_pending)) {
-               __get_cpu_var(printk_pending) = 0;
+       if (__this_cpu_read(printk_pending)) {
+               __this_cpu_write(printk_pending, 0);
                wake_up_interruptible(&log_wait);
        }
 }
 
 int printk_needs_cpu(int cpu)
 {
-       if (unlikely(cpu_is_offline(cpu)))
+       if (cpu_is_offline(cpu))
                printk_tick();
-       return per_cpu(printk_pending, cpu);
+       return __this_cpu_read(printk_pending);
 }
 
 void wake_up_klogd(void)
index d806735342acb10bc3e3ae787e62ade34f1d5955..0344937247495d69b3ef5255ace0d94b2250fac2 100644 (file)
 #include <linux/time.h>
 #include <linux/cpu.h>
 
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-       struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
-       struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
-       struct rcu_head **curtail;      /* ->next pointer of last CB. */
-};
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-       .donetail       = &rcu_sched_ctrlblk.rcucblist,
-       .curtail        = &rcu_sched_ctrlblk.rcucblist,
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-       .donetail       = &rcu_bh_ctrlblk.rcucblist,
-       .curtail        = &rcu_bh_ctrlblk.rcucblist,
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
+static struct task_struct *rcu_kthread_task;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
+static unsigned long have_rcu_kthread_work;
+static void invoke_rcu_kthread(void);
 
 /* Forward declarations for rcutiny_plugin.h. */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+struct rcu_ctrlblk;
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static int rcu_kthread(void *arg);
 static void __call_rcu(struct rcu_head *head,
                       void (*func)(struct rcu_head *rcu),
                       struct rcu_ctrlblk *rcp);
@@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu)
 {
        if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
            rcu_qsctr_help(&rcu_bh_ctrlblk))
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
 }
 
 /*
@@ -132,7 +117,7 @@ void rcu_sched_qs(int cpu)
 void rcu_bh_qs(int cpu)
 {
        if (rcu_qsctr_help(&rcu_bh_ctrlblk))
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
 }
 
 /*
@@ -152,13 +137,14 @@ void rcu_check_callbacks(int cpu, int user)
 }
 
 /*
- * Helper function for rcu_process_callbacks() that operates on the
- * specified rcu_ctrlkblk structure.
+ * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
+ * whose grace period has elapsed.
  */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
        struct rcu_head *next, *list;
        unsigned long flags;
+       RCU_TRACE(int cb_count = 0);
 
        /* If no RCU callbacks ready to invoke, just return. */
        if (&rcp->rcucblist == rcp->donetail)
@@ -180,19 +166,58 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
                next = list->next;
                prefetch(next);
                debug_rcu_head_unqueue(list);
+               local_bh_disable();
                list->func(list);
+               local_bh_enable();
                list = next;
+               RCU_TRACE(cb_count++);
        }
+       RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
 }
 
 /*
- * Invoke any callbacks whose grace period has completed.
+ * This kthread invokes RCU callbacks whose grace periods have
+ * elapsed.  It is awakened as needed, and takes the place of the
+ * RCU_SOFTIRQ that was used previously for this purpose.
+ * This is a kthread, but it is never stopped, at least not until
+ * the system goes down.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static int rcu_kthread(void *arg)
 {
-       __rcu_process_callbacks(&rcu_sched_ctrlblk);
-       __rcu_process_callbacks(&rcu_bh_ctrlblk);
-       rcu_preempt_process_callbacks();
+       unsigned long work;
+       unsigned long morework;
+       unsigned long flags;
+
+       for (;;) {
+               wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0);
+               morework = rcu_boost();
+               local_irq_save(flags);
+               work = have_rcu_kthread_work;
+               have_rcu_kthread_work = morework;
+               local_irq_restore(flags);
+               if (work) {
+                       rcu_process_callbacks(&rcu_sched_ctrlblk);
+                       rcu_process_callbacks(&rcu_bh_ctrlblk);
+                       rcu_preempt_process_callbacks();
+               }
+               schedule_timeout_interruptible(1); /* Leave CPU for others. */
+       }
+
+       return 0;  /* Not reached, but needed to shut gcc up. */
+}
+
+/*
+ * Wake up rcu_kthread() to process callbacks now eligible for invocation
+ * or to boost readers.
+ */
+static void invoke_rcu_kthread(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       have_rcu_kthread_work = 1;
+       wake_up(&rcu_kthread_wq);
+       local_irq_restore(flags);
 }
 
 /*
@@ -230,6 +255,7 @@ static void __call_rcu(struct rcu_head *head,
        local_irq_save(flags);
        *rcp->curtail = head;
        rcp->curtail = &head->next;
+       RCU_TRACE(rcp->qlen++);
        local_irq_restore(flags);
 }
 
@@ -282,7 +308,16 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-void __init rcu_init(void)
+/*
+ * Spawn the kthread that invokes RCU callbacks.
+ */
+static int __init rcu_spawn_kthreads(void)
 {
-       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+       struct sched_param sp;
+
+       rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
+       sp.sched_priority = RCU_BOOST_PRIO;
+       sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
+       return 0;
 }
+early_initcall(rcu_spawn_kthreads);
index 6ceca4f745ffa1f4535c69467ea59704e2ddbe97..015abaea962ad4087130014506b72dc19b33b43d 100644 (file)
  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
+#include <linux/kthread.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_RCU_TRACE
+#define RCU_TRACE(stmt)        stmt
+#else /* #ifdef CONFIG_RCU_TRACE */
+#define RCU_TRACE(stmt)
+#endif /* #else #ifdef CONFIG_RCU_TRACE */
+
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+       struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
+       struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
+       struct rcu_head **curtail;      /* ->next pointer of last CB. */
+       RCU_TRACE(long qlen);           /* Number of pending CBs. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+       .donetail       = &rcu_sched_ctrlblk.rcucblist,
+       .curtail        = &rcu_sched_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+       .donetail       = &rcu_bh_ctrlblk.rcucblist,
+       .curtail        = &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 #ifdef CONFIG_TINY_PREEMPT_RCU
 
 #include <linux/delay.h>
@@ -46,17 +80,45 @@ struct rcu_preempt_ctrlblk {
        struct list_head *gp_tasks;
                                /* Pointer to the first task blocking the */
                                /*  current grace period, or NULL if there */
-                               /*  is not such task. */
+                               /*  is no such task. */
        struct list_head *exp_tasks;
                                /* Pointer to first task blocking the */
                                /*  current expedited grace period, or NULL */
                                /*  if there is no such task.  If there */
                                /*  is no current expedited grace period, */
                                /*  then there cannot be any such task. */
+#ifdef CONFIG_RCU_BOOST
+       struct list_head *boost_tasks;
+                               /* Pointer to first task that needs to be */
+                               /*  priority-boosted, or NULL if no priority */
+                               /*  boosting is needed.  If there is no */
+                               /*  current or expedited grace period, there */
+                               /*  can be no such task. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
        u8 gpnum;               /* Current grace period. */
        u8 gpcpu;               /* Last grace period blocked by the CPU. */
        u8 completed;           /* Last grace period completed. */
                                /*  If all three are equal, RCU is idle. */
+#ifdef CONFIG_RCU_BOOST
+       s8 boosted_this_gp;     /* Has boosting already happened? */
+       unsigned long boost_time; /* When to start boosting (jiffies) */
+#endif /* #ifdef CONFIG_RCU_BOOST */
+#ifdef CONFIG_RCU_TRACE
+       unsigned long n_grace_periods;
+#ifdef CONFIG_RCU_BOOST
+       unsigned long n_tasks_boosted;
+       unsigned long n_exp_boosts;
+       unsigned long n_normal_boosts;
+       unsigned long n_normal_balk_blkd_tasks;
+       unsigned long n_normal_balk_gp_tasks;
+       unsigned long n_normal_balk_boost_tasks;
+       unsigned long n_normal_balk_boosted;
+       unsigned long n_normal_balk_notyet;
+       unsigned long n_normal_balk_nos;
+       unsigned long n_exp_balk_blkd_tasks;
+       unsigned long n_exp_balk_nos;
+#endif /* #ifdef CONFIG_RCU_BOOST */
+#endif /* #ifdef CONFIG_RCU_TRACE */
 };
 
 static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -121,6 +183,210 @@ static int rcu_preempt_gp_in_progress(void)
        return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
 }
 
+/*
+ * Advance a ->blkd_tasks-list pointer to the next entry, instead
+ * returning NULL if at the end of the list.
+ */
+static struct list_head *rcu_next_node_entry(struct task_struct *t)
+{
+       struct list_head *np;
+
+       np = t->rcu_node_entry.next;
+       if (np == &rcu_preempt_ctrlblk.blkd_tasks)
+               np = NULL;
+       return np;
+}
+
+#ifdef CONFIG_RCU_TRACE
+
+#ifdef CONFIG_RCU_BOOST
+static void rcu_initiate_boost_trace(void);
+static void rcu_initiate_exp_boost_trace(void);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * Dump additional statistice for TINY_PREEMPT_RCU.
+ */
+static void show_tiny_preempt_stats(struct seq_file *m)
+{
+       seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
+                  rcu_preempt_ctrlblk.rcb.qlen,
+                  rcu_preempt_ctrlblk.n_grace_periods,
+                  rcu_preempt_ctrlblk.gpnum,
+                  rcu_preempt_ctrlblk.gpcpu,
+                  rcu_preempt_ctrlblk.completed,
+                  "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
+                  "N."[!rcu_preempt_ctrlblk.gp_tasks],
+                  "E."[!rcu_preempt_ctrlblk.exp_tasks]);
+#ifdef CONFIG_RCU_BOOST
+       seq_printf(m, "             ttb=%c btg=",
+                  "B."[!rcu_preempt_ctrlblk.boost_tasks]);
+       switch (rcu_preempt_ctrlblk.boosted_this_gp) {
+       case -1:
+               seq_puts(m, "exp");
+               break;
+       case 0:
+               seq_puts(m, "no");
+               break;
+       case 1:
+               seq_puts(m, "begun");
+               break;
+       case 2:
+               seq_puts(m, "done");
+               break;
+       default:
+               seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
+       }
+       seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
+                  rcu_preempt_ctrlblk.n_tasks_boosted,
+                  rcu_preempt_ctrlblk.n_exp_boosts,
+                  rcu_preempt_ctrlblk.n_normal_boosts,
+                  (int)(jiffies & 0xffff),
+                  (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
+       seq_printf(m, "             %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n",
+                  "normal balk",
+                  rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
+                  rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
+                  rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
+                  rcu_preempt_ctrlblk.n_normal_balk_boosted,
+                  rcu_preempt_ctrlblk.n_normal_balk_notyet,
+                  rcu_preempt_ctrlblk.n_normal_balk_nos);
+       seq_printf(m, "             exp balk: bt=%lu nos=%lu\n",
+                  rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
+                  rcu_preempt_ctrlblk.n_exp_balk_nos);
+#endif /* #ifdef CONFIG_RCU_BOOST */
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+#ifdef CONFIG_RCU_BOOST
+
+#include "rtmutex_common.h"
+
+/*
+ * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
+ * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
+ */
+static int rcu_boost(void)
+{
+       unsigned long flags;
+       struct rt_mutex mtx;
+       struct list_head *np;
+       struct task_struct *t;
+
+       if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+               return 0;  /* Nothing to boost. */
+       raw_local_irq_save(flags);
+       rcu_preempt_ctrlblk.boosted_this_gp++;
+       t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
+                        rcu_node_entry);
+       np = rcu_next_node_entry(t);
+       rt_mutex_init_proxy_locked(&mtx, t);
+       t->rcu_boost_mutex = &mtx;
+       t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+       raw_local_irq_restore(flags);
+       rt_mutex_lock(&mtx);
+       RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
+       rcu_preempt_ctrlblk.boosted_this_gp++;
+       rt_mutex_unlock(&mtx);
+       return rcu_preempt_ctrlblk.boost_tasks != NULL;
+}
+
+/*
+ * Check to see if it is now time to start boosting RCU readers blocking
+ * the current grace period, and, if so, tell the rcu_kthread_task to
+ * start boosting them.  If there is an expedited boost in progress,
+ * we wait for it to complete.
+ *
+ * If there are no blocked readers blocking the current grace period,
+ * return 0 to let the caller know, otherwise return 1.  Note that this
+ * return value is independent of whether or not boosting was done.
+ */
+static int rcu_initiate_boost(void)
+{
+       if (!rcu_preempt_blocked_readers_cgp()) {
+               RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
+               return 0;
+       }
+       if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+           rcu_preempt_ctrlblk.boost_tasks == NULL &&
+           rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
+           ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
+               rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
+               invoke_rcu_kthread();
+               RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
+       } else
+               RCU_TRACE(rcu_initiate_boost_trace());
+       return 1;
+}
+
+/*
+ * Initiate boosting for an expedited grace period.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+       unsigned long flags;
+
+       raw_local_irq_save(flags);
+       if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
+               rcu_preempt_ctrlblk.boost_tasks =
+                       rcu_preempt_ctrlblk.blkd_tasks.next;
+               rcu_preempt_ctrlblk.boosted_this_gp = -1;
+               invoke_rcu_kthread();
+               RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
+       } else
+               RCU_TRACE(rcu_initiate_exp_boost_trace());
+       raw_local_irq_restore(flags);
+}
+
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+
+/*
+ * Do priority-boost accounting for the start of a new grace period.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+       rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
+       if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
+               rcu_preempt_ctrlblk.boosted_this_gp = 0;
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * If there is no RCU priority boosting, we don't boost.
+ */
+static int rcu_boost(void)
+{
+       return 0;
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate boosting,
+ * but we do indicate whether there are blocked readers blocking the
+ * current grace period.
+ */
+static int rcu_initiate_boost(void)
+{
+       return rcu_preempt_blocked_readers_cgp();
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate expedited boosting.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, nothing to do at grace-period start.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+}
+
+#endif /* else #ifdef CONFIG_RCU_BOOST */
+
 /*
  * Record a preemptible-RCU quiescent state for the specified CPU.  Note
  * that this just means that the task currently running on the CPU is
@@ -148,11 +414,14 @@ static void rcu_preempt_cpu_qs(void)
        rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
        current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 
+       /* If there is no GP then there is nothing more to do.  */
+       if (!rcu_preempt_gp_in_progress())
+               return;
        /*
-        * If there is no GP, or if blocked readers are still blocking GP,
-        * then there is nothing more to do.
+        * Check up on boosting.  If there are no readers blocking the
+        * current grace period, leave.
         */
-       if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
+       if (rcu_initiate_boost())
                return;
 
        /* Advance callbacks. */
@@ -164,9 +433,9 @@ static void rcu_preempt_cpu_qs(void)
        if (!rcu_preempt_blocked_readers_any())
                rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
 
-       /* If there are done callbacks, make RCU_SOFTIRQ process them. */
+       /* If there are done callbacks, cause them to be invoked. */
        if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
 }
 
 /*
@@ -178,12 +447,16 @@ static void rcu_preempt_start_gp(void)
 
                /* Official start of GP. */
                rcu_preempt_ctrlblk.gpnum++;
+               RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
 
                /* Any blocked RCU readers block new GP. */
                if (rcu_preempt_blocked_readers_any())
                        rcu_preempt_ctrlblk.gp_tasks =
                                rcu_preempt_ctrlblk.blkd_tasks.next;
 
+               /* Set up for RCU priority boosting. */
+               rcu_preempt_boost_start_gp();
+
                /* If there is no running reader, CPU is done with GP. */
                if (!rcu_preempt_running_reader())
                        rcu_preempt_cpu_qs();
@@ -304,14 +577,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
                 */
                empty = !rcu_preempt_blocked_readers_cgp();
                empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
-               np = t->rcu_node_entry.next;
-               if (np == &rcu_preempt_ctrlblk.blkd_tasks)
-                       np = NULL;
+               np = rcu_next_node_entry(t);
                list_del(&t->rcu_node_entry);
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
                        rcu_preempt_ctrlblk.gp_tasks = np;
                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
                        rcu_preempt_ctrlblk.exp_tasks = np;
+#ifdef CONFIG_RCU_BOOST
+               if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
+                       rcu_preempt_ctrlblk.boost_tasks = np;
+#endif /* #ifdef CONFIG_RCU_BOOST */
                INIT_LIST_HEAD(&t->rcu_node_entry);
 
                /*
@@ -331,6 +606,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
                if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
                        rcu_report_exp_done();
        }
+#ifdef CONFIG_RCU_BOOST
+       /* Unboost self if was boosted. */
+       if (special & RCU_READ_UNLOCK_BOOSTED) {
+               t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
+               rt_mutex_unlock(t->rcu_boost_mutex);
+               t->rcu_boost_mutex = NULL;
+       }
+#endif /* #ifdef CONFIG_RCU_BOOST */
        local_irq_restore(flags);
 }
 
@@ -374,7 +657,7 @@ static void rcu_preempt_check_callbacks(void)
                rcu_preempt_cpu_qs();
        if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
            rcu_preempt_ctrlblk.rcb.donetail)
-               raise_softirq(RCU_SOFTIRQ);
+               invoke_rcu_kthread();
        if (rcu_preempt_gp_in_progress() &&
            rcu_cpu_blocking_cur_gp() &&
            rcu_preempt_running_reader())
@@ -383,7 +666,7 @@ static void rcu_preempt_check_callbacks(void)
 
 /*
  * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
- * update, so this is invoked from __rcu_process_callbacks() to
+ * update, so this is invoked from rcu_process_callbacks() to
  * handle that case.  Of course, it is invoked for all flavors of
  * RCU, but RCU callbacks can appear only on one of the lists, and
  * neither ->nexttail nor ->donetail can possibly be NULL, so there
@@ -400,7 +683,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
  */
 static void rcu_preempt_process_callbacks(void)
 {
-       __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
+       rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
 }
 
 /*
@@ -417,6 +700,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
        local_irq_save(flags);
        *rcu_preempt_ctrlblk.nexttail = head;
        rcu_preempt_ctrlblk.nexttail = &head->next;
+       RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
        rcu_preempt_start_gp();  /* checks to see if GP needed. */
        local_irq_restore(flags);
 }
@@ -532,6 +816,7 @@ void synchronize_rcu_expedited(void)
 
        /* Wait for tail of ->blkd_tasks list to drain. */
        if (rcu_preempted_readers_exp())
+               rcu_initiate_expedited_boost();
                wait_event(sync_rcu_preempt_exp_wq,
                           !rcu_preempted_readers_exp());
 
@@ -572,6 +857,27 @@ void exit_rcu(void)
 
 #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
 
+#ifdef CONFIG_RCU_TRACE
+
+/*
+ * Because preemptible RCU does not exist, it is not necessary to
+ * dump out its statistics.
+ */
+static void show_tiny_preempt_stats(struct seq_file *m)
+{
+}
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
+
+/*
+ * Because preemptible RCU does not exist, it is never necessary to
+ * boost preempted RCU readers.
+ */
+static int rcu_boost(void)
+{
+       return 0;
+}
+
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
@@ -599,17 +905,116 @@ static void rcu_preempt_process_callbacks(void)
 #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-
 #include <linux/kernel_stat.h>
 
 /*
  * During boot, we forgive RCU lockdep issues.  After this function is
  * invoked, we start taking RCU lockdep issues seriously.
  */
-void rcu_scheduler_starting(void)
+void __init rcu_scheduler_starting(void)
 {
        WARN_ON(nr_context_switches() > 0);
        rcu_scheduler_active = 1;
 }
 
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_RCU_BOOST
+#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
+#else /* #ifdef CONFIG_RCU_BOOST */
+#define RCU_BOOST_PRIO 1
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+#ifdef CONFIG_RCU_TRACE
+
+#ifdef CONFIG_RCU_BOOST
+
+static void rcu_initiate_boost_trace(void)
+{
+       if (rcu_preempt_ctrlblk.gp_tasks == NULL)
+               rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
+       else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
+               rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
+       else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
+               rcu_preempt_ctrlblk.n_normal_balk_boosted++;
+       else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
+               rcu_preempt_ctrlblk.n_normal_balk_notyet++;
+       else
+               rcu_preempt_ctrlblk.n_normal_balk_nos++;
+}
+
+static void rcu_initiate_exp_boost_trace(void)
+{
+       if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
+               rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
+       else
+               rcu_preempt_ctrlblk.n_exp_balk_nos++;
+}
+
+#endif /* #ifdef CONFIG_RCU_BOOST */
+
+static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
+{
+       unsigned long flags;
+
+       raw_local_irq_save(flags);
+       rcp->qlen -= n;
+       raw_local_irq_restore(flags);
+}
+
+/*
+ * Dump statistics for TINY_RCU, such as they are.
+ */
+static int show_tiny_stats(struct seq_file *m, void *unused)
+{
+       show_tiny_preempt_stats(m);
+       seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
+       seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
+       return 0;
+}
+
+static int show_tiny_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_tiny_stats, NULL);
+}
+
+static const struct file_operations show_tiny_stats_fops = {
+       .owner = THIS_MODULE,
+       .open = show_tiny_stats_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static struct dentry *rcudir;
+
+static int __init rcutiny_trace_init(void)
+{
+       struct dentry *retval;
+
+       rcudir = debugfs_create_dir("rcu", NULL);
+       if (!rcudir)
+               goto free_out;
+       retval = debugfs_create_file("rcudata", 0444, rcudir,
+                                    NULL, &show_tiny_stats_fops);
+       if (!retval)
+               goto free_out;
+       return 0;
+free_out:
+       debugfs_remove_recursive(rcudir);
+       return 1;
+}
+
+static void __exit rcutiny_trace_cleanup(void)
+{
+       debugfs_remove_recursive(rcudir);
+}
+
+module_init(rcutiny_trace_init);
+module_exit(rcutiny_trace_cleanup);
+
+MODULE_AUTHOR("Paul E. McKenney");
+MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
+MODULE_LICENSE("GPL");
+
+#endif /* #ifdef CONFIG_RCU_TRACE */
index 9d8e8fb2515f4e4801c214841a7f8c95b8b45ffe..89613f97ff264e35cac497419bd0a4dac798ce5f 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <asm/byteorder.h>
+#include <linux/sched.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
@@ -64,6 +65,9 @@ static int irqreader = 1;     /* RCU readers from irq (timers). */
 static int fqs_duration = 0;   /* Duration of bursts (us), 0 to disable. */
 static int fqs_holdoff = 0;    /* Hold time within burst (us). */
 static int fqs_stutter = 3;    /* Wait time between bursts (s). */
+static int test_boost = 1;     /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
+static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
+static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
 static char *torture_type = "rcu"; /* What RCU implementation to torture. */
 
 module_param(nreaders, int, 0444);
@@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444);
 MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
 module_param(fqs_stutter, int, 0444);
 MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
+module_param(test_boost, int, 0444);
+MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
+module_param(test_boost_interval, int, 0444);
+MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
+module_param(test_boost_duration, int, 0444);
+MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
 module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
 
@@ -109,6 +119,7 @@ static struct task_struct *stats_task;
 static struct task_struct *shuffler_task;
 static struct task_struct *stutter_task;
 static struct task_struct *fqs_task;
+static struct task_struct *boost_tasks[NR_CPUS];
 
 #define RCU_TORTURE_PIPE_LEN 10
 
@@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail;
 static atomic_t n_rcu_torture_free;
 static atomic_t n_rcu_torture_mberror;
 static atomic_t n_rcu_torture_error;
+static long n_rcu_torture_boost_ktrerror;
+static long n_rcu_torture_boost_rterror;
+static long n_rcu_torture_boost_allocerror;
+static long n_rcu_torture_boost_afferror;
+static long n_rcu_torture_boost_failure;
+static long n_rcu_torture_boosts;
 static long n_rcu_torture_timers;
 static struct list_head rcu_torture_removed;
 static cpumask_var_t shuffle_tmp_mask;
@@ -147,6 +164,16 @@ static int stutter_pause_test;
 #endif
 int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
 
+#ifdef CONFIG_RCU_BOOST
+#define rcu_can_boost() 1
+#else /* #ifdef CONFIG_RCU_BOOST */
+#define rcu_can_boost() 0
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
+
+static unsigned long boost_starttime;  /* jiffies of next boost test start. */
+DEFINE_MUTEX(boost_mutex);             /* protect setting boost_starttime */
+                                       /*  and boost task create/destroy. */
+
 /* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
 
 #define FULLSTOP_DONTSTOP 0    /* Normal operation. */
@@ -277,6 +304,7 @@ struct rcu_torture_ops {
        void (*fqs)(void);
        int (*stats)(char *page);
        int irq_capable;
+       int can_boost;
        char *name;
 };
 
@@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = {
        .fqs            = rcu_force_quiescent_state,
        .stats          = NULL,
        .irq_capable    = 1,
+       .can_boost      = rcu_can_boost(),
        .name           = "rcu"
 };
 
@@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
        .fqs            = rcu_force_quiescent_state,
        .stats          = NULL,
        .irq_capable    = 1,
+       .can_boost      = rcu_can_boost(),
        .name           = "rcu_sync"
 };
 
@@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
        .fqs            = rcu_force_quiescent_state,
        .stats          = NULL,
        .irq_capable    = 1,
+       .can_boost      = rcu_can_boost(),
        .name           = "rcu_expedited"
 };
 
@@ -683,6 +714,110 @@ static struct rcu_torture_ops sched_expedited_ops = {
        .name           = "sched_expedited"
 };
 
+/*
+ * RCU torture priority-boost testing.  Runs one real-time thread per
+ * CPU for moderate bursts, repeatedly registering RCU callbacks and
+ * spinning waiting for them to be invoked.  If a given callback takes
+ * too long to be invoked, we assume that priority inversion has occurred.
+ */
+
+struct rcu_boost_inflight {
+       struct rcu_head rcu;
+       int inflight;
+};
+
+static void rcu_torture_boost_cb(struct rcu_head *head)
+{
+       struct rcu_boost_inflight *rbip =
+               container_of(head, struct rcu_boost_inflight, rcu);
+
+       smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
+       rbip->inflight = 0;
+}
+
+static int rcu_torture_boost(void *arg)
+{
+       unsigned long call_rcu_time;
+       unsigned long endtime;
+       unsigned long oldstarttime;
+       struct rcu_boost_inflight rbi = { .inflight = 0 };
+       struct sched_param sp;
+
+       VERBOSE_PRINTK_STRING("rcu_torture_boost started");
+
+       /* Set real-time priority. */
+       sp.sched_priority = 1;
+       if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
+               VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
+               n_rcu_torture_boost_rterror++;
+       }
+
+       /* Each pass through the following loop does one boost-test cycle. */
+       do {
+               /* Wait for the next test interval. */
+               oldstarttime = boost_starttime;
+               while (jiffies - oldstarttime > ULONG_MAX / 2) {
+                       schedule_timeout_uninterruptible(1);
+                       rcu_stutter_wait("rcu_torture_boost");
+                       if (kthread_should_stop() ||
+                           fullstop != FULLSTOP_DONTSTOP)
+                               goto checkwait;
+               }
+
+               /* Do one boost-test interval. */
+               endtime = oldstarttime + test_boost_duration * HZ;
+               call_rcu_time = jiffies;
+               while (jiffies - endtime > ULONG_MAX / 2) {
+                       /* If we don't have a callback in flight, post one. */
+                       if (!rbi.inflight) {
+                               smp_mb(); /* RCU core before ->inflight = 1. */
+                               rbi.inflight = 1;
+                               call_rcu(&rbi.rcu, rcu_torture_boost_cb);
+                               if (jiffies - call_rcu_time >
+                                        test_boost_duration * HZ - HZ / 2) {
+                                       VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
+                                       n_rcu_torture_boost_failure++;
+                               }
+                               call_rcu_time = jiffies;
+                       }
+                       cond_resched();
+                       rcu_stutter_wait("rcu_torture_boost");
+                       if (kthread_should_stop() ||
+                           fullstop != FULLSTOP_DONTSTOP)
+                               goto checkwait;
+               }
+
+               /*
+                * Set the start time of the next test interval.
+                * Yes, this is vulnerable to long delays, but such
+                * delays simply cause a false negative for the next
+                * interval.  Besides, we are running at RT priority,
+                * so delays should be relatively rare.
+                */
+               while (oldstarttime == boost_starttime) {
+                       if (mutex_trylock(&boost_mutex)) {
+                               boost_starttime = jiffies +
+                                                 test_boost_interval * HZ;
+                               n_rcu_torture_boosts++;
+                               mutex_unlock(&boost_mutex);
+                               break;
+                       }
+                       schedule_timeout_uninterruptible(1);
+               }
+
+               /* Go do the stutter. */
+checkwait:     rcu_stutter_wait("rcu_torture_boost");
+       } while (!kthread_should_stop() && fullstop  == FULLSTOP_DONTSTOP);
+
+       /* Clean up and exit. */
+       VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
+       rcutorture_shutdown_absorb("rcu_torture_boost");
+       while (!kthread_should_stop() || rbi.inflight)
+               schedule_timeout_uninterruptible(1);
+       smp_mb(); /* order accesses to ->inflight before stack-frame death. */
+       return 0;
+}
+
 /*
  * RCU torture force-quiescent-state kthread.  Repeatedly induces
  * bursts of calls to force_quiescent_state(), increasing the probability
@@ -933,7 +1068,8 @@ rcu_torture_printk(char *page)
        cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
        cnt += sprintf(&page[cnt],
                       "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
-                      "rtmbe: %d nt: %ld",
+                      "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld "
+                      "rtbf: %ld rtb: %ld nt: %ld",
                       rcu_torture_current,
                       rcu_torture_current_version,
                       list_empty(&rcu_torture_freelist),
@@ -941,8 +1077,19 @@ rcu_torture_printk(char *page)
                       atomic_read(&n_rcu_torture_alloc_fail),
                       atomic_read(&n_rcu_torture_free),
                       atomic_read(&n_rcu_torture_mberror),
+                      n_rcu_torture_boost_ktrerror,
+                      n_rcu_torture_boost_rterror,
+                      n_rcu_torture_boost_allocerror,
+                      n_rcu_torture_boost_afferror,
+                      n_rcu_torture_boost_failure,
+                      n_rcu_torture_boosts,
                       n_rcu_torture_timers);
-       if (atomic_read(&n_rcu_torture_mberror) != 0)
+       if (atomic_read(&n_rcu_torture_mberror) != 0 ||
+           n_rcu_torture_boost_ktrerror != 0 ||
+           n_rcu_torture_boost_rterror != 0 ||
+           n_rcu_torture_boost_allocerror != 0 ||
+           n_rcu_torture_boost_afferror != 0 ||
+           n_rcu_torture_boost_failure != 0)
                cnt += sprintf(&page[cnt], " !!!");
        cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
        if (i > 1) {
@@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg)
 }
 
 static inline void
-rcu_torture_print_module_parms(char *tag)
+rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
 {
        printk(KERN_ALERT "%s" TORTURE_FLAG
                "--- %s: nreaders=%d nfakewriters=%d "
                "stat_interval=%d verbose=%d test_no_idle_hz=%d "
                "shuffle_interval=%d stutter=%d irqreader=%d "
-               "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
+               "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
+               "test_boost=%d/%d test_boost_interval=%d "
+               "test_boost_duration=%d\n",
                torture_type, tag, nrealreaders, nfakewriters,
                stat_interval, verbose, test_no_idle_hz, shuffle_interval,
-               stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
+               stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
+               test_boost, cur_ops->can_boost,
+               test_boost_interval, test_boost_duration);
 }
 
-static struct notifier_block rcutorture_nb = {
+static struct notifier_block rcutorture_shutdown_nb = {
        .notifier_call = rcutorture_shutdown_notify,
 };
 
+static void rcutorture_booster_cleanup(int cpu)
+{
+       struct task_struct *t;
+
+       if (boost_tasks[cpu] == NULL)
+               return;
+       mutex_lock(&boost_mutex);
+       VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
+       t = boost_tasks[cpu];
+       boost_tasks[cpu] = NULL;
+       mutex_unlock(&boost_mutex);
+
+       /* This must be outside of the mutex, otherwise deadlock! */
+       kthread_stop(t);
+}
+
+static int rcutorture_booster_init(int cpu)
+{
+       int retval;
+
+       if (boost_tasks[cpu] != NULL)
+               return 0;  /* Already created, nothing more to do. */
+
+       /* Don't allow time recalculation while creating a new task. */
+       mutex_lock(&boost_mutex);
+       VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
+       boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL,
+                                         "rcu_torture_boost");
+       if (IS_ERR(boost_tasks[cpu])) {
+               retval = PTR_ERR(boost_tasks[cpu]);
+               VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
+               n_rcu_torture_boost_ktrerror++;
+               boost_tasks[cpu] = NULL;
+               mutex_unlock(&boost_mutex);
+               return retval;
+       }
+       kthread_bind(boost_tasks[cpu], cpu);
+       wake_up_process(boost_tasks[cpu]);
+       mutex_unlock(&boost_mutex);
+       return 0;
+}
+
+static int rcutorture_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+       long cpu = (long)hcpu;
+
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               (void)rcutorture_booster_init(cpu);
+               break;
+       case CPU_DOWN_PREPARE:
+               rcutorture_booster_cleanup(cpu);
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block rcutorture_cpu_nb = {
+       .notifier_call = rcutorture_cpu_notify,
+};
+
 static void
 rcu_torture_cleanup(void)
 {
@@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void)
        }
        fullstop = FULLSTOP_RMMOD;
        mutex_unlock(&fullstop_mutex);
-       unregister_reboot_notifier(&rcutorture_nb);
+       unregister_reboot_notifier(&rcutorture_shutdown_nb);
        if (stutter_task) {
                VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
                kthread_stop(stutter_task);
@@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void)
                kthread_stop(fqs_task);
        }
        fqs_task = NULL;
+       if ((test_boost == 1 && cur_ops->can_boost) ||
+           test_boost == 2) {
+               unregister_cpu_notifier(&rcutorture_cpu_nb);
+               for_each_possible_cpu(i)
+                       rcutorture_booster_cleanup(i);
+       }
 
        /* Wait for all RCU callbacks to fire.  */
 
@@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void)
        if (cur_ops->cleanup)
                cur_ops->cleanup();
        if (atomic_read(&n_rcu_torture_error))
-               rcu_torture_print_module_parms("End of test: FAILURE");
+               rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
        else
-               rcu_torture_print_module_parms("End of test: SUCCESS");
+               rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
 }
 
 static int __init
@@ -1242,7 +1464,7 @@ rcu_torture_init(void)
                nrealreaders = nreaders;
        else
                nrealreaders = 2 * num_online_cpus();
-       rcu_torture_print_module_parms("Start of test");
+       rcu_torture_print_module_parms(cur_ops, "Start of test");
        fullstop = FULLSTOP_DONTSTOP;
 
        /* Set up the freelist. */
@@ -1263,6 +1485,12 @@ rcu_torture_init(void)
        atomic_set(&n_rcu_torture_free, 0);
        atomic_set(&n_rcu_torture_mberror, 0);
        atomic_set(&n_rcu_torture_error, 0);
+       n_rcu_torture_boost_ktrerror = 0;
+       n_rcu_torture_boost_rterror = 0;
+       n_rcu_torture_boost_allocerror = 0;
+       n_rcu_torture_boost_afferror = 0;
+       n_rcu_torture_boost_failure = 0;
+       n_rcu_torture_boosts = 0;
        for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
                atomic_set(&rcu_torture_wcount[i], 0);
        for_each_possible_cpu(cpu) {
@@ -1376,7 +1604,27 @@ rcu_torture_init(void)
                        goto unwind;
                }
        }
-       register_reboot_notifier(&rcutorture_nb);
+       if (test_boost_interval < 1)
+               test_boost_interval = 1;
+       if (test_boost_duration < 2)
+               test_boost_duration = 2;
+       if ((test_boost == 1 && cur_ops->can_boost) ||
+           test_boost == 2) {
+               int retval;
+
+               boost_starttime = jiffies + test_boost_interval * HZ;
+               register_cpu_notifier(&rcutorture_cpu_nb);
+               for_each_possible_cpu(i) {
+                       if (cpu_is_offline(i))
+                               continue;  /* Heuristic: CPU can go offline. */
+                       retval = rcutorture_booster_init(i);
+                       if (retval < 0) {
+                               firsterr = retval;
+                               goto unwind;
+                       }
+               }
+       }
+       register_reboot_notifier(&rcutorture_shutdown_nb);
        mutex_unlock(&fullstop_mutex);
        return 0;
 
index ccdc04c479815addc8dbacea69643174a4636670..d0ddfea6579d027809cfb0bce885289bac0f957e 100644 (file)
@@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
        .gpnum = -300, \
        .completed = -300, \
        .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
-       .orphan_cbs_list = NULL, \
-       .orphan_cbs_tail = &structname.orphan_cbs_list, \
-       .orphan_qlen = 0, \
        .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
        .n_force_qs = 0, \
        .n_force_qs_ngp = 0, \
@@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void)
 static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
 {
        if (rdp->gpnum != rnp->gpnum) {
-               rdp->qs_pending = 1;
-               rdp->passed_quiesc = 0;
+               /*
+                * If the current grace period is waiting for this CPU,
+                * set up to detect a quiescent state, otherwise don't
+                * go looking for one.
+                */
                rdp->gpnum = rnp->gpnum;
+               if (rnp->qsmask & rdp->grpmask) {
+                       rdp->qs_pending = 1;
+                       rdp->passed_quiesc = 0;
+               } else
+                       rdp->qs_pending = 0;
        }
 }
 
@@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
 
                /* Remember that we saw this grace-period completion. */
                rdp->completed = rnp->completed;
+
+               /*
+                * If we were in an extended quiescent state, we may have
+                * missed some grace periods that others CPUs handled on
+                * our behalf. Catch up with this state to avoid noting
+                * spurious new grace periods.  If another grace period
+                * has started, then rnp->gpnum will have advanced, so
+                * we will detect this later on.
+                */
+               if (ULONG_CMP_LT(rdp->gpnum, rdp->completed))
+                       rdp->gpnum = rdp->completed;
+
+               /*
+                * If RCU does not need a quiescent state from this CPU,
+                * then make sure that this CPU doesn't go looking for one.
+                */
+               if ((rnp->qsmask & rdp->grpmask) == 0)
+                       rdp->qs_pending = 0;
        }
 }
 
@@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
 #ifdef CONFIG_HOTPLUG_CPU
 
 /*
- * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the
- * specified flavor of RCU.  The callbacks will be adopted by the next
- * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever
- * comes first.  Because this is invoked from the CPU_DYING notifier,
- * irqs are already disabled.
+ * Move a dying CPU's RCU callbacks to online CPU's callback list.
+ * Synchronization is not required because this function executes
+ * in stop_machine() context.
  */
-static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
+static void rcu_send_cbs_to_online(struct rcu_state *rsp)
 {
        int i;
+       /* current DYING CPU is cleared in the cpu_online_mask */
+       int receive_cpu = cpumask_any(cpu_online_mask);
        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+       struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
 
        if (rdp->nxtlist == NULL)
                return;  /* irqs disabled, so comparison is stable. */
-       raw_spin_lock(&rsp->onofflock);  /* irqs already disabled. */
-       *rsp->orphan_cbs_tail = rdp->nxtlist;
-       rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
+
+       *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
+       receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+       receive_rdp->qlen += rdp->qlen;
+       receive_rdp->n_cbs_adopted += rdp->qlen;
+       rdp->n_cbs_orphaned += rdp->qlen;
+
        rdp->nxtlist = NULL;
        for (i = 0; i < RCU_NEXT_SIZE; i++)
                rdp->nxttail[i] = &rdp->nxtlist;
-       rsp->orphan_qlen += rdp->qlen;
-       rdp->n_cbs_orphaned += rdp->qlen;
        rdp->qlen = 0;
-       raw_spin_unlock(&rsp->onofflock);  /* irqs remain disabled. */
-}
-
-/*
- * Adopt previously orphaned RCU callbacks.
- */
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
-{
-       unsigned long flags;
-       struct rcu_data *rdp;
-
-       raw_spin_lock_irqsave(&rsp->onofflock, flags);
-       rdp = this_cpu_ptr(rsp->rda);
-       if (rsp->orphan_cbs_list == NULL) {
-               raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
-               return;
-       }
-       *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
-       rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail;
-       rdp->qlen += rsp->orphan_qlen;
-       rdp->n_cbs_adopted += rsp->orphan_qlen;
-       rsp->orphan_cbs_list = NULL;
-       rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
-       rsp->orphan_qlen = 0;
-       raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
 }
 
 /*
@@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        if (need_report & RCU_OFL_TASKS_EXP_GP)
                rcu_report_exp_rnp(rsp, rnp);
-
-       rcu_adopt_orphan_cbs(rsp);
 }
 
 /*
@@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu)
 
 #else /* #ifdef CONFIG_HOTPLUG_CPU */
 
-static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
-{
-}
-
-static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
+static void rcu_send_cbs_to_online(struct rcu_state *rsp)
 {
 }
 
@@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
         */
        local_irq_save(flags);
        rdp = this_cpu_ptr(rsp->rda);
-       rcu_process_gp_end(rsp, rdp);
-       check_for_new_grace_period(rsp, rdp);
 
        /* Add the callback to our list. */
        *rdp->nxttail[RCU_NEXT_TAIL] = head;
        rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
 
-       /* Start a new grace period if one not already started. */
-       if (!rcu_gp_in_progress(rsp)) {
-               unsigned long nestflag;
-               struct rcu_node *rnp_root = rcu_get_root(rsp);
-
-               raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
-               rcu_start_gp(rsp, nestflag);  /* releases rnp_root->lock. */
-       }
-
        /*
         * Force the grace period if too many callbacks or too long waiting.
         * Enforce hysteresis, and don't invoke force_quiescent_state()
@@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
         * is the only one waiting for a grace period to complete.
         */
        if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
-               rdp->blimit = LONG_MAX;
-               if (rsp->n_force_qs == rdp->n_force_qs_snap &&
-                   *rdp->nxttail[RCU_DONE_TAIL] != head)
-                       force_quiescent_state(rsp, 0);
-               rdp->n_force_qs_snap = rsp->n_force_qs;
-               rdp->qlen_last_fqs_check = rdp->qlen;
+
+               /* Are we ignoring a completed grace period? */
+               rcu_process_gp_end(rsp, rdp);
+               check_for_new_grace_period(rsp, rdp);
+
+               /* Start a new grace period if one not already started. */
+               if (!rcu_gp_in_progress(rsp)) {
+                       unsigned long nestflag;
+                       struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+                       raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
+                       rcu_start_gp(rsp, nestflag);  /* rlses rnp_root->lock */
+               } else {
+                       /* Give the grace period a kick. */
+                       rdp->blimit = LONG_MAX;
+                       if (rsp->n_force_qs == rdp->n_force_qs_snap &&
+                           *rdp->nxttail[RCU_DONE_TAIL] != head)
+                               force_quiescent_state(rsp, 0);
+                       rdp->n_force_qs_snap = rsp->n_force_qs;
+                       rdp->qlen_last_fqs_check = rdp->qlen;
+               }
        } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
                force_quiescent_state(rsp, 1);
        local_irq_restore(flags);
@@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp,
         * decrement rcu_barrier_cpu_count -- otherwise the first CPU
         * might complete its grace period before all of the other CPUs
         * did their increment, causing this function to return too
-        * early.
+        * early.  Note that on_each_cpu() disables irqs, which prevents
+        * any CPUs from coming online or going offline until each online
+        * CPU has queued its RCU-barrier callback.
         */
        atomic_set(&rcu_barrier_cpu_count, 1);
-       preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */
-       rcu_adopt_orphan_cbs(rsp);
        on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
-       preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */
        if (atomic_dec_and_test(&rcu_barrier_cpu_count))
                complete(&rcu_barrier_completion);
        wait_for_completion(&rcu_barrier_completion);
@@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
        case CPU_DYING:
        case CPU_DYING_FROZEN:
                /*
-                * preempt_disable() in _rcu_barrier() prevents stop_machine(),
-                * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
-                * returns, all online cpus have queued rcu_barrier_func().
-                * The dying CPU clears its cpu_online_mask bit and
-                * moves all of its RCU callbacks to ->orphan_cbs_list
-                * in the context of stop_machine(), so subsequent calls
-                * to _rcu_barrier() will adopt these callbacks and only
-                * then queue rcu_barrier_func() on all remaining CPUs.
+                * The whole machine is "stopped" except this CPU, so we can
+                * touch any data without introducing corruption. We send the
+                * dying CPU's callbacks to an arbitrarily chosen online CPU.
                 */
-               rcu_send_cbs_to_orphanage(&rcu_bh_state);
-               rcu_send_cbs_to_orphanage(&rcu_sched_state);
-               rcu_preempt_send_cbs_to_orphanage();
+               rcu_send_cbs_to_online(&rcu_bh_state);
+               rcu_send_cbs_to_online(&rcu_sched_state);
+               rcu_preempt_send_cbs_to_online();
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
@@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
 {
        int i;
 
-       for (i = NUM_RCU_LVLS - 1; i >= 0; i--)
+       for (i = NUM_RCU_LVLS - 1; i > 0; i--)
                rsp->levelspread[i] = CONFIG_RCU_FANOUT;
+       rsp->levelspread[0] = RCU_FANOUT_LEAF;
 }
 #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
 static void __init rcu_init_levelspread(struct rcu_state *rsp)
index 91d4170c5c13afd2e8997bd59b28e7cc2a4385e8..e8f057e44e3ee00466e840593983ca5062302545 100644 (file)
 /*
  * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
  * In theory, it should be possible to add more levels straightforwardly.
- * In practice, this has not been tested, so there is probably some
- * bug somewhere.
+ * In practice, this did work well going from three levels to four.
+ * Of course, your mileage may vary.
  */
 #define MAX_RCU_LVLS 4
-#define RCU_FANOUT           (CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_SQ        (RCU_FANOUT * RCU_FANOUT)
-#define RCU_FANOUT_CUBE              (RCU_FANOUT_SQ * RCU_FANOUT)
-#define RCU_FANOUT_FOURTH     (RCU_FANOUT_CUBE * RCU_FANOUT)
-
-#if NR_CPUS <= RCU_FANOUT
+#if CONFIG_RCU_FANOUT > 16
+#define RCU_FANOUT_LEAF       16
+#else /* #if CONFIG_RCU_FANOUT > 16 */
+#define RCU_FANOUT_LEAF       (CONFIG_RCU_FANOUT)
+#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
+#define RCU_FANOUT_1         (RCU_FANOUT_LEAF)
+#define RCU_FANOUT_2         (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_3         (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_4         (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT_1
 #  define NUM_RCU_LVLS       1
 #  define NUM_RCU_LVL_0              1
 #  define NUM_RCU_LVL_1              (NR_CPUS)
 #  define NUM_RCU_LVL_2              0
 #  define NUM_RCU_LVL_3              0
 #  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_SQ
+#elif NR_CPUS <= RCU_FANOUT_2
 #  define NUM_RCU_LVLS       2
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
 #  define NUM_RCU_LVL_2              (NR_CPUS)
 #  define NUM_RCU_LVL_3              0
 #  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_CUBE
+#elif NR_CPUS <= RCU_FANOUT_3
 #  define NUM_RCU_LVLS       3
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
-#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
-#  define NUM_RCU_LVL_3              NR_CPUS
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_3              (NR_CPUS)
 #  define NUM_RCU_LVL_4              0
-#elif NR_CPUS <= RCU_FANOUT_FOURTH
+#elif NR_CPUS <= RCU_FANOUT_4
 #  define NUM_RCU_LVLS       4
 #  define NUM_RCU_LVL_0              1
-#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE)
-#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ)
-#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT)
-#  define NUM_RCU_LVL_4              NR_CPUS
+#  define NUM_RCU_LVL_1              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+#  define NUM_RCU_LVL_2              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+#  define NUM_RCU_LVL_3              DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+#  define NUM_RCU_LVL_4              (NR_CPUS)
 #else
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-#endif /* #if (NR_CPUS) <= RCU_FANOUT */
+#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
 
 #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
 #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
@@ -203,8 +208,8 @@ struct rcu_data {
        long            qlen_last_fqs_check;
                                        /* qlen at last check for QS forcing */
        unsigned long   n_cbs_invoked;  /* count of RCU cbs invoked. */
-       unsigned long   n_cbs_orphaned; /* RCU cbs sent to orphanage. */
-       unsigned long   n_cbs_adopted;  /* RCU cbs adopted from orphanage. */
+       unsigned long   n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */
+       unsigned long   n_cbs_adopted;  /* RCU cbs adopted from dying CPU */
        unsigned long   n_force_qs_snap;
                                        /* did other CPU force QS recently? */
        long            blimit;         /* Upper limit on a processed batch */
@@ -309,15 +314,7 @@ struct rcu_state {
        /* End of fields guarded by root rcu_node's lock. */
 
        raw_spinlock_t onofflock;               /* exclude on/offline and */
-                                               /*  starting new GP.  Also */
-                                               /*  protects the following */
-                                               /*  orphan_cbs fields. */
-       struct rcu_head *orphan_cbs_list;       /* list of rcu_head structs */
-                                               /*  orphaned by all CPUs in */
-                                               /*  a given leaf rcu_node */
-                                               /*  going offline. */
-       struct rcu_head **orphan_cbs_tail;      /* And tail pointer. */
-       long orphan_qlen;                       /* Number of orphaned cbs. */
+                                               /*  starting new GP. */
        raw_spinlock_t fqslock;                 /* Only one task forcing */
                                                /*  quiescent states. */
        unsigned long jiffies_force_qs;         /* Time at which to invoke */
@@ -390,7 +387,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
 static int rcu_preempt_pending(int cpu);
 static int rcu_preempt_needs_cpu(int cpu);
 static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
-static void rcu_preempt_send_cbs_to_orphanage(void);
+static void rcu_preempt_send_cbs_to_online(void);
 static void __init __rcu_init_preempt(void);
 static void rcu_needs_cpu_flush(void);
 
index 71a4147473f95f51d2b2e88db4c14372dafe375f..a3638710dc67f4627f5cdb88e1cafb43b500d24a 100644 (file)
@@ -25,6 +25,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/stop_machine.h>
 
 /*
  * Check the RCU kernel configuration parameters and print informative
@@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 
 /*
- * Move preemptable RCU's callbacks to ->orphan_cbs_list.
+ * Move preemptable RCU's callbacks from dying CPU to other online CPU.
  */
-static void rcu_preempt_send_cbs_to_orphanage(void)
+static void rcu_preempt_send_cbs_to_online(void)
 {
-       rcu_send_cbs_to_orphanage(&rcu_preempt_state);
+       rcu_send_cbs_to_online(&rcu_preempt_state);
 }
 
 /*
@@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 /*
  * Because there is no preemptable RCU, there are no callbacks to move.
  */
-static void rcu_preempt_send_cbs_to_orphanage(void)
+static void rcu_preempt_send_cbs_to_online(void)
 {
 }
 
@@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void)
 
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 
+#ifndef CONFIG_SMP
+
+void synchronize_sched_expedited(void)
+{
+       cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#else /* #ifndef CONFIG_SMP */
+
+static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
+
+static int synchronize_sched_expedited_cpu_stop(void *data)
+{
+       /*
+        * There must be a full memory barrier on each affected CPU
+        * between the time that try_stop_cpus() is called and the
+        * time that it returns.
+        *
+        * In the current initial implementation of cpu_stop, the
+        * above condition is already met when the control reaches
+        * this point and the following smp_mb() is not strictly
+        * necessary.  Do smp_mb() anyway for documentation and
+        * robustness against future implementation changes.
+        */
+       smp_mb(); /* See above comment block. */
+       return 0;
+}
+
+/*
+ * Wait for an rcu-sched grace period to elapse, but use "big hammer"
+ * approach to force grace period to end quickly.  This consumes
+ * significant time on all CPUs, and is thus not recommended for
+ * any sort of common-case code.
+ *
+ * Note that it is illegal to call this function while holding any
+ * lock that is acquired by a CPU-hotplug notifier.  Failing to
+ * observe this restriction will result in deadlock.
+ *
+ * This implementation can be thought of as an application of ticket
+ * locking to RCU, with sync_sched_expedited_started and
+ * sync_sched_expedited_done taking on the roles of the halves
+ * of the ticket-lock word.  Each task atomically increments
+ * sync_sched_expedited_started upon entry, snapshotting the old value,
+ * then attempts to stop all the CPUs.  If this succeeds, then each
+ * CPU will have executed a context switch, resulting in an RCU-sched
+ * grace period.  We are then done, so we use atomic_cmpxchg() to
+ * update sync_sched_expedited_done to match our snapshot -- but
+ * only if someone else has not already advanced past our snapshot.
+ *
+ * On the other hand, if try_stop_cpus() fails, we check the value
+ * of sync_sched_expedited_done.  If it has advanced past our
+ * initial snapshot, then someone else must have forced a grace period
+ * some time after we took our snapshot.  In this case, our work is
+ * done for us, and we can simply return.  Otherwise, we try again,
+ * but keep our initial snapshot for purposes of checking for someone
+ * doing our work for us.
+ *
+ * If we fail too many times in a row, we fall back to synchronize_sched().
+ */
+void synchronize_sched_expedited(void)
+{
+       int firstsnap, s, snap, trycount = 0;
+
+       /* Note that atomic_inc_return() implies full memory barrier. */
+       firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
+       get_online_cpus();
+
+       /*
+        * Each pass through the following loop attempts to force a
+        * context switch on each CPU.
+        */
+       while (try_stop_cpus(cpu_online_mask,
+                            synchronize_sched_expedited_cpu_stop,
+                            NULL) == -EAGAIN) {
+               put_online_cpus();
+
+               /* No joy, try again later.  Or just synchronize_sched(). */
+               if (trycount++ < 10)
+                       udelay(trycount * num_online_cpus());
+               else {
+                       synchronize_sched();
+                       return;
+               }
+
+               /* Check to see if someone else did our work for us. */
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
+                       smp_mb(); /* ensure test happens before caller kfree */
+                       return;
+               }
+
+               /*
+                * Refetching sync_sched_expedited_started allows later
+                * callers to piggyback on our grace period.  We subtract
+                * 1 to get the same token that the last incrementer got.
+                * We retry after they started, so our grace period works
+                * for them, and they started after our first try, so their
+                * grace period works for us.
+                */
+               get_online_cpus();
+               snap = atomic_read(&sync_sched_expedited_started) - 1;
+               smp_mb(); /* ensure read is before try_stop_cpus(). */
+       }
+
+       /*
+        * Everyone up to our most recent fetch is covered by our grace
+        * period.  Update the counter, but only if our work is still
+        * relevant -- which it won't be if someone who started later
+        * than we did beat us to the punch.
+        */
+       do {
+               s = atomic_read(&sync_sched_expedited_done);
+               if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
+                       smp_mb(); /* ensure test happens before caller kfree */
+                       break;
+               }
+       } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
+
+       put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#endif /* #else #ifndef CONFIG_SMP */
+
 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 
 /*
index d15430b9d122f4d619e76fb6b5069aa1f494a575..c8e97853b970f71ad662732ef46da011cf46ac1d 100644 (file)
@@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 
        gpnum = rsp->gpnum;
        seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x "
-                     "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n",
+                     "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n",
                   rsp->completed, gpnum, rsp->signaled,
                   (long)(rsp->jiffies_force_qs - jiffies),
                   (int)(jiffies & 0xffff),
                   rsp->n_force_qs, rsp->n_force_qs_ngp,
                   rsp->n_force_qs - rsp->n_force_qs_ngp,
-                  rsp->n_force_qs_lh, rsp->orphan_qlen);
+                  rsp->n_force_qs_lh);
        for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) {
                if (rnp->level != level) {
                        seq_puts(m, "\n");
@@ -300,7 +300,7 @@ static const struct file_operations rcu_pending_fops = {
 
 static struct dentry *rcudir;
 
-static int __init rcuclassic_trace_init(void)
+static int __init rcutree_trace_init(void)
 {
        struct dentry *retval;
 
@@ -337,14 +337,14 @@ free_out:
        return 1;
 }
 
-static void __exit rcuclassic_trace_cleanup(void)
+static void __exit rcutree_trace_cleanup(void)
 {
        debugfs_remove_recursive(rcudir);
 }
 
 
-module_init(rcuclassic_trace_init);
-module_exit(rcuclassic_trace_cleanup);
+module_init(rcutree_trace_init);
+module_exit(rcutree_trace_cleanup);
 
 MODULE_AUTHOR("Paul E. McKenney");
 MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation");
index 297d1a0eedb0e68d8b9327f530ba477c93b1222e..04949089e7601ccd2a9b82f0f30c5905cbc9777b 100644 (file)
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
+#include <asm/mutex.h>
 
 #include "sched_cpupri.h"
 #include "workqueue_sched.h"
+#include "sched_autogroup.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -253,6 +255,8 @@ struct task_group {
        /* runqueue "owned" by this group on each cpu */
        struct cfs_rq **cfs_rq;
        unsigned long shares;
+
+       atomic_t load_weight;
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -268,24 +272,19 @@ struct task_group {
        struct task_group *parent;
        struct list_head siblings;
        struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+       struct autogroup *autogroup;
+#endif
 };
 
 #define root_task_group init_task_group
 
-/* task_group_lock serializes add/remove of task groups and also changes to
- * a task group's cpu shares.
- */
+/* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-#ifdef CONFIG_SMP
-static int root_task_group_empty(void)
-{
-       return list_empty(&root_task_group.children);
-}
-#endif
-
 # define INIT_TASK_GROUP_LOAD  NICE_0_LOAD
 
 /*
@@ -342,6 +341,7 @@ struct cfs_rq {
         * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
         * list is used during load balance.
         */
+       int on_list;
        struct list_head leaf_cfs_rq_list;
        struct task_group *tg;  /* group that "owns" this runqueue */
 
@@ -360,14 +360,17 @@ struct cfs_rq {
        unsigned long h_load;
 
        /*
-        * this cpu's part of tg->shares
+        * Maintaining per-cpu shares distribution for group scheduling
+        *
+        * load_stamp is the last time we updated the load average
+        * load_last is the last time we updated the load average and saw load
+        * load_unacc_exec_time is currently unaccounted execution time
         */
-       unsigned long shares;
+       u64 load_avg;
+       u64 load_period;
+       u64 load_stamp, load_last, load_unacc_exec_time;
 
-       /*
-        * load.weight at the time we set shares
-        */
-       unsigned long rq_weight;
+       unsigned long load_contribution;
 #endif
 #endif
 };
@@ -605,11 +608,14 @@ static inline int cpu_of(struct rq *rq)
  */
 static inline struct task_group *task_group(struct task_struct *p)
 {
+       struct task_group *tg;
        struct cgroup_subsys_state *css;
 
        css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
                        lockdep_is_held(&task_rq(p)->lock));
-       return container_of(css, struct task_group, css);
+       tg = container_of(css, struct task_group, css);
+
+       return autogroup_task_group(p, tg);
 }
 
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -792,20 +798,6 @@ late_initcall(sched_init_debug);
  */
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
-/*
- * ratelimit for updating the group shares.
- * default: 0.25ms
- */
-unsigned int sysctl_sched_shares_ratelimit = 250000;
-unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
-
-/*
- * Inject some fuzzyness into changing the per-cpu group shares
- * this avoids remote rq-locks at the expense of fairness.
- * default: 4
- */
-unsigned int sysctl_sched_shares_thresh = 4;
-
 /*
  * period over which we average the RT time consumption, measured
  * in ms.
@@ -1355,6 +1347,12 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
        lw->inv_weight = 0;
 }
 
+static inline void update_load_set(struct load_weight *lw, unsigned long w)
+{
+       lw->weight = w;
+       lw->inv_weight = 0;
+}
+
 /*
  * To aid in avoiding the subversion of "niceness" due to uneven distribution
  * of tasks with abnormal "nice" values across CPUs the contribution that
@@ -1543,101 +1541,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static __read_mostly unsigned long __percpu *update_shares_data;
-
-static void __set_se_shares(struct sched_entity *se, unsigned long shares);
-
-/*
- * Calculate and set the cpu's group shares.
- */
-static void update_group_shares_cpu(struct task_group *tg, int cpu,
-                                   unsigned long sd_shares,
-                                   unsigned long sd_rq_weight,
-                                   unsigned long *usd_rq_weight)
-{
-       unsigned long shares, rq_weight;
-       int boost = 0;
-
-       rq_weight = usd_rq_weight[cpu];
-       if (!rq_weight) {
-               boost = 1;
-               rq_weight = NICE_0_LOAD;
-       }
-
-       /*
-        *             \Sum_j shares_j * rq_weight_i
-        * shares_i =  -----------------------------
-        *                  \Sum_j rq_weight_j
-        */
-       shares = (sd_shares * rq_weight) / sd_rq_weight;
-       shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
-
-       if (abs(shares - tg->se[cpu]->load.weight) >
-                       sysctl_sched_shares_thresh) {
-               struct rq *rq = cpu_rq(cpu);
-               unsigned long flags;
-
-               raw_spin_lock_irqsave(&rq->lock, flags);
-               tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
-               tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-               __set_se_shares(tg->se[cpu], shares);
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
-       }
-}
-
-/*
- * Re-compute the task group their per cpu shares over the given domain.
- * This needs to be done in a bottom-up fashion because the rq weight of a
- * parent group depends on the shares of its child groups.
- */
-static int tg_shares_up(struct task_group *tg, void *data)
-{
-       unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
-       unsigned long *usd_rq_weight;
-       struct sched_domain *sd = data;
-       unsigned long flags;
-       int i;
-
-       if (!tg->se[0])
-               return 0;
-
-       local_irq_save(flags);
-       usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
-
-       for_each_cpu(i, sched_domain_span(sd)) {
-               weight = tg->cfs_rq[i]->load.weight;
-               usd_rq_weight[i] = weight;
-
-               rq_weight += weight;
-               /*
-                * If there are currently no tasks on the cpu pretend there
-                * is one of average load so that when a new task gets to
-                * run here it will not get delayed by group starvation.
-                */
-               if (!weight)
-                       weight = NICE_0_LOAD;
-
-               sum_weight += weight;
-               shares += tg->cfs_rq[i]->shares;
-       }
-
-       if (!rq_weight)
-               rq_weight = sum_weight;
-
-       if ((!shares && rq_weight) || shares > tg->shares)
-               shares = tg->shares;
-
-       if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
-               shares = tg->shares;
-
-       for_each_cpu(i, sched_domain_span(sd))
-               update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
-
-       local_irq_restore(flags);
-
-       return 0;
-}
-
 /*
  * Compute the cpu's hierarchical load factor for each task group.
  * This needs to be done in a top-down fashion because the load of a child
@@ -1652,7 +1555,7 @@ static int tg_load_down(struct task_group *tg, void *data)
                load = cpu_rq(cpu)->load.weight;
        } else {
                load = tg->parent->cfs_rq[cpu]->h_load;
-               load *= tg->cfs_rq[cpu]->shares;
+               load *= tg->se[cpu]->load.weight;
                load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
        }
 
@@ -1661,34 +1564,11 @@ static int tg_load_down(struct task_group *tg, void *data)
        return 0;
 }
 
-static void update_shares(struct sched_domain *sd)
-{
-       s64 elapsed;
-       u64 now;
-
-       if (root_task_group_empty())
-               return;
-
-       now = local_clock();
-       elapsed = now - sd->last_update;
-
-       if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
-               sd->last_update = now;
-               walk_tg_tree(tg_nop, tg_shares_up, sd);
-       }
-}
-
 static void update_h_load(long cpu)
 {
        walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 }
 
-#else
-
-static inline void update_shares(struct sched_domain *sd)
-{
-}
-
 #endif
 
 #ifdef CONFIG_PREEMPT
@@ -1810,15 +1690,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 
 #endif
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
-{
-#ifdef CONFIG_SMP
-       cfs_rq->shares = shares;
-#endif
-}
-#endif
-
 static void calc_load_account_idle(struct rq *this_rq);
 static void update_sysctl(void);
 static int get_update_sysctl_factor(void);
@@ -2063,6 +1934,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 #include "sched_idletask.c"
 #include "sched_fair.c"
 #include "sched_rt.c"
+#include "sched_autogroup.c"
 #include "sched_stoptask.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
@@ -2255,10 +2127,8 @@ static int migration_cpu_stop(void *data);
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static bool migrate_task(struct task_struct *p, int dest_cpu)
+static bool migrate_task(struct task_struct *p, struct rq *rq)
 {
-       struct rq *rq = task_rq(p);
-
        /*
         * If the task is not on a runqueue (and not running), then
         * the next wake-up will properly place the task.
@@ -2438,18 +2308,15 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
                return dest_cpu;
 
        /* No more Mr. Nice Guy. */
-       if (unlikely(dest_cpu >= nr_cpu_ids)) {
-               dest_cpu = cpuset_cpus_allowed_fallback(p);
-               /*
-                * Don't tell them about moving exiting tasks or
-                * kernel threads (both mm NULL), since they never
-                * leave kernel.
-                */
-               if (p->mm && printk_ratelimit()) {
-                       printk(KERN_INFO "process %d (%s) no "
-                              "longer affine to cpu%d\n",
-                              task_pid_nr(p), p->comm, cpu);
-               }
+       dest_cpu = cpuset_cpus_allowed_fallback(p);
+       /*
+        * Don't tell them about moving exiting tasks or
+        * kernel threads (both mm NULL), since they never
+        * leave kernel.
+        */
+       if (p->mm && printk_ratelimit()) {
+               printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n",
+                               task_pid_nr(p), p->comm, cpu);
        }
 
        return dest_cpu;
@@ -2785,7 +2652,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
 #endif
+#ifdef CONFIG_SMP
        plist_node_init(&p->pushable_tasks, MAX_PRIO);
+#endif
 
        put_cpu();
 }
@@ -3549,7 +3418,7 @@ void sched_exec(void)
         * select_task_rq() can race against ->cpus_allowed
         */
        if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-           likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
+           likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
                struct migration_arg arg = { p, dest_cpu };
 
                task_rq_unlock(rq, &flags);
@@ -4214,7 +4083,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
                if (task_thread_info(rq->curr) != owner || need_resched())
                        return 0;
 
-               cpu_relax();
+               arch_mutex_cpu_relax();
        }
 
        return 1;
@@ -4526,7 +4395,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
  * This waits for either a completion of a specific task to be signaled or for a
  * specified timeout to expire. It is interruptible. The timeout is in jiffies.
  */
-unsigned long __sched
+long __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
                                          unsigned long timeout)
 {
@@ -4559,7 +4428,7 @@ EXPORT_SYMBOL(wait_for_completion_killable);
  * signaled or for a specified timeout to expire. It can be
  * interrupted by a kill signal. The timeout is in jiffies.
  */
-unsigned long __sched
+long __sched
 wait_for_completion_killable_timeout(struct completion *x,
                                     unsigned long timeout)
 {
@@ -4901,7 +4770,7 @@ static bool check_same_owner(struct task_struct *p)
 }
 
 static int __sched_setscheduler(struct task_struct *p, int policy,
-                               struct sched_param *param, bool user)
+                               const struct sched_param *param, bool user)
 {
        int retval, oldprio, oldpolicy = -1, on_rq, running;
        unsigned long flags;
@@ -5056,7 +4925,7 @@ recheck:
  * NOTE that the task may be already dead.
  */
 int sched_setscheduler(struct task_struct *p, int policy,
-                      struct sched_param *param)
+                      const struct sched_param *param)
 {
        return __sched_setscheduler(p, policy, param, true);
 }
@@ -5074,7 +4943,7 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
  * but our caller might not have that capability.
  */
 int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-                              struct sched_param *param)
+                              const struct sched_param *param)
 {
        return __sched_setscheduler(p, policy, param, false);
 }
@@ -5590,7 +5459,7 @@ void sched_show_task(struct task_struct *p)
        unsigned state;
 
        state = p->state ? __ffs(p->state) + 1 : 0;
-       printk(KERN_INFO "%-13.13s %c", p->comm,
+       printk(KERN_INFO "%-15.15s %c", p->comm,
                state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
 #if BITS_PER_LONG == 32
        if (state == TASK_RUNNING)
@@ -5754,7 +5623,6 @@ static void update_sysctl(void)
        SET_SYSCTL(sched_min_granularity);
        SET_SYSCTL(sched_latency);
        SET_SYSCTL(sched_wakeup_granularity);
-       SET_SYSCTL(sched_shares_ratelimit);
 #undef SET_SYSCTL
 }
 
@@ -5830,7 +5698,7 @@ again:
                goto out;
 
        dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-       if (migrate_task(p, dest_cpu)) {
+       if (migrate_task(p, rq)) {
                struct migration_arg arg = { p, dest_cpu };
                /* Need help from migration thread: drop lock and wait. */
                task_rq_unlock(rq, &flags);
@@ -5912,29 +5780,20 @@ static int migration_cpu_stop(void *data)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+
 /*
- * Figure out where task on dead CPU should go, use force if necessary.
+ * Ensures that the idle task is using init_mm right before its cpu goes
+ * offline.
  */
-void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void idle_task_exit(void)
 {
-       struct rq *rq = cpu_rq(dead_cpu);
-       int needs_cpu, uninitialized_var(dest_cpu);
-       unsigned long flags;
+       struct mm_struct *mm = current->active_mm;
 
-       local_irq_save(flags);
+       BUG_ON(cpu_online(smp_processor_id()));
 
-       raw_spin_lock(&rq->lock);
-       needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
-       if (needs_cpu)
-               dest_cpu = select_fallback_rq(dead_cpu, p);
-       raw_spin_unlock(&rq->lock);
-       /*
-        * It can only fail if we race with set_cpus_allowed(),
-        * in the racer should migrate the task anyway.
-        */
-       if (needs_cpu)
-               __migrate_task(p, dead_cpu, dest_cpu);
-       local_irq_restore(flags);
+       if (mm != &init_mm)
+               switch_mm(mm, &init_mm, current);
+       mmdrop(mm);
 }
 
 /*
@@ -5947,128 +5806,69 @@ void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
        struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
-       unsigned long flags;
 
-       local_irq_save(flags);
-       double_rq_lock(rq_src, rq_dest);
        rq_dest->nr_uninterruptible += rq_src->nr_uninterruptible;
        rq_src->nr_uninterruptible = 0;
-       double_rq_unlock(rq_src, rq_dest);
-       local_irq_restore(flags);
-}
-
-/* Run through task list and migrate tasks from the dead cpu. */
-static void migrate_live_tasks(int src_cpu)
-{
-       struct task_struct *p, *t;
-
-       read_lock(&tasklist_lock);
-
-       do_each_thread(t, p) {
-               if (p == current)
-                       continue;
-
-               if (task_cpu(p) == src_cpu)
-                       move_task_off_dead_cpu(src_cpu, p);
-       } while_each_thread(t, p);
-
-       read_unlock(&tasklist_lock);
 }
 
 /*
- * Schedules idle task to be the next runnable task on current CPU.
- * It does so by boosting its priority to highest possible.
- * Used by CPU offline code.
+ * remove the tasks which were accounted by rq from calc_load_tasks.
  */
-void sched_idle_next(void)
+static void calc_global_load_remove(struct rq *rq)
 {
-       int this_cpu = smp_processor_id();
-       struct rq *rq = cpu_rq(this_cpu);
-       struct task_struct *p = rq->idle;
-       unsigned long flags;
-
-       /* cpu has to be offline */
-       BUG_ON(cpu_online(this_cpu));
-
-       /*
-        * Strictly not necessary since rest of the CPUs are stopped by now
-        * and interrupts disabled on the current cpu.
-        */
-       raw_spin_lock_irqsave(&rq->lock, flags);
-
-       __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
-
-       activate_task(rq, p, 0);
-
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+       rq->calc_load_active = 0;
 }
 
 /*
- * Ensures that the idle task is using init_mm right before its cpu goes
- * offline.
+ * Migrate all tasks from the rq, sleeping tasks will be migrated by
+ * try_to_wake_up()->select_task_rq().
+ *
+ * Called with rq->lock held even though we'er in stop_machine() and
+ * there's no concurrency possible, we hold the required locks anyway
+ * because of lock validation efforts.
  */
-void idle_task_exit(void)
-{
-       struct mm_struct *mm = current->active_mm;
-
-       BUG_ON(cpu_online(smp_processor_id()));
-
-       if (mm != &init_mm)
-               switch_mm(mm, &init_mm, current);
-       mmdrop(mm);
-}
-
-/* called under rq->lock with disabled interrupts */
-static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
+static void migrate_tasks(unsigned int dead_cpu)
 {
        struct rq *rq = cpu_rq(dead_cpu);
-
-       /* Must be exiting, otherwise would be on tasklist. */
-       BUG_ON(!p->exit_state);
-
-       /* Cannot have done final schedule yet: would have vanished. */
-       BUG_ON(p->state == TASK_DEAD);
-
-       get_task_struct(p);
+       struct task_struct *next, *stop = rq->stop;
+       int dest_cpu;
 
        /*
-        * Drop lock around migration; if someone else moves it,
-        * that's OK. No task can be added to this CPU, so iteration is
-        * fine.
+        * Fudge the rq selection such that the below task selection loop
+        * doesn't get stuck on the currently eligible stop task.
+        *
+        * We're currently inside stop_machine() and the rq is either stuck
+        * in the stop_machine_cpu_stop() loop, or we're executing this code,
+        * either way we should never end up calling schedule() until we're
+        * done here.
         */
-       raw_spin_unlock_irq(&rq->lock);
-       move_task_off_dead_cpu(dead_cpu, p);
-       raw_spin_lock_irq(&rq->lock);
-
-       put_task_struct(p);
-}
-
-/* release_task() removes task from tasklist, so we won't find dead tasks. */
-static void migrate_dead_tasks(unsigned int dead_cpu)
-{
-       struct rq *rq = cpu_rq(dead_cpu);
-       struct task_struct *next;
+       rq->stop = NULL;
 
        for ( ; ; ) {
-               if (!rq->nr_running)
+               /*
+                * There's this thread running, bail when that's the only
+                * remaining thread.
+                */
+               if (rq->nr_running == 1)
                        break;
+
                next = pick_next_task(rq);
-               if (!next)
-                       break;
+               BUG_ON(!next);
                next->sched_class->put_prev_task(rq, next);
-               migrate_dead(dead_cpu, next);
 
+               /* Find suitable destination for @next, with force if needed. */
+               dest_cpu = select_fallback_rq(dead_cpu, next);
+               raw_spin_unlock(&rq->lock);
+
+               __migrate_task(next, dead_cpu, dest_cpu);
+
+               raw_spin_lock(&rq->lock);
        }
-}
 
-/*
- * remove the tasks which were accounted by rq from calc_load_tasks.
- */
-static void calc_global_load_remove(struct rq *rq)
-{
-       atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
-       rq->calc_load_active = 0;
+       rq->stop = stop;
 }
+
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -6278,15 +6078,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
        unsigned long flags;
        struct rq *rq = cpu_rq(cpu);
 
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
 
        case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
                rq->calc_load_update = calc_load_update;
                break;
 
        case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
                /* Update our root-domain */
                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->rd) {
@@ -6298,30 +6096,19 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               migrate_live_tasks(cpu);
-               /* Idle task back to normal (off runqueue, low prio) */
-               raw_spin_lock_irq(&rq->lock);
-               deactivate_task(rq, rq->idle, 0);
-               __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
-               rq->idle->sched_class = &idle_sched_class;
-               migrate_dead_tasks(cpu);
-               raw_spin_unlock_irq(&rq->lock);
-               migrate_nr_uninterruptible(rq);
-               BUG_ON(rq->nr_running != 0);
-               calc_global_load_remove(rq);
-               break;
-
        case CPU_DYING:
-       case CPU_DYING_FROZEN:
                /* Update our root-domain */
                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
                        set_rq_offline(rq);
                }
+               migrate_tasks(cpu);
+               BUG_ON(rq->nr_running != 1); /* the migration thread */
                raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+               migrate_nr_uninterruptible(rq);
+               calc_global_load_remove(rq);
                break;
 #endif
        }
@@ -8052,15 +7839,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
-                               struct sched_entity *se, int cpu, int add,
+                               struct sched_entity *se, int cpu,
                                struct sched_entity *parent)
 {
        struct rq *rq = cpu_rq(cpu);
        tg->cfs_rq[cpu] = cfs_rq;
        init_cfs_rq(cfs_rq, rq);
        cfs_rq->tg = tg;
-       if (add)
-               list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 
        tg->se[cpu] = se;
        /* se could be NULL for init_task_group */
@@ -8073,15 +7858,14 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
                se->cfs_rq = parent->my_q;
 
        se->my_q = cfs_rq;
-       se->load.weight = tg->shares;
-       se->load.inv_weight = 0;
+       update_load_set(&se->load, 0);
        se->parent = parent;
 }
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
 static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
-               struct sched_rt_entity *rt_se, int cpu, int add,
+               struct sched_rt_entity *rt_se, int cpu,
                struct sched_rt_entity *parent)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -8090,8 +7874,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
        init_rt_rq(rt_rq, rq);
        rt_rq->tg = tg;
        rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
-       if (add)
-               list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
 
        tg->rt_se[cpu] = rt_se;
        if (!rt_se)
@@ -8164,13 +7946,9 @@ void __init sched_init(void)
 #ifdef CONFIG_CGROUP_SCHED
        list_add(&init_task_group.list, &task_groups);
        INIT_LIST_HEAD(&init_task_group.children);
-
+       autogroup_init(&init_task);
 #endif /* CONFIG_CGROUP_SCHED */
 
-#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
-       update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
-                                           __alignof__(unsigned long));
-#endif
        for_each_possible_cpu(i) {
                struct rq *rq;
 
@@ -8184,7 +7962,6 @@ void __init sched_init(void)
 #ifdef CONFIG_FAIR_GROUP_SCHED
                init_task_group.shares = init_task_group_load;
                INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
-#ifdef CONFIG_CGROUP_SCHED
                /*
                 * How much cpu bandwidth does init_task_group get?
                 *
@@ -8204,16 +7981,13 @@ void __init sched_init(void)
                 * We achieve this by letting init_task_group's tasks sit
                 * directly in rq->cfs (i.e init_task_group->se[] = NULL).
                 */
-               init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, 1, NULL);
-#endif
+               init_tg_cfs_entry(&init_task_group, &rq->cfs, NULL, i, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
                rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
 #ifdef CONFIG_RT_GROUP_SCHED
                INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
-#ifdef CONFIG_CGROUP_SCHED
-               init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
-#endif
+               init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, NULL);
 #endif
 
                for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -8293,8 +8067,6 @@ void __init sched_init(void)
                zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
-       perf_event_init();
-
        scheduler_running = 1;
 }
 
@@ -8488,7 +8260,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
                if (!se)
                        goto err_free_rq;
 
-               init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
+               init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
        }
 
        return 1;
@@ -8499,15 +8271,21 @@ err:
        return 0;
 }
 
-static inline void register_fair_sched_group(struct task_group *tg, int cpu)
-{
-       list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
-                       &cpu_rq(cpu)->leaf_cfs_rq_list);
-}
-
 static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
 {
-       list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
+       struct rq *rq = cpu_rq(cpu);
+       unsigned long flags;
+
+       /*
+       * Only empty task groups can be destroyed; so we can speculatively
+       * check on_list without danger of it being re-added.
+       */
+       if (!tg->cfs_rq[cpu]->on_list)
+               return;
+
+       raw_spin_lock_irqsave(&rq->lock, flags);
+       list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 #else /* !CONFG_FAIR_GROUP_SCHED */
 static inline void free_fair_sched_group(struct task_group *tg)
@@ -8520,10 +8298,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
        return 1;
 }
 
-static inline void register_fair_sched_group(struct task_group *tg, int cpu)
-{
-}
-
 static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
 {
 }
@@ -8578,7 +8352,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
                if (!rt_se)
                        goto err_free_rq;
 
-               init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
+               init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
        }
 
        return 1;
@@ -8588,17 +8362,6 @@ err_free_rq:
 err:
        return 0;
 }
-
-static inline void register_rt_sched_group(struct task_group *tg, int cpu)
-{
-       list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
-                       &cpu_rq(cpu)->leaf_rt_rq_list);
-}
-
-static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
-{
-       list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
-}
 #else /* !CONFIG_RT_GROUP_SCHED */
 static inline void free_rt_sched_group(struct task_group *tg)
 {
@@ -8609,14 +8372,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
        return 1;
 }
-
-static inline void register_rt_sched_group(struct task_group *tg, int cpu)
-{
-}
-
-static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
-{
-}
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 #ifdef CONFIG_CGROUP_SCHED
@@ -8632,7 +8387,6 @@ struct task_group *sched_create_group(struct task_group *parent)
 {
        struct task_group *tg;
        unsigned long flags;
-       int i;
 
        tg = kzalloc(sizeof(*tg), GFP_KERNEL);
        if (!tg)
@@ -8645,10 +8399,6 @@ struct task_group *sched_create_group(struct task_group *parent)
                goto err;
 
        spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
-               register_fair_sched_group(tg, i);
-               register_rt_sched_group(tg, i);
-       }
        list_add_rcu(&tg->list, &task_groups);
 
        WARN_ON(!parent); /* root should already exist */
@@ -8678,11 +8428,11 @@ void sched_destroy_group(struct task_group *tg)
        unsigned long flags;
        int i;
 
-       spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
+       /* end participation in shares distribution */
+       for_each_possible_cpu(i)
                unregister_fair_sched_group(tg, i);
-               unregister_rt_sched_group(tg, i);
-       }
+
+       spin_lock_irqsave(&task_group_lock, flags);
        list_del_rcu(&tg->list);
        list_del_rcu(&tg->siblings);
        spin_unlock_irqrestore(&task_group_lock, flags);
@@ -8729,33 +8479,6 @@ void sched_move_task(struct task_struct *tsk)
 #endif /* CONFIG_CGROUP_SCHED */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void __set_se_shares(struct sched_entity *se, unsigned long shares)
-{
-       struct cfs_rq *cfs_rq = se->cfs_rq;
-       int on_rq;
-
-       on_rq = se->on_rq;
-       if (on_rq)
-               dequeue_entity(cfs_rq, se, 0);
-
-       se->load.weight = shares;
-       se->load.inv_weight = 0;
-
-       if (on_rq)
-               enqueue_entity(cfs_rq, se, 0);
-}
-
-static void set_se_shares(struct sched_entity *se, unsigned long shares)
-{
-       struct cfs_rq *cfs_rq = se->cfs_rq;
-       struct rq *rq = cfs_rq->rq;
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&rq->lock, flags);
-       __set_se_shares(se, shares);
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
 static DEFINE_MUTEX(shares_mutex);
 
 int sched_group_set_shares(struct task_group *tg, unsigned long shares)
@@ -8778,37 +8501,19 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
        if (tg->shares == shares)
                goto done;
 
-       spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i)
-               unregister_fair_sched_group(tg, i);
-       list_del_rcu(&tg->siblings);
-       spin_unlock_irqrestore(&task_group_lock, flags);
-
-       /* wait for any ongoing reference to this group to finish */
-       synchronize_sched();
-
-       /*
-        * Now we are free to modify the group's share on each cpu
-        * w/o tripping rebalance_share or load_balance_fair.
-        */
        tg->shares = shares;
        for_each_possible_cpu(i) {
-               /*
-                * force a rebalance
-                */
-               cfs_rq_set_shares(tg->cfs_rq[i], 0);
-               set_se_shares(tg->se[i], shares);
+               struct rq *rq = cpu_rq(i);
+               struct sched_entity *se;
+
+               se = tg->se[i];
+               /* Propagate contribution to hierarchy */
+               raw_spin_lock_irqsave(&rq->lock, flags);
+               for_each_sched_entity(se)
+                       update_cfs_shares(group_cfs_rq(se), 0);
+               raw_spin_unlock_irqrestore(&rq->lock, flags);
        }
 
-       /*
-        * Enable load balance activity on this group, by inserting it back on
-        * each cpu's rq->leaf_cfs_rq_list.
-        */
-       spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i)
-               register_fair_sched_group(tg, i);
-       list_add_rcu(&tg->siblings, &tg->parent->children);
-       spin_unlock_irqrestore(&task_group_lock, flags);
 done:
        mutex_unlock(&shares_mutex);
        return 0;
@@ -9534,72 +9239,3 @@ struct cgroup_subsys cpuacct_subsys = {
 };
 #endif /* CONFIG_CGROUP_CPUACCT */
 
-#ifndef CONFIG_SMP
-
-void synchronize_sched_expedited(void)
-{
-       barrier();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#else /* #ifndef CONFIG_SMP */
-
-static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
-
-static int synchronize_sched_expedited_cpu_stop(void *data)
-{
-       /*
-        * There must be a full memory barrier on each affected CPU
-        * between the time that try_stop_cpus() is called and the
-        * time that it returns.
-        *
-        * In the current initial implementation of cpu_stop, the
-        * above condition is already met when the control reaches
-        * this point and the following smp_mb() is not strictly
-        * necessary.  Do smp_mb() anyway for documentation and
-        * robustness against future implementation changes.
-        */
-       smp_mb(); /* See above comment block. */
-       return 0;
-}
-
-/*
- * Wait for an rcu-sched grace period to elapse, but use "big hammer"
- * approach to force grace period to end quickly.  This consumes
- * significant time on all CPUs, and is thus not recommended for
- * any sort of common-case code.
- *
- * Note that it is illegal to call this function while holding any
- * lock that is acquired by a CPU-hotplug notifier.  Failing to
- * observe this restriction will result in deadlock.
- */
-void synchronize_sched_expedited(void)
-{
-       int snap, trycount = 0;
-
-       smp_mb();  /* ensure prior mod happens before capturing snap. */
-       snap = atomic_read(&synchronize_sched_expedited_count) + 1;
-       get_online_cpus();
-       while (try_stop_cpus(cpu_online_mask,
-                            synchronize_sched_expedited_cpu_stop,
-                            NULL) == -EAGAIN) {
-               put_online_cpus();
-               if (trycount++ < 10)
-                       udelay(trycount * num_online_cpus());
-               else {
-                       synchronize_sched();
-                       return;
-               }
-               if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
-                       smp_mb(); /* ensure test happens before caller kfree */
-                       return;
-               }
-               get_online_cpus();
-       }
-       atomic_inc(&synchronize_sched_expedited_count);
-       smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
-       put_online_cpus();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
-#endif /* #else #ifndef CONFIG_SMP */
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
new file mode 100644 (file)
index 0000000..c80fedc
--- /dev/null
@@ -0,0 +1,238 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/utsname.h>
+
+unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
+static struct autogroup autogroup_default;
+static atomic_t autogroup_seq_nr;
+
+static void autogroup_init(struct task_struct *init_task)
+{
+       autogroup_default.tg = &init_task_group;
+       init_task_group.autogroup = &autogroup_default;
+       kref_init(&autogroup_default.kref);
+       init_rwsem(&autogroup_default.lock);
+       init_task->signal->autogroup = &autogroup_default;
+}
+
+static inline void autogroup_free(struct task_group *tg)
+{
+       kfree(tg->autogroup);
+}
+
+static inline void autogroup_destroy(struct kref *kref)
+{
+       struct autogroup *ag = container_of(kref, struct autogroup, kref);
+
+       sched_destroy_group(ag->tg);
+}
+
+static inline void autogroup_kref_put(struct autogroup *ag)
+{
+       kref_put(&ag->kref, autogroup_destroy);
+}
+
+static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
+{
+       kref_get(&ag->kref);
+       return ag;
+}
+
+static inline struct autogroup *autogroup_task_get(struct task_struct *p)
+{
+       struct autogroup *ag;
+       unsigned long flags;
+
+       if (!lock_task_sighand(p, &flags))
+               return autogroup_kref_get(&autogroup_default);
+
+       ag = autogroup_kref_get(p->signal->autogroup);
+       unlock_task_sighand(p, &flags);
+
+       return ag;
+}
+
+static inline struct autogroup *autogroup_create(void)
+{
+       struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
+       struct task_group *tg;
+
+       if (!ag)
+               goto out_fail;
+
+       tg = sched_create_group(&init_task_group);
+
+       if (IS_ERR(tg))
+               goto out_free;
+
+       kref_init(&ag->kref);
+       init_rwsem(&ag->lock);
+       ag->id = atomic_inc_return(&autogroup_seq_nr);
+       ag->tg = tg;
+       tg->autogroup = ag;
+
+       return ag;
+
+out_free:
+       kfree(ag);
+out_fail:
+       if (printk_ratelimit()) {
+               printk(KERN_WARNING "autogroup_create: %s failure.\n",
+                       ag ? "sched_create_group()" : "kmalloc()");
+       }
+
+       return autogroup_kref_get(&autogroup_default);
+}
+
+static inline bool
+task_wants_autogroup(struct task_struct *p, struct task_group *tg)
+{
+       if (tg != &root_task_group)
+               return false;
+
+       if (p->sched_class != &fair_sched_class)
+               return false;
+
+       /*
+        * We can only assume the task group can't go away on us if
+        * autogroup_move_group() can see us on ->thread_group list.
+        */
+       if (p->flags & PF_EXITING)
+               return false;
+
+       return true;
+}
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+       int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
+
+       if (enabled && task_wants_autogroup(p, tg))
+               return p->signal->autogroup->tg;
+
+       return tg;
+}
+
+static void
+autogroup_move_group(struct task_struct *p, struct autogroup *ag)
+{
+       struct autogroup *prev;
+       struct task_struct *t;
+       unsigned long flags;
+
+       BUG_ON(!lock_task_sighand(p, &flags));
+
+       prev = p->signal->autogroup;
+       if (prev == ag) {
+               unlock_task_sighand(p, &flags);
+               return;
+       }
+
+       p->signal->autogroup = autogroup_kref_get(ag);
+
+       t = p;
+       do {
+               sched_move_task(t);
+       } while_each_thread(p, t);
+
+       unlock_task_sighand(p, &flags);
+       autogroup_kref_put(prev);
+}
+
+/* Allocates GFP_KERNEL, cannot be called under any spinlock */
+void sched_autogroup_create_attach(struct task_struct *p)
+{
+       struct autogroup *ag = autogroup_create();
+
+       autogroup_move_group(p, ag);
+       /* drop extra refrence added by autogroup_create() */
+       autogroup_kref_put(ag);
+}
+EXPORT_SYMBOL(sched_autogroup_create_attach);
+
+/* Cannot be called under siglock.  Currently has no users */
+void sched_autogroup_detach(struct task_struct *p)
+{
+       autogroup_move_group(p, &autogroup_default);
+}
+EXPORT_SYMBOL(sched_autogroup_detach);
+
+void sched_autogroup_fork(struct signal_struct *sig)
+{
+       sig->autogroup = autogroup_task_get(current);
+}
+
+void sched_autogroup_exit(struct signal_struct *sig)
+{
+       autogroup_kref_put(sig->autogroup);
+}
+
+static int __init setup_autogroup(char *str)
+{
+       sysctl_sched_autogroup_enabled = 0;
+
+       return 1;
+}
+
+__setup("noautogroup", setup_autogroup);
+
+#ifdef CONFIG_PROC_FS
+
+int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
+{
+       static unsigned long next = INITIAL_JIFFIES;
+       struct autogroup *ag;
+       int err;
+
+       if (*nice < -20 || *nice > 19)
+               return -EINVAL;
+
+       err = security_task_setnice(current, *nice);
+       if (err)
+               return err;
+
+       if (*nice < 0 && !can_nice(current, *nice))
+               return -EPERM;
+
+       /* this is a heavy operation taking global locks.. */
+       if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
+               return -EAGAIN;
+
+       next = HZ / 10 + jiffies;
+       ag = autogroup_task_get(p);
+
+       down_write(&ag->lock);
+       err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
+       if (!err)
+               ag->nice = *nice;
+       up_write(&ag->lock);
+
+       autogroup_kref_put(ag);
+
+       return err;
+}
+
+void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
+{
+       struct autogroup *ag = autogroup_task_get(p);
+
+       down_read(&ag->lock);
+       seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
+       up_read(&ag->lock);
+
+       autogroup_kref_put(ag);
+}
+#endif /* CONFIG_PROC_FS */
+
+#ifdef CONFIG_SCHED_DEBUG
+static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+{
+       return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
+}
+#endif /* CONFIG_SCHED_DEBUG */
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
new file mode 100644 (file)
index 0000000..5358e24
--- /dev/null
@@ -0,0 +1,32 @@
+#ifdef CONFIG_SCHED_AUTOGROUP
+
+struct autogroup {
+       struct kref             kref;
+       struct task_group       *tg;
+       struct rw_semaphore     lock;
+       unsigned long           id;
+       int                     nice;
+};
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg);
+
+#else /* !CONFIG_SCHED_AUTOGROUP */
+
+static inline void autogroup_init(struct task_struct *init_task) {  }
+static inline void autogroup_free(struct task_group *tg) { }
+
+static inline struct task_group *
+autogroup_task_group(struct task_struct *p, struct task_group *tg)
+{
+       return tg;
+}
+
+#ifdef CONFIG_SCHED_DEBUG
+static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
+{
+       return 0;
+}
+#endif
+
+#endif /* CONFIG_SCHED_AUTOGROUP */
index 52f1a149bfb15a871a362255498fadf90e357c57..9d8af0b3fb64544d9ca7076f3478d2239b46540e 100644 (file)
@@ -79,7 +79,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
 }
 EXPORT_SYMBOL_GPL(sched_clock);
 
-static __read_mostly int sched_clock_running;
+__read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 __read_mostly int sched_clock_stable;
index 2e1b0d17dd9b6a8b4ac48891a988c025a8b07ed5..1dfae3d014b5934eba4b3be25bfbad746196b4b7 100644 (file)
@@ -54,8 +54,7 @@ static unsigned long nsec_low(unsigned long long nsec)
 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static void print_cfs_group_stats(struct seq_file *m, int cpu,
-               struct task_group *tg)
+static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
 {
        struct sched_entity *se = tg->se[cpu];
        if (!se)
@@ -110,16 +109,6 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
                0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
 #endif
 
-#ifdef CONFIG_CGROUP_SCHED
-       {
-               char path[64];
-
-               rcu_read_lock();
-               cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
-               rcu_read_unlock();
-               SEQ_printf(m, " %s", path);
-       }
-#endif
        SEQ_printf(m, "\n");
 }
 
@@ -147,19 +136,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
        read_unlock_irqrestore(&tasklist_lock, flags);
 }
 
-#if defined(CONFIG_CGROUP_SCHED) && \
-       (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
-static void task_group_path(struct task_group *tg, char *buf, int buflen)
-{
-       /* may be NULL if the underlying cgroup isn't fully-created yet */
-       if (!tg->css.cgroup) {
-               buf[0] = '\0';
-               return;
-       }
-       cgroup_path(tg->css.cgroup, buf, buflen);
-}
-#endif
-
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
        s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -168,16 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
        struct sched_entity *last;
        unsigned long flags;
 
-#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
-       char path[128];
-       struct task_group *tg = cfs_rq->tg;
-
-       task_group_path(tg, path, sizeof(path));
-
-       SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
-#else
        SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
-#endif
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
                        SPLIT_NS(cfs_rq->exec_clock));
 
@@ -202,32 +169,29 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
        spread0 = min_vruntime - rq0_min_vruntime;
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
                        SPLIT_NS(spread0));
-       SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
-       SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
-
        SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
                        cfs_rq->nr_spread_over);
+       SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
+       SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 #ifdef CONFIG_FAIR_GROUP_SCHED
 #ifdef CONFIG_SMP
-       SEQ_printf(m, "  .%-30s: %lu\n", "shares", cfs_rq->shares);
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_avg",
+                       SPLIT_NS(cfs_rq->load_avg));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_period",
+                       SPLIT_NS(cfs_rq->load_period));
+       SEQ_printf(m, "  .%-30s: %ld\n", "load_contrib",
+                       cfs_rq->load_contribution);
+       SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
+                       atomic_read(&cfs_rq->tg->load_weight));
 #endif
+
        print_cfs_group_stats(m, cpu, cfs_rq->tg);
 #endif
 }
 
 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 {
-#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
-       char path[128];
-       struct task_group *tg = rt_rq->tg;
-
-       task_group_path(tg, path, sizeof(path));
-
-       SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
-#else
        SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
-#endif
-
 
 #define P(x) \
        SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -243,6 +207,8 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
 #undef P
 }
 
+extern __read_mostly int sched_clock_running;
+
 static void print_cpu(struct seq_file *m, int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -314,21 +280,42 @@ static const char *sched_tunable_scaling_names[] = {
 
 static int sched_debug_show(struct seq_file *m, void *v)
 {
-       u64 now = ktime_to_ns(ktime_get());
+       u64 ktime, sched_clk, cpu_clk;
+       unsigned long flags;
        int cpu;
 
-       SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
+       local_irq_save(flags);
+       ktime = ktime_to_ns(ktime_get());
+       sched_clk = sched_clock();
+       cpu_clk = local_clock();
+       local_irq_restore(flags);
+
+       SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
                init_utsname()->release,
                (int)strcspn(init_utsname()->version, " "),
                init_utsname()->version);
 
-       SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
+#define P(x) \
+       SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
+#define PN(x) \
+       SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+       PN(ktime);
+       PN(sched_clk);
+       PN(cpu_clk);
+       P(jiffies);
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+       P(sched_clock_stable);
+#endif
+#undef PN
+#undef P
+
+       SEQ_printf(m, "\n");
+       SEQ_printf(m, "sysctl_sched\n");
 
 #define P(x) \
        SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 #define PN(x) \
        SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
-       P(jiffies);
        PN(sysctl_sched_latency);
        PN(sysctl_sched_min_granularity);
        PN(sysctl_sched_wakeup_granularity);
index 00ebd7686676bd87a6e5b3be513d27028e518796..c62ebae65cf0c5e5d1628b0368692a94cda37568 100644 (file)
@@ -89,6 +89,13 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
 
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
 
+/*
+ * The exponential sliding  window over which load is averaged for shares
+ * distribution.
+ * (default: 10msec)
+ */
+unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
+
 static const struct sched_class fair_sched_class;
 
 /**************************************************************
@@ -143,6 +150,36 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
        return cfs_rq->tg->cfs_rq[this_cpu];
 }
 
+static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+       if (!cfs_rq->on_list) {
+               /*
+                * Ensure we either appear before our parent (if already
+                * enqueued) or force our parent to appear after us when it is
+                * enqueued.  The fact that we always enqueue bottom-up
+                * reduces this to two cases.
+                */
+               if (cfs_rq->tg->parent &&
+                   cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
+                       list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
+                               &rq_of(cfs_rq)->leaf_cfs_rq_list);
+               } else {
+                       list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
+                               &rq_of(cfs_rq)->leaf_cfs_rq_list);
+               }
+
+               cfs_rq->on_list = 1;
+       }
+}
+
+static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+       if (cfs_rq->on_list) {
+               list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+               cfs_rq->on_list = 0;
+       }
+}
+
 /* Iterate thr' all leaf cfs_rq's on a runqueue */
 #define for_each_leaf_cfs_rq(rq, cfs_rq) \
        list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
@@ -246,6 +283,14 @@ static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
        return &cpu_rq(this_cpu)->cfs;
 }
 
+static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+}
+
+static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
+{
+}
+
 #define for_each_leaf_cfs_rq(rq, cfs_rq) \
                for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
 
@@ -417,7 +462,6 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
        WRT_SYSCTL(sched_min_granularity);
        WRT_SYSCTL(sched_latency);
        WRT_SYSCTL(sched_wakeup_granularity);
-       WRT_SYSCTL(sched_shares_ratelimit);
 #undef WRT_SYSCTL
 
        return 0;
@@ -495,6 +539,9 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
        return calc_delta_fair(sched_slice(cfs_rq, se), se);
 }
 
+static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
+static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta);
+
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -514,6 +561,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 
        curr->vruntime += delta_exec_weighted;
        update_min_vruntime(cfs_rq);
+
+#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
+       cfs_rq->load_unacc_exec_time += delta_exec;
+#endif
 }
 
 static void update_curr(struct cfs_rq *cfs_rq)
@@ -633,7 +684,6 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
                list_add(&se->group_node, &cfs_rq->tasks);
        }
        cfs_rq->nr_running++;
-       se->on_rq = 1;
 }
 
 static void
@@ -647,9 +697,140 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
                list_del_init(&se->group_node);
        }
        cfs_rq->nr_running--;
-       se->on_rq = 0;
 }
 
+#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
+static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
+                                           int global_update)
+{
+       struct task_group *tg = cfs_rq->tg;
+       long load_avg;
+
+       load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
+       load_avg -= cfs_rq->load_contribution;
+
+       if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
+               atomic_add(load_avg, &tg->load_weight);
+               cfs_rq->load_contribution += load_avg;
+       }
+}
+
+static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
+{
+       u64 period = sysctl_sched_shares_window;
+       u64 now, delta;
+       unsigned long load = cfs_rq->load.weight;
+
+       if (!cfs_rq)
+               return;
+
+       now = rq_of(cfs_rq)->clock;
+       delta = now - cfs_rq->load_stamp;
+
+       /* truncate load history at 4 idle periods */
+       if (cfs_rq->load_stamp > cfs_rq->load_last &&
+           now - cfs_rq->load_last > 4 * period) {
+               cfs_rq->load_period = 0;
+               cfs_rq->load_avg = 0;
+       }
+
+       cfs_rq->load_stamp = now;
+       cfs_rq->load_unacc_exec_time = 0;
+       cfs_rq->load_period += delta;
+       if (load) {
+               cfs_rq->load_last = now;
+               cfs_rq->load_avg += delta * load;
+       }
+
+       /* consider updating load contribution on each fold or truncate */
+       if (global_update || cfs_rq->load_period > period
+           || !cfs_rq->load_period)
+               update_cfs_rq_load_contribution(cfs_rq, global_update);
+
+       while (cfs_rq->load_period > period) {
+               /*
+                * Inline assembly required to prevent the compiler
+                * optimising this loop into a divmod call.
+                * See __iter_div_u64_rem() for another example of this.
+                */
+               asm("" : "+rm" (cfs_rq->load_period));
+               cfs_rq->load_period /= 2;
+               cfs_rq->load_avg /= 2;
+       }
+
+       if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
+               list_del_leaf_cfs_rq(cfs_rq);
+}
+
+static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+                           unsigned long weight)
+{
+       if (se->on_rq) {
+               /* commit outstanding execution time */
+               if (cfs_rq->curr == se)
+                       update_curr(cfs_rq);
+               account_entity_dequeue(cfs_rq, se);
+       }
+
+       update_load_set(&se->load, weight);
+
+       if (se->on_rq)
+               account_entity_enqueue(cfs_rq, se);
+}
+
+static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+{
+       struct task_group *tg;
+       struct sched_entity *se;
+       long load_weight, load, shares;
+
+       if (!cfs_rq)
+               return;
+
+       tg = cfs_rq->tg;
+       se = tg->se[cpu_of(rq_of(cfs_rq))];
+       if (!se)
+               return;
+
+       load = cfs_rq->load.weight + weight_delta;
+
+       load_weight = atomic_read(&tg->load_weight);
+       load_weight -= cfs_rq->load_contribution;
+       load_weight += load;
+
+       shares = (tg->shares * load);
+       if (load_weight)
+               shares /= load_weight;
+
+       if (shares < MIN_SHARES)
+               shares = MIN_SHARES;
+       if (shares > tg->shares)
+               shares = tg->shares;
+
+       reweight_entity(cfs_rq_of(se), se, shares);
+}
+
+static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
+{
+       if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
+               update_cfs_load(cfs_rq, 0);
+               update_cfs_shares(cfs_rq, 0);
+       }
+}
+#else /* CONFIG_FAIR_GROUP_SCHED */
+static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
+{
+}
+
+static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+{
+}
+
+static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
+{
+}
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHEDSTATS
@@ -771,6 +952,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         * Update run-time statistics of the 'current'.
         */
        update_curr(cfs_rq);
+       update_cfs_load(cfs_rq, 0);
+       update_cfs_shares(cfs_rq, se->load.weight);
        account_entity_enqueue(cfs_rq, se);
 
        if (flags & ENQUEUE_WAKEUP) {
@@ -782,6 +965,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
        check_spread(cfs_rq, se);
        if (se != cfs_rq->curr)
                __enqueue_entity(cfs_rq, se);
+       se->on_rq = 1;
+
+       if (cfs_rq->nr_running == 1)
+               list_add_leaf_cfs_rq(cfs_rq);
 }
 
 static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -825,8 +1012,11 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
        if (se != cfs_rq->curr)
                __dequeue_entity(cfs_rq, se);
+       se->on_rq = 0;
+       update_cfs_load(cfs_rq, 0);
        account_entity_dequeue(cfs_rq, se);
        update_min_vruntime(cfs_rq);
+       update_cfs_shares(cfs_rq, 0);
 
        /*
         * Normalize the entity after updating the min_vruntime because the
@@ -955,6 +1145,11 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
         */
        update_curr(cfs_rq);
 
+       /*
+        * Update share accounting for long-running entities.
+        */
+       update_entity_shares_tick(cfs_rq);
+
 #ifdef CONFIG_SCHED_HRTICK
        /*
         * queued ticks are scheduled to match the slice, so don't bother
@@ -1055,6 +1250,13 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                flags = ENQUEUE_WAKEUP;
        }
 
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+               update_cfs_load(cfs_rq, 0);
+               update_cfs_shares(cfs_rq, 0);
+       }
+
        hrtick_update(rq);
 }
 
@@ -1071,12 +1273,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
                dequeue_entity(cfs_rq, se, flags);
+
                /* Don't dequeue parent if it has other entities besides us */
                if (cfs_rq->load.weight)
                        break;
                flags |= DEQUEUE_SLEEP;
        }
 
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+               update_cfs_load(cfs_rq, 0);
+               update_cfs_shares(cfs_rq, 0);
+       }
+
        hrtick_update(rq);
 }
 
@@ -1143,51 +1353,20 @@ static void task_waking_fair(struct rq *rq, struct task_struct *p)
  * Adding load to a group doesn't make a group heavier, but can cause movement
  * of group shares between cpus. Assuming the shares were perfectly aligned one
  * can calculate the shift in shares.
- *
- * The problem is that perfectly aligning the shares is rather expensive, hence
- * we try to avoid doing that too often - see update_shares(), which ratelimits
- * this change.
- *
- * We compensate this by not only taking the current delta into account, but
- * also considering the delta between when the shares were last adjusted and
- * now.
- *
- * We still saw a performance dip, some tracing learned us that between
- * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
- * significantly. Therefore try to bias the error in direction of failing
- * the affine wakeup.
- *
  */
-static long effective_load(struct task_group *tg, int cpu,
-               long wl, long wg)
+static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
 {
        struct sched_entity *se = tg->se[cpu];
 
        if (!tg->parent)
                return wl;
 
-       /*
-        * By not taking the decrease of shares on the other cpu into
-        * account our error leans towards reducing the affine wakeups.
-        */
-       if (!wl && sched_feat(ASYM_EFF_LOAD))
-               return wl;
-
        for_each_sched_entity(se) {
                long S, rw, s, a, b;
-               long more_w;
-
-               /*
-                * Instead of using this increment, also add the difference
-                * between when the shares were last updated and now.
-                */
-               more_w = se->my_q->load.weight - se->my_q->rq_weight;
-               wl += more_w;
-               wg += more_w;
 
                S = se->my_q->tg->shares;
-               s = se->my_q->shares;
-               rw = se->my_q->rq_weight;
+               s = se->load.weight;
+               rw = se->my_q->load.weight;
 
                a = S*(rw + wl);
                b = S*rw + s*wg;
@@ -1508,23 +1687,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
                        sd = tmp;
        }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-       if (sched_feat(LB_SHARES_UPDATE)) {
-               /*
-                * Pick the largest domain to update shares over
-                */
-               tmp = sd;
-               if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
-                       tmp = affine_sd;
-
-               if (tmp) {
-                       raw_spin_unlock(&rq->lock);
-                       update_shares(tmp);
-                       raw_spin_lock(&rq->lock);
-               }
-       }
-#endif
-
        if (affine_sd) {
                if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
                        return select_idle_sibling(p, cpu);
@@ -1909,6 +2071,48 @@ out:
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * update tg->load_weight by folding this cpu's load_avg
+ */
+static int update_shares_cpu(struct task_group *tg, int cpu)
+{
+       struct cfs_rq *cfs_rq;
+       unsigned long flags;
+       struct rq *rq;
+
+       if (!tg->se[cpu])
+               return 0;
+
+       rq = cpu_rq(cpu);
+       cfs_rq = tg->cfs_rq[cpu];
+
+       raw_spin_lock_irqsave(&rq->lock, flags);
+
+       update_rq_clock(rq);
+       update_cfs_load(cfs_rq, 1);
+
+       /*
+        * We need to update shares after updating tg->load_weight in
+        * order to adjust the weight of groups with long running tasks.
+        */
+       update_cfs_shares(cfs_rq, 0);
+
+       raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+       return 0;
+}
+
+static void update_shares(int cpu)
+{
+       struct cfs_rq *cfs_rq;
+       struct rq *rq = cpu_rq(cpu);
+
+       rcu_read_lock();
+       for_each_leaf_cfs_rq(rq, cfs_rq)
+               update_shares_cpu(cfs_rq->tg, cpu);
+       rcu_read_unlock();
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -1956,6 +2160,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
        return max_load_move - rem_load_move;
 }
 #else
+static inline void update_shares(int cpu)
+{
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -3032,7 +3240,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
        schedstat_inc(sd, lb_count[idle]);
 
 redo:
-       update_shares(sd);
        group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
                                   cpus, balance);
 
@@ -3174,8 +3381,6 @@ out_one_pinned:
        else
                ld_moved = 0;
 out:
-       if (ld_moved)
-               update_shares(sd);
        return ld_moved;
 }
 
@@ -3199,6 +3404,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
         */
        raw_spin_unlock(&this_rq->lock);
 
+       update_shares(this_cpu);
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
                int balance = 1;
@@ -3569,6 +3775,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
        int update_next_balance = 0;
        int need_serialize;
 
+       update_shares(cpu);
+
        for_each_domain(cpu, sd) {
                if (!(sd->flags & SD_LOAD_BALANCE))
                        continue;
index 185f920ec1a2e923b0d966f787c610ff26a7b6cb..68e69acc29b9570b10ecf8a5892ea62c48700c5c 100644 (file)
@@ -52,8 +52,6 @@ SCHED_FEAT(ARCH_POWER, 0)
 SCHED_FEAT(HRTICK, 0)
 SCHED_FEAT(DOUBLE_TICK, 0)
 SCHED_FEAT(LB_BIAS, 1)
-SCHED_FEAT(LB_SHARES_UPDATE, 1)
-SCHED_FEAT(ASYM_EFF_LOAD, 1)
 
 /*
  * Spin-wait on mutex acquisition when the mutex owner is running on
index bea7d79f7e9ca958bba514cbd8eb48ceab47bab3..c914ec747ca6709e25a177eb3c4152c75cb40aee 100644 (file)
@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
        return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 }
 
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+       list_add_rcu(&rt_rq->leaf_rt_rq_list,
+                       &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+       list_del_rcu(&rt_rq->leaf_rt_rq_list);
+}
+
 #define for_each_leaf_rt_rq(rt_rq, rq) \
        list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
 
@@ -276,6 +287,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
        return ktime_to_ns(def_rt_bandwidth.rt_period);
 }
 
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
 #define for_each_leaf_rt_rq(rt_rq, rq) \
        for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
 
@@ -825,6 +844,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
        if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
                return;
 
+       if (!rt_rq->rt_nr_running)
+               list_add_leaf_rt_rq(rt_rq);
+
        if (head)
                list_add(&rt_se->run_list, queue);
        else
@@ -844,6 +866,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
                __clear_bit(rt_se_prio(rt_se), array->bitmap);
 
        dec_rt_tasks(rt_se, rt_rq);
+       if (!rt_rq->rt_nr_running)
+               list_del_leaf_rt_rq(rt_rq);
 }
 
 /*
index 18f4be0d5fe0bbf853935972d9b441e95bc61c5a..d4d918a91881407acd8abbde6691f77197cd013c 100644 (file)
@@ -853,7 +853,9 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
                             cpumask_any(cpu_online_mask));
        case CPU_DEAD:
        case CPU_DEAD_FROZEN: {
-               struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+               static struct sched_param param = {
+                       .sched_priority = MAX_RT_PRIO-1
+               };
 
                p = per_cpu(ksoftirqd, hotcpu);
                per_cpu(ksoftirqd, hotcpu) = NULL;
index c71e075005368eceff3aab4340f94beca4aee249..98d8c1e80edbcb106ba8e87c34777459aa4eff55 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/delay.h>
 #include <linux/srcu.h>
 
 static int init_srcu_struct_fields(struct srcu_struct *sp)
@@ -203,9 +204,14 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
         * all srcu_read_lock() calls using the old counters have completed.
         * Their corresponding critical sections might well be still
         * executing, but the srcu_read_lock() primitives themselves
-        * will have finished executing.
+        * will have finished executing.  We initially give readers
+        * an arbitrarily chosen 10 microseconds to get out of their
+        * SRCU read-side critical sections, then loop waiting 1/HZ
+        * seconds per iteration.
         */
 
+       if (srcu_readers_active_idx(sp, idx))
+               udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY);
        while (srcu_readers_active_idx(sp, idx))
                schedule_timeout_interruptible(1);
 
index 7f5a0cd296a96ca44e43f0db028026094dbbb57a..2745dcdb6c6c5756a7bafd19e6497c31cc077d60 100644 (file)
@@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid)
        err = session;
 out:
        write_unlock_irq(&tasklist_lock);
-       if (err > 0)
+       if (err > 0) {
                proc_sid_connector(group_leader);
+               sched_autogroup_create_attach(group_leader);
+       }
        return err;
 }
 
index 5abfa151855493735a91fd45a255a45727c8ba97..ae5cbb1e3ced15b8cc2e00b052496953709cf4c2 100644 (file)
@@ -259,8 +259,6 @@ static int min_wakeup_granularity_ns;                       /* 0 usecs */
 static int max_wakeup_granularity_ns = NSEC_PER_SEC;   /* 1 second */
 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
-static int min_sched_shares_ratelimit = 100000; /* 100 usec */
-static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
 #endif
 
 #ifdef CONFIG_COMPACTION
@@ -304,15 +302,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = &min_wakeup_granularity_ns,
                .extra2         = &max_wakeup_granularity_ns,
        },
-       {
-               .procname       = "sched_shares_ratelimit",
-               .data           = &sysctl_sched_shares_ratelimit,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = sched_proc_update_handler,
-               .extra1         = &min_sched_shares_ratelimit,
-               .extra2         = &max_sched_shares_ratelimit,
-       },
        {
                .procname       = "sched_tunable_scaling",
                .data           = &sysctl_sched_tunable_scaling,
@@ -322,14 +311,6 @@ static struct ctl_table kern_table[] = {
                .extra1         = &min_sched_tunable_scaling,
                .extra2         = &max_sched_tunable_scaling,
        },
-       {
-               .procname       = "sched_shares_thresh",
-               .data           = &sysctl_sched_shares_thresh,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-       },
        {
                .procname       = "sched_migration_cost",
                .data           = &sysctl_sched_migration_cost,
@@ -351,6 +332,13 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "sched_shares_window",
+               .data           = &sysctl_sched_shares_window,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
        {
                .procname       = "timer_migration",
                .data           = &sysctl_timer_migration,
@@ -382,6 +370,17 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+#ifdef CONFIG_SCHED_AUTOGROUP
+       {
+               .procname       = "sched_autogroup_enabled",
+               .data           = &sysctl_sched_autogroup_enabled,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
 #ifdef CONFIG_PROVE_LOCKING
        {
                .procname       = "prove_locking",
@@ -745,21 +744,21 @@ static struct ctl_table kern_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
-#endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
        {
-               .procname       = "unknown_nmi_panic",
-               .data           = &unknown_nmi_panic,
+               .procname       = "nmi_watchdog",
+               .data           = &watchdog_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dowatchdog_enabled,
        },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
        {
-               .procname       = "nmi_watchdog",
-               .data           = &nmi_watchdog_enabled,
+               .procname       = "unknown_nmi_panic",
+               .data           = &unknown_nmi_panic,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = proc_nmi_enabled,
+               .proc_handler   = proc_dointvec,
        },
 #endif
 #if defined(CONFIG_X86)
index 1357c5786064e6c8f030defbbb7f76f690dc3c15..4b2545a136ffcec72d5073b579cf3f41c49fcbd3 100644 (file)
@@ -136,7 +136,6 @@ static const struct bin_table bin_kern_table[] = {
        { CTL_INT,      KERN_IA64_UNALIGNED,            "ignore-unaligned-usertrap" },
        { CTL_INT,      KERN_COMPAT_LOG,                "compat-log" },
        { CTL_INT,      KERN_MAX_LOCK_DEPTH,            "max_lock_depth" },
-       { CTL_INT,      KERN_NMI_WATCHDOG,              "nmi_watchdog" },
        { CTL_INT,      KERN_PANIC_ON_NMI,              "panic_on_unrecovered_nmi" },
        {}
 };
index c8231fb1570831d78215ab9967958a1663424ed0..3308fd7f1b52f170e4da5dd7e26749aca652ef1b 100644 (file)
@@ -349,25 +349,47 @@ static int parse(struct nlattr *na, struct cpumask *mask)
        return ret;
 }
 
+#ifdef CONFIG_IA64
+#define TASKSTATS_NEEDS_PADDING 1
+#endif
+
 static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
 {
        struct nlattr *na, *ret;
        int aggr;
 
-       /* If we don't pad, we end up with alignment on a 4 byte boundary.
-        * This causes lots of runtime warnings on systems requiring 8 byte
-        * alignment */
-       u32 pids[2] = { pid, 0 };
-       int pid_size = ALIGN(sizeof(pid), sizeof(long));
-
        aggr = (type == TASKSTATS_TYPE_PID)
                        ? TASKSTATS_TYPE_AGGR_PID
                        : TASKSTATS_TYPE_AGGR_TGID;
 
+       /*
+        * The taskstats structure is internally aligned on 8 byte
+        * boundaries but the layout of the aggregrate reply, with
+        * two NLA headers and the pid (each 4 bytes), actually
+        * force the entire structure to be unaligned. This causes
+        * the kernel to issue unaligned access warnings on some
+        * architectures like ia64. Unfortunately, some software out there
+        * doesn't properly unroll the NLA packet and assumes that the start
+        * of the taskstats structure will always be 20 bytes from the start
+        * of the netlink payload. Aligning the start of the taskstats
+        * structure breaks this software, which we don't want. So, for now
+        * the alignment only happens on architectures that require it
+        * and those users will have to update to fixed versions of those
+        * packages. Space is reserved in the packet only when needed.
+        * This ifdef should be removed in several years e.g. 2012 once
+        * we can be confident that fixed versions are installed on most
+        * systems. We add the padding before the aggregate since the
+        * aggregate is already a defined type.
+        */
+#ifdef TASKSTATS_NEEDS_PADDING
+       if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0)
+               goto err;
+#endif
        na = nla_nest_start(skb, aggr);
        if (!na)
                goto err;
-       if (nla_put(skb, type, pid_size, pids) < 0)
+
+       if (nla_put(skb, type, sizeof(pid), &pid) < 0)
                goto err;
        ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
        if (!ret)
@@ -456,6 +478,18 @@ out:
        return rc;
 }
 
+static size_t taskstats_packet_size(void)
+{
+       size_t size;
+
+       size = nla_total_size(sizeof(u32)) +
+               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+#ifdef TASKSTATS_NEEDS_PADDING
+       size += nla_total_size(0); /* Padding for alignment */
+#endif
+       return size;
+}
+
 static int cmd_attr_pid(struct genl_info *info)
 {
        struct taskstats *stats;
@@ -464,8 +498,7 @@ static int cmd_attr_pid(struct genl_info *info)
        u32 pid;
        int rc;
 
-       size = nla_total_size(sizeof(u32)) +
-               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+       size = taskstats_packet_size();
 
        rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
        if (rc < 0)
@@ -494,8 +527,7 @@ static int cmd_attr_tgid(struct genl_info *info)
        u32 tgid;
        int rc;
 
-       size = nla_total_size(sizeof(u32)) +
-               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+       size = taskstats_packet_size();
 
        rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
        if (rc < 0)
@@ -570,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
        /*
         * Size includes space for nested attributes
         */
-       size = nla_total_size(sizeof(u32)) +
-               nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
+       size = taskstats_packet_size();
 
        is_thread_group = !!taskstats_tgid_alloc(tsk);
        if (is_thread_group) {
index ac38fbb176ccd0bb598b1eaaa7f2a703b17ec565..a9ae369925ce14fa4cf7ca1674d9f4903a6f4a24 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/math64.h>
+#include <linux/kernel.h>
 
 /*
  * fixed point arithmetic scale factor for skew
@@ -57,11 +58,11 @@ int timecompare_offset(struct timecompare *sync,
        int index;
        int num_samples = sync->num_samples;
 
-       if (num_samples > sizeof(buffer)/sizeof(buffer[0])) {
+       if (num_samples > ARRAY_SIZE(buffer)) {
                samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
                if (!samples) {
                        samples = buffer;
-                       num_samples = sizeof(buffer)/sizeof(buffer[0]);
+                       num_samples = ARRAY_SIZE(buffer);
                }
        } else {
                samples = buffer;
index 49010d822f725b47726742fa7e1b45aad076ef90..5bb86da8200373a2e6cd64fdcbe0355f43f5a27f 100644 (file)
@@ -32,6 +32,8 @@ struct timekeeper {
        cycle_t cycle_interval;
        /* Number of clock shifted nano seconds in one NTP interval. */
        u64     xtime_interval;
+       /* shifted nano seconds left over when rounding cycle_interval */
+       s64     xtime_remainder;
        /* Raw nano seconds accumulated per NTP interval. */
        u32     raw_interval;
 
@@ -62,7 +64,7 @@ struct timekeeper timekeeper;
 static void timekeeper_setup_internals(struct clocksource *clock)
 {
        cycle_t interval;
-       u64 tmp;
+       u64 tmp, ntpinterval;
 
        timekeeper.clock = clock;
        clock->cycle_last = clock->read(clock);
@@ -70,6 +72,7 @@ static void timekeeper_setup_internals(struct clocksource *clock)
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
        tmp <<= clock->shift;
+       ntpinterval = tmp;
        tmp += clock->mult/2;
        do_div(tmp, clock->mult);
        if (tmp == 0)
@@ -80,6 +83,7 @@ static void timekeeper_setup_internals(struct clocksource *clock)
 
        /* Go back from cycles -> shifted ns */
        timekeeper.xtime_interval = (u64) interval * clock->mult;
+       timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval;
        timekeeper.raw_interval =
                ((u64) interval * clock->mult) >> clock->shift;
 
@@ -719,7 +723,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
 
        /* Accumulate error between NTP and clock interval */
        timekeeper.ntp_error += tick_length << shift;
-       timekeeper.ntp_error -= timekeeper.xtime_interval <<
+       timekeeper.ntp_error -=
+           (timekeeper.xtime_interval + timekeeper.xtime_remainder) <<
                                (timekeeper.ntp_error_shift + shift);
 
        return offset;
index ab8f5e33fa92c76db813d1419e6a339f3a7aca52..32a19f9397fc347c3144a01e142308c026f49c70 100644 (file)
@@ -79,26 +79,26 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
 {
        struct hrtimer *timer, tmp;
        unsigned long next = 0, i;
-       struct rb_node *curr;
+       struct timerqueue_node *curr;
        unsigned long flags;
 
 next_one:
        i = 0;
        raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
 
-       curr = base->first;
+       curr = timerqueue_getnext(&base->active);
        /*
         * Crude but we have to do this O(N*N) thing, because
         * we have to unlock the base when printing:
         */
        while (curr && i < next) {
-               curr = rb_next(curr);
+               curr = timerqueue_iterate_next(curr);
                i++;
        }
 
        if (curr) {
 
-               timer = rb_entry(curr, struct hrtimer, node);
+               timer = container_of(curr, struct hrtimer, node);
                tmp = *timer;
                raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
 
index 353b9227c2ecfe11793a17b0a41f534ebdbd14f8..43ca9936f2d06a2cba572f1c877406f9281e68a3 100644 (file)
@@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
-/*
- * Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB to
- * indicate whether the timer is deferrable.
- *
- * A deferrable timer will work normally when the system is busy, but
- * will not cause a CPU to come out of idle just to service it; instead,
- * the timer will be serviced when the CPU eventually wakes up with a
- * subsequent non-deferrable timer.
- */
-#define TBASE_DEFERRABLE_FLAG          (0x1)
-
 /* Functions below help us manage 'deferrable' flag */
 static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
@@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-       timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
-                                      TBASE_DEFERRABLE_FLAG));
+       timer->base = TBASE_MAKE_DEFERRED(timer->base);
 }
 
 static inline void
@@ -343,15 +330,6 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
 }
 EXPORT_SYMBOL_GPL(set_timer_slack);
 
-
-static inline void set_running_timer(struct tvec_base *base,
-                                       struct timer_list *timer)
-{
-#ifdef CONFIG_SMP
-       base->running_timer = timer;
-#endif
-}
-
 static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
        unsigned long expires = timer->expires;
@@ -936,15 +914,12 @@ int del_timer(struct timer_list *timer)
 }
 EXPORT_SYMBOL(del_timer);
 
-#ifdef CONFIG_SMP
 /**
  * try_to_del_timer_sync - Try to deactivate a timer
  * @timer: timer do del
  *
  * This function tries to deactivate a timer. Upon successful (ret >= 0)
  * exit the timer is not queued and the handler is not running on any CPU.
- *
- * It must not be called from interrupt contexts.
  */
 int try_to_del_timer_sync(struct timer_list *timer)
 {
@@ -973,6 +948,7 @@ out:
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
+#ifdef CONFIG_SMP
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -983,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  *
  * Synchronization rules: Callers must prevent restarting of the timer,
  * otherwise this function is meaningless. It must not be called from
- * interrupt contexts. The caller must not hold locks which would prevent
+ * hardirq contexts. The caller must not hold locks which would prevent
  * completion of the timer's handler. The timer's handler must not call
  * add_timer_on(). Upon exit the timer is not queued and the handler is
  * not running on any CPU.
@@ -993,14 +969,16 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
 int del_timer_sync(struct timer_list *timer)
 {
 #ifdef CONFIG_LOCKDEP
-       unsigned long flags;
-
-       local_irq_save(flags);
+       local_bh_disable();
        lock_map_acquire(&timer->lockdep_map);
        lock_map_release(&timer->lockdep_map);
-       local_irq_restore(flags);
+       local_bh_enable();
 #endif
-
+       /*
+        * don't use it in hardirq context, because it
+        * could lead to deadlock.
+        */
+       WARN_ON(in_irq());
        for (;;) {
                int ret = try_to_del_timer_sync(timer);
                if (ret >= 0)
@@ -1111,7 +1089,7 @@ static inline void __run_timers(struct tvec_base *base)
 
                        timer_stats_account_timer(timer);
 
-                       set_running_timer(base, timer);
+                       base->running_timer = timer;
                        detach_timer(timer, 1);
 
                        spin_unlock_irq(&base->lock);
@@ -1119,7 +1097,7 @@ static inline void __run_timers(struct tvec_base *base)
                        spin_lock_irq(&base->lock);
                }
        }
-       set_running_timer(base, NULL);
+       base->running_timer = NULL;
        spin_unlock_irq(&base->lock);
 }
 
@@ -1249,7 +1227,7 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
  */
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
-       struct tvec_base *base = __get_cpu_var(tvec_bases);
+       struct tvec_base *base = __this_cpu_read(tvec_bases);
        unsigned long expires;
 
        /*
@@ -1298,7 +1276,7 @@ void update_process_times(int user_tick)
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-       struct tvec_base *base = __get_cpu_var(tvec_bases);
+       struct tvec_base *base = __this_cpu_read(tvec_bases);
 
        hrtimer_run_pending();
 
index ea37e2ff416429d04cea3829e835ced7f0f35fc0..14674dce77a6c5a9cb8f17ffe2056f8e3ece1cc2 100644 (file)
@@ -69,6 +69,21 @@ config EVENT_TRACING
        select CONTEXT_SWITCH_TRACER
        bool
 
+config EVENT_POWER_TRACING_DEPRECATED
+       depends on EVENT_TRACING
+       bool "Deprecated power event trace API, to be removed"
+       default y
+       help
+         Provides old power event types:
+         C-state/idle accounting events:
+         power:power_start
+         power:power_end
+         and old cpufreq accounting event:
+         power:power_frequency
+         This is for userspace compatibility
+         and will vanish after 5 kernel iterations,
+         namely 2.6.41.
+
 config CONTEXT_SWITCH_TRACER
        bool
 
index a22582a061618cea52acee544d9e783ef1d9868e..f55fcf61b223d87e547568512ef2ae89e17107c5 100644 (file)
@@ -13,5 +13,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/power.h>
 
-EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
+#ifdef EVENT_POWER_TRACING_DEPRECATED
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
+#endif
+EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
 
index 9ed509a015d81a69cd452b3697658dd7fde9cb71..bd1c35a4fbccf31c0531f0667545011f704c561f 100644 (file)
@@ -3853,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 
                /* Need to copy one event at a time */
                do {
+                       /* We need the size of one event, because
+                        * rb_advance_reader only advances by one event,
+                        * whereas rb_event_ts_length may include the size of
+                        * one or two events.
+                        * We have already ensured there's enough space if this
+                        * is a time extend. */
+                       size = rb_event_length(event);
                        memcpy(bpage->data + pos, rpage->data + rpos, size);
 
                        len -= size;
@@ -3867,7 +3874,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                        event = rb_reader_event(cpu_buffer);
                        /* Always keep the time extend and data together */
                        size = rb_event_ts_length(event);
-               } while (len > size);
+               } while (len >= size);
 
                /* update bpage */
                local_set(&bpage->commit, pos);
index 39c059ca670e64156e6681782ffa708c6b8d720f..19a359d5e6d58573cc1c74326e488a419b01b342 100644 (file)
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
 /* Count the events in use (per event id, not per instance) */
 static int     total_ref_count;
 
+static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
+                                struct perf_event *p_event)
+{
+       /* No tracing, just counting, so no obvious leak */
+       if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
+               return 0;
+
+       /* Some events are ok to be traced by non-root users... */
+       if (p_event->attach_state == PERF_ATTACH_TASK) {
+               if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
+                       return 0;
+       }
+
+       /*
+        * ...otherwise raw tracepoint data can be a severe data leak,
+        * only allow root to have these.
+        */
+       if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       return 0;
+}
+
 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
                                 struct perf_event *p_event)
 {
        struct hlist_head __percpu *list;
-       int ret = -ENOMEM;
+       int ret;
        int cpu;
 
+       ret = perf_trace_event_perm(tp_event, p_event);
+       if (ret)
+               return ret;
+
        p_event->tp_event = tp_event;
        if (tp_event->perf_refcount++ > 0)
                return 0;
 
+       ret = -ENOMEM;
+
        list = alloc_percpu(struct hlist_head);
        if (!list)
                goto fail;
index 0725eeab1937ef24a301f2c0b0404f64ce95e026..35fde09b81dee7f386c111766b1fdbefac2d7414 100644 (file)
 
 DEFINE_MUTEX(event_mutex);
 
+DEFINE_MUTEX(event_storage_mutex);
+EXPORT_SYMBOL_GPL(event_storage_mutex);
+
+char event_storage[EVENT_STORAGE_SIZE];
+EXPORT_SYMBOL_GPL(event_storage);
+
 LIST_HEAD(ftrace_events);
 LIST_HEAD(ftrace_common_fields);
 
index 4ba44deaac259d05fb67d5d31e8ce5371844a414..4b74d71705c0d2be2a9adf67246823584bd34fcd 100644 (file)
@@ -83,13 +83,19 @@ static void __always_unused ____ftrace_check_##name(void)   \
 
 #undef __array
 #define __array(type, item, len)                                       \
-       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
-       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+       do {                                                            \
+               BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                 \
+               mutex_lock(&event_storage_mutex);                       \
+               snprintf(event_storage, sizeof(event_storage),          \
+                        "%s[%d]", #type, len);                         \
+               ret = trace_define_field(event_call, event_storage, #item, \
                                 offsetof(typeof(field), item),         \
                                 sizeof(field.item),                    \
                                 is_signed_type(type), FILTER_OTHER);   \
-       if (ret)                                                        \
-               return ret;
+               mutex_unlock(&event_storage_mutex);                     \
+               if (ret)                                                \
+                       return ret;                                     \
+       } while (0);
 
 #undef __array_desc
 #define __array_desc(type, container, item, len)                       \
index 155a415b3209c0c4e65936be1b593678aea42d27..562c56e048fdbc34b18dded38cd21ccb3cd87f08 100644 (file)
@@ -558,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
 static int trace_wakeup_test_thread(void *data)
 {
        /* Make this a RT thread, doesn't need to be too high */
-       struct sched_param param = { .sched_priority = 5 };
+       static struct sched_param param = { .sched_priority = 5 };
        struct completion *x = data;
 
        sched_setscheduler(current, SCHED_FIFO, &param);
index 2c7d8d5914b188be65c36a686a81fba7eed07d8c..5c598ca781df4bf6f907043ed1d43f98b3b7ef58 100644 (file)
@@ -158,6 +158,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
                spin_lock_irq(&uidhash_lock);
                up = uid_hash_find(uid, hashent);
                if (up) {
+                       put_user_ns(ns);
                        key_put(new->uid_keyring);
                        key_put(new->session_keyring);
                        kmem_cache_free(uid_cachep, new);
index 6e3c41a4024c1cc66be01218e2c37498498f2469..6e7b575ac33cf2dcba3f9dc749f7039e6805a3f0 100644 (file)
@@ -57,6 +57,8 @@ static int __init hardlockup_panic_setup(char *str)
 {
        if (!strncmp(str, "panic", 5))
                hardlockup_panic = 1;
+       else if (!strncmp(str, "0", 1))
+               no_watchdog = 1;
        return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -307,7 +309,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
  */
 static int watchdog(void *unused)
 {
-       struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+       static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
        struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
 
        sched_setscheduler(current, SCHED_FIFO, &param);
@@ -364,7 +366,8 @@ static int watchdog_nmi_enable(int cpu)
                goto out_save;
        }
 
-       printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
+       printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n",
+              cpu, PTR_ERR(event));
        return PTR_ERR(event);
 
        /* success path */
@@ -547,13 +550,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
        .notifier_call = cpu_callback
 };
 
-static int __init spawn_watchdog_task(void)
+void __init lockup_detector_init(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int err;
 
        if (no_watchdog)
-               return 0;
+               return;
 
        err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
        WARN_ON(notifier_to_errno(err));
@@ -561,6 +564,5 @@ static int __init spawn_watchdog_task(void)
        cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
        register_cpu_notifier(&cpu_nfb);
 
-       return 0;
+       return;
 }
-early_initcall(spawn_watchdog_task);
index 28b42b9274d0b5fe47522d9df8158498be12319a..2d05adb984018776610f573de126f14bda4c9d2e 100644 (file)
@@ -173,7 +173,8 @@ config LOCKUP_DETECTOR
          An NMI is generated every 60 seconds or so to check for hardlockups.
 
 config HARDLOCKUP_DETECTOR
-       def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+       def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
+                !ARCH_HAS_NMI_WATCHDOG
 
 config BOOTPARAM_SOFTLOCKUP_PANIC
        bool "Panic (Reboot) On Soft Lockups"
index e6a3763b82126729ecad6636caec9686cd7dec5f..9e2db72d128e6f22ec560ff7f04584777c293dd7 100644 (file)
@@ -8,7 +8,7 @@ KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
 endif
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-        rbtree.o radix-tree.o dump_stack.o \
+        rbtree.o radix-tree.o dump_stack.o timerqueue.o\
         idr.o int_sqrt.o extable.o prio_tree.o \
         sha1.o irq_regs.o reciprocal_div.o argv_split.o \
         proportions.o prio_heap.o ratelimit.o show_mem.o \
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
new file mode 100644 (file)
index 0000000..e3a1050
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ *  Generic Timer-queue
+ *
+ *  Manages a simple queue of timers, ordered by expiration time.
+ *  Uses rbtrees for quick list adds and expiration.
+ *
+ *  NOTE: All of the following functions need to be serialized
+ *  to avoid races. No locking is done by this libary code.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/timerqueue.h>
+#include <linux/rbtree.h>
+#include <linux/module.h>
+
+/**
+ * timerqueue_add - Adds timer to timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be added
+ *
+ * Adds the timer node to the timerqueue, sorted by the
+ * node's expires value.
+ */
+void timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+       struct rb_node **p = &head->head.rb_node;
+       struct rb_node *parent = NULL;
+       struct timerqueue_node  *ptr;
+
+       /* Make sure we don't add nodes that are already added */
+       WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
+
+       while (*p) {
+               parent = *p;
+               ptr = rb_entry(parent, struct timerqueue_node, node);
+               if (node->expires.tv64 < ptr->expires.tv64)
+                       p = &(*p)->rb_left;
+               else
+                       p = &(*p)->rb_right;
+       }
+       rb_link_node(&node->node, parent, p);
+       rb_insert_color(&node->node, &head->head);
+
+       if (!head->next || node->expires.tv64 < head->next->expires.tv64)
+               head->next = node;
+}
+EXPORT_SYMBOL_GPL(timerqueue_add);
+
+/**
+ * timerqueue_del - Removes a timer from the timerqueue.
+ *
+ * @head: head of timerqueue
+ * @node: timer node to be removed
+ *
+ * Removes the timer node from the timerqueue.
+ */
+void timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
+{
+       WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
+
+       /* update next pointer */
+       if (head->next == node) {
+               struct rb_node *rbn = rb_next(&node->node);
+
+               head->next = rbn ?
+                       rb_entry(rbn, struct timerqueue_node, node) : NULL;
+       }
+       rb_erase(&node->node, &head->head);
+       RB_CLEAR_NODE(&node->node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_del);
+
+/**
+ * timerqueue_iterate_next - Returns the timer after the provided timer
+ *
+ * @node: Pointer to a timer.
+ *
+ * Provides the timer that is after the given node. This is used, when
+ * necessary, to iterate through the list of timers in a timer list
+ * without modifying the list.
+ */
+struct timerqueue_node *timerqueue_iterate_next(struct timerqueue_node *node)
+{
+       struct rb_node *next;
+
+       if (!node)
+               return NULL;
+       next = rb_next(&node->node);
+       if (!next)
+               return NULL;
+       return container_of(next, struct timerqueue_node, node);
+}
+EXPORT_SYMBOL_GPL(timerqueue_iterate_next);
index 4d709ee5901370842534224a9f81e7d13943e196..1a8894eadf7275fc2a41f002cadaca4f86684cbf 100644 (file)
@@ -279,7 +279,6 @@ static unsigned long isolate_migratepages(struct zone *zone,
                /* Successfully isolated */
                del_page_from_lru_list(zone, page, page_lru(page));
                list_add(&page->lru, migratelist);
-               mem_cgroup_del_lru(page);
                cc->nr_migratepages++;
 
                /* Avoid isolating too much */
index 7a22b41292115f78dc039ebf5e271090dc5a8f9d..00bb8a64d028f945d50346c2332c6d4b54373621 100644 (file)
@@ -1925,19 +1925,18 @@ again:
 
                rcu_read_lock();
                p = rcu_dereference(mm->owner);
-               VM_BUG_ON(!p);
                /*
-                * because we don't have task_lock(), "p" can exit while
-                * we're here. In that case, "mem" can point to root
-                * cgroup but never be NULL. (and task_struct itself is freed
-                * by RCU, cgroup itself is RCU safe.) Then, we have small
-                * risk here to get wrong cgroup. But such kind of mis-account
-                * by race always happens because we don't have cgroup_mutex().
-                * It's overkill and we allow that small race, here.
+                * Because we don't have task_lock(), "p" can exit.
+                * In that case, "mem" can point to root or p can be NULL with
+                * race with swapoff. Then, we have small risk of mis-accouning.
+                * But such kind of mis-account by race always happens because
+                * we don't have cgroup_mutex(). It's overkill and we allo that
+                * small race, here.
+                * (*) swapoff at el will charge against mm-struct not against
+                * task-struct. So, mm->owner can be NULL.
                 */
                mem = mem_cgroup_from_task(p);
-               VM_BUG_ON(!mem);
-               if (mem_cgroup_is_root(mem)) {
+               if (!mem || mem_cgroup_is_root(mem)) {
                        rcu_read_unlock();
                        goto done;
                }
index fe5a3c6a54260f2ae9999fbaabe03082a818982f..6ae8a66a704575764ecf068c3f9a02bcc0a14575 100644 (file)
@@ -35,6 +35,8 @@
 #include <linux/hugetlb.h>
 #include <linux/gfp.h>
 
+#include <asm/tlbflush.h>
+
 #include "internal.h"
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
index 27a9ac58851678d30c7c477d00a857cf2f63bc35..ef4045d010d5f37631c3ad2f9de0e824e1de65a7 100644 (file)
@@ -10,7 +10,7 @@
  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
- *  Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org>
+ *  Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org>
  */
 
 #include <linux/module.h>
@@ -328,6 +328,7 @@ void *vmalloc_node(unsigned long size, int node)
 {
        return vmalloc(size);
 }
+EXPORT_SYMBOL(vmalloc_node);
 
 /**
  * vzalloc_node - allocate memory on a specific node with zero fill
@@ -440,6 +441,31 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 {
 }
 
+/**
+ *     alloc_vm_area - allocate a range of kernel address space
+ *     @size:          size of the area
+ *
+ *     Returns:        NULL on failure, vm_struct on success
+ *
+ *     This function reserves a range of kernel address space, and
+ *     allocates pagetables to map that range.  No actual mappings
+ *     are created.  If the kernel address space is not shared
+ *     between processes, it syncs the pagetable across all
+ *     processes.
+ */
+struct vm_struct *alloc_vm_area(size_t size)
+{
+       BUG();
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(alloc_vm_area);
+
+void free_vm_area(struct vm_struct *area)
+{
+       BUG();
+}
+EXPORT_SYMBOL_GPL(free_vm_area);
+
 int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
                   struct page *page)
 {
index b840afa89761ce0d83690ff963385399246fa6d8..b4edfe7ce06c1bf5f6d692235eac4e07f9068490 100644 (file)
@@ -563,7 +563,7 @@ static void balance_dirty_pages(struct address_space *mapping,
                                break;          /* We've done our duty */
                }
                trace_wbc_balance_dirty_wait(&wbc, bdi);
-               __set_current_state(TASK_INTERRUPTIBLE);
+               __set_current_state(TASK_UNINTERRUPTIBLE);
                io_schedule_timeout(pause);
 
                /*
index efe816856a9d777b284f8bf25cc7548154f374d6..02ba91230b99269f45beea6387c165e042800347 100644 (file)
@@ -1268,7 +1268,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
        /* we're done parsing the input, undefine BUG macro and dump config */
 #undef PCPU_SETUP_BUG_ON
-       pcpu_dump_alloc_info(KERN_INFO, ai);
+       pcpu_dump_alloc_info(KERN_DEBUG, ai);
 
        pcpu_nr_groups = ai->nr_groups;
        pcpu_group_offsets = group_offsets;
index fa642aa652bdba0d4b0b3f47c77dc9e55571c19f..432a9a633e8d8da4ddcabdb1a9a3cef1a55a3253 100644 (file)
@@ -311,6 +311,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
        d->state      = BT_OPEN;
        d->flags      = 0;
        d->mscex      = 0;
+       d->sec_level  = BT_SECURITY_LOW;
        d->mtu        = RFCOMM_DEFAULT_MTU;
        d->v24_sig    = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
 
index eb5b256ffc8801ff7e187c64ee3dde5da268f570..543b3262d002cdca0213348b4878c364622394b1 100644 (file)
@@ -437,7 +437,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
        ip6h = ipv6_hdr(skb);
 
        *(__force __be32 *)ip6h = htonl(0x60000000);
-       ip6h->payload_len = 8 + sizeof(*mldq);
+       ip6h->payload_len = htons(8 + sizeof(*mldq));
        ip6h->nexthdr = IPPROTO_HOPOPTS;
        ip6h->hop_limit = 1;
        ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
@@ -1430,7 +1430,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
                                 struct net_bridge_port *port,
                                 struct sk_buff *skb)
 {
-       struct sk_buff *skb2 = skb;
+       struct sk_buff *skb2;
        struct ipv6hdr *ip6h;
        struct icmp6hdr *icmp6h;
        u8 nexthdr;
@@ -1469,15 +1469,15 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
        if (!skb2)
                return -ENOMEM;
 
+       err = -EINVAL;
+       if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr)))
+               goto out;
+
        len -= offset - skb_network_offset(skb2);
 
        __skb_pull(skb2, offset);
        skb_reset_transport_header(skb2);
 
-       err = -EINVAL;
-       if (!pskb_may_pull(skb2, sizeof(*icmp6h)))
-               goto out;
-
        icmp6h = icmp6_hdr(skb2);
 
        switch (icmp6h->icmp6_type) {
@@ -1516,7 +1516,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
        switch (icmp6h->icmp6_type) {
        case ICMPV6_MGM_REPORT:
            {
-               struct mld_msg *mld = (struct mld_msg *)icmp6h;
+               struct mld_msg *mld;
+               if (!pskb_may_pull(skb2, sizeof(*mld))) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               mld = (struct mld_msg *)skb_transport_header(skb2);
                BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
                err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
                break;
@@ -1529,15 +1534,18 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
                break;
        case ICMPV6_MGM_REDUCTION:
            {
-               struct mld_msg *mld = (struct mld_msg *)icmp6h;
+               struct mld_msg *mld;
+               if (!pskb_may_pull(skb2, sizeof(*mld))) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               mld = (struct mld_msg *)skb_transport_header(skb2);
                br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
            }
        }
 
 out:
-       __skb_push(skb2, offset);
-       if (skb2 != skb)
-               kfree_skb(skb2);
+       kfree_skb(skb2);
        return err;
 }
 #endif
index 35cf27087b561d6e9955fd75b4b03213a6e9e8d8..e3d7aefa91811d8945d9283d5de7ede9b168da34 100644 (file)
@@ -50,6 +50,8 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
        llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr);
 
+       skb_reset_mac_header(skb);
+
        NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
                dev_queue_xmit);
 }
index 6faa8256e10ca22d6fb0b2005d74c5c5580a404d..9d5e8accfab1d73f25e8a00ae45523a400b820ce 100644 (file)
@@ -125,7 +125,7 @@ struct bcm_sock {
        struct list_head tx_ops;
        unsigned long dropped_usr_msgs;
        struct proc_dir_entry *bcm_proc_read;
-       char procname [20]; /* pointer printed in ASCII with \0 */
+       char procname [32]; /* inode number in decimal with \0 */
 };
 
 static inline struct bcm_sock *bcm_sk(const struct sock *sk)
@@ -1521,7 +1521,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 
        if (proc_dir) {
                /* unique socket address as filename */
-               sprintf(bo->procname, "%p", sock);
+               sprintf(bo->procname, "%lu", sock_i_ino(sk));
                bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
                                                     proc_dir,
                                                     &bcm_proc_fops, sk);
index 82a4369ae15091520d99effdcbb36dec7b3bab42..a20e5d3bbfa017db76959cfd448962c134d5d309 100644 (file)
@@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
 {
        int ret = 0;
 
-       if (rule->iifindex && (rule->iifindex != fl->iif) &&
-           !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
+       if (rule->iifindex && (rule->iifindex != fl->iif))
                goto out;
 
        if (rule->oifindex && (rule->oifindex != fl->oif))
index fb6080111461546953b34979db77f5e2d516e060..e5af8d5d5b505d79e3102a0aa879ba3d3e17f82b 100644 (file)
@@ -1009,6 +1009,36 @@ static void sock_copy(struct sock *nsk, const struct sock *osk)
 #endif
 }
 
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+       if (offsetof(struct sock, sk_node.next) != 0)
+               memset(sk, 0, offsetof(struct sock, sk_node.next));
+       memset(&sk->sk_node.pprev, 0,
+              size - offsetof(struct sock, sk_node.pprev));
+}
+
+void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
+{
+       unsigned long nulls1, nulls2;
+
+       nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
+       nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
+       if (nulls1 > nulls2)
+               swap(nulls1, nulls2);
+
+       if (nulls1 != 0)
+               memset((char *)sk, 0, nulls1);
+       memset((char *)sk + nulls1 + sizeof(void *), 0,
+              nulls2 - nulls1 - sizeof(void *));
+       memset((char *)sk + nulls2 + sizeof(void *), 0,
+              size - nulls2 - sizeof(void *));
+}
+EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
+
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                int family)
 {
@@ -1021,19 +1051,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
                if (!sk)
                        return sk;
                if (priority & __GFP_ZERO) {
-                       /*
-                        * caches using SLAB_DESTROY_BY_RCU should let
-                        * sk_node.next un-modified. Special care is taken
-                        * when initializing object to zero.
-                        */
-                       if (offsetof(struct sock, sk_node.next) != 0)
-                               memset(sk, 0, offsetof(struct sock, sk_node.next));
-                       memset(&sk->sk_node.pprev, 0,
-                              prot->obj_size - offsetof(struct sock,
-                                                        sk_node.pprev));
+                       if (prot->clear_sk)
+                               prot->clear_sk(sk, prot->obj_size);
+                       else
+                               sk_prot_clear_nulls(sk, prot->obj_size);
                }
-       }
-       else
+       } else
                sk = kmalloc(prot->obj_size, priority);
 
        if (sk != NULL) {
index eb6f69a8f27aff4db2de494389dd7d9584b93ece..c19c1f739fbadc48d39a87c25c550f49de100db4 100644 (file)
@@ -163,13 +163,19 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
                                .daddr = addr
                        }
                },
-               .flags = FLOWI_FLAG_MATCH_ANY_IIF
        };
        struct fib_result res = { 0 };
        struct net_device *dev = NULL;
+       struct fib_table *local_table;
+
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+       res.r = NULL;
+#endif
 
        rcu_read_lock();
-       if (fib_lookup(net, &fl, &res)) {
+       local_table = fib_get_table(net, RT_TABLE_LOCAL);
+       if (!local_table ||
+           fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
                rcu_read_unlock();
                return NULL;
        }
index 987bf9adb31833c19a0db04ce76060306d8e6994..93bfd95584f4656605eb963919de2d74b7c729f0 100644 (file)
@@ -2585,9 +2585,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                        goto out;
 
                /* RACE: Check return value of inet_select_addr instead. */
-               if (rcu_dereference(dev_out->ip_ptr) == NULL)
-                       goto out;       /* Wrong error code */
-
+               if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
+                       err = -ENETUNREACH;
+                       goto out;
+               }
                if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
                    ipv4_is_lbcast(oldflp->fl4_dst)) {
                        if (!fl.fl4_src)
@@ -2648,8 +2649,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
        }
 
        if (res.type == RTN_LOCAL) {
-               if (!fl.fl4_src)
-                       fl.fl4_src = fl.fl4_dst;
+               if (!fl.fl4_src) {
+                       if (res.fi->fib_prefsrc)
+                               fl.fl4_src = res.fi->fib_prefsrc;
+                       else
+                               fl.fl4_src = fl.fl4_dst;
+               }
                dev_out = net->loopback_dev;
                fl.oif = dev_out->ifindex;
                res.fi = NULL;
index e13da6de1fc79e26dc6d03b671e1becf9e6c8754..d978bb2f748b34d99efb0162e4e083a460275630 100644 (file)
@@ -2030,7 +2030,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 get_req:
                        req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
                }
-               sk        = sk_next(st->syn_wait_sk);
+               sk        = sk_nulls_next(st->syn_wait_sk);
                st->state = TCP_SEQ_STATE_LISTENING;
                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
        } else {
@@ -2039,7 +2039,7 @@ get_req:
                if (reqsk_queue_len(&icsk->icsk_accept_queue))
                        goto start_req;
                read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-               sk = sk_next(sk);
+               sk = sk_nulls_next(sk);
        }
 get_sk:
        sk_nulls_for_each_from(sk, node) {
index 5e0a3a582a59a05466468b244371dc5675bedc67..2d3ded4d078684298f9116f5c225f92b8908120e 100644 (file)
@@ -1899,6 +1899,7 @@ struct proto udp_prot = {
        .compat_setsockopt = compat_udp_setsockopt,
        .compat_getsockopt = compat_udp_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udp_prot);
 
index ab76aa928fa98d6f1b8a8f6371d2b898ebdccb24..aee9963f7f5a497efc06429d3ab730e9f3efc999 100644 (file)
@@ -57,6 +57,7 @@ struct proto  udplite_prot = {
        .compat_setsockopt = compat_udp_setsockopt,
        .compat_getsockopt = compat_udp_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udplite_prot);
 
index 93b7a933a7758254ed70ed849d333ad4d19114a7..848b355910424e75d42df6b9218eae3824ee213c 100644 (file)
@@ -2669,7 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
        ASSERT_RTNL();
 
-       rt6_ifdown(net, dev);
+       /* Flush routes if device is being removed or it is not loopback */
+       if (how || !(dev->flags & IFF_LOOPBACK))
+               rt6_ifdown(net, dev);
        neigh_ifdown(&nd_tbl, dev);
 
        idev = __in6_dev_get(dev);
index 99157b4cd56e2fa619939a17aa5b716680fc8799..94b5bf132b2e33a467f662b8c0e402a459f00a3d 100644 (file)
@@ -56,7 +56,7 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
 int __ip6_local_out(struct sk_buff *skb)
 {
@@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb)
        return -EINVAL;
 }
 
-static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
-{
-       struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
-
-       return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
-              skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
-}
-
 static int ip6_finish_output(struct sk_buff *skb)
 {
        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
@@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
        return offset;
 }
 
-static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
        struct sk_buff *frag;
        struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
index 96455ffb76fb8b92aa90c3a711aa6635d45b91fa..7659d6f16e6bae4b7e8a1701161181f627c0e41f 100644 (file)
@@ -1565,11 +1565,16 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
 {
        struct rt6_info *rt, *nrt;
        int allfrag = 0;
-
+again:
        rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
        if (rt == NULL)
                return;
 
+       if (rt6_check_expired(rt)) {
+               ip6_del_rt(rt);
+               goto again;
+       }
+
        if (pmtu >= dst_mtu(&rt->dst))
                goto out;
 
index 91def93bec85060e7571218c20439cabd4ec824f..cd6cb7c3e5636e5911a36b5e1b4ebb4cb96b4be4 100644 (file)
@@ -1477,6 +1477,7 @@ struct proto udpv6_prot = {
        .compat_setsockopt = compat_udpv6_setsockopt,
        .compat_getsockopt = compat_udpv6_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 
 static struct inet_protosw udpv6_protosw = {
index 5f48fadc27f7a62aa13632fc31bea73e1d1e6aec..986c4de5292eedf715b25b8bae98ee640917d97e 100644 (file)
@@ -55,6 +55,7 @@ struct proto udplitev6_prot = {
        .compat_setsockopt = compat_udpv6_setsockopt,
        .compat_getsockopt = compat_udpv6_getsockopt,
 #endif
+       .clear_sk          = sk_prot_clear_portaddr_nulls,
 };
 
 static struct inet_protosw udplite6_protosw = {
index 6434bd5ce0885ad20408a2f79eb198b8f30c269a..8e688b3de9abc62ec03c7dd90b33c951791c5587 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/dst.h>
 #include <net/ipv6.h>
+#include <net/ip6_route.h>
 #include <net/xfrm.h>
 
 int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb)
        return xfrm_output(skb);
 }
 
+static int __xfrm6_output(struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct xfrm_state *x = dst->xfrm;
+
+       if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
+           ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+               dst_allfrag(skb_dst(skb)))) {
+                       return ip6_fragment(skb, xfrm6_output_finish);
+       }
+       return xfrm6_output_finish(skb);
+}
+
 int xfrm6_output(struct sk_buff *skb)
 {
        return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
-                      skb_dst(skb)->dev, xfrm6_output_finish);
+                      skb_dst(skb)->dev, __xfrm6_output);
 }
index a6de3059746ddd3f3039d564f20acf394225e3ad..c9890e25cd4c9681b14f838976a97ed4db560ac7 100644 (file)
@@ -2280,6 +2280,16 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
 
        switch (optname) {
        case IRLMP_ENUMDEVICES:
+
+               /* Offset to first device entry */
+               offset = sizeof(struct irda_device_list) -
+                       sizeof(struct irda_device_info);
+
+               if (len < offset) {
+                       err = -EINVAL;
+                       goto out;
+               }
+
                /* Ask lmp for the current discovery log */
                discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
                                                    self->nslots);
@@ -2290,15 +2300,9 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
                }
 
                /* Write total list length back to client */
-               if (copy_to_user(optval, &list,
-                                sizeof(struct irda_device_list) -
-                                sizeof(struct irda_device_info)))
+               if (copy_to_user(optval, &list, offset))
                        err = -EFAULT;
 
-               /* Offset to first device entry */
-               offset = sizeof(struct irda_device_list) -
-                       sizeof(struct irda_device_info);
-
                /* Copy the list itself - watch for overflow */
                if (list.len > 2048) {
                        err = -EINVAL;
index 239c4836a946601f27c69f965ae1a0935e71b434..077a93dd1671a841b9db85fe4cf6981aa90c1edf 100644 (file)
@@ -780,6 +780,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
        mutex_lock(&sdata->u.ibss.mtx);
 
+       if (!sdata->u.ibss.ssid_len)
+               goto mgmt_out; /* not ready to merge yet */
+
        switch (fc & IEEE80211_FCTL_STYPE) {
        case IEEE80211_STYPE_PROBE_REQ:
                ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
@@ -797,6 +800,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
                break;
        }
 
+ mgmt_out:
        mutex_unlock(&sdata->u.ibss.mtx);
 }
 
index 54fb4a0e76f03d835c6038cffeabf18874cb2d59..b01e467b76c69f15aa8de00434deca874b89ae48 100644 (file)
@@ -1788,9 +1788,11 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 
                        fwd_skb = skb_copy(skb, GFP_ATOMIC);
 
-                       if (!fwd_skb && net_ratelimit())
+                       if (!fwd_skb && net_ratelimit()) {
                                printk(KERN_DEBUG "%s: failed to clone mesh frame\n",
                                                   sdata->name);
+                               goto out;
+                       }
 
                        fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data;
                        memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
@@ -1828,6 +1830,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
                }
        }
 
+ out:
        if (is_multicast_ether_addr(hdr->addr1) ||
            sdata->dev->flags & IFF_PROMISC)
                return RX_CONTINUE;
index ae344d1ba0560480665e1b939b15b058f5fddc4c..146097cb43a710d756ab997770faa812293ba85e 100644 (file)
@@ -1051,11 +1051,13 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_work *wk;
+       bool cleanup = false;
 
        mutex_lock(&local->mtx);
        list_for_each_entry(wk, &local->work_list, list) {
                if (wk->sdata != sdata)
                        continue;
+               cleanup = true;
                wk->type = IEEE80211_WORK_ABORT;
                wk->started = true;
                wk->timeout = jiffies;
@@ -1063,7 +1065,8 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
        mutex_unlock(&local->mtx);
 
        /* run cleanups etc. */
-       ieee80211_work_work(&local->work_work);
+       if (cleanup)
+               ieee80211_work_work(&local->work_work);
 
        mutex_lock(&local->mtx);
        list_for_each_entry(wk, &local->work_list, list) {
index 3cf478d012dd4f29b7a67f951c2cfa901b64248f..7150705f1d0b8b7aa12de87067928c25605ffbbb 100644 (file)
@@ -270,7 +270,6 @@ static unsigned int sfq_drop(struct Qdisc *sch)
                /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
                d = q->next[q->tail];
                q->next[q->tail] = q->next[d];
-               q->allot[q->next[d]] += q->quantum;
                skb = q->qs[d].prev;
                len = qdisc_pkt_len(skb);
                __skb_unlink(skb, &q->qs[d]);
@@ -321,14 +320,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        sfq_inc(q, x);
        if (q->qs[x].qlen == 1) {               /* The flow is new */
                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
-                       q->tail = x;
                        q->next[x] = x;
-                       q->allot[x] = q->quantum;
                } else {
                        q->next[x] = q->next[q->tail];
                        q->next[q->tail] = x;
-                       q->tail = x;
                }
+               q->tail = x;
+               q->allot[x] = q->quantum;
        }
        if (++sch->q.qlen <= q->limit) {
                sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -359,13 +357,13 @@ sfq_dequeue(struct Qdisc *sch)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-       sfq_index a, old_a;
+       sfq_index a, next_a;
 
        /* No active slots */
        if (q->tail == SFQ_DEPTH)
                return NULL;
 
-       a = old_a = q->next[q->tail];
+       a = q->next[q->tail];
 
        /* Grab packet */
        skb = __skb_dequeue(&q->qs[a]);
@@ -376,17 +374,15 @@ sfq_dequeue(struct Qdisc *sch)
        /* Is the slot empty? */
        if (q->qs[a].qlen == 0) {
                q->ht[q->hash[a]] = SFQ_DEPTH;
-               a = q->next[a];
-               if (a == old_a) {
+               next_a = q->next[a];
+               if (a == next_a) {
                        q->tail = SFQ_DEPTH;
                        return skb;
                }
-               q->next[q->tail] = a;
-               q->allot[a] += q->quantum;
+               q->next[q->tail] = next_a;
        } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
-               q->tail = a;
-               a = q->next[a];
                q->allot[a] += q->quantum;
+               q->tail = a;
        }
        return skb;
 }
index 0b9ee34ad35ceb31ff764baf9cec73fba010f55b..fff0926b11112a8dd04d11e1e7f9ba378c40fcc3 100644 (file)
@@ -5053,7 +5053,7 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
        if (copy_to_user(optval, &val, len))
                return -EFAULT;
 
-       return -ENOTSUPP;
+       return 0;
 }
 
 /*
index 5ad25e17b6cb2782a2101b59ad7cfd442a8af2ea..4eb99ab34053769f5b2b644594427b2bdc108c82 100644 (file)
@@ -214,17 +214,22 @@ ifdef BUILD_C_RECORDMCOUNT
 # The empty.o file is created in the make process in order to determine
 #  the target endianness and word size. It is made before all other C
 #  files, including recordmcount.
-cmd_record_mcount = if [ $(@) != "scripts/mod/empty.o" ]; then                 \
-                       $(objtree)/scripts/recordmcount "$(@)";                 \
-                   fi;
+sub_cmd_record_mcount =                                        \
+       if [ $(@) != "scripts/mod/empty.o" ]; then      \
+               $(objtree)/scripts/recordmcount "$(@)"; \
+       fi;
 else
-cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
        "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \
        "$(if $(CONFIG_64BIT),64,32)" \
        "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \
        "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
        "$(if $(part-of-module),1,0)" "$(@)";
 endif
+cmd_record_mcount =                                            \
+       if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then    \
+               $(sub_cmd_record_mcount)                        \
+       fi;
 endif
 
 define rule_cc_o_c
index b9d9aa18e6d62bbaa40c34cf7062c00c8bb15e35..5f77dcb8977e0632b5875167d5a652ef62f4be1c 100644 (file)
@@ -140,6 +140,20 @@ struct property *menu_add_prop(enum prop_type type, char *prompt, struct expr *e
                }
                if (current_entry->prompt && current_entry != &rootmenu)
                        prop_warn(prop, "prompt redefined");
+
+               /* Apply all upper menus' visibilities to actual prompts. */
+               if(type == P_PROMPT) {
+                       struct menu *menu = current_entry;
+
+                       while ((menu = menu->parent) != NULL) {
+                               if (!menu->visibility)
+                                       continue;
+                               prop->visible.expr
+                                       = expr_alloc_and(prop->visible.expr,
+                                                        menu->visibility);
+                       }
+               }
+
                current_entry->prompt = prop;
        }
        prop->text = prompt;
index 39580a5dc5df6083a5766ae8853c6610e417474f..9f85012acf0d2fb749a4ded8bb962d514d524c03 100755 (executable)
@@ -155,6 +155,8 @@ use strict;
 # '@parameter' - name of a parameter
 # '%CONST' - name of a constant.
 
+## init lots of data
+
 my $errors = 0;
 my $warnings = 0;
 my $anon_struct_union = 0;
@@ -218,21 +220,14 @@ my %highlights_list = ( $type_constant, "\$1",
                        $type_param, "\$1" );
 my $blankline_list = "";
 
-sub usage {
-    print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n";
-    print "         [ -no-doc-sections ]\n";
-    print "         [ -function funcname [ -function funcname ...] ]\n";
-    print "         [ -nofunction funcname [ -nofunction funcname ...] ]\n";
-    print "         c source file(s) > outputfile\n";
-    print "         -v : verbose output, more warnings & other info listed\n";
-    exit 1;
-}
-
 # read arguments
 if ($#ARGV == -1) {
     usage();
 }
 
+my $kernelversion;
+my $dohighlight = "";
+
 my $verbose = 0;
 my $output_mode = "man";
 my $no_doc_sections = 0;
@@ -245,7 +240,7 @@ my $man_date = ('January', 'February', 'March', 'April', 'May', 'June',
                'November', 'December')[(localtime)[4]] .
   " " . ((localtime)[5]+1900);
 
-# Essentially these are globals
+# Essentially these are globals.
 # They probably want to be tidied up, made more localised or something.
 # CAVEAT EMPTOR!  Some of the others I localised may not want to be, which
 # could cause "use of undefined value" or other bugs.
@@ -353,6 +348,18 @@ while ($ARGV[0] =~ m/^-(.*)/) {
     }
 }
 
+# continue execution near EOF;
+
+sub usage {
+    print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n";
+    print "         [ -no-doc-sections ]\n";
+    print "         [ -function funcname [ -function funcname ...] ]\n";
+    print "         [ -nofunction funcname [ -nofunction funcname ...] ]\n";
+    print "         c source file(s) > outputfile\n";
+    print "         -v : verbose output, more warnings & other info listed\n";
+    exit 1;
+}
+
 # get kernel version from env
 sub get_kernel_version() {
     my $version = 'unknown kernel version';
@@ -362,15 +369,6 @@ sub get_kernel_version() {
     }
     return $version;
 }
-my $kernelversion = get_kernel_version();
-
-# generate a sequence of code that will splice in highlighting information
-# using the s// operator.
-my $dohighlight = "";
-foreach my $pattern (keys %highlights) {
-#   print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n";
-    $dohighlight .=  "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n";
-}
 
 ##
 # dumps section contents to arrays/hashes intended for that purpose.
@@ -1851,34 +1849,6 @@ sub dump_function($$) {
                       });
 }
 
-sub process_file($);
-
-# Read the file that maps relative names to absolute names for
-# separate source and object directories and for shadow trees.
-if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
-       my ($relname, $absname);
-       while(<SOURCE_MAP>) {
-               chop();
-               ($relname, $absname) = (split())[0..1];
-               $relname =~ s:^/+::;
-               $source_map{$relname} = $absname;
-       }
-       close(SOURCE_MAP);
-}
-
-foreach (@ARGV) {
-    chomp;
-    process_file($_);
-}
-if ($verbose && $errors) {
-  print STDERR "$errors errors\n";
-}
-if ($verbose && $warnings) {
-  print STDERR "$warnings warnings\n";
-}
-
-exit($errors);
-
 sub reset_state {
     $function = "";
     %constants = ();
@@ -2285,3 +2255,39 @@ sub process_file($) {
        }
     }
 }
+
+
+$kernelversion = get_kernel_version();
+
+# generate a sequence of code that will splice in highlighting information
+# using the s// operator.
+foreach my $pattern (keys %highlights) {
+#   print STDERR "scanning pattern:$pattern, highlight:($highlights{$pattern})\n";
+    $dohighlight .=  "\$contents =~ s:$pattern:$highlights{$pattern}:gs;\n";
+}
+
+# Read the file that maps relative names to absolute names for
+# separate source and object directories and for shadow trees.
+if (open(SOURCE_MAP, "<.tmp_filelist.txt")) {
+       my ($relname, $absname);
+       while(<SOURCE_MAP>) {
+               chop();
+               ($relname, $absname) = (split())[0..1];
+               $relname =~ s:^/+::;
+               $source_map{$relname} = $absname;
+       }
+       close(SOURCE_MAP);
+}
+
+foreach (@ARGV) {
+    chomp;
+    process_file($_);
+}
+if ($verbose && $errors) {
+  print STDERR "$errors errors\n";
+}
+if ($verbose && $warnings) {
+  print STDERR "$warnings warnings\n";
+}
+
+exit($errors);
index aef8c0a923ab8e2276f97e1fe2aa61b7748e25e4..d661afbe474c2fce8f74907869be0a644c5789c9 100644 (file)
@@ -253,6 +253,8 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry,
        result = security_filter_rule_init(entry->lsm[lsm_rule].type,
                                           Audit_equal, args,
                                           &entry->lsm[lsm_rule].rule);
+       if (!entry->lsm[lsm_rule].rule)
+               return -EINVAL;
        return result;
 }
 
index 0088dd8bf68a77b29637527733ff0b8985b8ecb7..0ea52d25a6bda8b3568fdb7752539403e93086b6 100644 (file)
@@ -403,7 +403,6 @@ link_check_failed:
        return ret;
 
 link_prealloc_failed:
-       up_write(&dest_keyring->sem);
        mutex_unlock(&user->cons_lock);
        kleave(" = %d [prelink]", ret);
        return ret;
index b75db8e9cc0f36fff03d0ecf1884b23b3bad724b..11446a1506dad244b2d11c447e6dc39e906427f1 100644 (file)
@@ -1070,8 +1070,10 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
                struct snd_pcm_hw_rule *new;
                unsigned int new_rules = constrs->rules_all + 16;
                new = kcalloc(new_rules, sizeof(*c), GFP_KERNEL);
-               if (!new)
+               if (!new) {
+                       va_end(args);
                        return -ENOMEM;
+               }
                if (constrs->rules) {
                        memcpy(new, constrs->rules,
                               constrs->rules_num * sizeof(*c));
@@ -1087,8 +1089,10 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
        c->private = private;
        k = 0;
        while (1) {
-               if (snd_BUG_ON(k >= ARRAY_SIZE(c->deps)))
+               if (snd_BUG_ON(k >= ARRAY_SIZE(c->deps))) {
+                       va_end(args);
                        return -EINVAL;
+               }
                c->deps[k++] = dep;
                if (dep < 0)
                        break;
@@ -1097,7 +1101,7 @@ int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond,
        constrs->rules_num++;
        va_end(args);
        return 0;
-}                                  
+}
 
 EXPORT_SYMBOL(snd_pcm_hw_rule_add);
 
index 46c0d03dbecced68ca318dbbe20df52d1da4ab0b..fcb14a09982262d473842aeb55f365b031ebeb70 100644 (file)
@@ -87,7 +87,7 @@ int *load_mixer_volumes(char *name, int *levels, int present)
        int             i, n;
 
        for (i = 0; i < num_mixer_volumes; i++) {
-               if (strcmp(name, mixer_vols[i].name) == 0) {
+               if (strncmp(name, mixer_vols[i].name, 32) == 0) {
                        if (present)
                                mixer_vols[i].num = i;
                        return mixer_vols[i].levels;
@@ -99,7 +99,7 @@ int *load_mixer_volumes(char *name, int *levels, int present)
        }
        n = num_mixer_volumes++;
 
-       strcpy(mixer_vols[n].name, name);
+       strncpy(mixer_vols[n].name, name, 32);
 
        if (present)
                mixer_vols[n].num = n;
index 644e3f14f8ca5aea7af2e465450caab86d2f90cb..98b6d02a36c9c0600a7b26dffbbf1f4f378fccd8 100644 (file)
@@ -1919,6 +1919,16 @@ struct snd_kcontrol *snd_hda_find_mixer_ctl(struct hda_codec *codec,
 }
 EXPORT_SYMBOL_HDA(snd_hda_find_mixer_ctl);
 
+static int find_empty_mixer_ctl_idx(struct hda_codec *codec, const char *name)
+{
+       int idx;
+       for (idx = 0; idx < 16; idx++) { /* 16 ctlrs should be large enough */
+               if (!_snd_hda_find_mixer_ctl(codec, name, idx))
+                       return idx;
+       }
+       return -EBUSY;
+}
+
 /**
  * snd_hda_ctl_add - Add a control element and assign to the codec
  * @codec: HD-audio codec
@@ -2654,8 +2664,6 @@ static struct snd_kcontrol_new dig_mixes[] = {
        { } /* end */
 };
 
-#define SPDIF_MAX_IDX  4       /* 4 instances should be enough to probe */
-
 /**
  * snd_hda_create_spdif_out_ctls - create Output SPDIF-related controls
  * @codec: the HDA codec
@@ -2673,12 +2681,8 @@ int snd_hda_create_spdif_out_ctls(struct hda_codec *codec, hda_nid_t nid)
        struct snd_kcontrol_new *dig_mix;
        int idx;
 
-       for (idx = 0; idx < SPDIF_MAX_IDX; idx++) {
-               if (!_snd_hda_find_mixer_ctl(codec, "IEC958 Playback Switch",
-                                            idx))
-                       break;
-       }
-       if (idx >= SPDIF_MAX_IDX) {
+       idx = find_empty_mixer_ctl_idx(codec, "IEC958 Playback Switch");
+       if (idx < 0) {
                printk(KERN_ERR "hda_codec: too many IEC958 outputs\n");
                return -EBUSY;
        }
@@ -2829,12 +2833,8 @@ int snd_hda_create_spdif_in_ctls(struct hda_codec *codec, hda_nid_t nid)
        struct snd_kcontrol_new *dig_mix;
        int idx;
 
-       for (idx = 0; idx < SPDIF_MAX_IDX; idx++) {
-               if (!_snd_hda_find_mixer_ctl(codec, "IEC958 Capture Switch",
-                                            idx))
-                       break;
-       }
-       if (idx >= SPDIF_MAX_IDX) {
+       idx = find_empty_mixer_ctl_idx(codec, "IEC958 Capture Switch");
+       if (idx < 0) {
                printk(KERN_ERR "hda_codec: too many IEC958 inputs\n");
                return -EBUSY;
        }
@@ -3808,21 +3808,32 @@ int snd_hda_add_new_ctls(struct hda_codec *codec, struct snd_kcontrol_new *knew)
 
        for (; knew->name; knew++) {
                struct snd_kcontrol *kctl;
+               int addr = 0, idx = 0;
                if (knew->iface == -1)  /* skip this codec private value */
                        continue;
-               kctl = snd_ctl_new1(knew, codec);
-               if (!kctl)
-                       return -ENOMEM;
-               err = snd_hda_ctl_add(codec, 0, kctl);
-               if (err < 0) {
-                       if (!codec->addr)
-                               return err;
+               for (;;) {
                        kctl = snd_ctl_new1(knew, codec);
                        if (!kctl)
                                return -ENOMEM;
-                       kctl->id.device = codec->addr;
+                       if (addr > 0)
+                               kctl->id.device = addr;
+                       if (idx > 0)
+                               kctl->id.index = idx;
                        err = snd_hda_ctl_add(codec, 0, kctl);
-                       if (err < 0)
+                       if (!err)
+                               break;
+                       /* try first with another device index corresponding to
+                        * the codec addr; if it still fails (or it's the
+                        * primary codec), then try another control index
+                        */
+                       if (!addr && codec->addr)
+                               addr = codec->addr;
+                       else if (!idx && !knew->index) {
+                               idx = find_empty_mixer_ctl_idx(codec,
+                                                              knew->name);
+                               if (idx <= 0)
+                                       return err;
+                       } else
                                return err;
                }
        }
index b030c8eba21fdc618fe0b5bfb32cf5b156c39bba..a1c4008af89185449567331db04014a31b87007c 100644 (file)
@@ -2300,6 +2300,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = {
        SND_PCI_QUIRK(0x1028, 0x01cc, "Dell D820", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1028, 0x01de, "Dell Precision 390", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1028, 0x01f6, "Dell Latitude 131L", POS_FIX_LPIB),
+       SND_PCI_QUIRK(0x1028, 0x0470, "Dell Inspiron 1120", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x103c, 0x306d, "HP dv3", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB),
        SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB),
index 427da45d7906560f517e04de74c84ab971c0a50a..552a09e9211ff355c185314339d48b99d5a0cb65 100644 (file)
@@ -14806,8 +14806,9 @@ static int alc269_resume(struct hda_codec *codec)
 
 enum {
        ALC269_FIXUP_SONY_VAIO,
+       ALC275_FIX_SONY_VAIO_GPIO2,
        ALC269_FIXUP_DELL_M101Z,
-       ALC269_FIXUP_LENOVO_EDGE14,
+       ALC269_FIXUP_SKU_IGNORE,
        ALC269_FIXUP_ASUS_G73JW,
 };
 
@@ -14818,6 +14819,14 @@ static const struct alc_fixup alc269_fixups[] = {
                        {}
                }
        },
+       [ALC275_FIX_SONY_VAIO_GPIO2] = {
+               .verbs = (const struct hda_verb[]) {
+                       {0x01, AC_VERB_SET_GPIO_MASK, 0x04},
+                       {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04},
+                       {0x01, AC_VERB_SET_GPIO_DATA, 0x00},
+                       { }
+               }
+       },
        [ALC269_FIXUP_DELL_M101Z] = {
                .verbs = (const struct hda_verb[]) {
                        /* Enables internal speaker */
@@ -14826,7 +14835,7 @@ static const struct alc_fixup alc269_fixups[] = {
                        {}
                }
        },
-       [ALC269_FIXUP_LENOVO_EDGE14] = {
+       [ALC269_FIXUP_SKU_IGNORE] = {
                .sku = ALC_FIXUP_SKU_IGNORE,
        },
        [ALC269_FIXUP_ASUS_G73JW] = {
@@ -14838,9 +14847,13 @@ static const struct alc_fixup alc269_fixups[] = {
 };
 
 static struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x104d, 0x9073, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
+       SND_PCI_QUIRK(0x104d, 0x907b, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
+       SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIX_SONY_VAIO_GPIO2),
        SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO),
        SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
-       SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_LENOVO_EDGE14),
+       SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
+       SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
        SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
        {}
 };
@@ -15091,28 +15104,29 @@ static int patch_alc269(struct hda_codec *codec)
 
        alc_auto_parse_customize_define(codec);
 
-       coef = alc_read_coef_idx(codec, 0);
-       if ((coef & 0x00f0) == 0x0010) {
-               if (codec->bus->pci->subsystem_vendor == 0x1025 &&
-                   spec->cdefine.platform_type == 1) {
-                       alc_codec_rename(codec, "ALC271X");
-                       spec->codec_variant = ALC269_TYPE_ALC271X;
-               } else if ((coef & 0xf000) == 0x1000) {
-                       spec->codec_variant = ALC269_TYPE_ALC270;
-               } else if ((coef & 0xf000) == 0x2000) {
-                       alc_codec_rename(codec, "ALC259");
-                       spec->codec_variant = ALC269_TYPE_ALC259;
-               } else if ((coef & 0xf000) == 0x3000) {
-                       alc_codec_rename(codec, "ALC258");
-                       spec->codec_variant = ALC269_TYPE_ALC258;
-               } else {
-                       alc_codec_rename(codec, "ALC269VB");
-                       spec->codec_variant = ALC269_TYPE_ALC269VB;
-               }
-       } else
-               alc_fix_pll_init(codec, 0x20, 0x04, 15);
-
-       alc269_fill_coef(codec);
+       if (codec->vendor_id == 0x10ec0269) {
+               coef = alc_read_coef_idx(codec, 0);
+               if ((coef & 0x00f0) == 0x0010) {
+                       if (codec->bus->pci->subsystem_vendor == 0x1025 &&
+                           spec->cdefine.platform_type == 1) {
+                               alc_codec_rename(codec, "ALC271X");
+                               spec->codec_variant = ALC269_TYPE_ALC271X;
+                       } else if ((coef & 0xf000) == 0x1000) {
+                               spec->codec_variant = ALC269_TYPE_ALC270;
+                       } else if ((coef & 0xf000) == 0x2000) {
+                               alc_codec_rename(codec, "ALC259");
+                               spec->codec_variant = ALC269_TYPE_ALC259;
+                       } else if ((coef & 0xf000) == 0x3000) {
+                               alc_codec_rename(codec, "ALC258");
+                               spec->codec_variant = ALC269_TYPE_ALC258;
+                       } else {
+                               alc_codec_rename(codec, "ALC269VB");
+                               spec->codec_variant = ALC269_TYPE_ALC269VB;
+                       }
+               } else
+                       alc_fix_pll_init(codec, 0x20, 0x04, 15);
+               alc269_fill_coef(codec);
+       }
 
        board_config = snd_hda_check_board_config(codec, ALC269_MODEL_LAST,
                                                  alc269_models,
index efa4225f5fd6c95f9f0218d38002d1ca12d88bd3..f03b2ff90496f86151b01cec6131e9cc94996a42 100644 (file)
@@ -3481,6 +3481,8 @@ static int stac92xx_auto_create_dmic_input_ctls(struct hda_codec *codec,
 
                label = hda_get_input_pin_label(codec, nid, 1);
                snd_hda_add_imux_item(dimux, label, index, &type_idx);
+               if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1)
+                       snd_hda_add_imux_item(imux, label, index, &type_idx);
 
                err = create_elem_capture_vol(codec, nid, label, type_idx,
                                              HDA_INPUT);
@@ -3492,9 +3494,6 @@ static int stac92xx_auto_create_dmic_input_ctls(struct hda_codec *codec,
                        if (err < 0)
                                return err;
                }
-
-               if (snd_hda_get_bool_hint(codec, "separate_dmux") != 1)
-                       snd_hda_add_imux_item(imux, label, index, NULL);
        }
 
        return 0;
index d63e28773eb1841ac7bdd06af0d37a0d5c03013b..6447dbb2f1238f1288c6ed6a797757a08ddd69fa 100644 (file)
@@ -40,7 +40,6 @@ struct max98088_cdata {
 };
 
 struct max98088_priv {
-       u8 reg_cache[M98088_REG_CNT];
        enum max98088_type devtype;
        void *control_data;
        struct max98088_pdata *pdata;
@@ -1588,7 +1587,7 @@ static int max98088_dai2_set_fmt(struct snd_soc_dai *codec_dai,
 
 static void max98088_sync_cache(struct snd_soc_codec *codec)
 {
-       struct max98088_priv *max98088 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
@@ -1599,14 +1598,14 @@ static void max98088_sync_cache(struct snd_soc_codec *codec)
        /* write back cached values if they're writeable and
         * different from the hardware default.
         */
-       for (i = 1; i < ARRAY_SIZE(max98088->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                if (!max98088_access[i].writable)
                        continue;
 
-               if (max98088->reg_cache[i] == max98088_reg[i])
+               if (reg_cache[i] == max98088_reg[i])
                        continue;
 
-               snd_soc_write(codec, i, max98088->reg_cache[i]);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        codec->cache_sync = 0;
@@ -1951,7 +1950,6 @@ static int max98088_probe(struct snd_soc_codec *codec)
        int ret = 0;
 
        codec->cache_sync = 1;
-       memcpy(codec->reg_cache, max98088_reg, sizeof(max98088_reg));
 
        ret = snd_soc_codec_set_cache_io(codec, 8, 8, SND_SOC_I2C);
        if (ret != 0) {
index 9a433a5396cb781727e39a4c1c6005f1bbc9a421..deca79ea2b4b98ab6d3232c9cb3c1b4c3e6858dd 100644 (file)
@@ -41,7 +41,6 @@ static const char *wm8523_supply_names[WM8523_NUM_SUPPLIES] = {
 /* codec private data */
 struct wm8523_priv {
        enum snd_soc_control_type control_type;
-       u16 reg_cache[WM8523_REGISTER_COUNT];
        struct regulator_bulk_data supplies[WM8523_NUM_SUPPLIES];
        unsigned int sysclk;
        unsigned int rate_constraint_list[WM8523_NUM_RATES];
@@ -314,6 +313,7 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec,
                                 enum snd_soc_bias_level level)
 {
        struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        switch (level) {
@@ -344,7 +344,7 @@ static int wm8523_set_bias_level(struct snd_soc_codec *codec,
                        /* Sync back default/cached values */
                        for (i = WM8523_AIF_CTRL1;
                             i < WM8523_MAX_REGISTER; i++)
-                               snd_soc_write(codec, i, wm8523->reg_cache[i]);
+                               snd_soc_write(codec, i, reg_cache[i]);
 
 
                        msleep(100);
@@ -414,6 +414,7 @@ static int wm8523_resume(struct snd_soc_codec *codec)
 static int wm8523_probe(struct snd_soc_codec *codec)
 {
        struct wm8523_priv *wm8523 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        codec->hw_write = (hw_write_t)i2c_master_send;
@@ -470,8 +471,8 @@ static int wm8523_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU and enable ZC */
-       wm8523->reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU;
-       wm8523->reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC;
+       reg_cache[WM8523_DAC_GAINR] |= WM8523_DACR_VU;
+       reg_cache[WM8523_DAC_CTRL3] |= WM8523_ZC;
 
        wm8523_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
index 90e31e9aa6f7c66346c8360a3c1ab900519e4a98..aea60ef8aba73da14e7cd17f62b72955adfd645a 100644 (file)
@@ -41,7 +41,6 @@ static const char *wm8741_supply_names[WM8741_NUM_SUPPLIES] = {
 /* codec private data */
 struct wm8741_priv {
        enum snd_soc_control_type control_type;
-       u16 reg_cache[WM8741_REGISTER_COUNT];
        struct regulator_bulk_data supplies[WM8741_NUM_SUPPLIES];
        unsigned int sysclk;
        struct snd_pcm_hw_constraint_list *sysclk_constraints;
@@ -422,6 +421,7 @@ static int wm8741_resume(struct snd_soc_codec *codec)
 static int wm8741_probe(struct snd_soc_codec *codec)
 {
        struct wm8741_priv *wm8741 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret = 0;
 
        ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8741->control_type);
@@ -437,10 +437,10 @@ static int wm8741_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU */
-       wm8741->reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL;
-       wm8741->reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM;
-       wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL;
-       wm8741->reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM;
+       reg_cache[WM8741_DACLLSB_ATTENUATION] |= WM8741_UPDATELL;
+       reg_cache[WM8741_DACLMSB_ATTENUATION] |= WM8741_UPDATELM;
+       reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERL;
+       reg_cache[WM8741_DACRLSB_ATTENUATION] |= WM8741_UPDATERM;
 
        snd_soc_add_controls(codec, wm8741_snd_controls,
                             ARRAY_SIZE(wm8741_snd_controls));
index 8f679a13f2bcaae23653adaffafff6fedcd8e66f..87caae59e939c78465750f470d5d4ce35ee505d3 100644 (file)
@@ -65,22 +65,22 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec,
  * are using 2 wire for device control, so we cache them instead.
  */
 static const u16 wm8753_reg[] = {
-       0x0008, 0x0000, 0x000a, 0x000a,
-       0x0033, 0x0000, 0x0007, 0x00ff,
-       0x00ff, 0x000f, 0x000f, 0x007b,
-       0x0000, 0x0032, 0x0000, 0x00c3,
-       0x00c3, 0x00c0, 0x0000, 0x0000,
+       0x0000, 0x0008, 0x0000, 0x000a,
+       0x000a, 0x0033, 0x0000, 0x0007,
+       0x00ff, 0x00ff, 0x000f, 0x000f,
+       0x007b, 0x0000, 0x0032, 0x0000,
+       0x00c3, 0x00c3, 0x00c0, 0x0000,
        0x0000, 0x0000, 0x0000, 0x0000,
        0x0000, 0x0000, 0x0000, 0x0000,
-       0x0000, 0x0000, 0x0000, 0x0055,
-       0x0005, 0x0050, 0x0055, 0x0050,
-       0x0055, 0x0050, 0x0055, 0x0079,
-       0x0079, 0x0079, 0x0079, 0x0079,
        0x0000, 0x0000, 0x0000, 0x0000,
-       0x0097, 0x0097, 0x0000, 0x0004,
-       0x0000, 0x0083, 0x0024, 0x01ba,
-       0x0000, 0x0083, 0x0024, 0x01ba,
-       0x0000, 0x0000, 0x0000
+       0x0055, 0x0005, 0x0050, 0x0055,
+       0x0050, 0x0055, 0x0050, 0x0055,
+       0x0079, 0x0079, 0x0079, 0x0079,
+       0x0079, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0097, 0x0097, 0x0000,
+       0x0004, 0x0000, 0x0083, 0x0024,
+       0x01ba, 0x0000, 0x0083, 0x0024,
+       0x01ba, 0x0000, 0x0000, 0x0000
 };
 
 /* codec private data */
@@ -88,57 +88,10 @@ struct wm8753_priv {
        enum snd_soc_control_type control_type;
        unsigned int sysclk;
        unsigned int pcmclk;
-       u16 reg_cache[ARRAY_SIZE(wm8753_reg)];
        int dai_func;
 };
 
-/*
- * read wm8753 register cache
- */
-static inline unsigned int wm8753_read_reg_cache(struct snd_soc_codec *codec,
-       unsigned int reg)
-{
-       u16 *cache = codec->reg_cache;
-       if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1))
-               return -1;
-       return cache[reg - 1];
-}
-
-/*
- * write wm8753 register cache
- */
-static inline void wm8753_write_reg_cache(struct snd_soc_codec *codec,
-       unsigned int reg, unsigned int value)
-{
-       u16 *cache = codec->reg_cache;
-       if (reg < 1 || reg >= (ARRAY_SIZE(wm8753_reg) + 1))
-               return;
-       cache[reg - 1] = value;
-}
-
-/*
- * write to the WM8753 register space
- */
-static int wm8753_write(struct snd_soc_codec *codec, unsigned int reg,
-       unsigned int value)
-{
-       u8 data[2];
-
-       /* data is
-        *   D15..D9 WM8753 register offset
-        *   D8...D0 register data
-        */
-       data[0] = (reg << 1) | ((value >> 8) & 0x0001);
-       data[1] = value & 0x00ff;
-
-       wm8753_write_reg_cache(codec, reg, value);
-       if (codec->hw_write(codec->control_data, data, 2) == 2)
-               return 0;
-       else
-               return -EIO;
-}
-
-#define wm8753_reset(c) wm8753_write(c, WM8753_RESET, 0)
+#define wm8753_reset(c) snd_soc_write(c, WM8753_RESET, 0)
 
 /*
  * WM8753 Controls
@@ -218,7 +171,7 @@ static int wm8753_get_dai(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec =  snd_kcontrol_chip(kcontrol);
-       int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL);
+       int mode = snd_soc_read(codec, WM8753_IOCTL);
 
        ucontrol->value.integer.value[0] = (mode & 0xc) >> 2;
        return 0;
@@ -228,7 +181,7 @@ static int wm8753_set_dai(struct snd_kcontrol *kcontrol,
        struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec =  snd_kcontrol_chip(kcontrol);
-       int mode = wm8753_read_reg_cache(codec, WM8753_IOCTL);
+       int mode = snd_soc_read(codec, WM8753_IOCTL);
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 
        if (((mode & 0xc) >> 2) == ucontrol->value.integer.value[0])
@@ -738,17 +691,17 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
        if (pll_id == WM8753_PLL1) {
                offset = 0;
                enable = 0x10;
-               reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xffef;
+               reg = snd_soc_read(codec, WM8753_CLOCK) & 0xffef;
        } else {
                offset = 4;
                enable = 0x8;
-               reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfff7;
+               reg = snd_soc_read(codec, WM8753_CLOCK) & 0xfff7;
        }
 
        if (!freq_in || !freq_out) {
                /* disable PLL  */
-               wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0026);
-               wm8753_write(codec, WM8753_CLOCK, reg);
+               snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0026);
+               snd_soc_write(codec, WM8753_CLOCK, reg);
                return 0;
        } else {
                u16 value = 0;
@@ -759,20 +712,20 @@ static int wm8753_set_dai_pll(struct snd_soc_dai *codec_dai, int pll_id,
                /* set up N and K PLL divisor ratios */
                /* bits 8:5 = PLL_N, bits 3:0 = PLL_K[21:18] */
                value = (pll_div.n << 5) + ((pll_div.k & 0x3c0000) >> 18);
-               wm8753_write(codec, WM8753_PLL1CTL2 + offset, value);
+               snd_soc_write(codec, WM8753_PLL1CTL2 + offset, value);
 
                /* bits 8:0 = PLL_K[17:9] */
                value = (pll_div.k & 0x03fe00) >> 9;
-               wm8753_write(codec, WM8753_PLL1CTL3 + offset, value);
+               snd_soc_write(codec, WM8753_PLL1CTL3 + offset, value);
 
                /* bits 8:0 = PLL_K[8:0] */
                value = pll_div.k & 0x0001ff;
-               wm8753_write(codec, WM8753_PLL1CTL4 + offset, value);
+               snd_soc_write(codec, WM8753_PLL1CTL4 + offset, value);
 
                /* set PLL as input and enable */
-               wm8753_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 |
+               snd_soc_write(codec, WM8753_PLL1CTL1 + offset, 0x0027 |
                        (pll_div.div2 << 3));
-               wm8753_write(codec, WM8753_CLOCK, reg | enable);
+               snd_soc_write(codec, WM8753_CLOCK, reg | enable);
        }
        return 0;
 }
@@ -879,7 +832,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai,
                unsigned int fmt)
 {
        struct snd_soc_codec *codec = codec_dai->codec;
-       u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01ec;
+       u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01ec;
 
        /* interface format */
        switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
@@ -901,7 +854,7 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_PCM, voice);
+       snd_soc_write(codec, WM8753_PCM, voice);
        return 0;
 }
 
@@ -922,8 +875,8 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream,
        struct snd_soc_pcm_runtime *rtd = substream->private_data;
        struct snd_soc_codec *codec = rtd->codec;
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
-       u16 voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x01f3;
-       u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x017f;
+       u16 voice = snd_soc_read(codec, WM8753_PCM) & 0x01f3;
+       u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x017f;
 
        /* bit size */
        switch (params_format(params)) {
@@ -943,9 +896,9 @@ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream,
        /* sample rate */
        if (params_rate(params) * 384 == wm8753->pcmclk)
                srate |= 0x80;
-       wm8753_write(codec, WM8753_SRATE1, srate);
+       snd_soc_write(codec, WM8753_SRATE1, srate);
 
-       wm8753_write(codec, WM8753_PCM, voice);
+       snd_soc_write(codec, WM8753_PCM, voice);
        return 0;
 }
 
@@ -958,8 +911,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai,
        struct snd_soc_codec *codec = codec_dai->codec;
        u16 voice, ioctl;
 
-       voice = wm8753_read_reg_cache(codec, WM8753_PCM) & 0x011f;
-       ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x015d;
+       voice = snd_soc_read(codec, WM8753_PCM) & 0x011f;
+       ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x015d;
 
        /* set master/slave audio interface */
        switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -1013,8 +966,8 @@ static int wm8753_pcm_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_PCM, voice);
-       wm8753_write(codec, WM8753_IOCTL, ioctl);
+       snd_soc_write(codec, WM8753_PCM, voice);
+       snd_soc_write(codec, WM8753_IOCTL, ioctl);
        return 0;
 }
 
@@ -1026,16 +979,16 @@ static int wm8753_set_dai_clkdiv(struct snd_soc_dai *codec_dai,
 
        switch (div_id) {
        case WM8753_PCMDIV:
-               reg = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0x003f;
-               wm8753_write(codec, WM8753_CLOCK, reg | div);
+               reg = snd_soc_read(codec, WM8753_CLOCK) & 0x003f;
+               snd_soc_write(codec, WM8753_CLOCK, reg | div);
                break;
        case WM8753_BCLKDIV:
-               reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x01c7;
-               wm8753_write(codec, WM8753_SRATE2, reg | div);
+               reg = snd_soc_read(codec, WM8753_SRATE2) & 0x01c7;
+               snd_soc_write(codec, WM8753_SRATE2, reg | div);
                break;
        case WM8753_VXCLKDIV:
-               reg = wm8753_read_reg_cache(codec, WM8753_SRATE2) & 0x003f;
-               wm8753_write(codec, WM8753_SRATE2, reg | div);
+               reg = snd_soc_read(codec, WM8753_SRATE2) & 0x003f;
+               snd_soc_write(codec, WM8753_SRATE2, reg | div);
                break;
        default:
                return -EINVAL;
@@ -1050,7 +1003,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai,
                unsigned int fmt)
 {
        struct snd_soc_codec *codec = codec_dai->codec;
-       u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01e0;
+       u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01e0;
 
        /* interface format */
        switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
@@ -1072,7 +1025,7 @@ static int wm8753_hdac_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_HIFI, hifi);
+       snd_soc_write(codec, WM8753_HIFI, hifi);
        return 0;
 }
 
@@ -1085,8 +1038,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai,
        struct snd_soc_codec *codec = codec_dai->codec;
        u16 ioctl, hifi;
 
-       hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x011f;
-       ioctl = wm8753_read_reg_cache(codec, WM8753_IOCTL) & 0x00ae;
+       hifi = snd_soc_read(codec, WM8753_HIFI) & 0x011f;
+       ioctl = snd_soc_read(codec, WM8753_IOCTL) & 0x00ae;
 
        /* set master/slave audio interface */
        switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
@@ -1140,8 +1093,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai,
                return -EINVAL;
        }
 
-       wm8753_write(codec, WM8753_HIFI, hifi);
-       wm8753_write(codec, WM8753_IOCTL, ioctl);
+       snd_soc_write(codec, WM8753_HIFI, hifi);
+       snd_soc_write(codec, WM8753_IOCTL, ioctl);
        return 0;
 }
 
@@ -1162,8 +1115,8 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
        struct snd_soc_pcm_runtime *rtd = substream->private_data;
        struct snd_soc_codec *codec = rtd->codec;
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
-       u16 srate = wm8753_read_reg_cache(codec, WM8753_SRATE1) & 0x01c0;
-       u16 hifi = wm8753_read_reg_cache(codec, WM8753_HIFI) & 0x01f3;
+       u16 srate = snd_soc_read(codec, WM8753_SRATE1) & 0x01c0;
+       u16 hifi = snd_soc_read(codec, WM8753_HIFI) & 0x01f3;
        int coeff;
 
        /* is digital filter coefficient valid ? */
@@ -1172,7 +1125,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
                printk(KERN_ERR "wm8753 invalid MCLK or rate\n");
                return coeff;
        }
-       wm8753_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) |
+       snd_soc_write(codec, WM8753_SRATE1, srate | (coeff_div[coeff].sr << 1) |
                coeff_div[coeff].usb);
 
        /* bit size */
@@ -1190,7 +1143,7 @@ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream,
                break;
        }
 
-       wm8753_write(codec, WM8753_HIFI, hifi);
+       snd_soc_write(codec, WM8753_HIFI, hifi);
        return 0;
 }
 
@@ -1201,8 +1154,8 @@ static int wm8753_mode1v_set_dai_fmt(struct snd_soc_dai *codec_dai,
        u16 clock;
 
        /* set clk source as pcmclk */
-       clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb;
-       wm8753_write(codec, WM8753_CLOCK, clock);
+       clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
+       snd_soc_write(codec, WM8753_CLOCK, clock);
 
        if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0)
                return -EINVAL;
@@ -1224,8 +1177,8 @@ static int wm8753_mode2_set_dai_fmt(struct snd_soc_dai *codec_dai,
        u16 clock;
 
        /* set clk source as pcmclk */
-       clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb;
-       wm8753_write(codec, WM8753_CLOCK, clock);
+       clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
+       snd_soc_write(codec, WM8753_CLOCK, clock);
 
        if (wm8753_vdac_adc_set_dai_fmt(codec_dai, fmt) < 0)
                return -EINVAL;
@@ -1239,8 +1192,8 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai,
        u16 clock;
 
        /* set clk source as mclk */
-       clock = wm8753_read_reg_cache(codec, WM8753_CLOCK) & 0xfffb;
-       wm8753_write(codec, WM8753_CLOCK, clock | 0x4);
+       clock = snd_soc_read(codec, WM8753_CLOCK) & 0xfffb;
+       snd_soc_write(codec, WM8753_CLOCK, clock | 0x4);
 
        if (wm8753_hdac_set_dai_fmt(codec_dai, fmt) < 0)
                return -EINVAL;
@@ -1252,19 +1205,19 @@ static int wm8753_mode3_4_set_dai_fmt(struct snd_soc_dai *codec_dai,
 static int wm8753_mute(struct snd_soc_dai *dai, int mute)
 {
        struct snd_soc_codec *codec = dai->codec;
-       u16 mute_reg = wm8753_read_reg_cache(codec, WM8753_DAC) & 0xfff7;
+       u16 mute_reg = snd_soc_read(codec, WM8753_DAC) & 0xfff7;
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
 
        /* the digital mute covers the HiFi and Voice DAC's on the WM8753.
         * make sure we check if they are not both active when we mute */
        if (mute && wm8753->dai_func == 1) {
                if (!codec->active)
-                       wm8753_write(codec, WM8753_DAC, mute_reg | 0x8);
+                       snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8);
        } else {
                if (mute)
-                       wm8753_write(codec, WM8753_DAC, mute_reg | 0x8);
+                       snd_soc_write(codec, WM8753_DAC, mute_reg | 0x8);
                else
-                       wm8753_write(codec, WM8753_DAC, mute_reg);
+                       snd_soc_write(codec, WM8753_DAC, mute_reg);
        }
 
        return 0;
@@ -1273,23 +1226,23 @@ static int wm8753_mute(struct snd_soc_dai *dai, int mute)
 static int wm8753_set_bias_level(struct snd_soc_codec *codec,
                                 enum snd_soc_bias_level level)
 {
-       u16 pwr_reg = wm8753_read_reg_cache(codec, WM8753_PWR1) & 0xfe3e;
+       u16 pwr_reg = snd_soc_read(codec, WM8753_PWR1) & 0xfe3e;
 
        switch (level) {
        case SND_SOC_BIAS_ON:
                /* set vmid to 50k and unmute dac */
-               wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x00c0);
+               snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x00c0);
                break;
        case SND_SOC_BIAS_PREPARE:
                /* set vmid to 5k for quick power up */
-               wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x01c1);
+               snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x01c1);
                break;
        case SND_SOC_BIAS_STANDBY:
                /* mute dac and set vmid to 500k, enable VREF */
-               wm8753_write(codec, WM8753_PWR1, pwr_reg | 0x0141);
+               snd_soc_write(codec, WM8753_PWR1, pwr_reg | 0x0141);
                break;
        case SND_SOC_BIAS_OFF:
-               wm8753_write(codec, WM8753_PWR1, 0x0001);
+               snd_soc_write(codec, WM8753_PWR1, 0x0001);
                break;
        }
        codec->bias_level = level;
@@ -1477,7 +1430,7 @@ static void wm8753_set_dai_mode(struct snd_soc_codec *codec,
                else
                        dai->driver = &wm8753_all_dai[(wm8753->dai_func << 1) + 1];
        }
-       wm8753_write(codec, WM8753_IOCTL, wm8753->dai_func);
+       snd_soc_write(codec, WM8753_IOCTL, wm8753->dai_func);
 }
 
 static void wm8753_work(struct work_struct *work)
@@ -1495,22 +1448,19 @@ static int wm8753_suspend(struct snd_soc_codec *codec, pm_message_t state)
 
 static int wm8753_resume(struct snd_soc_codec *codec)
 {
+       u16 *reg_cache = codec->reg_cache;
        int i;
-       u8 data[2];
-       u16 *cache = codec->reg_cache;
 
        /* Sync reg_cache with the hardware */
-       for (i = 0; i < ARRAY_SIZE(wm8753_reg); i++) {
-               if (i + 1 == WM8753_RESET)
+       for (i = 1; i < ARRAY_SIZE(wm8753_reg); i++) {
+               if (i == WM8753_RESET)
                        continue;
 
                /* No point in writing hardware default values back */
-               if (cache[i] == wm8753_reg[i])
+               if (reg_cache[i] == wm8753_reg[i])
                        continue;
 
-               data[0] = ((i + 1) << 1) | ((cache[i] >> 8) & 0x0001);
-               data[1] = cache[i] & 0x00ff;
-               codec->hw_write(codec->control_data, data, 2);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        wm8753_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
@@ -1548,7 +1498,7 @@ static int run_delayed_work(struct delayed_work *dwork)
 static int wm8753_probe(struct snd_soc_codec *codec)
 {
        struct wm8753_priv *wm8753 = snd_soc_codec_get_drvdata(codec);
-       int ret = 0, reg;
+       int ret;
 
        INIT_DELAYED_WORK(&codec->delayed_work, wm8753_work);
 
@@ -1573,26 +1523,16 @@ static int wm8753_probe(struct snd_soc_codec *codec)
                              msecs_to_jiffies(caps_charge));
 
        /* set the update bits */
-       reg = wm8753_read_reg_cache(codec, WM8753_LDAC);
-       wm8753_write(codec, WM8753_LDAC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_RDAC);
-       wm8753_write(codec, WM8753_RDAC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LADC);
-       wm8753_write(codec, WM8753_LADC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_RADC);
-       wm8753_write(codec, WM8753_RADC, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LOUT1V);
-       wm8753_write(codec, WM8753_LOUT1V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_ROUT1V);
-       wm8753_write(codec, WM8753_ROUT1V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LOUT2V);
-       wm8753_write(codec, WM8753_LOUT2V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_ROUT2V);
-       wm8753_write(codec, WM8753_ROUT2V, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_LINVOL);
-       wm8753_write(codec, WM8753_LINVOL, reg | 0x0100);
-       reg = wm8753_read_reg_cache(codec, WM8753_RINVOL);
-       wm8753_write(codec, WM8753_RINVOL, reg | 0x0100);
+       snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_RDAC, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LOUT1V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_ROUT1V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LOUT2V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_ROUT2V, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_LINVOL, 0x0100, 0x0100);
+       snd_soc_update_bits(codec, WM8753_RINVOL, 0x0100, 0x0100);
 
        snd_soc_add_controls(codec, wm8753_snd_controls,
                             ARRAY_SIZE(wm8753_snd_controls));
index 9001cc48ba1371596c890ce12465e314d9022a81..1ec12eff06205f9023b57c5da7b836d6e81a1f4b 100644 (file)
@@ -50,8 +50,6 @@ static const char *wm8904_supply_names[WM8904_NUM_SUPPLIES] = {
 /* codec private data */
 struct wm8904_priv {
 
-       u16 reg_cache[WM8904_MAX_REGISTER + 1];
-
        enum wm8904_type devtype;
        void *control_data;
 
@@ -2094,7 +2092,7 @@ static int wm8904_digital_mute(struct snd_soc_dai *codec_dai, int mute)
 
 static void wm8904_sync_cache(struct snd_soc_codec *codec)
 {
-       struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
@@ -2105,14 +2103,14 @@ static void wm8904_sync_cache(struct snd_soc_codec *codec)
        /* Sync back cached values if they're different from the
         * hardware default.
         */
-       for (i = 1; i < ARRAY_SIZE(wm8904->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                if (!wm8904_access[i].writable)
                        continue;
 
-               if (wm8904->reg_cache[i] == wm8904_reg[i])
+               if (reg_cache[i] == wm8904_reg[i])
                        continue;
 
-               snd_soc_write(codec, i, wm8904->reg_cache[i]);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        codec->cache_sync = 0;
@@ -2371,6 +2369,7 @@ static int wm8904_probe(struct snd_soc_codec *codec)
 {
        struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
        struct wm8904_pdata *pdata = wm8904->pdata;
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        codec->cache_sync = 1;
@@ -2437,19 +2436,19 @@ static int wm8904_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU and enable ZC */
-       wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU;
-       wm8904->reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU;
-       wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU;
-       wm8904->reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU |
+       reg_cache[WM8904_ADC_DIGITAL_VOLUME_LEFT] |= WM8904_ADC_VU;
+       reg_cache[WM8904_ADC_DIGITAL_VOLUME_RIGHT] |= WM8904_ADC_VU;
+       reg_cache[WM8904_DAC_DIGITAL_VOLUME_LEFT] |= WM8904_DAC_VU;
+       reg_cache[WM8904_DAC_DIGITAL_VOLUME_RIGHT] |= WM8904_DAC_VU;
+       reg_cache[WM8904_ANALOGUE_OUT1_LEFT] |= WM8904_HPOUT_VU |
                WM8904_HPOUTLZC;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU |
+       reg_cache[WM8904_ANALOGUE_OUT1_RIGHT] |= WM8904_HPOUT_VU |
                WM8904_HPOUTRZC;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU |
+       reg_cache[WM8904_ANALOGUE_OUT2_LEFT] |= WM8904_LINEOUT_VU |
                WM8904_LINEOUTLZC;
-       wm8904->reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU |
+       reg_cache[WM8904_ANALOGUE_OUT2_RIGHT] |= WM8904_LINEOUT_VU |
                WM8904_LINEOUTRZC;
-       wm8904->reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE;
+       reg_cache[WM8904_CLOCK_RATES_0] &= ~WM8904_SR_MODE;
 
        /* Apply configuration from the platform data. */
        if (wm8904->pdata) {
@@ -2457,23 +2456,23 @@ static int wm8904_probe(struct snd_soc_codec *codec)
                        if (!pdata->gpio_cfg[i])
                                continue;
 
-                       wm8904->reg_cache[WM8904_GPIO_CONTROL_1 + i]
+                       reg_cache[WM8904_GPIO_CONTROL_1 + i]
                                = pdata->gpio_cfg[i] & 0xffff;
                }
 
                /* Zero is the default value for these anyway */
                for (i = 0; i < WM8904_MIC_REGS; i++)
-                       wm8904->reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i]
+                       reg_cache[WM8904_MIC_BIAS_CONTROL_0 + i]
                                = pdata->mic_cfg[i];
        }
 
        /* Set Class W by default - this will be managed by the Class
         * G widget at runtime where bypass paths are available.
         */
-       wm8904->reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR;
+       reg_cache[WM8904_CLASS_W_0] |= WM8904_CP_DYN_PWR;
 
        /* Use normal bias source */
-       wm8904->reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL;
+       reg_cache[WM8904_BIAS_CONTROL_0] &= ~WM8904_POBCTRL;
 
        wm8904_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
index 2cb16f895c4607c9d323ea3cbb9d8e31c4741cb1..23086e2c976abf686c62624da2fd00ff59417594 100644 (file)
@@ -768,6 +768,7 @@ static __devinit int wm8940_i2c_probe(struct i2c_client *i2c,
 
        i2c_set_clientdata(i2c, wm8940);
        wm8940->control_data = i2c;
+       wm8940->control_type = SND_SOC_I2C;
 
        ret = snd_soc_register_codec(&i2c->dev,
                        &soc_codec_dev_wm8940, &wm8940_dai, 1);
index 9cbab8e1de0149cd8b2063406ecbae29ca0c78e5..2ac35b0be86acb37faaabdb1c0f81b321a2bb741 100644 (file)
@@ -42,8 +42,6 @@ static const char *wm8955_supply_names[WM8955_NUM_SUPPLIES] = {
 struct wm8955_priv {
        enum snd_soc_control_type control_type;
 
-       u16 reg_cache[WM8955_MAX_REGISTER + 1];
-
        unsigned int mclk_rate;
 
        int deemph;
@@ -768,6 +766,7 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec,
                                 enum snd_soc_bias_level level)
 {
        struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        switch (level) {
@@ -800,14 +799,14 @@ static int wm8955_set_bias_level(struct snd_soc_codec *codec,
                        /* Sync back cached values if they're
                         * different from the hardware default.
                         */
-                       for (i = 0; i < ARRAY_SIZE(wm8955->reg_cache); i++) {
+                       for (i = 0; i < codec->driver->reg_cache_size; i++) {
                                if (i == WM8955_RESET)
                                        continue;
 
-                               if (wm8955->reg_cache[i] == wm8955_reg[i])
+                               if (reg_cache[i] == wm8955_reg[i])
                                        continue;
 
-                               snd_soc_write(codec, i, wm8955->reg_cache[i]);
+                               snd_soc_write(codec, i, reg_cache[i]);
                        }
 
                        /* Enable VREF and VMID */
@@ -902,6 +901,7 @@ static int wm8955_probe(struct snd_soc_codec *codec)
 {
        struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
        struct wm8955_pdata *pdata = dev_get_platdata(codec->dev);
+       u16 *reg_cache = codec->reg_cache;
        int ret, i;
 
        ret = snd_soc_codec_set_cache_io(codec, 7, 9, wm8955->control_type);
@@ -934,25 +934,25 @@ static int wm8955_probe(struct snd_soc_codec *codec)
        }
 
        /* Change some default settings - latch VU and enable ZC */
-       wm8955->reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU;
-       wm8955->reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU;
-       wm8955->reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC;
-       wm8955->reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC;
-       wm8955->reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC;
-       wm8955->reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC;
-       wm8955->reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC;
+       reg_cache[WM8955_LEFT_DAC_VOLUME] |= WM8955_LDVU;
+       reg_cache[WM8955_RIGHT_DAC_VOLUME] |= WM8955_RDVU;
+       reg_cache[WM8955_LOUT1_VOLUME] |= WM8955_LO1VU | WM8955_LO1ZC;
+       reg_cache[WM8955_ROUT1_VOLUME] |= WM8955_RO1VU | WM8955_RO1ZC;
+       reg_cache[WM8955_LOUT2_VOLUME] |= WM8955_LO2VU | WM8955_LO2ZC;
+       reg_cache[WM8955_ROUT2_VOLUME] |= WM8955_RO2VU | WM8955_RO2ZC;
+       reg_cache[WM8955_MONOOUT_VOLUME] |= WM8955_MOZC;
 
        /* Also enable adaptive bass boost by default */
-       wm8955->reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB;
+       reg_cache[WM8955_BASS_CONTROL] |= WM8955_BB;
 
        /* Set platform data values */
        if (pdata) {
                if (pdata->out2_speaker)
-                       wm8955->reg_cache[WM8955_ADDITIONAL_CONTROL_2]
+                       reg_cache[WM8955_ADDITIONAL_CONTROL_2]
                                |= WM8955_ROUT2INV;
 
                if (pdata->monoin_diff)
-                       wm8955->reg_cache[WM8955_MONO_OUT_MIX_1]
+                       reg_cache[WM8955_MONO_OUT_MIX_1]
                                |= WM8955_DMEN;
        }
 
@@ -1003,6 +1003,7 @@ static __devinit int wm8955_i2c_probe(struct i2c_client *i2c,
                return -ENOMEM;
 
        i2c_set_clientdata(i2c, wm8955);
+       wm8955->control_type = SND_SOC_I2C;
 
        ret = snd_soc_register_codec(&i2c->dev,
                        &soc_codec_dev_wm8955, &wm8955_dai, 1);
index 21986c42272f07ac693cd21d15f48cff20c43352..ff6ff2f529d2a97f9e2522ceff5df18355cecd5c 100644 (file)
@@ -1013,6 +1013,7 @@ static __devinit int wm8960_i2c_probe(struct i2c_client *i2c,
                return -ENOMEM;
 
        i2c_set_clientdata(i2c, wm8960);
+       wm8960->control_type = SND_SOC_I2C;
        wm8960->control_data = i2c;
 
        ret = snd_soc_register_codec(&i2c->dev,
index 1304ca91a11c708c567f69c45052c95f1f7be17e..7c421cc837bd3d697969b6f0c0f94f0ecb7abc5f 100644 (file)
@@ -52,8 +52,6 @@ static const char *wm8962_supply_names[WM8962_NUM_SUPPLIES] = {
 struct wm8962_priv {
        struct snd_soc_codec *codec;
 
-       u16 reg_cache[WM8962_MAX_REGISTER + 1];
-
        int sysclk;
        int sysclk_rate;
 
@@ -1991,8 +1989,7 @@ static int wm8962_put_hp_sw(struct snd_kcontrol *kcontrol,
                            struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
-       u16 *reg_cache = wm8962->reg_cache;
+       u16 *reg_cache = codec->reg_cache;
        int ret;
 
        /* Apply the update (if any) */
@@ -2020,8 +2017,7 @@ static int wm8962_put_spk_sw(struct snd_kcontrol *kcontrol,
                            struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
-       u16 *reg_cache = wm8962->reg_cache;
+       u16 *reg_cache = codec->reg_cache;
        int ret;
 
        /* Apply the update (if any) */
@@ -2329,8 +2325,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w,
                         struct snd_kcontrol *kcontrol, int event)
 {
        struct snd_soc_codec *codec = w->codec;
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
-       u16 *reg_cache = wm8962->reg_cache;
+       u16 *reg_cache = codec->reg_cache;
        int reg;
 
        switch (w->shift) {
@@ -2719,7 +2714,7 @@ static int wm8962_add_widgets(struct snd_soc_codec *codec)
 
 static void wm8962_sync_cache(struct snd_soc_codec *codec)
 {
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
@@ -2732,13 +2727,13 @@ static void wm8962_sync_cache(struct snd_soc_codec *codec)
        /* Sync back cached values if they're different from the
         * hardware default.
         */
-       for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                if (i == WM8962_SOFTWARE_RESET)
                        continue;
-               if (wm8962->reg_cache[i] == wm8962_reg[i])
+               if (reg_cache[i] == wm8962_reg[i])
                        continue;
 
-               snd_soc_write(codec, i, wm8962->reg_cache[i]);
+               snd_soc_write(codec, i, reg_cache[i]);
        }
 
        codec->cache_sync = 0;
@@ -3406,12 +3401,11 @@ EXPORT_SYMBOL_GPL(wm8962_mic_detect);
 #ifdef CONFIG_PM
 static int wm8962_resume(struct snd_soc_codec *codec)
 {
-       struct wm8962_priv *wm8962 = snd_soc_codec_get_drvdata(codec);
        u16 *reg_cache = codec->reg_cache;
        int i;
 
        /* Restore the registers */
-       for (i = 1; i < ARRAY_SIZE(wm8962->reg_cache); i++) {
+       for (i = 1; i < codec->driver->reg_cache_size; i++) {
                switch (i) {
                case WM8962_SOFTWARE_RESET:
                        continue;
@@ -3705,6 +3699,7 @@ static int wm8962_probe(struct snd_soc_codec *codec)
        struct wm8962_pdata *pdata = dev_get_platdata(codec->dev);
        struct i2c_client *i2c = container_of(codec->dev, struct i2c_client,
                                              dev);
+       u16 *reg_cache = codec->reg_cache;
        int i, trigger, irq_pol;
 
        wm8962->codec = codec;
@@ -3804,7 +3799,7 @@ static int wm8962_probe(struct snd_soc_codec *codec)
 
                /* Put the speakers into mono mode? */
                if (pdata->spk_mono)
-                       wm8962->reg_cache[WM8962_CLASS_D_CONTROL_2]
+                       reg_cache[WM8962_CLASS_D_CONTROL_2]
                                |= WM8962_SPK_MONO;
 
                /* Micbias setup, detection enable and detection
@@ -3819,16 +3814,16 @@ static int wm8962_probe(struct snd_soc_codec *codec)
        }
 
        /* Latch volume update bits */
-       wm8962->reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU;
-       wm8962->reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU;
-       wm8962->reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU;
-       wm8962->reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU;    
-       wm8962->reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU;
-       wm8962->reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU;
-       wm8962->reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU;
-       wm8962->reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU;
-       wm8962->reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU;
-       wm8962->reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU;
+       reg_cache[WM8962_LEFT_INPUT_VOLUME] |= WM8962_IN_VU;
+       reg_cache[WM8962_RIGHT_INPUT_VOLUME] |= WM8962_IN_VU;
+       reg_cache[WM8962_LEFT_ADC_VOLUME] |= WM8962_ADC_VU;
+       reg_cache[WM8962_RIGHT_ADC_VOLUME] |= WM8962_ADC_VU;
+       reg_cache[WM8962_LEFT_DAC_VOLUME] |= WM8962_DAC_VU;
+       reg_cache[WM8962_RIGHT_DAC_VOLUME] |= WM8962_DAC_VU;
+       reg_cache[WM8962_SPKOUTL_VOLUME] |= WM8962_SPKOUT_VU;
+       reg_cache[WM8962_SPKOUTR_VOLUME] |= WM8962_SPKOUT_VU;
+       reg_cache[WM8962_HPOUTL_VOLUME] |= WM8962_HPOUT_VU;
+       reg_cache[WM8962_HPOUTR_VOLUME] |= WM8962_HPOUT_VU;
 
        wm8962_add_widgets(codec);
 
index 63f6dbf5d07021887084e57541591bf1fa570fb3..9f18db6e167c0c86924b08e434d671bea98c8ff1 100644 (file)
@@ -718,6 +718,7 @@ static __devinit int wm8971_i2c_probe(struct i2c_client *i2c,
        if (wm8971 == NULL)
                return -ENOMEM;
 
+       wm8971->control_type = SND_SOC_I2C;
        i2c_set_clientdata(i2c, wm8971);
 
        ret = snd_soc_register_codec(&i2c->dev,
index ecc7c37180c7ad2158f11b9de1579bde332cb44d..a486670966bd7e6a44470ece6918fab24c638c2b 100644 (file)
@@ -1335,6 +1335,7 @@ static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
                return -ENOMEM;
 
        i2c_set_clientdata(i2c, wm9081);
+       wm9081->control_type = SND_SOC_I2C;
        wm9081->control_data = i2c;
 
        ret = snd_soc_register_codec(&i2c->dev,
index 99c046ba46bb6ed637d356579a2f4af9d1841bc3..6e5f64f627cb82e6c7379ab389ec0a10e45d9b0a 100644 (file)
@@ -141,7 +141,6 @@ static const u16 wm9090_reg_defaults[] = {
 /* This struct is used to save the context */
 struct wm9090_priv {
        struct mutex mutex;
-       u16 reg_cache[WM9090_MAX_REGISTER + 1];
        struct wm9090_platform_data pdata;
        void *control_data;
 };
@@ -552,6 +551,7 @@ static int wm9090_set_bias_level(struct snd_soc_codec *codec,
 static int wm9090_probe(struct snd_soc_codec *codec)
 {
        struct wm9090_priv *wm9090 = snd_soc_codec_get_drvdata(codec);
+       u16 *reg_cache = codec->reg_cache;
        int ret;
 
        codec->control_data = wm9090->control_data;
@@ -576,22 +576,22 @@ static int wm9090_probe(struct snd_soc_codec *codec)
        /* Configure some defaults; they will be written out when we
         * bring the bias up.
         */
-       wm9090->reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU
+       reg_cache[WM9090_IN1_LINE_INPUT_A_VOLUME] |= WM9090_IN1_VU
                | WM9090_IN1A_ZC;
-       wm9090->reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU
+       reg_cache[WM9090_IN1_LINE_INPUT_B_VOLUME] |= WM9090_IN1_VU
                | WM9090_IN1B_ZC;
-       wm9090->reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU
+       reg_cache[WM9090_IN2_LINE_INPUT_A_VOLUME] |= WM9090_IN2_VU
                | WM9090_IN2A_ZC;
-       wm9090->reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU
+       reg_cache[WM9090_IN2_LINE_INPUT_B_VOLUME] |= WM9090_IN2_VU
                | WM9090_IN2B_ZC;
-       wm9090->reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |=
+       reg_cache[WM9090_SPEAKER_VOLUME_LEFT] |=
                WM9090_SPKOUT_VU | WM9090_SPKOUTL_ZC;
-       wm9090->reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |=
+       reg_cache[WM9090_LEFT_OUTPUT_VOLUME] |=
                WM9090_HPOUT1_VU | WM9090_HPOUT1L_ZC;
-       wm9090->reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |=
+       reg_cache[WM9090_RIGHT_OUTPUT_VOLUME] |=
                WM9090_HPOUT1_VU | WM9090_HPOUT1R_ZC;
 
-       wm9090->reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA;
+       reg_cache[WM9090_CLOCKING_1] |= WM9090_TOCLK_ENA;
 
        wm9090_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
index b2c63309a65165b471822e99268c828bbdb07777..6f5a498608b292241e93dc9c498e8ce5f6a683cc 100644 (file)
@@ -24,12 +24,47 @@ OPTIONS
 --input=::
         Input file name. (default: perf.data)
 
+-d::
+--dsos=<dso[,dso...]>::
+        Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+        Symbol to annotate.
+
+-f::
+--force::
+        Don't complain, do it.
+
+-v::
+--verbose::
+        Be more verbose. (Show symbol address, etc)
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+        vmlinux pathname.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+        Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+        Don't shorten the displayed pathnames.
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface Use of --tui requires a tty, if one is not
        present, as when piping to other commands, the stdio interface is
        used. This interfaces starts by centering on the line with more
-       samples, TAB/UNTAB cycles thru the lines with more samples.
+       samples, TAB/UNTAB cycles through the lines with more samples.
 
 SEE ALSO
 --------
index 01b642c0bf8f974aedf2560df4e91cc5dd514a37..5eaac6f26d51e861236cfa48ab7c0509107b472c 100644 (file)
@@ -18,6 +18,9 @@ perf report.
 
 OPTIONS
 -------
+-H::
+--with-hits::
+        Show only DSOs with hits.
 -i::
 --input=::
         Input file name. (default: perf.data)
index 20d97d84ea1c37164005a4f38b8af5e23104ef93..74d7481ed7a6916f8797ed67b91b5c5d80382edc 100644 (file)
@@ -19,6 +19,18 @@ If no parameters are passed it will assume perf.data.old and perf.data.
 
 OPTIONS
 -------
+-M::
+--displacement::
+        Show position displacement relative to baseline.
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+-m::
+--modules::
+        Load module symbols. WARNING: use only with -k and LIVE kernel
+
 -d::
 --dsos=::
        Only consider symbols in these dsos. CSV that understands
@@ -42,7 +54,7 @@ OPTIONS
 --field-separator=::
 
        Use a special separator character and don't pad with spaces, replacing
-       all occurances of this separator in symbol names (and other output)
+       all occurrences of this separator in symbol names (and other output)
        with a '.' character, that thus it's the only non valid separator.
 
 -v::
@@ -50,6 +62,13 @@ OPTIONS
        Be verbose, for instance, show the raw counts in addition to the
        diff.
 
+-f::
+--force::
+       Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
index d004e19fe6d6ffcc9c9d8e25822d251d8dda9c6e..dd84cb2f0a8861dd8656b4058ae8ed906f17169c 100644 (file)
@@ -22,7 +22,7 @@ There are a couple of variants of perf kvm:
   a performance counter profile of guest os in realtime
   of an arbitrary workload.
 
-  'perf kvm record <command>' to record the performance couinter profile
+  'perf kvm record <command>' to record the performance counter profile
   of an arbitrary workload and save it into a perf data file. If both
   --host and --guest are input, the perf data file name is perf.data.kvm.
   If there is  no --host but --guest, the file name is perf.data.guest.
@@ -40,6 +40,12 @@ There are a couple of variants of perf kvm:
 
 OPTIONS
 -------
+-i::
+--input=::
+        Input file name.
+-o::
+--output::
+        Output file name.
 --host=::
         Collect host side performance profile.
 --guest=::
index b317102138c82f2a78a58ada898888603f8cc802..921de259ea1086f36ce5b7e6a3426cec5618deba 100644 (file)
@@ -24,6 +24,21 @@ and statistics with this 'perf lock' command.
 
   'perf lock report' reports statistical data.
 
+OPTIONS
+-------
+
+-i::
+--input=<file>::
+        Input file name.
+
+-v::
+--verbose::
+        Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 SEE ALSO
 --------
 linkperf:perf[1]
index 62de1b7f4e760367337042c52760e7a49ded50c7..86b797a35aa6acae540b652345937991257daa4d 100644 (file)
@@ -115,9 +115,9 @@ Each probe argument follows below syntax.
 
 LINE SYNTAX
 -----------
-Line range is descripted by following syntax.
+Line range is described by following syntax.
 
- "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
+ "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
 
 FUNC specifies the function name of showing lines. 'RLN' is the start line
 number from function entry line, and 'RLN2' is the end line number. As same as
index a91f9f9e6e5c27f96623fd10061f12a3041926b6..52462ae26455c264aa83130c7bc50c9ca97807cc 100644 (file)
@@ -39,15 +39,24 @@ OPTIONS
           be passed as follows: '\mem:addr[:[r][w][x]]'.
           If you want to profile read-write accesses in 0x1000, just set
           'mem:0x1000:rw'.
+
+--filter=<filter>::
+        Event filter.
+
 -a::
-        System-wide collection.
+--all-cpus::
+        System-wide collection from all CPUs.
 
 -l::
         Scale counter values.
 
 -p::
 --pid=::
-       Record events on existing pid.
+       Record events on existing process ID.
+
+-t::
+--tid=::
+        Record events on existing thread ID.
 
 -r::
 --realtime=::
@@ -99,6 +108,11 @@ OPTIONS
 --data::
        Sample addresses.
 
+-T::
+--timestamp::
+       Sample timestamps. Use it with 'perf report -D' to see the timestamps,
+       for instance.
+
 -n::
 --no-samples::
        Don't sample.
@@ -109,8 +123,8 @@ Collect raw sample records from all opened counters (default for tracepoint coun
 
 -C::
 --cpu::
-Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode with inheritance mode on (default), samples are captured only when
 the thread executes on the designated CPUs. Default is to monitor all CPUs.
 
index 12052c9ed0babfc3a1c93cc01758ec3b7747ee10..8ba03d6e5398d8387b11f9caf183bed81a0eb5a2 100644 (file)
@@ -20,6 +20,11 @@ OPTIONS
 -i::
 --input=::
         Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -d::
 --dsos=::
        Only consider symbols in these dsos. CSV that understands
@@ -27,6 +32,10 @@ OPTIONS
 -n::
 --show-nr-samples::
        Show the number of samples for each symbol
+
+--showcpuutilization::
+        Show sample percentage for different cpu modes.
+
 -T::
 --threads::
        Show per-thread event counters
@@ -39,12 +48,24 @@ OPTIONS
        Only consider these symbols. CSV that understands
        file://filename entries.
 
+-U::
+--hide-unresolved::
+        Only display entries resolved to a symbol.
+
 -s::
 --sort=::
        Sort by key(s): pid, comm, dso, symbol, parent.
 
+-p::
+--parent=<regex>::
+        regex filter to identify parent, see: '--sort parent'
+
+-x::
+--exclude-other::
+        Only display entries with parent-match.
+
 -w::
---field-width=::
+--column-widths=<width[,width...]>::
        Force each column width to the provided list, for large terminal
        readability.
 
@@ -52,19 +73,26 @@ OPTIONS
 --field-separator=::
 
        Use a special separator character and don't pad with spaces, replacing
-       all occurances of this separator in symbol names (and other output)
+       all occurrences of this separator in symbol names (and other output)
        with a '.' character, that thus it's the only non valid separator.
 
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
 -g [type,min]::
 --call-graph::
-        Display callchains using type and min percent threshold.
+        Display call chains using type and min percent threshold.
        type can be either:
-       - flat: single column, linear exposure of callchains.
+       - flat: single column, linear exposure of call chains.
        - graph: use a graph tree, displaying absolute overhead rates.
        - fractal: like graph, but displays relative rates. Each branch of
                 the tree is considered as a new profiled object. +
        Default: fractal,0.5.
 
+--pretty=<key>::
+        Pretty printing style.  key: normal, raw
+
 --stdio:: Use the stdio interface.
 
 --tui:: Use the TUI interface, that is integrated with annotate and allows
@@ -72,6 +100,25 @@ OPTIONS
        requires a tty, if one is not present, as when piping to other
        commands, the stdio interface is used.
 
+-k::
+--vmlinux=<file>::
+        vmlinux pathname
+
+--kallsyms=<file>::
+        kallsyms pathname
+
+-m::
+--modules::
+        Load module symbols. WARNING: This should only be used with -k and
+        a LIVE kernel.
+
+-f::
+--force::
+        Don't complain, do it.
+
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1]
index 8417644a6166b9fcd071b88eebb9dd46f00b8a1f..46822d5fde1c0328ca8af0f7687265adeeca7f0e 100644 (file)
@@ -8,11 +8,11 @@ perf-sched - Tool to trace/measure scheduler properties (latencies)
 SYNOPSIS
 --------
 [verse]
-'perf sched' {record|latency|replay|trace}
+'perf sched' {record|latency|map|replay|trace}
 
 DESCRIPTION
 -----------
-There are four variants of perf sched:
+There are five variants of perf sched:
 
   'perf sched record <command>' to record the scheduling events
   of an arbitrary workload.
@@ -30,8 +30,22 @@ There are four variants of perf sched:
   of the workload as it occurred when it was recorded - and can repeat
   it a number of times, measuring its performance.)
 
+  'perf sched map' to print a textual context-switching outline of
+  workload captured via perf sched record.  Columns stand for
+  individual CPUs, and the two-letter shortcuts stand for tasks that
+  are running on a CPU. A '*' denotes the CPU that had the event, and
+  a dot signals an idle CPU.
+
 OPTIONS
 -------
+-i::
+--input=<file>::
+        Input file name. (default: perf.data)
+
+-v::
+--verbose::
+        Be more verbose. (show symbol address, etc)
+
 -D::
 --dump-raw-trace=::
         Display verbose dump of the sched data.
similarity index 90%
rename from tools/perf/Documentation/perf-trace-perl.txt
rename to tools/perf/Documentation/perf-script-perl.txt
index ee6525ee6d69ada5c5cae3eb928b6626d9cfae49..5bb41e55a3ac3cbf98d728ec4de3df655d77ff61 100644 (file)
@@ -1,19 +1,19 @@
-perf-trace-perl(1)
+perf-script-perl(1)
 ==================
 
 NAME
 ----
-perf-trace-perl - Process trace data with a Perl script
+perf-script-perl - Process trace data with a Perl script
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [-s [Perl]:script[.pl] ]
+'perf script' [-s [Perl]:script[.pl] ]
 
 DESCRIPTION
 -----------
 
-This perf trace option is used to process perf trace data using perf's
+This perf script option is used to process perf script data using perf's
 built-in Perl interpreter.  It reads and processes the input file and
 displays the results of the trace analysis implemented in the given
 Perl script, if any.
@@ -21,7 +21,7 @@ Perl script, if any.
 STARTER SCRIPTS
 ---------------
 
-You can avoid reading the rest of this document by running 'perf trace
+You can avoid reading the rest of this document by running 'perf script
 -g perl' in the same directory as an existing perf.data trace file.
 That will generate a starter script containing a handler for each of
 the event types in the trace file; it simply prints every available
@@ -30,13 +30,13 @@ field for each event in the trace file.
 You can also look at the existing scripts in
 ~/libexec/perf-core/scripts/perl for typical examples showing how to
 do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.pl script, while not interesting for its results,
+the check-perf-script.pl script, while not interesting for its results,
 attempts to exercise all of the main scripting features.
 
 EVENT HANDLERS
 --------------
 
-When perf trace is invoked using a trace script, a user-defined
+When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
 ignored (or passed to a 'trace_handled' function, see below) and the
@@ -112,13 +112,13 @@ write a useful trace script.  The sections below cover the rest.
 SCRIPT LAYOUT
 -------------
 
-Every perf trace Perl script should start by setting up a Perl module
+Every perf script Perl script should start by setting up a Perl module
 search path and 'use'ing a few support modules (see module
 descriptions below):
 
 ----
- use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
- use lib "./Perf-Trace-Util/lib";
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib";
+ use lib "./perf-script-Util/lib";
  use Perf::Trace::Core;
  use Perf::Trace::Context;
  use Perf::Trace::Util;
@@ -162,7 +162,7 @@ sub trace_unhandled
 ----
 
 The remaining sections provide descriptions of each of the available
-built-in perf trace Perl modules and their associated functions.
+built-in perf script Perl modules and their associated functions.
 
 AVAILABLE MODULES AND FUNCTIONS
 -------------------------------
@@ -170,7 +170,7 @@ AVAILABLE MODULES AND FUNCTIONS
 The following sections describe the functions and variables available
 via the various Perf::Trace::* Perl modules.  To use the functions and
 variables from the given module, add the corresponding 'use
-Perf::Trace::XXX' line to your perf trace script.
+Perf::Trace::XXX' line to your perf script script.
 
 Perf::Trace::Core Module
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -204,7 +204,7 @@ argument.
 Perf::Trace::Util Module
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-Various utility functions for use with perf trace:
+Various utility functions for use with perf script:
 
   nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
   nsecs_secs($nsecs) - returns whole secs portion given nsecs
@@ -214,4 +214,4 @@ Various utility functions for use with perf trace:
 
 SEE ALSO
 --------
-linkperf:perf-trace[1]
+linkperf:perf-script[1]
similarity index 89%
rename from tools/perf/Documentation/perf-trace-python.txt
rename to tools/perf/Documentation/perf-script-python.txt
index 693be804dd3d88a167b8d3cdaf3023f5fa2cd5c2..36b38277422c8a9973c3a3fec671271ffeb23ffa 100644 (file)
@@ -1,19 +1,19 @@
-perf-trace-python(1)
+perf-script-python(1)
 ====================
 
 NAME
 ----
-perf-trace-python - Process trace data with a Python script
+perf-script-python - Process trace data with a Python script
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [-s [Python]:script[.py] ]
+'perf script' [-s [Python]:script[.py] ]
 
 DESCRIPTION
 -----------
 
-This perf trace option is used to process perf trace data using perf's
+This perf script option is used to process perf script data using perf's
 built-in Python interpreter.  It reads and processes the input file and
 displays the results of the trace analysis implemented in the given
 Python script, if any.
@@ -23,15 +23,15 @@ A QUICK EXAMPLE
 
 This section shows the process, start to finish, of creating a working
 Python script that aggregates and extracts useful information from a
-raw perf trace stream.  You can avoid reading the rest of this
+raw perf script stream.  You can avoid reading the rest of this
 document if an example is enough for you; the rest of the document
 provides more details on each step and lists the library functions
 available to script writers.
 
 This example actually details the steps that were used to create the
-'syscall-counts' script you see when you list the available perf trace
-scripts via 'perf trace -l'.  As such, this script also shows how to
-integrate your script into the list of general-purpose 'perf trace'
+'syscall-counts' script you see when you list the available perf script
+scripts via 'perf script -l'.  As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf script'
 scripts listed by that command.
 
 The syscall-counts script is a simple script, but demonstrates all the
@@ -105,31 +105,31 @@ That single stream will be recorded in a file in the current directory
 called perf.data.
 
 Once we have a perf.data file containing our data, we can use the -g
-'perf trace' option to generate a Python script that will contain a
+'perf script' option to generate a Python script that will contain a
 callback handler for each event type found in the perf.data trace
 stream (for more details, see the STARTER SCRIPTS section).
 
 ----
-# perf trace -g python
-generated Python script: perf-trace.py
+# perf script -g python
+generated Python script: perf-script.py
 
 The output file created also in the current directory is named
-perf-trace.py.  Here's the file in its entirety:
+perf-script.py.  Here's the file in its entirety:
 
-# perf trace event handlers, generated by perf trace -g python
+# perf script event handlers, generated by perf script -g python
 # Licensed under the terms of the GNU GPL License version 2
 
 # The common_* event handler fields are the most useful fields common to
 # all events.  They don't necessarily correspond to the 'common_*' fields
 # in the format files.  Those fields not available as handler params can
 # be retrieved using Python functions of the form common_*(context).
-# See the perf-trace-python Documentation for the list of available functions.
+# See the perf-script-python Documentation for the list of available functions.
 
 import os
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -160,7 +160,7 @@ def print_header(event_name, cpu, secs, nsecs, pid, comm):
 ----
 
 At the top is a comment block followed by some import statements and a
-path append which every perf trace script should include.
+path append which every perf script script should include.
 
 Following that are a couple generated functions, trace_begin() and
 trace_end(), which are called at the beginning and the end of the
@@ -189,8 +189,8 @@ simply a utility function used for that purpose.  Let's rename the
 script and run it to see the default output:
 
 ----
-# mv perf-trace.py syscall-counts.py
-# perf trace -s syscall-counts.py
+# mv perf-script.py syscall-counts.py
+# perf script -s syscall-counts.py
 
 raw_syscalls__sys_enter     1 00840.847582083     7506 perf                  id=1, args=
 raw_syscalls__sys_enter     1 00840.847595764     7506 perf                  id=1, args=
@@ -216,7 +216,7 @@ import os
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -279,7 +279,7 @@ import os
 import sys
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+       '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
 from perf_trace_context import *
 from Core import *
@@ -315,7 +315,7 @@ def print_syscall_totals():
 
 The script can be run just as before:
 
-  # perf trace -s syscall-counts.py
+  # perf script -s syscall-counts.py
 
 So those are the essential steps in writing and running a script.  The
 process can be generalized to any tracepoint or set of tracepoints
@@ -324,17 +324,17 @@ interested in by looking at the list of available events shown by
 'perf list' and/or look in /sys/kernel/debug/tracing events for
 detailed event and field info, record the corresponding trace data
 using 'perf record', passing it the list of interesting events,
-generate a skeleton script using 'perf trace -g python' and modify the
+generate a skeleton script using 'perf script -g python' and modify the
 code to aggregate and display it for your particular needs.
 
 After you've done that you may end up with a general-purpose script
 that you want to keep around and have available for future use.  By
 writing a couple of very simple shell scripts and putting them in the
 right place, you can have your script listed alongside the other
-scripts listed by the 'perf trace -l' command e.g.:
+scripts listed by the 'perf script -l' command e.g.:
 
 ----
-root@tropicana:~# perf trace -l
+root@tropicana:~# perf script -l
 List of available trace scripts:
   workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
@@ -365,14 +365,14 @@ perf record -a -e raw_syscalls:sys_enter
 The 'report' script is also a shell script with the same base name as
 your script, but with -report appended.  It should also be located in
 the perf/scripts/python/bin directory.  In that script, you write the
-'perf trace -s' command-line needed for running your script:
+'perf script -s' command-line needed for running your script:
 
 ----
 # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
 
 #!/bin/bash
 # description: system-wide syscall counts
-perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
 ----
 
 Note that the location of the Python script given in the shell script
@@ -390,17 +390,17 @@ total 32
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
 drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
 drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
--rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-trace.py
-drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util
 -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
 ----
 
 Once you've done that (don't forget to do a new 'make install',
-otherwise your script won't show up at run-time), 'perf trace -l'
+otherwise your script won't show up at run-time), 'perf script -l'
 should show a new entry for your script:
 
 ----
-root@tropicana:~# perf trace -l
+root@tropicana:~# perf script -l
 List of available trace scripts:
   workqueue-stats                      workqueue stats (ins/exe/create/destroy)
   wakeup-latency                       system-wide min/max/avg wakeup latency
@@ -409,19 +409,19 @@ List of available trace scripts:
   syscall-counts                       system-wide syscall counts
 ----
 
-You can now perform the record step via 'perf trace record':
+You can now perform the record step via 'perf script record':
 
-  # perf trace record syscall-counts
+  # perf script record syscall-counts
 
-and display the output using 'perf trace report':
+and display the output using 'perf script report':
 
-  # perf trace report syscall-counts
+  # perf script report syscall-counts
 
 STARTER SCRIPTS
 ---------------
 
 You can quickly get started writing a script for a particular set of
-trace data by generating a skeleton script using 'perf trace -g
+trace data by generating a skeleton script using 'perf script -g
 python' in the same directory as an existing perf.data trace file.
 That will generate a starter script containing a handler for each of
 the event types in the trace file; it simply prints every available
@@ -430,13 +430,13 @@ field for each event in the trace file.
 You can also look at the existing scripts in
 ~/libexec/perf-core/scripts/python for typical examples showing how to
 do basic things like aggregate event data, print results, etc.  Also,
-the check-perf-trace.py script, while not interesting for its results,
+the check-perf-script.py script, while not interesting for its results,
 attempts to exercise all of the main scripting features.
 
 EVENT HANDLERS
 --------------
 
-When perf trace is invoked using a trace script, a user-defined
+When perf script is invoked using a trace script, a user-defined
 'handler function' is called for each event in the trace.  If there's
 no handler function defined for a given event type, the event is
 ignored (or passed to a 'trace_handled' function, see below) and the
@@ -510,7 +510,7 @@ write a useful trace script.  The sections below cover the rest.
 SCRIPT LAYOUT
 -------------
 
-Every perf trace Python script should start by setting up a Python
+Every perf script Python script should start by setting up a Python
 module search path and 'import'ing a few support modules (see module
 descriptions below):
 
@@ -519,7 +519,7 @@ descriptions below):
  import sys
 
  sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-             '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+             '/scripts/python/perf-script-Util/lib/Perf/Trace')
 
  from perf_trace_context import *
  from Core import *
@@ -559,15 +559,15 @@ def trace_unhandled(event_name, context, common_cpu, common_secs,
 ----
 
 The remaining sections provide descriptions of each of the available
-built-in perf trace Python modules and their associated functions.
+built-in perf script Python modules and their associated functions.
 
 AVAILABLE MODULES AND FUNCTIONS
 -------------------------------
 
 The following sections describe the functions and variables available
-via the various perf trace Python modules.  To use the functions and
+via the various perf script Python modules.  To use the functions and
 variables from the given module, add the corresponding 'from XXXX
-import' line to your perf trace script.
+import' line to your perf script script.
 
 Core.py Module
 ~~~~~~~~~~~~~~
@@ -610,7 +610,7 @@ argument.
 Util.py Module
 ~~~~~~~~~~~~~~
 
-Various utility functions for use with perf trace:
+Various utility functions for use with perf script:
 
   nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
   nsecs_secs(nsecs) - returns whole secs portion given nsecs
@@ -620,4 +620,4 @@ Various utility functions for use with perf trace:
 
 SEE ALSO
 --------
-linkperf:perf-trace[1]
+linkperf:perf-script[1]
similarity index 62%
rename from tools/perf/Documentation/perf-trace.txt
rename to tools/perf/Documentation/perf-script.txt
index 26aff6bf9e500d0d6699e968a789db651deb9722..29ad94293cd26cf033c4962352f1518b45975652 100644 (file)
@@ -1,71 +1,71 @@
-perf-trace(1)
+perf-script(1)
 =============
 
 NAME
 ----
-perf-trace - Read perf.data (created by perf record) and display trace output
+perf-script - Read perf.data (created by perf record) and display trace output
 
 SYNOPSIS
 --------
 [verse]
-'perf trace' [<options>]
-'perf trace' [<options>] record <script> [<record-options>] <command>
-'perf trace' [<options>] report <script> [script-args]
-'perf trace' [<options>] <script> <required-script-args> [<record-options>] <command>
-'perf trace' [<options>] <top-script> [script-args]
+'perf script' [<options>]
+'perf script' [<options>] record <script> [<record-options>] <command>
+'perf script' [<options>] report <script> [script-args]
+'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
+'perf script' [<options>] <top-script> [script-args]
 
 DESCRIPTION
 -----------
 This command reads the input file and displays the trace recorded.
 
-There are several variants of perf trace:
+There are several variants of perf script:
 
-  'perf trace' to see a detailed trace of the workload that was
+  'perf script' to see a detailed trace of the workload that was
   recorded.
 
   You can also run a set of pre-canned scripts that aggregate and
   summarize the raw trace data in various ways (the list of scripts is
-  available via 'perf trace -l').  The following variants allow you to
+  available via 'perf script -l').  The following variants allow you to
   record and run those scripts:
 
-  'perf trace record <script> <command>' to record the events required
-  for 'perf trace report'.  <script> is the name displayed in the
-  output of 'perf trace --list' i.e. the actual script name minus any
+  'perf script record <script> <command>' to record the events required
+  for 'perf script report'.  <script> is the name displayed in the
+  output of 'perf script --list' i.e. the actual script name minus any
   language extension.  If <command> is not specified, the events are
   recorded using the -a (system-wide) 'perf record' option.
 
-  'perf trace report <script> [args]' to run and display the results
+  'perf script report <script> [args]' to run and display the results
   of <script>.  <script> is the name displayed in the output of 'perf
   trace --list' i.e. the actual script name minus any language
-  extension.  The perf.data output from a previous run of 'perf trace
+  extension.  The perf.data output from a previous run of 'perf script
   record <script>' is used and should be present for this command to
   succeed.  [args] refers to the (mainly optional) args expected by
   the script.
 
-  'perf trace <script> <required-script-args> <command>' to both
+  'perf script <script> <required-script-args> <command>' to both
   record the events required for <script> and to run the <script>
   using 'live-mode' i.e. without writing anything to disk.  <script>
-  is the name displayed in the output of 'perf trace --list' i.e. the
+  is the name displayed in the output of 'perf script --list' i.e. the
   actual script name minus any language extension.  If <command> is
   not specified, the events are recorded using the -a (system-wide)
   'perf record' option.  If <script> has any required args, they
   should be specified before <command>.  This mode doesn't allow for
   optional script args to be specified; if optional script args are
-  desired, they can be specified using separate 'perf trace record'
-  and 'perf trace report' commands, with the stdout of the record step
+  desired, they can be specified using separate 'perf script record'
+  and 'perf script report' commands, with the stdout of the record step
   piped to the stdin of the report script, using the '-o -' and '-i -'
   options of the corresponding commands.
 
-  'perf trace <top-script>' to both record the events required for
+  'perf script <top-script>' to both record the events required for
   <top-script> and to run the <top-script> using 'live-mode'
   i.e. without writing anything to disk.  <top-script> is the name
-  displayed in the output of 'perf trace --list' i.e. the actual
+  displayed in the output of 'perf script --list' i.e. the actual
   script name minus any language extension; a <top-script> is defined
   as any script name ending with the string 'top'.
 
-  [<record-options>] can be passed to the record steps of 'perf trace
+  [<record-options>] can be passed to the record steps of 'perf script
   record' and 'live-mode' variants; this isn't possible however for
-  <top-script> 'live-mode' or 'perf trace report' variants.
+  <top-script> 'live-mode' or 'perf script report' variants.
 
   See the 'SEE ALSO' section for links to language-specific
   information on how to write and run your own trace scripts.
@@ -76,7 +76,7 @@ OPTIONS
        Any command you can specify in a shell.
 
 -D::
---dump-raw-trace=::
+--dump-raw-script=::
         Display verbose dump of the trace data.
 
 -L::
@@ -95,7 +95,7 @@ OPTIONS
 
 -g::
 --gen-script=::
-        Generate perf-trace.[ext] starter script for given language,
+        Generate perf-script.[ext] starter script for given language,
         using current perf.data.
 
 -a::
@@ -104,8 +104,15 @@ OPTIONS
         normally don't - this option allows the latter to be run in
         system-wide mode.
 
+-i::
+--input=::
+        Input file name.
+
+-d::
+--debug-mode::
+        Do various checks like samples ordering and lost events.
 
 SEE ALSO
 --------
-linkperf:perf-record[1], linkperf:perf-trace-perl[1],
-linkperf:perf-trace-python[1]
+linkperf:perf-record[1], linkperf:perf-script-perl[1],
+linkperf:perf-script-python[1]
index 4b3a2d46b4378607f5195d12328646f5b1d7a638..b6da7affbbeeb82533387e9ba3f7c788d3e28dec 100644 (file)
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
 SYNOPSIS
 --------
 [verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
 
 DESCRIPTION
 -----------
@@ -35,24 +35,54 @@ OPTIONS
         child tasks do not inherit counters
 -p::
 --pid=<pid>::
-        stat events on existing pid
+        stat events on existing process id
+
+-t::
+--tid=<tid>::
+        stat events on existing thread id
+
 
 -a::
-        system-wide collection
+--all-cpus::
+        system-wide collection from all CPUs
 
 -c::
-        scale counter values
+--scale::
+       scale/normalize counter values
+
+-r::
+--repeat=<n>::
+       repeat command and print average + stddev (max: 100)
 
 -B::
+--big-num::
         print large numbers with thousands' separators according to locale
 
 -C::
 --cpu=::
-Count only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 In per-thread mode, this option is ignored. The -a option is still necessary
 to activate system-wide monitoring. Default is to count on all CPUs.
 
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
+This option is only valid in system-wide mode.
+
+-n::
+--null::
+        null run - don't start any counters
+
+-v::
+--verbose::
+        be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
 EXAMPLES
 --------
 
index 1c4b5f5b7f71ec7047be7f02a369e8e27fc8710d..2c3b462f64b00531b4c8a7e5fc5cb6224dfc88d6 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This command does assorted sanity tests, initially thru linked routines but
+This command does assorted sanity tests, initially through linked routines but
 also will look for a directory with more tests in the form of scripts.
 
 OPTIONS
index 4b1788355ecac3d305bf72e6f58d5a477e08ba7b..d7b79e2ba2adbe2cc0cb6468a9d84d6b73b8ed0f 100644 (file)
@@ -38,6 +38,8 @@ OPTIONS
 --process::
         Select the processes to display, by name or PID
 
+--symfs=<directory>::
+        Look for files with symbols relative to this directory.
 
 SEE ALSO
 --------
index 1f9687663f2a9cd62d6cff9f597395523b3da931..f6eb1cdafb7758162463b0ca8a25525f94c44d19 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
 DESCRIPTION
 -----------
-This command generates and displays a performance counter profile in realtime.
+This command generates and displays a performance counter profile in real time.
 
 
 OPTIONS
@@ -27,8 +27,8 @@ OPTIONS
 
 -C <cpu-list>::
 --cpu=<cpu>::
-Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
-comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
 Default is to monitor all CPUS.
 
 -d <seconds>::
@@ -50,6 +50,10 @@ Default is to monitor all CPUS.
 --count-filter=<count>::
        Only display functions with more events than this.
 
+-g::
+--group::
+        Put the counters into a counter group.
+
 -F <freq>::
 --freq=<freq>::
        Profile at this frequency.
@@ -68,7 +72,11 @@ Default is to monitor all CPUS.
 
 -p <pid>::
 --pid=<pid>::
-       Profile events on existing pid.
+       Profile events on existing Process ID.
+
+-t <tid>::
+--tid=<tid>::
+        Profile events on existing thread ID.
 
 -r <priority>::
 --realtime=<priority>::
@@ -78,6 +86,18 @@ Default is to monitor all CPUS.
 --sym-annotate=<symbol>::
         Annotate this symbol.
 
+-K::
+--hide_kernel_symbols::
+        Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+        Hide user symbols.
+
+-D::
+--dump-symtab::
+        Dump the symbol table used for profiling.
+
 -v::
 --verbose::
        Be more verbose (show counter open errors, etc).
index 8c7fc0c8f0b8cd0a77ddb58fb5b712379276ebc8..c12659d8cb26fcefd8449581921aef2962cb651c 100644 (file)
@@ -7,6 +7,7 @@ include/linux/stringify.h
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
+arch/*/lib/memcpy*.S
 include/linux/poison.h
 include/linux/magic.h
 include/linux/hw_breakpoint.h
index d1db0f676a4bf14850fa0264e78fe3d482d376dc..1b9b13ee2a726848bbfe2bdd9d81a9a98f8b5940 100644 (file)
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386)
         ARCH := x86
 endif
 ifeq ($(ARCH),x86_64)
+       RAW_ARCH := x86_64
         ARCH := x86
+       ARCH_CFLAGS := -DARCH_X86_64
+       ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
 endif
 
 # CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h
 LIB_H += util/include/linux/rbtree.h
 LIB_H += util/include/linux/string.h
 LIB_H += util/include/linux/types.h
+LIB_H += util/include/linux/linkage.h
 LIB_H += util/include/asm/asm-offsets.h
 LIB_H += util/include/asm/bug.h
 LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h
 LIB_H += util/include/asm/system.h
 LIB_H += util/include/asm/uaccess.h
 LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
 LIB_H += perf.h
 LIB_H += util/cache.h
 LIB_H += util/callchain.h
@@ -390,6 +396,7 @@ LIB_H += util/build-id.h
 LIB_H += util/debug.h
 LIB_H += util/debugfs.h
 LIB_H += util/event.h
+LIB_H += util/evsel.h
 LIB_H += util/exec_cmd.h
 LIB_H += util/types.h
 LIB_H += util/levenshtein.h
@@ -398,6 +405,7 @@ LIB_H += util/parse-options.h
 LIB_H += util/parse-events.h
 LIB_H += util/quote.h
 LIB_H += util/util.h
+LIB_H += util/xyarray.h
 LIB_H += util/header.h
 LIB_H += util/help.h
 LIB_H += util/session.h
@@ -417,6 +425,7 @@ LIB_H += util/probe-finder.h
 LIB_H += util/probe-event.h
 LIB_H += util/pstack.h
 LIB_H += util/cpumap.h
+LIB_H += $(ARCH_INCLUDE)
 
 LIB_OBJS += $(OUTPUT)util/abspath.o
 LIB_OBJS += $(OUTPUT)util/alias.o
@@ -426,6 +435,7 @@ LIB_OBJS += $(OUTPUT)util/ctype.o
 LIB_OBJS += $(OUTPUT)util/debugfs.o
 LIB_OBJS += $(OUTPUT)util/environment.o
 LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evsel.o
 LIB_OBJS += $(OUTPUT)util/exec_cmd.o
 LIB_OBJS += $(OUTPUT)util/help.o
 LIB_OBJS += $(OUTPUT)util/levenshtein.o
@@ -463,6 +473,7 @@ LIB_OBJS += $(OUTPUT)util/sort.o
 LIB_OBJS += $(OUTPUT)util/hist.o
 LIB_OBJS += $(OUTPUT)util/probe-event.o
 LIB_OBJS += $(OUTPUT)util/util.o
+LIB_OBJS += $(OUTPUT)util/xyarray.o
 LIB_OBJS += $(OUTPUT)util/cpumap.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
@@ -472,6 +483,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
 # Benchmark modules
 BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
 BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -485,7 +499,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-report.o
 BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
 BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
 BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
 BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
 BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
@@ -507,7 +521,7 @@ PERFLIBS = $(LIB_FILE)
 -include config.mak
 
 ifndef NO_DWARF
-FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
+FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
 ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
        msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
        NO_DWARF := 1
@@ -554,7 +568,7 @@ ifndef NO_DWARF
 ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
        msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
 else
-       BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT
+       BASIC_CFLAGS += -DDWARF_SUPPORT
        EXTLIBS += -lelf -ldw
        LIB_OBJS += $(OUTPUT)util/probe-finder.o
 endif # PERF_HAVE_DWARF_REGS
@@ -891,13 +905,14 @@ prefix_SQ = $(subst ','\'',$(prefix))
 SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
 PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
 
-LIBS = $(PERFLIBS) $(EXTLIBS)
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
 
 BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
        $(COMPAT_CFLAGS)
 LIB_OBJS += $(COMPAT_OBJS)
 
 ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_CFLAGS += $(ARCH_CFLAGS)
 ALL_LDFLAGS += $(BASIC_LDFLAGS)
 
 export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644 (file)
index 0000000..a72e36c
--- /dev/null
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc)              \
+       extern void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644 (file)
index 0000000..d588b87
--- /dev/null
@@ -0,0 +1,4 @@
+
+MEMCPY_FN(__memcpy,
+       "x86-64-unrolled",
+       "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644 (file)
index 0000000..a57b66e
--- /dev/null
@@ -0,0 +1,2 @@
+
+#include "../../../arch/x86/lib/memcpy_64.S"
index 38dae7465142fdb86037cbae95a85a3ede9e24ed..db82021f4b91c7172a2fe43b4902e67f022af7b9 100644 (file)
@@ -12,6 +12,7 @@
 #include "../util/parse-options.h"
 #include "../util/header.h"
 #include "bench.h"
+#include "mem-memcpy-arch.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 
 static const char      *length_str     = "1MB";
 static const char      *routine        = "default";
-static bool            use_clock       = false;
+static bool            use_clock;
 static int             clock_fd;
+static bool            only_prefault;
+static bool            no_prefault;
 
 static const struct option options[] = {
        OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@ static const struct option options[] = {
                    "Specify routine to copy"),
        OPT_BOOLEAN('c', "clock", &use_clock,
                    "Use CPU clock for measuring"),
+       OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+                   "Show only the result with page faults before memcpy()"),
+       OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+                   "Show only the result without page faults before memcpy()"),
        OPT_END()
 };
 
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+
 struct routine {
        const char *name;
        const char *desc;
-       void * (*fn)(void *dst, const void *src, size_t len);
+       memcpy_t fn;
 };
 
 struct routine routines[] = {
        { "default",
          "Default memcpy() provided by glibc",
          memcpy },
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memcpy-x86-64-asm-def.h"
+#undef MEMCPY_FN
+
+#endif
+
        { NULL,
          NULL,
          NULL   }
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts)
                (double)ts->tv_usec / (double)1000000;
 }
 
+static void alloc_mem(void **dst, void **src, size_t length)
+{
+       *dst = zalloc(length);
+       if (!dst)
+               die("memory allocation failed - maybe length is too large?\n");
+
+       *src = zalloc(length);
+       if (!src)
+               die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
+{
+       u64 clock_start = 0ULL, clock_end = 0ULL;
+       void *src = NULL, *dst = NULL;
+
+       alloc_mem(&src, &dst, len);
+
+       if (prefault)
+               fn(dst, src, len);
+
+       clock_start = get_clock();
+       fn(dst, src, len);
+       clock_end = get_clock();
+
+       free(src);
+       free(dst);
+       return clock_end - clock_start;
+}
+
+static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
+{
+       struct timeval tv_start, tv_end, tv_diff;
+       void *src = NULL, *dst = NULL;
+
+       alloc_mem(&src, &dst, len);
+
+       if (prefault)
+               fn(dst, src, len);
+
+       BUG_ON(gettimeofday(&tv_start, NULL));
+       fn(dst, src, len);
+       BUG_ON(gettimeofday(&tv_end, NULL));
+
+       timersub(&tv_end, &tv_start, &tv_diff);
+
+       free(src);
+       free(dst);
+       return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {                                      \
+               if (x < K)                                      \
+                       printf(" %14lf B/Sec", x);              \
+               else if (x < K * K)                             \
+                       printf(" %14lfd KB/Sec", x / K);        \
+               else if (x < K * K * K)                         \
+                       printf(" %14lf MB/Sec", x / K / K);     \
+               else                                            \
+                       printf(" %14lf GB/Sec", x / K / K / K); \
+       } while (0)
+
 int bench_mem_memcpy(int argc, const char **argv,
                     const char *prefix __used)
 {
        int i;
-       void *dst, *src;
-       size_t length;
-       double bps = 0.0;
-       struct timeval tv_start, tv_end, tv_diff;
-       u64 clock_start, clock_end, clock_diff;
+       size_t len;
+       double result_bps[2];
+       u64 result_clock[2];
 
-       clock_start = clock_end = clock_diff = 0ULL;
        argc = parse_options(argc, argv, options,
                             bench_mem_memcpy_usage, 0);
 
-       tv_diff.tv_sec = 0;
-       tv_diff.tv_usec = 0;
-       length = (size_t)perf_atoll((char *)length_str);
+       if (use_clock)
+               init_clock();
+
+       len = (size_t)perf_atoll((char *)length_str);
 
-       if ((s64)length <= 0) {
+       result_clock[0] = result_clock[1] = 0ULL;
+       result_bps[0] = result_bps[1] = 0.0;
+
+       if ((s64)len <= 0) {
                fprintf(stderr, "Invalid length:%s\n", length_str);
                return 1;
        }
 
+       /* same to without specifying either of prefault and no-prefault */
+       if (only_prefault && no_prefault)
+               only_prefault = no_prefault = false;
+
        for (i = 0; routines[i].name; i++) {
                if (!strcmp(routines[i].name, routine))
                        break;
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv,
                return 1;
        }
 
-       dst = zalloc(length);
-       if (!dst)
-               die("memory allocation failed - maybe length is too large?\n");
-
-       src = zalloc(length);
-       if (!src)
-               die("memory allocation failed - maybe length is too large?\n");
-
-       if (bench_format == BENCH_FORMAT_DEFAULT) {
-               printf("# Copying %s Bytes from %p to %p ...\n\n",
-                      length_str, src, dst);
-       }
-
-       if (use_clock) {
-               init_clock();
-               clock_start = get_clock();
-       } else {
-               BUG_ON(gettimeofday(&tv_start, NULL));
-       }
-
-       routines[i].fn(dst, src, length);
+       if (bench_format == BENCH_FORMAT_DEFAULT)
+               printf("# Copying %s Bytes ...\n\n", length_str);
 
-       if (use_clock) {
-               clock_end = get_clock();
-               clock_diff = clock_end - clock_start;
+       if (!only_prefault && !no_prefault) {
+               /* show both of results */
+               if (use_clock) {
+                       result_clock[0] =
+                               do_memcpy_clock(routines[i].fn, len, false);
+                       result_clock[1] =
+                               do_memcpy_clock(routines[i].fn, len, true);
+               } else {
+                       result_bps[0] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, false);
+                       result_bps[1] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, true);
+               }
        } else {
-               BUG_ON(gettimeofday(&tv_end, NULL));
-               timersub(&tv_end, &tv_start, &tv_diff);
-               bps = (double)((double)length / timeval2double(&tv_diff));
+               if (use_clock) {
+                       result_clock[pf] =
+                               do_memcpy_clock(routines[i].fn,
+                                               len, only_prefault);
+               } else {
+                       result_bps[pf] =
+                               do_memcpy_gettimeofday(routines[i].fn,
+                                               len, only_prefault);
+               }
        }
 
        switch (bench_format) {
        case BENCH_FORMAT_DEFAULT:
-               if (use_clock) {
-                       printf(" %14lf Clock/Byte\n",
-                              (double)clock_diff / (double)length);
-               } else {
-                       if (bps < K)
-                               printf(" %14lf B/Sec\n", bps);
-                       else if (bps < K * K)
-                               printf(" %14lfd KB/Sec\n", bps / 1024);
-                       else if (bps < K * K * K)
-                               printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
-                       else {
-                               printf(" %14lf GB/Sec\n",
-                                      bps / 1024 / 1024 / 1024);
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte\n",
+                                       (double)result_clock[0]
+                                       / (double)len);
+                               printf(" %14lf Clock/Byte (with prefault)\n",
+                                       (double)result_clock[1]
+                                       / (double)len);
+                       } else {
+                               print_bps(result_bps[0]);
+                               printf("\n");
+                               print_bps(result_bps[1]);
+                               printf(" (with prefault)\n");
                        }
+               } else {
+                       if (use_clock) {
+                               printf(" %14lf Clock/Byte",
+                                       (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               print_bps(result_bps[pf]);
+
+                       printf("%s\n", only_prefault ? " (with prefault)" : "");
                }
                break;
        case BENCH_FORMAT_SIMPLE:
-               if (use_clock) {
-                       printf("%14lf\n",
-                              (double)clock_diff / (double)length);
-               } else
-                       printf("%lf\n", bps);
+               if (!only_prefault && !no_prefault) {
+                       if (use_clock) {
+                               printf("%lf %lf\n",
+                                       (double)result_clock[0] / (double)len,
+                                       (double)result_clock[1] / (double)len);
+                       } else {
+                               printf("%lf %lf\n",
+                                       result_bps[0], result_bps[1]);
+                       }
+               } else {
+                       if (use_clock) {
+                               printf("%lf\n", (double)result_clock[pf]
+                                       / (double)len);
+                       } else
+                               printf("%lf\n", result_bps[pf]);
+               }
                break;
        default:
                /* reaching this means there's some disaster: */
index 6d5604d8df9599acb55d87017f5d58e19d906395..c056cdc0691258b159665ca3e8c74d2963543ccf 100644 (file)
@@ -58,12 +58,12 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
        return hist_entry__inc_addr_samples(he, al->addr);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
        struct addr_location al;
-       struct sample_data data;
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
                           event->header.type);
                return -1;
@@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = {
        .mmap   = event__process_mmap,
        .comm   = event__process_comm,
        .fork   = event__process_task,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static int __cmd_annotate(void)
@@ -382,7 +384,7 @@ static int __cmd_annotate(void)
        int ret;
        struct perf_session *session;
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
index 44a47e13bd673eb362f956b80bd02adbec367d45..5af32ae9031ec83fc2db926df9360d1ce8f1620c 100644 (file)
@@ -36,10 +36,10 @@ static const struct option options[] = {
 
 static int __cmd_buildid_list(void)
 {
-       int err = -1;
        struct perf_session *session;
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false,
+                                   &build_id__mark_dso_hit_ops);
        if (session == NULL)
                return -1;
 
@@ -49,7 +49,7 @@ static int __cmd_buildid_list(void)
        perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
 
        perf_session__delete(session);
-       return err;
+       return 0;
 }
 
 int cmd_buildid_list(int argc, const char **argv, const char *prefix __used)
index fca1d4402910ab13a6f7aa45299e31c0289cb27c..3153e492dbcc29e1593b6df29357424dd012da99 100644 (file)
@@ -30,12 +30,13 @@ static int hists__add_entry(struct hists *self,
        return -ENOMEM;
 }
 
-static int diff__process_sample_event(event_t *event, struct perf_session *session)
+static int diff__process_sample_event(event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_session *session)
 {
        struct addr_location al;
-       struct sample_data data = { .period = 1, };
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                pr_warning("problem processing %d event, skipping it.\n",
                           event->header.type);
                return -1;
@@ -44,12 +45,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
        if (al.filtered || al.sym == NULL)
                return 0;
 
-       if (hists__add_entry(&session->hists, &al, data.period)) {
+       if (hists__add_entry(&session->hists, &al, sample->period)) {
                pr_warning("problem incrementing symbol period, skipping event\n");
                return -1;
        }
 
-       session->hists.stats.total_period += data.period;
+       session->hists.stats.total_period += sample->period;
        return 0;
 }
 
@@ -60,6 +61,8 @@ static struct perf_event_ops event_ops = {
        .exit   = event__process_task,
        .fork   = event__process_task,
        .lost   = event__process_lost,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -141,8 +144,8 @@ static int __cmd_diff(void)
        int ret, i;
        struct perf_session *session[2];
 
-       session[0] = perf_session__new(input_old, O_RDONLY, force, false);
-       session[1] = perf_session__new(input_new, O_RDONLY, force, false);
+       session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
+       session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
        if (session[0] == NULL || session[1] == NULL)
                return -ENOMEM;
 
@@ -173,7 +176,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
        OPT_INCR('v', "verbose", &verbose,
                    "be more verbose (show symbol address, etc)"),
-       OPT_BOOLEAN('m', "displacement", &show_displacement,
+       OPT_BOOLEAN('M', "displacement", &show_displacement,
                    "Show position displacement relative to baseline"),
        OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
                    "dump raw trace in ASCII"),
@@ -191,6 +194,8 @@ static const struct option options[] = {
        OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
                   "separator for columns, no spaces will be added between "
                   "columns '.' is reserved."),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index 8e3e47b064cea7ddea4e98f0fb98202b35643632..0c78ffa7bf675f46c9e631d9fa8d51fbc71aded4 100644 (file)
@@ -16,8 +16,8 @@
 static char            const *input_name = "-";
 static bool            inject_build_ids;
 
-static int event__repipe(event_t *event __used,
-                        struct perf_session *session __used)
+static int event__repipe_synth(event_t *event,
+                              struct perf_session *session __used)
 {
        uint32_t size;
        void *buf = event;
@@ -36,22 +36,30 @@ static int event__repipe(event_t *event __used,
        return 0;
 }
 
-static int event__repipe_mmap(event_t *self, struct perf_session *session)
+static int event__repipe(event_t *event, struct sample_data *sample __used,
+                        struct perf_session *session)
+{
+       return event__repipe_synth(event, session);
+}
+
+static int event__repipe_mmap(event_t *self, struct sample_data *sample,
+                             struct perf_session *session)
 {
        int err;
 
-       err = event__process_mmap(self, session);
-       event__repipe(self, session);
+       err = event__process_mmap(self, sample, session);
+       event__repipe(self, sample, session);
 
        return err;
 }
 
-static int event__repipe_task(event_t *self, struct perf_session *session)
+static int event__repipe_task(event_t *self, struct sample_data *sample,
+                             struct perf_session *session)
 {
        int err;
 
-       err = event__process_task(self, session);
-       event__repipe(self, session);
+       err = event__process_task(self, sample, session);
+       event__repipe(self, sample, session);
 
        return err;
 }
@@ -61,7 +69,7 @@ static int event__repipe_tracing_data(event_t *self,
 {
        int err;
 
-       event__repipe(self, session);
+       event__repipe_synth(self, session);
        err = event__process_tracing_data(self, session);
 
        return err;
@@ -111,7 +119,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
        return 0;
 }
 
-static int event__inject_buildid(event_t *event, struct perf_session *session)
+static int event__inject_buildid(event_t *event, struct sample_data *sample,
+                                struct perf_session *session)
 {
        struct addr_location al;
        struct thread *thread;
@@ -146,7 +155,7 @@ static int event__inject_buildid(event_t *event, struct perf_session *session)
        }
 
 repipe:
-       event__repipe(event, session);
+       event__repipe(event, sample, session);
        return 0;
 }
 
@@ -160,10 +169,10 @@ struct perf_event_ops inject_ops = {
        .read           = event__repipe,
        .throttle       = event__repipe,
        .unthrottle     = event__repipe,
-       .attr           = event__repipe,
-       .event_type     = event__repipe,
-       .tracing_data   = event__repipe,
-       .build_id       = event__repipe,
+       .attr           = event__repipe_synth,
+       .event_type     = event__repipe_synth,
+       .tracing_data   = event__repipe_synth,
+       .build_id       = event__repipe_synth,
 };
 
 extern volatile int session_done;
@@ -187,7 +196,7 @@ static int __cmd_inject(void)
                inject_ops.tracing_data = event__repipe_tracing_data;
        }
 
-       session = perf_session__new(input_name, O_RDONLY, false, true);
+       session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
        if (session == NULL)
                return -ENOMEM;
 
index 31f60a2535e0ec95b60e6b90e3e24818fa0dd972..def7ddc2fd4fbc1b1c57795f4c10c519729b035d 100644 (file)
@@ -304,22 +304,11 @@ process_raw_event(event_t *raw_event __used, void *data,
        }
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(session, event->ip.pid);
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = 1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, event->ip.pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -328,8 +317,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-       process_raw_event(event, data.raw_data, data.cpu,
-                         data.time, thread);
+       process_raw_event(event, sample->raw_data, sample->cpu,
+                         sample->time, thread);
 
        return 0;
 }
@@ -492,7 +481,8 @@ static void sort_result(void)
 static int __cmd_kmem(void)
 {
        int err = -EINVAL;
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -747,6 +737,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
index 821c1586a22b7da92cd732ad1a4ee4e05b037085..b9c6e54329713e326d74da9b08164e562e521d03 100644 (file)
@@ -834,22 +834,18 @@ static void dump_info(void)
                die("Unknown type of information\n");
 }
 
-static int process_sample_event(event_t *self, struct perf_session *s)
+static int process_sample_event(event_t *self, struct sample_data *sample,
+                               struct perf_session *s)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(s, sample->tid);
 
-       bzero(&data, sizeof(data));
-       event__parse_sample(self, s->sample_type, &data);
-
-       thread = perf_session__findnew(s, data.tid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                        self->header.type);
                return -1;
        }
 
-       process_raw_event(data.raw_data, data.cpu, data.time, thread);
+       process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
 
        return 0;
 }
@@ -862,7 +858,7 @@ static struct perf_event_ops eops = {
 
 static int read_events(void)
 {
-       session = perf_session__new(input_name, O_RDONLY, 0, false);
+       session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
        if (!session)
                die("Initializing perf session failed\n");
 
@@ -947,6 +943,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
@@ -982,9 +981,9 @@ int cmd_lock(int argc, const char **argv, const char *prefix __used)
                                usage_with_options(report_usage, report_options);
                }
                __cmd_report();
-       } else if (!strcmp(argv[0], "trace")) {
-               /* Aliased to 'perf trace' */
-               return cmd_trace(argc, argv, prefix);
+       } else if (!strcmp(argv[0], "script")) {
+               /* Aliased to 'perf script' */
+               return cmd_script(argc, argv, prefix);
        } else if (!strcmp(argv[0], "info")) {
                if (argc) {
                        argc = parse_options(argc, argv,
index 2e000c068cc5a377d87923bb302a383abafd3a33..add163c9f0e7d4db3fe01afe704a72c5a923597b 100644 (file)
@@ -249,6 +249,11 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
             !params.show_lines))
                usage_with_options(probe_usage, options);
 
+       /*
+        * Only consider the user's kernel image path if given.
+        */
+       symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+
        if (params.list_events) {
                if (params.mod_events) {
                        pr_err("  Error: Don't use --list with --add/--del.\n");
index 564491fa18b27838dd79125954bc744f51f7fe2c..7bc0490354847a949d2f2105cca86e67f8e0b1b4 100644 (file)
@@ -18,6 +18,7 @@
 
 #include "util/header.h"
 #include "util/event.h"
+#include "util/evsel.h"
 #include "util/debug.h"
 #include "util/session.h"
 #include "util/symbol.h"
 #include <sched.h>
 #include <sys/mman.h>
 
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
 enum write_mode_t {
        WRITE_FORCE,
        WRITE_APPEND
 };
 
-static int                     *fd[MAX_NR_CPUS][MAX_COUNTERS];
-
 static u64                     user_interval                   = ULLONG_MAX;
 static u64                     default_interval                =      0;
+static u64                     sample_type;
 
-static int                     nr_cpus                         =      0;
+static struct cpu_map          *cpus;
 static unsigned int            page_size;
 static unsigned int            mmap_pages                      =    128;
 static unsigned int            user_freq                       = UINT_MAX;
@@ -48,11 +50,11 @@ static const char           *output_name                    = "perf.data";
 static int                     group                           =      0;
 static int                     realtime_prio                   =      0;
 static bool                    raw_samples                     =  false;
+static bool                    sample_id_all_avail             =   true;
 static bool                    system_wide                     =  false;
 static pid_t                   target_pid                      =     -1;
 static pid_t                   target_tid                      =     -1;
-static pid_t                   *all_tids                       =      NULL;
-static int                     thread_num                      =      0;
+static struct thread_map       *threads;
 static pid_t                   child_pid                       =     -1;
 static bool                    no_inherit                      =  false;
 static enum write_mode_t       write_mode                      = WRITE_FORCE;
@@ -60,7 +62,9 @@ static bool                   call_graph                      =  false;
 static bool                    inherit_stat                    =  false;
 static bool                    no_samples                      =  false;
 static bool                    sample_address                  =  false;
+static bool                    sample_time                     =  false;
 static bool                    no_buildid                      =  false;
+static bool                    no_buildid_cache                =  false;
 
 static long                    samples                         =      0;
 static u64                     bytes_written                   =      0;
@@ -77,7 +81,6 @@ static struct perf_session    *session;
 static const char              *cpu_list;
 
 struct mmap_data {
-       int                     counter;
        void                    *base;
        unsigned int            mask;
        unsigned int            prev;
@@ -128,6 +131,7 @@ static void write_output(void *buf, size_t size)
 }
 
 static int process_synthesized_event(event_t *event,
+                                    struct sample_data *sample __used,
                                     struct perf_session *self __used)
 {
        write_output(event, event->header.size);
@@ -224,12 +228,12 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
        return h_attr;
 }
 
-static void create_counter(int counter, int cpu)
+static void create_counter(struct perf_evsel *evsel, int cpu)
 {
-       char *filter = filters[counter];
-       struct perf_event_attr *attr = attrs + counter;
+       char *filter = evsel->filter;
+       struct perf_event_attr *attr = &evsel->attr;
        struct perf_header_attr *h_attr;
-       int track = !counter; /* only the first counter needs these */
+       int track = !evsel->idx; /* only the first counter needs these */
        int thread_index;
        int ret;
        struct {
@@ -238,6 +242,19 @@ static void create_counter(int counter, int cpu)
                u64 time_running;
                u64 id;
        } read_data;
+       /*
+        * Check if parse_single_tracepoint_event has already asked for
+        * PERF_SAMPLE_TIME.
+        *
+        * XXX this is kludgy but short term fix for problems introduced by
+        * eac23d1c that broke 'perf script' by having different sample_types
+        * when using multiple tracepoint events when we use a perf binary
+        * that tries to use sample_id_all on an older kernel.
+        *
+        * We need to move counter creation to perf_session, support
+        * different sample_types, etc.
+        */
+       bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
 
        attr->read_format       = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                  PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -280,6 +297,10 @@ static void create_counter(int counter, int cpu)
        if (system_wide)
                attr->sample_type       |= PERF_SAMPLE_CPU;
 
+       if (sample_id_all_avail &&
+           (sample_time || system_wide || !no_inherit || cpu_list))
+               attr->sample_type       |= PERF_SAMPLE_TIME;
+
        if (raw_samples) {
                attr->sample_type       |= PERF_SAMPLE_TIME;
                attr->sample_type       |= PERF_SAMPLE_RAW;
@@ -293,13 +314,14 @@ static void create_counter(int counter, int cpu)
                attr->disabled = 1;
                attr->enable_on_exec = 1;
        }
+retry_sample_id:
+       attr->sample_id_all = sample_id_all_avail ? 1 : 0;
 
-       for (thread_index = 0; thread_index < thread_num; thread_index++) {
+       for (thread_index = 0; thread_index < threads->nr; thread_index++) {
 try_again:
-               fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr,
-                               all_tids[thread_index], cpu, group_fd, 0);
+               FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
 
-               if (fd[nr_cpu][counter][thread_index] < 0) {
+               if (FD(evsel, nr_cpu, thread_index) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES)
@@ -309,6 +331,15 @@ try_again:
                        else if (err ==  ENODEV && cpu_list) {
                                die("No such device - did you specify"
                                        " an out-of-range profile CPU?\n");
+                       } else if (err == EINVAL && sample_id_all_avail) {
+                               /*
+                                * Old kernel, no attr->sample_id_type_all field
+                                */
+                               sample_id_all_avail = false;
+                               if (!sample_time && !raw_samples && !time_needed)
+                                       attr->sample_type &= ~PERF_SAMPLE_TIME;
+
+                               goto retry_sample_id;
                        }
 
                        /*
@@ -326,8 +357,8 @@ try_again:
                                goto try_again;
                        }
                        printf("\n");
-                       error("perfcounter syscall returned with %d (%s)\n",
-                                       fd[nr_cpu][counter][thread_index], strerror(err));
+                       error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
+                             FD(evsel, nr_cpu, thread_index), strerror(err));
 
 #if defined(__i386__) || defined(__x86_64__)
                        if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -341,7 +372,7 @@ try_again:
                        exit(-1);
                }
 
-               h_attr = get_header_attr(attr, counter);
+               h_attr = get_header_attr(attr, evsel->idx);
                if (h_attr == NULL)
                        die("nomem\n");
 
@@ -352,7 +383,7 @@ try_again:
                        }
                }
 
-               if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
+               if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
                        perror("Unable to read perf file descriptor");
                        exit(-1);
                }
@@ -362,43 +393,44 @@ try_again:
                        exit(-1);
                }
 
-               assert(fd[nr_cpu][counter][thread_index] >= 0);
-               fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK);
+               assert(FD(evsel, nr_cpu, thread_index) >= 0);
+               fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
 
                /*
                 * First counter acts as the group leader:
                 */
                if (group && group_fd == -1)
-                       group_fd = fd[nr_cpu][counter][thread_index];
-
-               if (counter || thread_index) {
-                       ret = ioctl(fd[nr_cpu][counter][thread_index],
-                                       PERF_EVENT_IOC_SET_OUTPUT,
-                                       fd[nr_cpu][0][0]);
+                       group_fd = FD(evsel, nr_cpu, thread_index);
+
+               if (evsel->idx || thread_index) {
+                       struct perf_evsel *first;
+                       first = list_entry(evsel_list.next, struct perf_evsel, node);
+                       ret = ioctl(FD(evsel, nr_cpu, thread_index),
+                                   PERF_EVENT_IOC_SET_OUTPUT,
+                                   FD(first, nr_cpu, 0));
                        if (ret) {
                                error("failed to set output: %d (%s)\n", errno,
                                                strerror(errno));
                                exit(-1);
                        }
                } else {
-                       mmap_array[nr_cpu].counter = counter;
                        mmap_array[nr_cpu].prev = 0;
                        mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
                        mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
-                               PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
+                               PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
                        if (mmap_array[nr_cpu].base == MAP_FAILED) {
                                error("failed to mmap with %d (%s)\n", errno, strerror(errno));
                                exit(-1);
                        }
 
-                       event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
+                       event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
                        event_array[nr_poll].events = POLLIN;
                        nr_poll++;
                }
 
                if (filter != NULL) {
-                       ret = ioctl(fd[nr_cpu][counter][thread_index],
-                                       PERF_EVENT_IOC_SET_FILTER, filter);
+                       ret = ioctl(FD(evsel, nr_cpu, thread_index),
+                                   PERF_EVENT_IOC_SET_FILTER, filter);
                        if (ret) {
                                error("failed to set filter with %d (%s)\n", errno,
                                                strerror(errno));
@@ -406,15 +438,19 @@ try_again:
                        }
                }
        }
+
+       if (!sample_type)
+               sample_type = attr->sample_type;
 }
 
 static void open_counters(int cpu)
 {
-       int counter;
+       struct perf_evsel *pos;
 
        group_fd = -1;
-       for (counter = 0; counter < nr_counters; counter++)
-               create_counter(counter, cpu);
+
+       list_for_each_entry(pos, &evsel_list, node)
+               create_counter(pos, cpu);
 
        nr_cpu++;
 }
@@ -437,7 +473,8 @@ static void atexit_header(void)
        if (!pipe_output) {
                session->header.data_size += bytes_written;
 
-               process_buildids();
+               if (!no_buildid)
+                       process_buildids();
                perf_header__write(&session->header, output, true);
                perf_session__delete(session);
                symbol__exit();
@@ -500,7 +537,7 @@ static void mmap_read_all(void)
 
 static int __cmd_record(int argc, const char **argv)
 {
-       int i, counter;
+       int i;
        struct stat st;
        int flags;
        int err;
@@ -552,19 +589,22 @@ static int __cmd_record(int argc, const char **argv)
        }
 
        session = perf_session__new(output_name, O_WRONLY,
-                                   write_mode == WRITE_FORCE, false);
+                                   write_mode == WRITE_FORCE, false, NULL);
        if (session == NULL) {
                pr_err("Not enough memory for reading perf file header\n");
                return -1;
        }
 
+       if (!no_buildid)
+               perf_header__set_feat(&session->header, HEADER_BUILD_ID);
+
        if (!file_new) {
                err = perf_header__read(session, output);
                if (err < 0)
                        goto out_delete_session;
        }
 
-       if (have_tracepoints(attrs, nr_counters))
+       if (have_tracepoints(&evsel_list))
                perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
 
        /*
@@ -612,7 +652,7 @@ static int __cmd_record(int argc, const char **argv)
                }
 
                if (!system_wide && target_tid == -1 && target_pid == -1)
-                       all_tids[0] = child_pid;
+                       threads->map[0] = child_pid;
 
                close(child_ready_pipe[1]);
                close(go_pipe[0]);
@@ -626,19 +666,15 @@ static int __cmd_record(int argc, const char **argv)
                close(child_ready_pipe[0]);
        }
 
-       nr_cpus = read_cpu_map(cpu_list);
-       if (nr_cpus < 1) {
-               perror("failed to collect number of CPUs");
-               return -1;
-       }
-
        if (!system_wide && no_inherit && !cpu_list) {
                open_counters(-1);
        } else {
-               for (i = 0; i < nr_cpus; i++)
-                       open_counters(cpumap[i]);
+               for (i = 0; i < cpus->nr; i++)
+                       open_counters(cpus->map[i]);
        }
 
+       perf_session__set_sample_type(session, sample_type);
+
        if (pipe_output) {
                err = perf_header__write_pipe(output);
                if (err < 0)
@@ -651,6 +687,8 @@ static int __cmd_record(int argc, const char **argv)
 
        post_processing_offset = lseek(output, 0, SEEK_CUR);
 
+       perf_session__set_sample_id_all(session, sample_id_all_avail);
+
        if (pipe_output) {
                err = event__synthesize_attrs(&session->header,
                                              process_synthesized_event,
@@ -667,7 +705,7 @@ static int __cmd_record(int argc, const char **argv)
                        return err;
                }
 
-               if (have_tracepoints(attrs, nr_counters)) {
+               if (have_tracepoints(&evsel_list)) {
                        /*
                         * FIXME err <= 0 here actually means that
                         * there were no tracepoints so its not really
@@ -676,8 +714,7 @@ static int __cmd_record(int argc, const char **argv)
                         * return this more properly and also
                         * propagate errors that now are calling die()
                         */
-                       err = event__synthesize_tracing_data(output, attrs,
-                                                            nr_counters,
+                       err = event__synthesize_tracing_data(output, &evsel_list,
                                                             process_synthesized_event,
                                                             session);
                        if (err <= 0) {
@@ -751,13 +788,13 @@ static int __cmd_record(int argc, const char **argv)
 
                if (done) {
                        for (i = 0; i < nr_cpu; i++) {
-                               for (counter = 0;
-                                       counter < nr_counters;
-                                       counter++) {
+                               struct perf_evsel *pos;
+
+                               list_for_each_entry(pos, &evsel_list, node) {
                                        for (thread = 0;
-                                               thread < thread_num;
+                                               thread < threads->nr;
                                                thread++)
-                                               ioctl(fd[i][counter][thread],
+                                               ioctl(FD(pos, i, thread),
                                                        PERF_EVENT_IOC_DISABLE);
                                }
                        }
@@ -831,16 +868,20 @@ const struct option record_options[] = {
                    "per thread counts"),
        OPT_BOOLEAN('d', "data", &sample_address,
                    "Sample addresses"),
+       OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
        OPT_BOOLEAN('n', "no-samples", &no_samples,
                    "don't sample"),
-       OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
+       OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
                    "do not update the buildid cache"),
+       OPT_BOOLEAN('B', "no-buildid", &no_buildid,
+                   "do not collect buildids in perf.data"),
        OPT_END()
 };
 
 int cmd_record(int argc, const char **argv, const char *prefix __used)
 {
-       int i, j, err = -ENOMEM;
+       int err = -ENOMEM;
+       struct perf_evsel *pos;
 
        argc = parse_options(argc, argv, record_options, record_usage,
                            PARSE_OPT_STOP_AT_NON_OPTION);
@@ -859,41 +900,36 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
        }
 
        symbol__init();
-       if (no_buildid)
+
+       if (no_buildid_cache || no_buildid)
                disable_buildid_cache();
 
-       if (!nr_counters) {
-               nr_counters     = 1;
-               attrs[0].type   = PERF_TYPE_HARDWARE;
-               attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
+       if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) {
+               pr_err("Not enough memory for event selector list\n");
+               goto out_symbol_exit;
        }
 
-       if (target_pid != -1) {
+       if (target_pid != -1)
                target_tid = target_pid;
-               thread_num = find_all_tid(target_pid, &all_tids);
-               if (thread_num <= 0) {
-                       fprintf(stderr, "Can't find all threads of pid %d\n",
-                                       target_pid);
-                       usage_with_options(record_usage, record_options);
-               }
-       } else {
-               all_tids=malloc(sizeof(pid_t));
-               if (!all_tids)
-                       goto out_symbol_exit;
 
-               all_tids[0] = target_tid;
-               thread_num = 1;
+       threads = thread_map__new(target_pid, target_tid);
+       if (threads == NULL) {
+               pr_err("Problems finding threads of monitor\n");
+               usage_with_options(record_usage, record_options);
        }
 
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++) {
-                       fd[i][j] = malloc(sizeof(int)*thread_num);
-                       if (!fd[i][j])
-                               goto out_free_fd;
-               }
+       cpus = cpu_map__new(cpu_list);
+       if (cpus == NULL) {
+               perror("failed to parse CPUs map");
+               return -1;
        }
-       event_array = malloc(
-               sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
+
+       list_for_each_entry(pos, &evsel_list, node) {
+               if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+                       goto out_free_fd;
+       }
+       event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS *
+                             MAX_COUNTERS * threads->nr));
        if (!event_array)
                goto out_free_fd;
 
@@ -920,12 +956,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
 out_free_event_array:
        free(event_array);
 out_free_fd:
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++)
-                       free(fd[i][j]);
-       }
-       free(all_tids);
-       all_tids = NULL;
+       thread_map__delete(threads);
+       threads = NULL;
 out_symbol_exit:
        symbol__exit();
        return err;
index 5de405d452300318541338293563d8ebc41ccb87..75183a4518e60d23db05e36e585fc178df731a19 100644 (file)
@@ -150,13 +150,13 @@ static int add_event_total(struct perf_session *session,
        return 0;
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data = { .period = 1, };
        struct addr_location al;
        struct perf_event_attr *attr;
 
-       if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
+       if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
                fprintf(stderr, "problem processing %d event, skipping it.\n",
                        event->header.type);
                return -1;
@@ -165,14 +165,14 @@ static int process_sample_event(event_t *event, struct perf_session *session)
        if (al.filtered || (hide_unresolved && al.sym == NULL))
                return 0;
 
-       if (perf_session__add_hist_entry(session, &al, &data)) {
+       if (perf_session__add_hist_entry(session, &al, sample)) {
                pr_debug("problem incrementing symbol period, skipping event\n");
                return -1;
        }
 
-       attr = perf_header__find_attr(data.id, &session->header);
+       attr = perf_header__find_attr(sample->id, &session->header);
 
-       if (add_event_total(session, &data, attr)) {
+       if (add_event_total(session, sample, attr)) {
                pr_debug("problem adding event period\n");
                return -1;
        }
@@ -180,7 +180,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
        return 0;
 }
 
-static int process_read_event(event_t *event, struct perf_session *session __used)
+static int process_read_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        struct perf_event_attr *attr;
 
@@ -243,6 +244,8 @@ static struct perf_event_ops event_ops = {
        .event_type = event__process_event_type,
        .tracing_data = event__process_tracing_data,
        .build_id = event__process_build_id,
+       .ordered_samples = true,
+       .ordering_requires_timestamps = true,
 };
 
 extern volatile int session_done;
@@ -307,7 +310,7 @@ static int __cmd_report(void)
 
        signal(SIGINT, sig_handler);
 
-       session = perf_session__new(input_name, O_RDONLY, force, false);
+       session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -442,6 +445,8 @@ static const struct option options[] = {
                    "dump raw trace in ASCII"),
        OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
                   "file", "vmlinux pathname"),
+       OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+                  "file", "kallsyms pathname"),
        OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
        OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
                    "load module symbols - WARNING: use only with -k and LIVE kernel"),
@@ -478,6 +483,8 @@ static const struct option options[] = {
                   "columns '.' is reserved."),
        OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
                    "Only display entries resolved to a symbol"),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index 55f3b5dcc731417198a2e5fd29ac8eefd96e1a5e..7a4ebeb8b016b4ca01c14f393b3c5fab98567523 100644 (file)
@@ -1606,25 +1606,15 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
                process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
 }
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
        struct thread *thread;
 
        if (!(session->sample_type & PERF_SAMPLE_RAW))
                return 0;
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = -1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, data.pid);
+       thread = perf_session__findnew(session, sample->pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -1633,10 +1623,11 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-       if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
+       if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
                return 0;
 
-       process_raw_event(event, session, data.raw_data, data.cpu, data.time, thread);
+       process_raw_event(event, session, sample->raw_data, sample->cpu,
+                         sample->time, thread);
 
        return 0;
 }
@@ -1652,7 +1643,8 @@ static struct perf_event_ops event_ops = {
 static int read_events(void)
 {
        int err = -EINVAL;
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -1869,6 +1861,9 @@ static int __cmd_record(int argc, const char **argv)
        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
+       if (rec_argv)
+               return -ENOMEM;
+
        for (i = 0; i < ARRAY_SIZE(record_args); i++)
                rec_argv[i] = strdup(record_args[i]);
 
@@ -1888,10 +1883,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
                usage_with_options(sched_usage, sched_options);
 
        /*
-        * Aliased to 'perf trace' for now:
+        * Aliased to 'perf script' for now:
         */
-       if (!strcmp(argv[0], "trace"))
-               return cmd_trace(argc, argv, prefix);
+       if (!strcmp(argv[0], "script"))
+               return cmd_script(argc, argv, prefix);
 
        symbol__init();
        if (!strncmp(argv[0], "rec", 3)) {
similarity index 83%
rename from tools/perf/builtin-trace.c
rename to tools/perf/builtin-script.c
index 86cfe3800e6bf5580718fd3df3c3788f9edbda31..150a606002eb7b4eb281e69c9c8287eed1931077 100644 (file)
@@ -56,29 +56,18 @@ static void setup_scripting(void)
 
 static int cleanup_scripting(void)
 {
-       pr_debug("\nperf trace script stopped\n");
+       pr_debug("\nperf script stopped\n");
 
        return scripting_ops->stop_script();
 }
 
 static char const              *input_name = "perf.data";
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event, struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
-       struct thread *thread;
+       struct thread *thread = perf_session__findnew(session, event->ip.pid);
 
-       memset(&data, 0, sizeof(data));
-       data.time = -1;
-       data.cpu = -1;
-       data.period = 1;
-
-       event__parse_sample(event, session->sample_type, &data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
-                   data.pid, data.tid, data.ip, data.period);
-
-       thread = perf_session__findnew(session, event->ip.pid);
        if (thread == NULL) {
                pr_debug("problem processing %d event, skipping it.\n",
                         event->header.type);
@@ -87,13 +76,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
 
        if (session->sample_type & PERF_SAMPLE_RAW) {
                if (debug_mode) {
-                       if (data.time < last_timestamp) {
+                       if (sample->time < last_timestamp) {
                                pr_err("Samples misordered, previous: %llu "
                                        "this: %llu\n", last_timestamp,
-                                       data.time);
+                                       sample->time);
                                nr_unordered++;
                        }
-                       last_timestamp = data.time;
+                       last_timestamp = sample->time;
                        return 0;
                }
                /*
@@ -101,21 +90,12 @@ static int process_sample_event(event_t *event, struct perf_session *session)
                 * field, although it should be the same than this perf
                 * event pid
                 */
-               scripting_ops->process_event(data.cpu, data.raw_data,
-                                            data.raw_size,
-                                            data.time, thread->comm);
+               scripting_ops->process_event(sample->cpu, sample->raw_data,
+                                            sample->raw_size,
+                                            sample->time, thread->comm);
        }
 
-       session->hists.stats.total_period += data.period;
-       return 0;
-}
-
-static u64 nr_lost;
-
-static int process_lost_event(event_t *event, struct perf_session *session __used)
-{
-       nr_lost += event->lost.lost;
-
+       session->hists.stats.total_period += sample->period;
        return 0;
 }
 
@@ -126,7 +106,7 @@ static struct perf_event_ops event_ops = {
        .event_type = event__process_event_type,
        .tracing_data = event__process_tracing_data,
        .build_id = event__process_build_id,
-       .lost = process_lost_event,
+       .ordering_requires_timestamps = true,
        .ordered_samples = true,
 };
 
@@ -137,7 +117,7 @@ static void sig_handler(int sig __unused)
        session_done = 1;
 }
 
-static int __cmd_trace(struct perf_session *session)
+static int __cmd_script(struct perf_session *session)
 {
        int ret;
 
@@ -145,10 +125,8 @@ static int __cmd_trace(struct perf_session *session)
 
        ret = perf_session__process_events(session, &event_ops);
 
-       if (debug_mode) {
+       if (debug_mode)
                pr_err("Misordered timestamps: %llu\n", nr_unordered);
-               pr_err("Lost events: %llu\n", nr_lost);
-       }
 
        return ret;
 }
@@ -159,7 +137,7 @@ struct script_spec {
        char                    spec[0];
 };
 
-LIST_HEAD(script_specs);
+static LIST_HEAD(script_specs);
 
 static struct script_spec *script_spec__new(const char *spec,
                                            struct scripting_ops *ops)
@@ -247,7 +225,7 @@ static void list_available_languages(void)
 
        fprintf(stderr, "\n");
        fprintf(stderr, "Scripting language extensions (used in "
-               "perf trace -s [spec:]script.[spec]):\n\n");
+               "perf script -s [spec:]script.[spec]):\n\n");
 
        list_for_each_entry(s, &script_specs, node)
                fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
@@ -301,17 +279,34 @@ static int parse_scriptname(const struct option *opt __used,
        return 0;
 }
 
-#define for_each_lang(scripts_dir, lang_dirent, lang_next)             \
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+static int is_directory(const char *base_path, const struct dirent *dent)
+{
+       char path[PATH_MAX];
+       struct stat st;
+
+       sprintf(path, "%s/%s", base_path, dent->d_name);
+       if (stat(path, &st))
+               return 0;
+
+       return S_ISDIR(st.st_mode);
+}
+
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
        while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) &&     \
               lang_next)                                               \
-               if (lang_dirent.d_type == DT_DIR &&                     \
+               if ((lang_dirent.d_type == DT_DIR ||                    \
+                    (lang_dirent.d_type == DT_UNKNOWN &&               \
+                     is_directory(scripts_path, &lang_dirent))) &&     \
                    (strcmp(lang_dirent.d_name, ".")) &&                \
                    (strcmp(lang_dirent.d_name, "..")))
 
-#define for_each_script(lang_dir, script_dirent, script_next)          \
+#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
        while (!readdir_r(lang_dir, &script_dirent, &script_next) &&    \
               script_next)                                             \
-               if (script_dirent.d_type != DT_DIR)
+               if (script_dirent.d_type != DT_DIR &&                   \
+                   (script_dirent.d_type != DT_UNKNOWN ||              \
+                    !is_directory(lang_path, &script_dirent)))
 
 
 #define RECORD_SUFFIX                  "-record"
@@ -324,7 +319,7 @@ struct script_desc {
        char                    *args;
 };
 
-LIST_HEAD(script_descs);
+static LIST_HEAD(script_descs);
 
 static struct script_desc *script_desc__new(const char *name)
 {
@@ -380,10 +375,10 @@ out_delete_desc:
        return NULL;
 }
 
-static char *ends_with(char *str, const char *suffix)
+static const char *ends_with(const char *str, const char *suffix)
 {
        size_t suffix_len = strlen(suffix);
-       char *p = str;
+       const char *p = str;
 
        if (strlen(str) > suffix_len) {
                p = str + strlen(str) - suffix_len;
@@ -466,16 +461,16 @@ static int list_available_scripts(const struct option *opt __used,
        if (!scripts_dir)
                return -1;
 
-       for_each_lang(scripts_dir, lang_dirent, lang_next) {
+       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
                         lang_dirent.d_name);
                lang_dir = opendir(lang_path);
                if (!lang_dir)
                        continue;
 
-               for_each_script(lang_dir, script_dirent, script_next) {
+               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
                        script_root = strdup(script_dirent.d_name);
-                       str = ends_with(script_root, REPORT_SUFFIX);
+                       str = (char *)ends_with(script_root, REPORT_SUFFIX);
                        if (str) {
                                *str = '\0';
                                desc = script_desc__findnew(script_root);
@@ -514,16 +509,16 @@ static char *get_script_path(const char *script_root, const char *suffix)
        if (!scripts_dir)
                return NULL;
 
-       for_each_lang(scripts_dir, lang_dirent, lang_next) {
+       for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
                snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
                         lang_dirent.d_name);
                lang_dir = opendir(lang_path);
                if (!lang_dir)
                        continue;
 
-               for_each_script(lang_dir, script_dirent, script_next) {
+               for_each_script(lang_path, lang_dir, script_dirent, script_next) {
                        __script_root = strdup(script_dirent.d_name);
-                       str = ends_with(__script_root, suffix);
+                       str = (char *)ends_with(__script_root, suffix);
                        if (str) {
                                *str = '\0';
                                if (strcmp(__script_root, script_root))
@@ -543,7 +538,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
 
 static bool is_top_script(const char *script_path)
 {
-       return ends_with((char *)script_path, "top") == NULL ? false : true;
+       return ends_with(script_path, "top") == NULL ? false : true;
 }
 
 static int has_required_arg(char *script_path)
@@ -569,12 +564,12 @@ out:
        return n_args;
 }
 
-static const char * const trace_usage[] = {
-       "perf trace [<options>]",
-       "perf trace [<options>] record <script> [<record-options>] <command>",
-       "perf trace [<options>] report <script> [script-args]",
-       "perf trace [<options>] <script> [<record-options>] <command>",
-       "perf trace [<options>] <top-script> [script-args]",
+static const char * const script_usage[] = {
+       "perf script [<options>]",
+       "perf script [<options>] record <script> [<record-options>] <command>",
+       "perf script [<options>] report <script> [script-args]",
+       "perf script [<options>] <script> [<record-options>] <command>",
+       "perf script [<options>] <top-script> [script-args]",
        NULL
 };
 
@@ -591,7 +586,7 @@ static const struct option options[] = {
                     "script file name (lang:script name, script name, or *)",
                     parse_scriptname),
        OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
-                  "generate perf-trace.xx script in specified language"),
+                  "generate perf-script.xx script in specified language"),
        OPT_STRING('i', "input", &input_name, "file",
                    "input file name"),
        OPT_BOOLEAN('d', "debug-mode", &debug_mode,
@@ -614,7 +609,7 @@ static bool have_cmd(int argc, const char **argv)
        return argc != 0;
 }
 
-int cmd_trace(int argc, const char **argv, const char *prefix __used)
+int cmd_script(int argc, const char **argv, const char *prefix __used)
 {
        char *rec_script_path = NULL;
        char *rep_script_path = NULL;
@@ -626,7 +621,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
 
        setup_scripting();
 
-       argc = parse_options(argc, argv, options, trace_usage,
+       argc = parse_options(argc, argv, options, script_usage,
                             PARSE_OPT_STOP_AT_NON_OPTION);
 
        if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
@@ -640,7 +635,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                if (!rep_script_path) {
                        fprintf(stderr,
                                "Please specify a valid report script"
-                               "(see 'perf trace -l' for listing)\n");
+                               "(see 'perf script -l' for listing)\n");
                        return -1;
                }
        }
@@ -658,8 +653,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
 
                if (!rec_script_path && !rep_script_path) {
                        fprintf(stderr, " Couldn't find script %s\n\n See perf"
-                               " trace -l for available scripts.\n", argv[0]);
-                       usage_with_options(trace_usage, options);
+                               " script -l for available scripts.\n", argv[0]);
+                       usage_with_options(script_usage, options);
                }
 
                if (is_top_script(argv[0])) {
@@ -671,9 +666,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                        rec_args = (argc - 1) - rep_args;
                        if (rec_args < 0) {
                                fprintf(stderr, " %s script requires options."
-                                       "\n\n See perf trace -l for available "
+                                       "\n\n See perf script -l for available "
                                        "scripts and options.\n", argv[0]);
-                               usage_with_options(trace_usage, options);
+                               usage_with_options(script_usage, options);
                        }
                }
 
@@ -772,7 +767,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
        if (!script_name)
                setup_pager();
 
-       session = perf_session__new(input_name, O_RDONLY, 0, false);
+       session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops);
        if (session == NULL)
                return -ENOMEM;
 
@@ -806,7 +801,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                        return -1;
                }
 
-               err = scripting_ops->generate_script("perf-trace");
+               err = scripting_ops->generate_script("perf-script");
                goto out;
        }
 
@@ -814,10 +809,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
                err = scripting_ops->start_script(script_name, argc, argv);
                if (err)
                        goto out;
-               pr_debug("perf trace started with script %s\n\n", script_name);
+               pr_debug("perf script started with script %s\n\n", script_name);
        }
 
-       err = __cmd_trace(session);
+       err = __cmd_script(session);
 
        perf_session__delete(session);
        cleanup_scripting();
index a6b4d44f950246e27d4cb6b0bc3e6d5afd27adcc..02b2d8013a61e537bf80e5d110d6c2f2c3f6b150 100644 (file)
@@ -43,6 +43,7 @@
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 #include "util/event.h"
+#include "util/evsel.h"
 #include "util/debug.h"
 #include "util/header.h"
 #include "util/cpumap.h"
@@ -52,6 +53,8 @@
 #include <math.h>
 #include <locale.h>
 
+#define DEFAULT_SEPARATOR      " "
+
 static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK             },
@@ -69,25 +72,23 @@ static struct perf_event_attr default_attrs[] = {
 };
 
 static bool                    system_wide                     =  false;
-static int                     nr_cpus                         =  0;
+static struct cpu_map          *cpus;
 static int                     run_idx                         =  0;
 
 static int                     run_count                       =  1;
 static bool                    no_inherit                      = false;
 static bool                    scale                           =  true;
+static bool                    no_aggr                         = false;
 static pid_t                   target_pid                      = -1;
 static pid_t                   target_tid                      = -1;
-static pid_t                   *all_tids                       =  NULL;
-static int                     thread_num                      =  0;
+static struct thread_map       *threads;
 static pid_t                   child_pid                       = -1;
 static bool                    null_run                        =  false;
-static bool                    big_num                         =  false;
+static bool                    big_num                         =  true;
+static int                     big_num_opt                     =  -1;
 static const char              *cpu_list;
-
-
-static int                     *fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int                     event_scaled[MAX_COUNTERS];
+static const char              *csv_sep                        = NULL;
+static bool                    csv_output                      = false;
 
 static volatile int done = 0;
 
@@ -96,6 +97,22 @@ struct stats
        double n, mean, M2;
 };
 
+struct perf_stat {
+       struct stats      res_stats[3];
+};
+
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+{
+       evsel->priv = zalloc(sizeof(struct perf_stat));
+       return evsel->priv == NULL ? -ENOMEM : 0;
+}
+
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+{
+       free(evsel->priv);
+       evsel->priv = NULL;
+}
+
 static void update_stats(struct stats *stats, u64 val)
 {
        double delta;
@@ -135,69 +152,38 @@ static double stddev_stats(struct stats *stats)
        return sqrt(variance_mean);
 }
 
-struct stats                   event_res_stats[MAX_COUNTERS][3];
-struct stats                   runtime_nsecs_stats;
+struct stats                   runtime_nsecs_stats[MAX_NR_CPUS];
+struct stats                   runtime_cycles_stats[MAX_NR_CPUS];
+struct stats                   runtime_branches_stats[MAX_NR_CPUS];
 struct stats                   walltime_nsecs_stats;
-struct stats                   runtime_cycles_stats;
-struct stats                   runtime_branches_stats;
 
-#define MATCH_EVENT(t, c, counter)                     \
-       (attrs[counter].type == PERF_TYPE_##t &&        \
-        attrs[counter].config == PERF_COUNT_##c)
-
-#define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
-
-static int create_perf_stat_counter(int counter)
+static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
-       struct perf_event_attr *attr = attrs + counter;
-       int thread;
-       int ncreated = 0;
+       struct perf_event_attr *attr = &evsel->attr;
 
        if (scale)
                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 
-       if (system_wide) {
-               int cpu;
-
-               for (cpu = 0; cpu < nr_cpus; cpu++) {
-                       fd[cpu][counter][0] = sys_perf_event_open(attr,
-                                       -1, cpumap[cpu], -1, 0);
-                       if (fd[cpu][counter][0] < 0)
-                               pr_debug(ERR_PERF_OPEN, counter,
-                                        fd[cpu][counter][0], strerror(errno));
-                       else
-                               ++ncreated;
-               }
-       } else {
-               attr->inherit = !no_inherit;
-               if (target_pid == -1 && target_tid == -1) {
-                       attr->disabled = 1;
-                       attr->enable_on_exec = 1;
-               }
-               for (thread = 0; thread < thread_num; thread++) {
-                       fd[0][counter][thread] = sys_perf_event_open(attr,
-                               all_tids[thread], -1, -1, 0);
-                       if (fd[0][counter][thread] < 0)
-                               pr_debug(ERR_PERF_OPEN, counter,
-                                        fd[0][counter][thread],
-                                        strerror(errno));
-                       else
-                               ++ncreated;
-               }
+       if (system_wide)
+               return perf_evsel__open_per_cpu(evsel, cpus);
+
+       attr->inherit = !no_inherit;
+       if (target_pid == -1 && target_tid == -1) {
+               attr->disabled = 1;
+               attr->enable_on_exec = 1;
        }
 
-       return ncreated;
+       return perf_evsel__open_per_thread(evsel, threads);
 }
 
 /*
  * Does the counter have nsecs as a unit?
  */
-static inline int nsec_counter(int counter)
+static inline int nsec_counter(struct perf_evsel *evsel)
 {
-       if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
-           MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
+       if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+           perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
                return 1;
 
        return 0;
@@ -205,55 +191,19 @@ static inline int nsec_counter(int counter)
 
 /*
  * Read out the results of a single counter:
+ * aggregate counts across CPUs in system-wide mode
  */
-static void read_counter(int counter)
+static int read_counter_aggr(struct perf_evsel *counter)
 {
-       u64 count[3], single_count[3];
-       int cpu;
-       size_t res, nv;
-       int scaled;
-       int i, thread;
-
-       count[0] = count[1] = count[2] = 0;
-
-       nv = scale ? 3 : 1;
-       for (cpu = 0; cpu < nr_cpus; cpu++) {
-               for (thread = 0; thread < thread_num; thread++) {
-                       if (fd[cpu][counter][thread] < 0)
-                               continue;
-
-                       res = read(fd[cpu][counter][thread],
-                                       single_count, nv * sizeof(u64));
-                       assert(res == nv * sizeof(u64));
-
-                       close(fd[cpu][counter][thread]);
-                       fd[cpu][counter][thread] = -1;
-
-                       count[0] += single_count[0];
-                       if (scale) {
-                               count[1] += single_count[1];
-                               count[2] += single_count[2];
-                       }
-               }
-       }
-
-       scaled = 0;
-       if (scale) {
-               if (count[2] == 0) {
-                       event_scaled[counter] = -1;
-                       count[0] = 0;
-                       return;
-               }
+       struct perf_stat *ps = counter->priv;
+       u64 *count = counter->counts->aggr.values;
+       int i;
 
-               if (count[2] < count[1]) {
-                       event_scaled[counter] = 1;
-                       count[0] = (unsigned long long)
-                               ((double)count[0] * count[1] / count[2] + 0.5);
-               }
-       }
+       if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0)
+               return -1;
 
        for (i = 0; i < 3; i++)
-               update_stats(&event_res_stats[counter][i], count[i]);
+               update_stats(&ps->res_stats[i], count[i]);
 
        if (verbose) {
                fprintf(stderr, "%s: %Ld %Ld %Ld\n", event_name(counter),
@@ -263,26 +213,51 @@ static void read_counter(int counter)
        /*
         * Save the full runtime - to allow normalization during printout:
         */
-       if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
-               update_stats(&runtime_nsecs_stats, count[0]);
-       if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
-               update_stats(&runtime_cycles_stats, count[0]);
-       if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
-               update_stats(&runtime_branches_stats, count[0]);
+       if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+               update_stats(&runtime_nsecs_stats[0], count[0]);
+       if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+               update_stats(&runtime_cycles_stats[0], count[0]);
+       if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+               update_stats(&runtime_branches_stats[0], count[0]);
+
+       return 0;
+}
+
+/*
+ * Read out the results of a single counter:
+ * do not aggregate counts across CPUs in system-wide mode
+ */
+static int read_counter(struct perf_evsel *counter)
+{
+       u64 *count;
+       int cpu;
+
+       for (cpu = 0; cpu < cpus->nr; cpu++) {
+               if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
+                       return -1;
+
+               count = counter->counts->cpu[cpu].values;
+
+               if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+                       update_stats(&runtime_nsecs_stats[cpu], count[0]);
+               if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+                       update_stats(&runtime_cycles_stats[cpu], count[0]);
+               if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+                       update_stats(&runtime_branches_stats[cpu], count[0]);
+       }
+
+       return 0;
 }
 
 static int run_perf_stat(int argc __used, const char **argv)
 {
        unsigned long long t0, t1;
+       struct perf_evsel *counter;
        int status = 0;
-       int counter, ncreated = 0;
        int child_ready_pipe[2], go_pipe[2];
        const bool forks = (argc > 0);
        char buf;
 
-       if (!system_wide)
-               nr_cpus = 1;
-
        if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
                perror("failed to create pipes");
                exit(1);
@@ -322,7 +297,7 @@ static int run_perf_stat(int argc __used, const char **argv)
                }
 
                if (target_tid == -1 && target_pid == -1 && !system_wide)
-                       all_tids[0] = child_pid;
+                       threads->map[0] = child_pid;
 
                /*
                 * Wait for the child to be ready to exec.
@@ -334,16 +309,23 @@ static int run_perf_stat(int argc __used, const char **argv)
                close(child_ready_pipe[0]);
        }
 
-       for (counter = 0; counter < nr_counters; counter++)
-               ncreated += create_perf_stat_counter(counter);
-
-       if (ncreated == 0) {
-               pr_err("No permission to collect %sstats.\n"
-                      "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n",
-                      system_wide ? "system-wide " : "");
-               if (child_pid != -1)
-                       kill(child_pid, SIGTERM);
-               return -1;
+       list_for_each_entry(counter, &evsel_list, node) {
+               if (create_perf_stat_counter(counter) < 0) {
+                       if (errno == -EPERM || errno == -EACCES) {
+                               error("You may not have permission to collect %sstats.\n"
+                                     "\t Consider tweaking"
+                                     " /proc/sys/kernel/perf_event_paranoid or running as root.",
+                                     system_wide ? "system-wide " : "");
+                       } else {
+                               error("open_counter returned with %d (%s). "
+                                     "/bin/dmesg may provide additional information.\n",
+                                      errno, strerror(errno));
+                       }
+                       if (child_pid != -1)
+                               kill(child_pid, SIGTERM);
+                       die("Not all events could be opened.\n");
+                       return -1;
+               }
        }
 
        /*
@@ -362,60 +344,97 @@ static int run_perf_stat(int argc __used, const char **argv)
 
        update_stats(&walltime_nsecs_stats, t1 - t0);
 
-       for (counter = 0; counter < nr_counters; counter++)
-               read_counter(counter);
+       if (no_aggr) {
+               list_for_each_entry(counter, &evsel_list, node) {
+                       read_counter(counter);
+                       perf_evsel__close_fd(counter, cpus->nr, 1);
+               }
+       } else {
+               list_for_each_entry(counter, &evsel_list, node) {
+                       read_counter_aggr(counter);
+                       perf_evsel__close_fd(counter, cpus->nr, threads->nr);
+               }
+       }
 
        return WEXITSTATUS(status);
 }
 
-static void print_noise(int counter, double avg)
+static void print_noise(struct perf_evsel *evsel, double avg)
 {
+       struct perf_stat *ps;
+
        if (run_count == 1)
                return;
 
+       ps = evsel->priv;
        fprintf(stderr, "   ( +- %7.3f%% )",
-                       100 * stddev_stats(&event_res_stats[counter][0]) / avg);
+                       100 * stddev_stats(&ps->res_stats[0]) / avg);
 }
 
-static void nsec_printout(int counter, double avg)
+static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
        double msecs = avg / 1e6;
+       char cpustr[16] = { '\0', };
+       const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
 
-       fprintf(stderr, " %18.6f  %-24s", msecs, event_name(counter));
+       if (no_aggr)
+               sprintf(cpustr, "CPU%*d%s",
+                       csv_output ? 0 : -4,
+                       cpus->map[cpu], csv_sep);
+
+       fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
+
+       if (csv_output)
+               return;
 
-       if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
+       if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
                fprintf(stderr, " # %10.3f CPUs ",
                                avg / avg_stats(&walltime_nsecs_stats));
-       }
 }
 
-static void abs_printout(int counter, double avg)
+static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
        double total, ratio = 0.0;
+       char cpustr[16] = { '\0', };
+       const char *fmt;
+
+       if (csv_output)
+               fmt = "%s%.0f%s%s";
+       else if (big_num)
+               fmt = "%s%'18.0f%s%-24s";
+       else
+               fmt = "%s%18.0f%s%-24s";
 
-       if (big_num)
-               fprintf(stderr, " %'18.0f  %-24s", avg, event_name(counter));
+       if (no_aggr)
+               sprintf(cpustr, "CPU%*d%s",
+                       csv_output ? 0 : -4,
+                       cpus->map[cpu], csv_sep);
        else
-               fprintf(stderr, " %18.0f  %-24s", avg, event_name(counter));
+               cpu = 0;
+
+       fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
 
-       if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
-               total = avg_stats(&runtime_cycles_stats);
+       if (csv_output)
+               return;
+
+       if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+               total = avg_stats(&runtime_cycles_stats[cpu]);
 
                if (total)
                        ratio = avg / total;
 
                fprintf(stderr, " # %10.3f IPC  ", ratio);
-       } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
-                       runtime_branches_stats.n != 0) {
-               total = avg_stats(&runtime_branches_stats);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
+                       runtime_branches_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_branches_stats[cpu]);
 
                if (total)
                        ratio = avg * 100 / total;
 
                fprintf(stderr, " # %10.3f %%    ", ratio);
 
-       } else if (runtime_nsecs_stats.n != 0) {
-               total = avg_stats(&runtime_nsecs_stats);
+       } else if (runtime_nsecs_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_nsecs_stats[cpu]);
 
                if (total)
                        ratio = 1000.0 * avg / total;
@@ -426,30 +445,38 @@ static void abs_printout(int counter, double avg)
 
 /*
  * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
  */
-static void print_counter(int counter)
+static void print_counter_aggr(struct perf_evsel *counter)
 {
-       double avg = avg_stats(&event_res_stats[counter][0]);
-       int scaled = event_scaled[counter];
+       struct perf_stat *ps = counter->priv;
+       double avg = avg_stats(&ps->res_stats[0]);
+       int scaled = counter->counts->scaled;
 
        if (scaled == -1) {
-               fprintf(stderr, " %18s  %-24s\n",
-                       "<not counted>", event_name(counter));
+               fprintf(stderr, "%*s%s%-24s\n",
+                       csv_output ? 0 : 18,
+                       "<not counted>", csv_sep, event_name(counter));
                return;
        }
 
        if (nsec_counter(counter))
-               nsec_printout(counter, avg);
+               nsec_printout(-1, counter, avg);
        else
-               abs_printout(counter, avg);
+               abs_printout(-1, counter, avg);
+
+       if (csv_output) {
+               fputc('\n', stderr);
+               return;
+       }
 
        print_noise(counter, avg);
 
        if (scaled) {
                double avg_enabled, avg_running;
 
-               avg_enabled = avg_stats(&event_res_stats[counter][1]);
-               avg_running = avg_stats(&event_res_stats[counter][2]);
+               avg_enabled = avg_stats(&ps->res_stats[1]);
+               avg_running = avg_stats(&ps->res_stats[2]);
 
                fprintf(stderr, "  (scaled from %.2f%%)",
                                100 * avg_running / avg_enabled);
@@ -458,40 +485,92 @@ static void print_counter(int counter)
        fprintf(stderr, "\n");
 }
 
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(struct perf_evsel *counter)
+{
+       u64 ena, run, val;
+       int cpu;
+
+       for (cpu = 0; cpu < cpus->nr; cpu++) {
+               val = counter->counts->cpu[cpu].val;
+               ena = counter->counts->cpu[cpu].ena;
+               run = counter->counts->cpu[cpu].run;
+               if (run == 0 || ena == 0) {
+                       fprintf(stderr, "CPU%*d%s%*s%s%-24s",
+                               csv_output ? 0 : -4,
+                               cpus->map[cpu], csv_sep,
+                               csv_output ? 0 : 18,
+                               "<not counted>", csv_sep,
+                               event_name(counter));
+
+                       fprintf(stderr, "\n");
+                       continue;
+               }
+
+               if (nsec_counter(counter))
+                       nsec_printout(cpu, counter, val);
+               else
+                       abs_printout(cpu, counter, val);
+
+               if (!csv_output) {
+                       print_noise(counter, 1.0);
+
+                       if (run != ena) {
+                               fprintf(stderr, "  (scaled from %.2f%%)",
+                                       100.0 * run / ena);
+                       }
+               }
+               fprintf(stderr, "\n");
+       }
+}
+
 static void print_stat(int argc, const char **argv)
 {
-       int i, counter;
+       struct perf_evsel *counter;
+       int i;
 
        fflush(stdout);
 
-       fprintf(stderr, "\n");
-       fprintf(stderr, " Performance counter stats for ");
-       if(target_pid == -1 && target_tid == -1) {
-               fprintf(stderr, "\'%s", argv[0]);
-               for (i = 1; i < argc; i++)
-                       fprintf(stderr, " %s", argv[i]);
-       } else if (target_pid != -1)
-               fprintf(stderr, "process id \'%d", target_pid);
-       else
-               fprintf(stderr, "thread id \'%d", target_tid);
-
-       fprintf(stderr, "\'");
-       if (run_count > 1)
-               fprintf(stderr, " (%d runs)", run_count);
-       fprintf(stderr, ":\n\n");
+       if (!csv_output) {
+               fprintf(stderr, "\n");
+               fprintf(stderr, " Performance counter stats for ");
+               if(target_pid == -1 && target_tid == -1) {
+                       fprintf(stderr, "\'%s", argv[0]);
+                       for (i = 1; i < argc; i++)
+                               fprintf(stderr, " %s", argv[i]);
+               } else if (target_pid != -1)
+                       fprintf(stderr, "process id \'%d", target_pid);
+               else
+                       fprintf(stderr, "thread id \'%d", target_tid);
+
+               fprintf(stderr, "\'");
+               if (run_count > 1)
+                       fprintf(stderr, " (%d runs)", run_count);
+               fprintf(stderr, ":\n\n");
+       }
 
-       for (counter = 0; counter < nr_counters; counter++)
-               print_counter(counter);
+       if (no_aggr) {
+               list_for_each_entry(counter, &evsel_list, node)
+                       print_counter(counter);
+       } else {
+               list_for_each_entry(counter, &evsel_list, node)
+                       print_counter_aggr(counter);
+       }
 
-       fprintf(stderr, "\n");
-       fprintf(stderr, " %18.9f  seconds time elapsed",
-                       avg_stats(&walltime_nsecs_stats)/1e9);
-       if (run_count > 1) {
-               fprintf(stderr, "   ( +- %7.3f%% )",
+       if (!csv_output) {
+               fprintf(stderr, "\n");
+               fprintf(stderr, " %18.9f  seconds time elapsed",
+                               avg_stats(&walltime_nsecs_stats)/1e9);
+               if (run_count > 1) {
+                       fprintf(stderr, "   ( +- %7.3f%% )",
                                100*stddev_stats(&walltime_nsecs_stats) /
                                avg_stats(&walltime_nsecs_stats));
+               }
+               fprintf(stderr, "\n\n");
        }
-       fprintf(stderr, "\n\n");
 }
 
 static volatile int signr = -1;
@@ -521,6 +600,13 @@ static const char * const stat_usage[] = {
        NULL
 };
 
+static int stat__set_big_num(const struct option *opt __used,
+                            const char *s __used, int unset)
+{
+       big_num_opt = unset ? 0 : 1;
+       return 0;
+}
+
 static const struct option options[] = {
        OPT_CALLBACK('e', "event", NULL, "event",
                     "event selector. use 'perf list' to list available events",
@@ -541,64 +627,96 @@ static const struct option options[] = {
                    "repeat command and print average + stddev (max: 100)"),
        OPT_BOOLEAN('n', "null", &null_run,
                    "null run - dont start any counters"),
-       OPT_BOOLEAN('B', "big-num", &big_num,
-                   "print large numbers with thousands\' separators"),
+       OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
+                          "print large numbers with thousands\' separators",
+                          stat__set_big_num),
        OPT_STRING('C', "cpu", &cpu_list, "cpu",
                    "list of cpus to monitor in system-wide"),
+       OPT_BOOLEAN('A', "no-aggr", &no_aggr,
+                   "disable CPU count aggregation"),
+       OPT_STRING('x', "field-separator", &csv_sep, "separator",
+                  "print counts with custom separator"),
        OPT_END()
 };
 
 int cmd_stat(int argc, const char **argv, const char *prefix __used)
 {
-       int status;
-       int i,j;
+       struct perf_evsel *pos;
+       int status = -ENOMEM;
 
        setlocale(LC_ALL, "");
 
        argc = parse_options(argc, argv, options, stat_usage,
                PARSE_OPT_STOP_AT_NON_OPTION);
+
+       if (csv_sep)
+               csv_output = true;
+       else
+               csv_sep = DEFAULT_SEPARATOR;
+
+       /*
+        * let the spreadsheet do the pretty-printing
+        */
+       if (csv_output) {
+               /* User explicitely passed -B? */
+               if (big_num_opt == 1) {
+                       fprintf(stderr, "-B option not supported with -x\n");
+                       usage_with_options(stat_usage, options);
+               } else /* Nope, so disable big number formatting */
+                       big_num = false;
+       } else if (big_num_opt == 0) /* User passed --no-big-num */
+               big_num = false;
+
        if (!argc && target_pid == -1 && target_tid == -1)
                usage_with_options(stat_usage, options);
        if (run_count <= 0)
                usage_with_options(stat_usage, options);
 
+       /* no_aggr is for system-wide only */
+       if (no_aggr && !system_wide)
+               usage_with_options(stat_usage, options);
+
        /* Set attrs and nr_counters if no event is selected and !null_run */
        if (!null_run && !nr_counters) {
-               memcpy(attrs, default_attrs, sizeof(default_attrs));
+               size_t c;
+
                nr_counters = ARRAY_SIZE(default_attrs);
+
+               for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
+                       pos = perf_evsel__new(default_attrs[c].type,
+                                             default_attrs[c].config,
+                                             nr_counters);
+                       if (pos == NULL)
+                               goto out;
+                       list_add(&pos->node, &evsel_list);
+               }
        }
 
-       if (system_wide)
-               nr_cpus = read_cpu_map(cpu_list);
-       else
-               nr_cpus = 1;
+       if (target_pid != -1)
+               target_tid = target_pid;
 
-       if (nr_cpus < 1)
+       threads = thread_map__new(target_pid, target_tid);
+       if (threads == NULL) {
+               pr_err("Problems finding threads of monitor\n");
                usage_with_options(stat_usage, options);
+       }
 
-       if (target_pid != -1) {
-               target_tid = target_pid;
-               thread_num = find_all_tid(target_pid, &all_tids);
-               if (thread_num <= 0) {
-                       fprintf(stderr, "Can't find all threads of pid %d\n",
-                                       target_pid);
-                       usage_with_options(stat_usage, options);
-               }
-       } else {
-               all_tids=malloc(sizeof(pid_t));
-               if (!all_tids)
-                       return -ENOMEM;
+       if (system_wide)
+               cpus = cpu_map__new(cpu_list);
+       else
+               cpus = cpu_map__dummy_new();
 
-               all_tids[0] = target_tid;
-               thread_num = 1;
+       if (cpus == NULL) {
+               perror("failed to parse CPUs map");
+               usage_with_options(stat_usage, options);
+               return -1;
        }
 
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++) {
-                       fd[i][j] = malloc(sizeof(int)*thread_num);
-                       if (!fd[i][j])
-                               return -ENOMEM;
-               }
+       list_for_each_entry(pos, &evsel_list, node) {
+               if (perf_evsel__alloc_stat_priv(pos) < 0 ||
+                   perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
+                   perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+                       goto out_free_fd;
        }
 
        /*
@@ -621,6 +739,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 
        if (status != -1)
                print_stat(argc, argv);
-
+out_free_fd:
+       list_for_each_entry(pos, &evsel_list, node)
+               perf_evsel__free_stat_priv(pos);
+out:
+       thread_map__delete(threads);
+       threads = NULL;
        return status;
 }
index 035b9fa063a9453002873c00f654adcd13a99eb2..1c984342a5795090d2e863d0b2da1a995e0201d2 100644 (file)
@@ -119,10 +119,16 @@ static int test__vmlinux_matches_kallsyms(void)
         * end addresses too.
         */
        for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
-               struct symbol *pair;
+               struct symbol *pair, *first_pair;
+               bool backwards = true;
 
                sym  = rb_entry(nd, struct symbol, rb_node);
-               pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+
+               if (sym->start == sym->end)
+                       continue;
+
+               first_pair = machine__find_kernel_symbol(&kallsyms, type, sym->start, NULL, NULL);
+               pair = first_pair;
 
                if (pair && pair->start == sym->start) {
 next_pair:
@@ -143,8 +149,10 @@ next_pair:
                                pr_debug("%#Lx: diff end addr for %s v: %#Lx k: %#Lx\n",
                                         sym->start, sym->name, sym->end, pair->end);
                        } else {
-                               struct rb_node *nnd = rb_prev(&pair->rb_node);
-
+                               struct rb_node *nnd;
+detour:
+                               nnd = backwards ? rb_prev(&pair->rb_node) :
+                                                 rb_next(&pair->rb_node);
                                if (nnd) {
                                        struct symbol *next = rb_entry(nnd, struct symbol, rb_node);
 
@@ -153,6 +161,13 @@ next_pair:
                                                goto next_pair;
                                        }
                                }
+
+                               if (backwards) {
+                                       backwards = false;
+                                       pair = first_pair;
+                                       goto detour;
+                               }
+
                                pr_debug("%#Lx: diff name v: %s k: %s\n",
                                         sym->start, sym->name, pair->name);
                        }
@@ -219,6 +234,89 @@ out:
        return err;
 }
 
+#include "util/evsel.h"
+#include <sys/types.h>
+
+static int trace_event__id(const char *event_name)
+{
+       char *filename;
+       int err = -1, fd;
+
+       if (asprintf(&filename,
+                    "/sys/kernel/debug/tracing/events/syscalls/%s/id",
+                    event_name) < 0)
+               return -1;
+
+       fd = open(filename, O_RDONLY);
+       if (fd >= 0) {
+               char id[16];
+               if (read(fd, id, sizeof(id)) > 0)
+                       err = atoi(id);
+               close(fd);
+       }
+
+       free(filename);
+       return err;
+}
+
+static int test__open_syscall_event(void)
+{
+       int err = -1, fd;
+       struct thread_map *threads;
+       struct perf_evsel *evsel;
+       unsigned int nr_open_calls = 111, i;
+       int id = trace_event__id("sys_enter_open");
+
+       if (id < 0) {
+               pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+               return -1;
+       }
+
+       threads = thread_map__new(-1, getpid());
+       if (threads == NULL) {
+               pr_debug("thread_map__new\n");
+               return -1;
+       }
+
+       evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0);
+       if (evsel == NULL) {
+               pr_debug("perf_evsel__new\n");
+               goto out_thread_map_delete;
+       }
+
+       if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+               pr_debug("failed to open counter: %s, "
+                        "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+                        strerror(errno));
+               goto out_evsel_delete;
+       }
+
+       for (i = 0; i < nr_open_calls; ++i) {
+               fd = open("/etc/passwd", O_RDONLY);
+               close(fd);
+       }
+
+       if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
+               pr_debug("perf_evsel__open_read_on_cpu\n");
+               goto out_close_fd;
+       }
+
+       if (evsel->counts->cpu[0].val != nr_open_calls) {
+               pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %Ld\n",
+                        nr_open_calls, evsel->counts->cpu[0].val);
+               goto out_close_fd;
+       }
+       
+       err = 0;
+out_close_fd:
+       perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+       perf_evsel__delete(evsel);
+out_thread_map_delete:
+       thread_map__delete(threads);
+       return err;
+}
+
 static struct test {
        const char *desc;
        int (*func)(void);
@@ -227,6 +325,10 @@ static struct test {
                .desc = "vmlinux symtab matches kallsyms",
                .func = test__vmlinux_matches_kallsyms,
        },
+       {
+               .desc = "detect open syscall event",
+               .func = test__open_syscall_event,
+       },
        {
                .func = NULL,
        },
index 9bcc38f0b706f91ca3701e440c15e3e9c8aa7bd3..746cf03cb05d86a2796c88fca27930ca6e0a8894 100644 (file)
 #include "util/session.h"
 #include "util/svghelper.h"
 
+#define SUPPORT_OLD_POWER_EVENTS 1
+#define PWR_EVENT_EXIT -1
+
+
 static char            const *input_name = "perf.data";
 static char            const *output_name = "output.svg";
 
@@ -272,19 +276,22 @@ static int cpus_cstate_state[MAX_CPUS];
 static u64 cpus_pstate_start_times[MAX_CPUS];
 static u64 cpus_pstate_state[MAX_CPUS];
 
-static int process_comm_event(event_t *event, struct perf_session *session __used)
+static int process_comm_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_set_comm(event->comm.tid, event->comm.comm);
        return 0;
 }
 
-static int process_fork_event(event_t *event, struct perf_session *session __used)
+static int process_fork_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
        return 0;
 }
 
-static int process_exit_event(event_t *event, struct perf_session *session __used)
+static int process_exit_event(event_t *event, struct sample_data *sample __used,
+                             struct perf_session *session __used)
 {
        pid_exit(event->fork.pid, event->fork.time);
        return 0;
@@ -298,12 +305,21 @@ struct trace_entry {
        int                     lock_depth;
 };
 
-struct power_entry {
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static int use_old_power_events;
+struct power_entry_old {
        struct trace_entry te;
        u64     type;
        u64     value;
        u64     cpu_id;
 };
+#endif
+
+struct power_processor_entry {
+       struct trace_entry te;
+       u32     state;
+       u32     cpu_id;
+};
 
 #define TASK_COMM_LEN 16
 struct wakeup_entry {
@@ -470,48 +486,65 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
 }
 
 
-static int process_sample_event(event_t *event, struct perf_session *session)
+static int process_sample_event(event_t *event __used,
+                               struct sample_data *sample,
+                               struct perf_session *session)
 {
-       struct sample_data data;
        struct trace_entry *te;
 
-       memset(&data, 0, sizeof(data));
-
-       event__parse_sample(event, session->sample_type, &data);
-
        if (session->sample_type & PERF_SAMPLE_TIME) {
-               if (!first_time || first_time > data.time)
-                       first_time = data.time;
-               if (last_time < data.time)
-                       last_time = data.time;
+               if (!first_time || first_time > sample->time)
+                       first_time = sample->time;
+               if (last_time < sample->time)
+                       last_time = sample->time;
        }
 
-       te = (void *)data.raw_data;
-       if (session->sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
+       te = (void *)sample->raw_data;
+       if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
                char *event_str;
-               struct power_entry *pe;
-
-               pe = (void *)te;
-
+#ifdef SUPPORT_OLD_POWER_EVENTS
+               struct power_entry_old *peo;
+               peo = (void *)te;
+#endif
                event_str = perf_header__find_event(te->type);
 
                if (!event_str)
                        return 0;
 
-               if (strcmp(event_str, "power:power_start") == 0)
-                       c_state_start(pe->cpu_id, data.time, pe->value);
+               if (strcmp(event_str, "power:cpu_idle") == 0) {
+                       struct power_processor_entry *ppe = (void *)te;
+                       if (ppe->state == (u32)PWR_EVENT_EXIT)
+                               c_state_end(ppe->cpu_id, sample->time);
+                       else
+                               c_state_start(ppe->cpu_id, sample->time,
+                                             ppe->state);
+               }
+               else if (strcmp(event_str, "power:cpu_frequency") == 0) {
+                       struct power_processor_entry *ppe = (void *)te;
+                       p_state_change(ppe->cpu_id, sample->time, ppe->state);
+               }
+
+               else if (strcmp(event_str, "sched:sched_wakeup") == 0)
+                       sched_wakeup(sample->cpu, sample->time, sample->pid, te);
 
-               if (strcmp(event_str, "power:power_end") == 0)
-                       c_state_end(pe->cpu_id, data.time);
+               else if (strcmp(event_str, "sched:sched_switch") == 0)
+                       sched_switch(sample->cpu, sample->time, te);
 
-               if (strcmp(event_str, "power:power_frequency") == 0)
-                       p_state_change(pe->cpu_id, data.time, pe->value);
+#ifdef SUPPORT_OLD_POWER_EVENTS
+               if (use_old_power_events) {
+                       if (strcmp(event_str, "power:power_start") == 0)
+                               c_state_start(peo->cpu_id, sample->time,
+                                             peo->value);
 
-               if (strcmp(event_str, "sched:sched_wakeup") == 0)
-                       sched_wakeup(data.cpu, data.time, data.pid, te);
+                       else if (strcmp(event_str, "power:power_end") == 0)
+                               c_state_end(sample->cpu, sample->time);
 
-               if (strcmp(event_str, "sched:sched_switch") == 0)
-                       sched_switch(data.cpu, data.time, te);
+                       else if (strcmp(event_str,
+                                       "power:power_frequency") == 0)
+                               p_state_change(peo->cpu_id, sample->time,
+                                              peo->value);
+               }
+#endif
        }
        return 0;
 }
@@ -937,7 +970,8 @@ static struct perf_event_ops event_ops = {
 
 static int __cmd_timechart(void)
 {
-       struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0, false);
+       struct perf_session *session = perf_session__new(input_name, O_RDONLY,
+                                                        0, false, &event_ops);
        int ret = -EINVAL;
 
        if (session == NULL)
@@ -968,7 +1002,8 @@ static const char * const timechart_usage[] = {
        NULL
 };
 
-static const char *record_args[] = {
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static const char * const record_old_args[] = {
        "record",
        "-a",
        "-R",
@@ -980,16 +1015,43 @@ static const char *record_args[] = {
        "-e", "sched:sched_wakeup",
        "-e", "sched:sched_switch",
 };
+#endif
+
+static const char * const record_new_args[] = {
+       "record",
+       "-a",
+       "-R",
+       "-f",
+       "-c", "1",
+       "-e", "power:cpu_frequency",
+       "-e", "power:cpu_idle",
+       "-e", "sched:sched_wakeup",
+       "-e", "sched:sched_switch",
+};
 
 static int __cmd_record(int argc, const char **argv)
 {
        unsigned int rec_argc, i, j;
        const char **rec_argv;
+       const char * const *record_args = record_new_args;
+       unsigned int record_elems = ARRAY_SIZE(record_new_args);
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+       if (!is_valid_tracepoint("power:cpu_idle") &&
+           is_valid_tracepoint("power:power_start")) {
+               use_old_power_events = 1;
+               record_args = record_old_args;
+               record_elems = ARRAY_SIZE(record_old_args);
+       }
+#endif
 
-       rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+       rec_argc = record_elems + argc - 1;
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
-       for (i = 0; i < ARRAY_SIZE(record_args); i++)
+       if (rec_argv == NULL)
+               return -ENOMEM;
+
+       for (i = 0; i < record_elems; i++)
                rec_argv[i] = strdup(record_args[i]);
 
        for (j = 1; j < (unsigned int)argc; j++, i++)
@@ -1018,6 +1080,8 @@ static const struct option options[] = {
        OPT_CALLBACK('p', "process", NULL, "process",
                      "process selector. Pass a pid or process name.",
                       parse_process),
+       OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+                   "Look for files with symbols relative to this directory"),
        OPT_END()
 };
 
index dd625808c2a5332c4f733a59acfb1ee881faae3f..1e67ab9c7ebc46c5df87e6219ad023b5509207f2 100644 (file)
@@ -21,6 +21,7 @@
 #include "perf.h"
 
 #include "util/color.h"
+#include "util/evsel.h"
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/thread.h"
@@ -29,6 +30,7 @@
 #include "util/parse-options.h"
 #include "util/parse-events.h"
 #include "util/cpumap.h"
+#include "util/xyarray.h"
 
 #include "util/debug.h"
 
@@ -55,7 +57,7 @@
 #include <linux/unistd.h>
 #include <linux/types.h>
 
-static int                     *fd[MAX_NR_CPUS][MAX_COUNTERS];
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 
 static bool                    system_wide                     =  false;
 
@@ -66,10 +68,9 @@ static int                   print_entries;
 
 static int                     target_pid                      =     -1;
 static int                     target_tid                      =     -1;
-static pid_t                   *all_tids                       =      NULL;
-static int                     thread_num                      =      0;
+static struct thread_map       *threads;
 static bool                    inherit                         =  false;
-static int                     nr_cpus                         =      0;
+static struct cpu_map          *cpus;
 static int                     realtime_prio                   =      0;
 static bool                    group                           =  false;
 static unsigned int            page_size;
@@ -100,6 +101,7 @@ struct sym_entry            *sym_filter_entry               =   NULL;
 struct sym_entry               *sym_filter_entry_sched         =   NULL;
 static int                     sym_pcnt_filter                 =      5;
 static int                     sym_counter                     =      0;
+static struct perf_evsel       *sym_evsel                      =   NULL;
 static int                     display_weighted                =     -1;
 static const char              *cpu_list;
 
@@ -353,7 +355,7 @@ static void show_details(struct sym_entry *syme)
                return;
 
        symbol = sym_entry__symbol(syme);
-       printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
+       printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name);
        printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
 
        pthread_mutex_lock(&syme->src->lock);
@@ -460,7 +462,8 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
 static void print_sym_table(void)
 {
        int printed = 0, j;
-       int counter, snap = !display_weighted ? sym_counter : 0;
+       struct perf_evsel *counter;
+       int snap = !display_weighted ? sym_counter : 0;
        float samples_per_sec = samples/delay_secs;
        float ksamples_per_sec = kernel_samples/delay_secs;
        float us_samples_per_sec = (us_samples)/delay_secs;
@@ -532,7 +535,9 @@ static void print_sym_table(void)
        }
 
        if (nr_counters == 1 || !display_weighted) {
-               printf("%Ld", (u64)attrs[0].sample_period);
+               struct perf_evsel *first;
+               first = list_entry(evsel_list.next, struct perf_evsel, node);
+               printf("%Ld", first->attr.sample_period);
                if (freq)
                        printf("Hz ");
                else
@@ -540,9 +545,9 @@ static void print_sym_table(void)
        }
 
        if (!display_weighted)
-               printf("%s", event_name(sym_counter));
-       else for (counter = 0; counter < nr_counters; counter++) {
-               if (counter)
+               printf("%s", event_name(sym_evsel));
+       else list_for_each_entry(counter, &evsel_list, node) {
+               if (counter->idx)
                        printf("/");
 
                printf("%s", event_name(counter));
@@ -558,12 +563,12 @@ static void print_sym_table(void)
                printf(" (all");
 
        if (cpu_list)
-               printf(", CPU%s: %s)\n", nr_cpus > 1 ? "s" : "", cpu_list);
+               printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
        else {
                if (target_tid != -1)
                        printf(")\n");
                else
-                       printf(", %d CPU%s)\n", nr_cpus, nr_cpus > 1 ? "s" : "");
+                       printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
        }
 
        printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
@@ -739,7 +744,7 @@ static void print_mapped_keys(void)
        fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", print_entries);
 
        if (nr_counters > 1)
-               fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_counter));
+               fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(sym_evsel));
 
        fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", count_filter);
 
@@ -826,19 +831,23 @@ static void handle_keypress(struct perf_session *session, int c)
                        break;
                case 'E':
                        if (nr_counters > 1) {
-                               int i;
-
                                fprintf(stderr, "\nAvailable events:");
-                               for (i = 0; i < nr_counters; i++)
-                                       fprintf(stderr, "\n\t%d %s", i, event_name(i));
+
+                               list_for_each_entry(sym_evsel, &evsel_list, node)
+                                       fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
 
                                prompt_integer(&sym_counter, "Enter details event counter");
 
                                if (sym_counter >= nr_counters) {
-                                       fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
+                                       sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
                                        sym_counter = 0;
+                                       fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
                                        sleep(1);
+                                       break;
                                }
+                               list_for_each_entry(sym_evsel, &evsel_list, node)
+                                       if (sym_evsel->idx == sym_counter)
+                                               break;
                        } else sym_counter = 0;
                        break;
                case 'f':
@@ -977,12 +986,13 @@ static int symbol_filter(struct map *map, struct symbol *sym)
 }
 
 static void event__process_sample(const event_t *self,
-                                struct perf_session *session, int counter)
+                                 struct sample_data *sample,
+                                 struct perf_session *session,
+                                 struct perf_evsel *evsel)
 {
        u64 ip = self->ip.ip;
        struct sym_entry *syme;
        struct addr_location al;
-       struct sample_data data;
        struct machine *machine;
        u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
@@ -1025,7 +1035,7 @@ static void event__process_sample(const event_t *self,
        if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
                exact_samples++;
 
-       if (event__preprocess_sample(self, session, &al, &data,
+       if (event__preprocess_sample(self, session, &al, sample,
                                     symbol_filter) < 0 ||
            al.filtered)
                return;
@@ -1071,9 +1081,9 @@ static void event__process_sample(const event_t *self,
 
        syme = symbol__priv(al.sym);
        if (!syme->skip) {
-               syme->count[counter]++;
+               syme->count[evsel->idx]++;
                syme->origin = origin;
-               record_precise_ip(syme, counter, ip);
+               record_precise_ip(syme, evsel->idx, ip);
                pthread_mutex_lock(&active_symbols_lock);
                if (list_empty(&syme->node) || !syme->node.next)
                        __list_insert_active_sym(syme);
@@ -1082,12 +1092,24 @@ static void event__process_sample(const event_t *self,
 }
 
 struct mmap_data {
-       int                     counter;
        void                    *base;
        int                     mask;
        unsigned int            prev;
 };
 
+static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
+                                            int ncpus, int nthreads)
+{
+       evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
+       return evsel->priv != NULL ? 0 : -ENOMEM;
+}
+
+static void perf_evsel__free_mmap(struct perf_evsel *evsel)
+{
+       xyarray__delete(evsel->priv);
+       evsel->priv = NULL;
+}
+
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
        struct perf_event_mmap_page *pc = md->base;
@@ -1100,11 +1122,15 @@ static unsigned int mmap_read_head(struct mmap_data *md)
 }
 
 static void perf_session__mmap_read_counter(struct perf_session *self,
-                                           struct mmap_data *md)
+                                           struct perf_evsel *evsel,
+                                           int cpu, int thread_idx)
 {
+       struct xyarray *mmap_array = evsel->priv;
+       struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx);
        unsigned int head = mmap_read_head(md);
        unsigned int old = md->prev;
        unsigned char *data = md->base + page_size;
+       struct sample_data sample;
        int diff;
 
        /*
@@ -1152,10 +1178,11 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
                        event = &event_copy;
                }
 
+               event__parse_sample(event, self, &sample);
                if (event->header.type == PERF_RECORD_SAMPLE)
-                       event__process_sample(event, self, md->counter);
+                       event__process_sample(event, &sample, self, evsel);
                else
-                       event__process(event, self);
+                       event__process(event, &sample, self);
                old += size;
        }
 
@@ -1163,36 +1190,39 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
 }
 
 static struct pollfd *event_array;
-static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static void perf_session__mmap_read(struct perf_session *self)
 {
-       int i, counter, thread_index;
+       struct perf_evsel *counter;
+       int i, thread_index;
 
-       for (i = 0; i < nr_cpus; i++) {
-               for (counter = 0; counter < nr_counters; counter++)
+       for (i = 0; i < cpus->nr; i++) {
+               list_for_each_entry(counter, &evsel_list, node) {
                        for (thread_index = 0;
-                               thread_index < thread_num;
+                               thread_index < threads->nr;
                                thread_index++) {
                                perf_session__mmap_read_counter(self,
-                                       &mmap_array[i][counter][thread_index]);
+                                       counter, i, thread_index);
                        }
+               }
        }
 }
 
 int nr_poll;
 int group_fd;
 
-static void start_counter(int i, int counter)
+static void start_counter(int i, struct perf_evsel *evsel)
 {
+       struct xyarray *mmap_array = evsel->priv;
+       struct mmap_data *mm;
        struct perf_event_attr *attr;
        int cpu = -1;
        int thread_index;
 
        if (target_tid == -1)
-               cpu = cpumap[i];
+               cpu = cpus->map[i];
 
-       attr = attrs + counter;
+       attr = &evsel->attr;
 
        attr->sample_type       = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 
@@ -1205,16 +1235,18 @@ static void start_counter(int i, int counter)
        attr->inherit           = (cpu < 0) && inherit;
        attr->mmap              = 1;
 
-       for (thread_index = 0; thread_index < thread_num; thread_index++) {
+       for (thread_index = 0; thread_index < threads->nr; thread_index++) {
 try_again:
-               fd[i][counter][thread_index] = sys_perf_event_open(attr,
-                               all_tids[thread_index], cpu, group_fd, 0);
+               FD(evsel, i, thread_index) = sys_perf_event_open(attr,
+                               threads->map[thread_index], cpu, group_fd, 0);
 
-               if (fd[i][counter][thread_index] < 0) {
+               if (FD(evsel, i, thread_index) < 0) {
                        int err = errno;
 
                        if (err == EPERM || err == EACCES)
-                               die("No permission - are you root?\n");
+                               die("Permission error - are you root?\n"
+                                       "\t Consider tweaking"
+                                       " /proc/sys/kernel/perf_event_paranoid.\n");
                        /*
                         * If it's cycles then fall back to hrtimer
                         * based cpu-clock-tick sw counter, which
@@ -1231,30 +1263,30 @@ try_again:
                                goto try_again;
                        }
                        printf("\n");
-                       error("perfcounter syscall returned with %d (%s)\n",
-                                       fd[i][counter][thread_index], strerror(err));
+                       error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
+                                       FD(evsel, i, thread_index), strerror(err));
                        die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
                        exit(-1);
                }
-               assert(fd[i][counter][thread_index] >= 0);
-               fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK);
+               assert(FD(evsel, i, thread_index) >= 0);
+               fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
 
                /*
                 * First counter acts as the group leader:
                 */
                if (group && group_fd == -1)
-                       group_fd = fd[i][counter][thread_index];
+                       group_fd = FD(evsel, i, thread_index);
 
-               event_array[nr_poll].fd = fd[i][counter][thread_index];
+               event_array[nr_poll].fd = FD(evsel, i, thread_index);
                event_array[nr_poll].events = POLLIN;
                nr_poll++;
 
-               mmap_array[i][counter][thread_index].counter = counter;
-               mmap_array[i][counter][thread_index].prev = 0;
-               mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1;
-               mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
-                               PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0);
-               if (mmap_array[i][counter][thread_index].base == MAP_FAILED)
+               mm = xyarray__entry(mmap_array, i, thread_index);
+               mm->prev = 0;
+               mm->mask = mmap_pages*page_size - 1;
+               mm->base = mmap(NULL, (mmap_pages+1)*page_size,
+                               PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
+               if (mm->base == MAP_FAILED)
                        die("failed to mmap with %d (%s)\n", errno, strerror(errno));
        }
 }
@@ -1262,13 +1294,13 @@ try_again:
 static int __cmd_top(void)
 {
        pthread_t thread;
-       int i, counter;
-       int ret;
+       struct perf_evsel *counter;
+       int i, ret;
        /*
         * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
         * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
         */
-       struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false);
+       struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
        if (session == NULL)
                return -ENOMEM;
 
@@ -1277,9 +1309,9 @@ static int __cmd_top(void)
        else
                event__synthesize_threads(event__process, session);
 
-       for (i = 0; i < nr_cpus; i++) {
+       for (i = 0; i < cpus->nr; i++) {
                group_fd = -1;
-               for (counter = 0; counter < nr_counters; counter++)
+               list_for_each_entry(counter, &evsel_list, node)
                        start_counter(i, counter);
        }
 
@@ -1368,8 +1400,8 @@ static const struct option options[] = {
 
 int cmd_top(int argc, const char **argv, const char *prefix __used)
 {
-       int counter;
-       int i,j;
+       struct perf_evsel *pos;
+       int status = -ENOMEM;
 
        page_size = sysconf(_SC_PAGE_SIZE);
 
@@ -1377,34 +1409,17 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
        if (argc)
                usage_with_options(top_usage, options);
 
-       if (target_pid != -1) {
+       if (target_pid != -1)
                target_tid = target_pid;
-               thread_num = find_all_tid(target_pid, &all_tids);
-               if (thread_num <= 0) {
-                       fprintf(stderr, "Can't find all threads of pid %d\n",
-                               target_pid);
-                       usage_with_options(top_usage, options);
-               }
-       } else {
-               all_tids=malloc(sizeof(pid_t));
-               if (!all_tids)
-                       return -ENOMEM;
 
-               all_tids[0] = target_tid;
-               thread_num = 1;
+       threads = thread_map__new(target_pid, target_tid);
+       if (threads == NULL) {
+               pr_err("Problems finding threads of monitor\n");
+               usage_with_options(top_usage, options);
        }
 
-       for (i = 0; i < MAX_NR_CPUS; i++) {
-               for (j = 0; j < MAX_COUNTERS; j++) {
-                       fd[i][j] = malloc(sizeof(int)*thread_num);
-                       mmap_array[i][j] = zalloc(
-                               sizeof(struct mmap_data)*thread_num);
-                       if (!fd[i][j] || !mmap_array[i][j])
-                               return -ENOMEM;
-               }
-       }
-       event_array = malloc(
-               sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
+       event_array = malloc((sizeof(struct pollfd) *
+                             MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
        if (!event_array)
                return -ENOMEM;
 
@@ -1415,15 +1430,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                cpu_list = NULL;
        }
 
-       if (!nr_counters)
-               nr_counters = 1;
-
-       symbol_conf.priv_size = (sizeof(struct sym_entry) +
-                                (nr_counters + 1) * sizeof(unsigned long));
-
-       symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
-       if (symbol__init() < 0)
-               return -1;
+       if (!nr_counters && perf_evsel_list__create_default() < 0) {
+               pr_err("Not enough memory for event selector list\n");
+               return -ENOMEM;
+       }
 
        if (delay_secs < 1)
                delay_secs = 1;
@@ -1440,23 +1450,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                exit(EXIT_FAILURE);
        }
 
-       /*
-        * Fill in the ones not specifically initialized via -c:
-        */
-       for (counter = 0; counter < nr_counters; counter++) {
-               if (attrs[counter].sample_period)
+       if (target_tid != -1)
+               cpus = cpu_map__dummy_new();
+       else
+               cpus = cpu_map__new(cpu_list);
+
+       if (cpus == NULL)
+               usage_with_options(top_usage, options);
+
+       list_for_each_entry(pos, &evsel_list, node) {
+               if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
+                   perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+                       goto out_free_fd;
+               /*
+                * Fill in the ones not specifically initialized via -c:
+                */
+               if (pos->attr.sample_period)
                        continue;
 
-               attrs[counter].sample_period = default_interval;
+               pos->attr.sample_period = default_interval;
        }
 
-       if (target_tid != -1)
-               nr_cpus = 1;
-       else
-               nr_cpus = read_cpu_map(cpu_list);
+       symbol_conf.priv_size = (sizeof(struct sym_entry) +
+                                (nr_counters + 1) * sizeof(unsigned long));
 
-       if (nr_cpus < 1)
-               usage_with_options(top_usage, options);
+       symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+       if (symbol__init() < 0)
+               return -1;
 
        get_term_dimensions(&winsize);
        if (print_entries == 0) {
@@ -1464,5 +1484,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
                signal(SIGWINCH, sig_winch_handler);
        }
 
-       return __cmd_top();
+       status = __cmd_top();
+out_free_fd:
+       list_for_each_entry(pos, &evsel_list, node)
+               perf_evsel__free_mmap(pos);
+
+       return status;
 }
index 921245b28583e448cce49008b2482e0f59193454..c7798c7f24ed737f03a4376485be5a2e14b3aef4 100644 (file)
@@ -27,7 +27,7 @@ extern int cmd_report(int argc, const char **argv, const char *prefix);
 extern int cmd_stat(int argc, const char **argv, const char *prefix);
 extern int cmd_timechart(int argc, const char **argv, const char *prefix);
 extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_trace(int argc, const char **argv, const char *prefix);
+extern int cmd_script(int argc, const char **argv, const char *prefix);
 extern int cmd_version(int argc, const char **argv, const char *prefix);
 extern int cmd_probe(int argc, const char **argv, const char *prefix);
 extern int cmd_kmem(int argc, const char **argv, const char *prefix);
index 949d77fc0b9718d812a8906883b7a89ef99c49f0..16b5088cf8f4bbb2259aee8ad90483cd926a0935 100644 (file)
@@ -16,7 +16,7 @@ perf-report                   mainporcelain common
 perf-stat                      mainporcelain common
 perf-timechart                 mainporcelain common
 perf-top                       mainporcelain common
-perf-trace                     mainporcelain common
+perf-script                    mainporcelain common
 perf-probe                     mainporcelain common
 perf-kmem                      mainporcelain common
 perf-lock                      mainporcelain common
index b253db634f04b7e8ddfddd1cc33bb3ce8343a49a..b041ca67a2cbdee01c87ff92ec21deda82ae1b5c 100644 (file)
@@ -9,8 +9,8 @@ endef
 ifndef NO_DWARF
 define SOURCE_DWARF
 #include <dwarf.h>
-#include <libdw.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
 #ifndef _ELFUTILS_PREREQ
 #error
 #endif
index cdd6c03f1e14c132e550b85e07b22e7621a710d2..5b1ecd66bb36a053b6427020f9aa3361d4ec1265 100644 (file)
@@ -286,6 +286,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
        status = p->fn(argc, argv, prefix);
        exit_browser(status);
 
+       perf_evsel_list__delete();
+
        if (status)
                return status & 0xff;
 
@@ -323,7 +325,7 @@ static void handle_internal_command(int argc, const char **argv)
                { "top",        cmd_top,        0 },
                { "annotate",   cmd_annotate,   0 },
                { "version",    cmd_version,    0 },
-               { "trace",      cmd_trace,      0 },
+               { "script",     cmd_script,     0 },
                { "sched",      cmd_sched,      0 },
                { "probe",      cmd_probe,      0 },
                { "kmem",       cmd_kmem,       0 },
index 01a64ad693f2a7c4ac8600e8d37f9a90ffad1b00..790ceba6ad3f4a4102a1affa81a637ef774d7d43 100644 (file)
@@ -8,7 +8,7 @@
 
 #line 1 "Context.xs"
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
index 549cf0467d309eda5be9c0faee7897a5608371b4..c1e2ed1ed34e4e16e3398acd12f3e5723a81ddee 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Context.xs.  XS interfaces for perf trace.
+ * Context.xs.  XS interfaces for perf script.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -23,7 +23,7 @@
 #include "perl.h"
 #include "XSUB.h"
 #include "../../../perf.h"
-#include "../../../util/trace-event.h"
+#include "../../../util/script-event.h"
 
 MODULE = Perf::Trace::Context          PACKAGE = Perf::Trace::Context
 PROTOTYPES: ENABLE
index 9a970763079144666b9bebb3f6d626bd7009f298..2f0c7f3043ee5d992727b50f11c404d33cfb009e 100644 (file)
@@ -1,7 +1,7 @@
 Perf-Trace-Util version 0.01
 ============================
 
-This module contains utility functions for use with perf trace.
+This module contains utility functions for use with perf script.
 
 Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
 that the core perf support for Perl calls on and should always be
@@ -33,7 +33,7 @@ After you do that:
 
 INSTALLATION
 
-Building perf with perf trace Perl scripting should install this
+Building perf with perf script Perl scripting should install this
 module in the right place.
 
 You should make sure libperl and ExtUtils/Embed.pm are installed first
index 6c7f3659cb1769ca8d40bd19816d54be94dd47ca..4e2f6039ac920f60192a7cee033339afbdf26042 100644 (file)
@@ -34,7 +34,7 @@ Perf::Trace::Context - Perl extension for accessing functions in perf.
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index 9df376a9f62971e355de96b14324c52fa9b469ea..9158458d3eeb118c35357a29af763f533e370247 100644 (file)
@@ -163,7 +163,7 @@ sub dump_symbolic_fields
 __END__
 =head1 NAME
 
-Perf::Trace::Core - Perl extension for perf trace
+Perf::Trace::Core - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -171,7 +171,7 @@ Perf::Trace::Core - Perl extension for perf trace
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index d94b40c8ac857227516b6f408bf5b13a9fea03cd..053500114625515d7745757178274e097c0d6aa2 100644 (file)
@@ -65,7 +65,7 @@ sub clear_term
 __END__
 =head1 NAME
 
-Perf::Trace::Util - Perl extension for perf trace
+Perf::Trace::Util - Perl extension for perf script
 
 =head1 SYNOPSIS
 
@@ -73,7 +73,7 @@ Perf::Trace::Util - Perl extension for perf trace
 
 =head1 SEE ALSO
 
-Perf (trace) documentation
+Perf (script) documentation
 
 =head1 AUTHOR
 
index 4028d92dc4ae6602927d3c82c02f3973506d995e..9f83cc1ad8ba253acabff31a32987566976e6d7c 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
index ba25f4d41fb02a1d40303ebcdb8ba8f17ff8470d..77200b3f31003c7bd8c3a137106f461223b67a43 100644 (file)
@@ -7,7 +7,4 @@ if [ $# -lt 1 ] ; then
 fi
 comm=$1
 shift
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
index 641a3f5d085c6148e9437e10704f6953d7d1eca5..a27b9f311f959a626d9e481f39cbb65b060d743f 100644 (file)
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide r/w activity
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
index 4918dba77021e676fdfeaf6c7d3e90105979ccc8..83e11ec2e190988c142aab5d4644778b812b9447 100644 (file)
@@ -17,7 +17,4 @@ if [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
index 49052ebcb6326d8aa13ea309994d965ca045f58b..889e8130cca55c7235ae749c83c5a9aedb92d4fd 100644 (file)
@@ -1,6 +1,3 @@
 #!/bin/bash
 # description: system-wide min/max/avg wakeup latency
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
index df0c65f4ca93de35b07bbe9e24f5a966d8b75ea8..6d91411d248caa1a0f6ade3ee39645cfb51b631d 100644 (file)
@@ -1,7 +1,3 @@
 #!/bin/bash
 # description: workqueue stats (ins/exe/create/destroy)
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
-
-
-
-
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
index 4e7dc0a407a5fbf65d0bcfab434e6dfad23f0733..4e7076c2061610044f766dddff136bf785166256 100644 (file)
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g perl
+# perf script event handlers, generated by perf script -g perl
 # (c) 2009, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 
index 2a39097687b9f70dffb2fe83f4477539ff8e98b4..74844ee2be3ef691ce9fd1c7129a158b5080a975 100644 (file)
@@ -18,7 +18,7 @@ use lib "./Perf-Trace-Util/lib";
 use Perf::Trace::Core;
 use Perf::Trace::Util;
 
-my $usage = "perf trace -s rw-by-file.pl <comm>\n";
+my $usage = "perf script -s rw-by-file.pl <comm>\n";
 
 my $for_comm = shift or die $usage;
 
index b84b12699b70ba0731bdbfdd4115d62558990184..a8eaff5119e09fa953626d0ed58ddfabae18bb00 100644 (file)
@@ -10,7 +10,7 @@
 #     workqueue:workqueue_destruction -e workqueue:workqueue_execution
 #     -e workqueue:workqueue_insertion
 #
-#   perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl
+#   perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl
 
 use 5.010000;
 use strict;
index 957085dd5d8d1a2ff17ee4bb6ad3c3160f8df508..315067b8f5522ae9cafbef1df506aca814e93012 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Context.c.  Python interfaces for perf trace.
+ * Context.c.  Python interfaces for perf script.
  *
  * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
  *
index aad7525bca1dc5a45ca72cd4be79eec7411a2bb0..de7211e4fa471ac0a0475f15097c0e175fdf4da9 100644 (file)
@@ -1,4 +1,4 @@
-# Core.py - Python extension for perf trace, core functions
+# Core.py - Python extension for perf script, core functions
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
index ae9a56e43e05e37981774e9dc9ec3d0ed59f6ba2..fdd92f699055713e2d1fec1c99a61489e5812a64 100644 (file)
@@ -1,4 +1,4 @@
-# SchedGui.py - Python extension for perf trace, basic GUI code for
+# SchedGui.py - Python extension for perf script, basic GUI code for
 #              traces drawing and overview.
 #
 # Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
index 13cc02b5893a7ee0a248b040eddc32cdc675a258..15c8400240fd9029ae34fca077304337d9c75ca6 100644 (file)
@@ -1,4 +1,4 @@
-# Util.py - Python extension for perf trace, miscellaneous utility code
+# Util.py - Python extension for perf script, miscellaneous utility code
 #
 # Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
 #
index 03587021463d4ef6c7d25b4d0a852178fded5a86..fda5096d0cbf81a29792819c9648a43f89497d3c 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
index c8268138fb7e3e6d431be07bada51abed3022294..6c44271091abbb977b2a0ef725dea470626b1337 100644 (file)
@@ -1,4 +1,4 @@
 #!/bin/bash
 # description: futext contention measurement
 
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
index 4ad361b31249c03f10d424b842b92b6d9911acee..8f759291da86c07435a62e7fa044f8c75f9c2749 100644 (file)
@@ -2,4 +2,4 @@
 # description: display a process of packet and processing time
 # args: [tx] [rx] [dev=] [debug]
 
-perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
+perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
index df1791f07c24233c638e445d48ff3ab52955889c..68b037a1849b1aeb71ec86d21fb05af4e208fc87 100644 (file)
@@ -1,3 +1,3 @@
 #!/bin/bash
 # description: sched migration overview
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
index 36b409c05e50ac5e6f80f7b82189aaa0d2ba096a..c32db294124da91d2654302c9d8e32673ec2bc4c 100644 (file)
@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
     interval=$1
     shift
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
index 4eb88c9fc83ce7e99e14e8b004f71930b422d89a..16eb8d65c54335e08d1a95e7068e6d0df55be6b5 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
index cb2f9c5cf17e825972870c5c934500672e8bd15e..0f0e9d453bb48a606b3c6522a104bd16499fede8 100644 (file)
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
        shift
     fi
 fi
-perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
index d9f7893e315c0d5aa064df04a8cc3ac870aebdad..4647a7694cf60a77835f3c80aeb79d54578df69d 100644 (file)
@@ -1,4 +1,4 @@
-# perf trace event handlers, generated by perf trace -g python
+# perf script event handlers, generated by perf script -g python
 # (c) 2010, Tom Zanussi <tzanussi@gmail.com>
 # Licensed under the terms of the GNU GPL License version 2
 #
index acd7848717b35ea7c0c46ae61dc01241673f936d..85805fac41167b9e531c09f51ca21595287e08aa 100644 (file)
@@ -15,7 +15,7 @@ from perf_trace_context import *
 from Core import *
 from Util import *
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
 
 for_comm = None
 for_pid = None
index b934383c3364e63ed7bd6147bf5509867c65e7e2..74d55ec08aed5ec27867b1d74682a5a0bb320748 100644 (file)
@@ -4,7 +4,7 @@
 #
 # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
 #
-# perf trace event handlers have been generated by perf trace -g python
+# perf script event handlers have been generated by perf script -g python
 #
 # This software is distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
index 7a6ec2c7d8abe7bf01b660210811829f37eb572d..42c267e292fa36155f5d6b8270fb4b6943d9f6d0 100644 (file)
@@ -17,7 +17,7 @@ from perf_trace_context import *
 from Core import *
 from Util import *
 
-usage = "perf trace -s sctop.py [comm] [interval]\n";
+usage = "perf script -s sctop.py [comm] [interval]\n";
 
 for_comm = None
 default_interval = 3
index d1ee3ec10cf2b911776df81df7a5d4d66a5f5fc1..c64d1c55d745b7437e4e26f77446b87567c67ed1 100644 (file)
@@ -14,7 +14,7 @@ from perf_trace_context import *
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
+usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
 
 for_comm = None
 for_pid = None
index ea183dc82d29e54a005f28648201a2219feac224..b435d3f188e84c421819802cb2efcefd62cff0d2 100644 (file)
@@ -15,7 +15,7 @@ from perf_trace_context import *
 from Core import *
 from Util import syscall_name
 
-usage = "perf trace -s syscall-counts.py [comm]\n";
+usage = "perf script -s syscall-counts.py [comm]\n";
 
 for_comm = None
 
index e437edb72417ba2f12e90b810f8851caef2c24fb..deffb8c960716213124b7fd36f56edf41bf8207d 100644 (file)
@@ -14,7 +14,9 @@
 #include <linux/kernel.h>
 #include "debug.h"
 
-static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
+static int build_id__mark_dso_hit(event_t *event,
+                                 struct sample_data *sample __used,
+                                 struct perf_session *session)
 {
        struct addr_location al;
        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -35,7 +37,8 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
        return 0;
 }
 
-static int event__exit_del_thread(event_t *self, struct perf_session *session)
+static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
+                                 struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->fork.tid);
 
index 0f9b8d7a7d7e7d62f38c48ec8846251beb532918..3ccaa10433830503325bb7625527839f4586b93e 100644 (file)
@@ -4,32 +4,53 @@
 #include <assert.h>
 #include <stdio.h>
 
-int cpumap[MAX_NR_CPUS];
-
-static int default_cpu_map(void)
+static struct cpu_map *cpu_map__default_new(void)
 {
-       int nr_cpus, i;
+       struct cpu_map *cpus;
+       int nr_cpus;
 
        nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-       assert(nr_cpus <= MAX_NR_CPUS);
-       assert((int)nr_cpus >= 0);
+       if (nr_cpus < 0)
+               return NULL;
+
+       cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+       if (cpus != NULL) {
+               int i;
+               for (i = 0; i < nr_cpus; ++i)
+                       cpus->map[i] = i;
 
-       for (i = 0; i < nr_cpus; ++i)
-               cpumap[i] = i;
+               cpus->nr = nr_cpus;
+       }
 
-       return nr_cpus;
+       return cpus;
 }
 
-static int read_all_cpu_map(void)
+static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
 {
+       size_t payload_size = nr_cpus * sizeof(int);
+       struct cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+
+       if (cpus != NULL) {
+               cpus->nr = nr_cpus;
+               memcpy(cpus->map, tmp_cpus, payload_size);
+       }
+
+       return cpus;
+}
+
+static struct cpu_map *cpu_map__read_all_cpu_map(void)
+{
+       struct cpu_map *cpus = NULL;
        FILE *onlnf;
        int nr_cpus = 0;
+       int *tmp_cpus = NULL, *tmp;
+       int max_entries = 0;
        int n, cpu, prev;
        char sep;
 
        onlnf = fopen("/sys/devices/system/cpu/online", "r");
        if (!onlnf)
-               return default_cpu_map();
+               return cpu_map__default_new();
 
        sep = 0;
        prev = -1;
@@ -38,12 +59,28 @@ static int read_all_cpu_map(void)
                if (n <= 0)
                        break;
                if (prev >= 0) {
-                       assert(nr_cpus + cpu - prev - 1 < MAX_NR_CPUS);
+                       int new_max = nr_cpus + cpu - prev - 1;
+
+                       if (new_max >= max_entries) {
+                               max_entries = new_max + MAX_NR_CPUS / 2;
+                               tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+                               if (tmp == NULL)
+                                       goto out_free_tmp;
+                               tmp_cpus = tmp;
+                       }
+
                        while (++prev < cpu)
-                               cpumap[nr_cpus++] = prev;
+                               tmp_cpus[nr_cpus++] = prev;
+               }
+               if (nr_cpus == max_entries) {
+                       max_entries += MAX_NR_CPUS;
+                       tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+                       if (tmp == NULL)
+                               goto out_free_tmp;
+                       tmp_cpus = tmp;
                }
-               assert (nr_cpus < MAX_NR_CPUS);
-               cpumap[nr_cpus++] = cpu;
+
+               tmp_cpus[nr_cpus++] = cpu;
                if (n == 2 && sep == '-')
                        prev = cpu;
                else
@@ -51,24 +88,31 @@ static int read_all_cpu_map(void)
                if (n == 1 || sep == '\n')
                        break;
        }
-       fclose(onlnf);
-       if (nr_cpus > 0)
-               return nr_cpus;
 
-       return default_cpu_map();
+       if (nr_cpus > 0)
+               cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+       else
+               cpus = cpu_map__default_new();
+out_free_tmp:
+       free(tmp_cpus);
+       fclose(onlnf);
+       return cpus;
 }
 
-int read_cpu_map(const char *cpu_list)
+struct cpu_map *cpu_map__new(const char *cpu_list)
 {
+       struct cpu_map *cpus = NULL;
        unsigned long start_cpu, end_cpu = 0;
        char *p = NULL;
        int i, nr_cpus = 0;
+       int *tmp_cpus = NULL, *tmp;
+       int max_entries = 0;
 
        if (!cpu_list)
-               return read_all_cpu_map();
+               return cpu_map__read_all_cpu_map();
 
        if (!isdigit(*cpu_list))
-               goto invalid;
+               goto out;
 
        while (isdigit(*cpu_list)) {
                p = NULL;
@@ -94,21 +138,42 @@ int read_cpu_map(const char *cpu_list)
                for (; start_cpu <= end_cpu; start_cpu++) {
                        /* check for duplicates */
                        for (i = 0; i < nr_cpus; i++)
-                               if (cpumap[i] == (int)start_cpu)
+                               if (tmp_cpus[i] == (int)start_cpu)
                                        goto invalid;
 
-                       assert(nr_cpus < MAX_NR_CPUS);
-                       cpumap[nr_cpus++] = (int)start_cpu;
+                       if (nr_cpus == max_entries) {
+                               max_entries += MAX_NR_CPUS;
+                               tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+                               if (tmp == NULL)
+                                       goto invalid;
+                               tmp_cpus = tmp;
+                       }
+                       tmp_cpus[nr_cpus++] = (int)start_cpu;
                }
                if (*p)
                        ++p;
 
                cpu_list = p;
        }
-       if (nr_cpus > 0)
-               return nr_cpus;
 
-       return default_cpu_map();
+       if (nr_cpus > 0)
+               cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+       else
+               cpus = cpu_map__default_new();
 invalid:
-       return -1;
+       free(tmp_cpus);
+out:
+       return cpus;
+}
+
+struct cpu_map *cpu_map__dummy_new(void)
+{
+       struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+
+       if (cpus != NULL) {
+               cpus->nr = 1;
+               cpus->map[0] = -1;
+       }
+
+       return cpus;
 }
index 3e60f56e490eb10f8cf08981e703bf5699d6b20c..f7a4f42f6307fb522299ea48d1126e6d08ffda71 100644 (file)
@@ -1,7 +1,13 @@
 #ifndef __PERF_CPUMAP_H
 #define __PERF_CPUMAP_H
 
-extern int read_cpu_map(const char *cpu_list);
-extern int cpumap[];
+struct cpu_map {
+       int nr;
+       int map[];
+};
+
+struct cpu_map *cpu_map__new(const char *cpu_list);
+struct cpu_map *cpu_map__dummy_new(void);
+void *cpu_map__delete(struct cpu_map *map);
 
 #endif /* __PERF_CPUMAP_H */
index c8d81b00089d6d9d26ca431aea91453a86ef2351..01bbe8ecec3f7eda9088e9e59ad78ea53b7b5ce6 100644 (file)
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...)
        return ret;
 }
 
-static int dump_printf_color(const char *fmt, const char *color, ...)
+#ifdef NO_NEWT_SUPPORT
+void ui__warning(const char *format, ...)
 {
        va_list args;
-       int ret = 0;
 
-       if (dump_trace) {
-               va_start(args, color);
-               ret = color_vfprintf(stdout, color, fmt, args);
-               va_end(args);
-       }
-
-       return ret;
+       va_start(args, format);
+       vfprintf(stderr, format, args);
+       va_end(args);
 }
-
+#endif
 
 void trace_event(event_t *event)
 {
@@ -70,29 +66,29 @@ void trace_event(event_t *event)
        if (!dump_trace)
                return;
 
-       dump_printf(".");
-       dump_printf_color("\n. ... raw event: size %d bytes\n", color,
-                         event->header.size);
+       printf(".");
+       color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+                     event->header.size);
 
        for (i = 0; i < event->header.size; i++) {
                if ((i & 15) == 0) {
-                       dump_printf(".");
-                       dump_printf_color("  %04x: ", color, i);
+                       printf(".");
+                       color_fprintf(stdout, color, "  %04x: ", i);
                }
 
-               dump_printf_color(" %02x", color, raw_event[i]);
+               color_fprintf(stdout, color, " %02x", raw_event[i]);
 
                if (((i & 15) == 15) || i == event->header.size-1) {
-                       dump_printf_color("  ", color);
+                       color_fprintf(stdout, color, "  ");
                        for (j = 0; j < 15-(i & 15); j++)
-                               dump_printf_color("   ", color);
+                               color_fprintf(stdout, color, "   ");
                        for (j = i & ~15; j <= i; j++) {
-                               dump_printf_color("%c", color,
-                                               isprint(raw_event[j]) ?
-                                               raw_event[j] : '.');
+                               color_fprintf(stdout, color, "%c",
+                                             isprint(raw_event[j]) ?
+                                             raw_event[j] : '.');
                        }
-                       dump_printf_color("\n", color);
+                       color_fprintf(stdout, color, "\n");
                }
        }
-       dump_printf(".\n");
+       printf(".\n");
 }
index 7b514082bbaff4992c31c590cd6d5b3153149f85..ca35fd66b5dfc8c238f5a4be3cb28ce402cf0bf6 100644 (file)
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap);
 #include "ui/progress.h"
 #endif
 
+void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
 #endif /* __PERF_DEBUG_H */
index dab9e754a28103b1727d6dee29669aaa2d7f89da..2302ec051bb4f1b5171bb543b33284b29bf6f8b6 100644 (file)
@@ -7,7 +7,7 @@
 #include "strlist.h"
 #include "thread.h"
 
-const char *event__name[] = {
+static const char *event__name[] = {
        [0]                      = "TOTAL",
        [PERF_RECORD_MMAP]       = "MMAP",
        [PERF_RECORD_LOST]       = "LOST",
@@ -22,13 +22,31 @@ const char *event__name[] = {
        [PERF_RECORD_HEADER_EVENT_TYPE]  = "EVENT_TYPE",
        [PERF_RECORD_HEADER_TRACING_DATA]        = "TRACING_DATA",
        [PERF_RECORD_HEADER_BUILD_ID]    = "BUILD_ID",
+       [PERF_RECORD_FINISHED_ROUND]     = "FINISHED_ROUND",
 };
 
-static pid_t event__synthesize_comm(pid_t pid, int full,
+const char *event__get_event_name(unsigned int id)
+{
+       if (id >= ARRAY_SIZE(event__name))
+               return "INVALID";
+       if (!event__name[id])
+               return "UNKNOWN";
+       return event__name[id];
+}
+
+static struct sample_data synth_sample = {
+       .pid       = -1,
+       .tid       = -1,
+       .time      = -1,
+       .stream_id = -1,
+       .cpu       = -1,
+       .period    = 1,
+};
+
+static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
                                    event__handler_t process,
                                    struct perf_session *session)
 {
-       event_t ev;
        char filename[PATH_MAX];
        char bf[BUFSIZ];
        FILE *fp;
@@ -49,34 +67,39 @@ out_race:
                return 0;
        }
 
-       memset(&ev.comm, 0, sizeof(ev.comm));
-       while (!ev.comm.comm[0] || !ev.comm.pid) {
-               if (fgets(bf, sizeof(bf), fp) == NULL)
-                       goto out_failure;
+       memset(&event->comm, 0, sizeof(event->comm));
+
+       while (!event->comm.comm[0] || !event->comm.pid) {
+               if (fgets(bf, sizeof(bf), fp) == NULL) {
+                       pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
+                       goto out;
+               }
 
                if (memcmp(bf, "Name:", 5) == 0) {
                        char *name = bf + 5;
                        while (*name && isspace(*name))
                                ++name;
                        size = strlen(name) - 1;
-                       memcpy(ev.comm.comm, name, size++);
+                       memcpy(event->comm.comm, name, size++);
                } else if (memcmp(bf, "Tgid:", 5) == 0) {
                        char *tgids = bf + 5;
                        while (*tgids && isspace(*tgids))
                                ++tgids;
-                       tgid = ev.comm.pid = atoi(tgids);
+                       tgid = event->comm.pid = atoi(tgids);
                }
        }
 
-       ev.comm.header.type = PERF_RECORD_COMM;
+       event->comm.header.type = PERF_RECORD_COMM;
        size = ALIGN(size, sizeof(u64));
-       ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size);
-
+       memset(event->comm.comm + size, 0, session->id_hdr_size);
+       event->comm.header.size = (sizeof(event->comm) -
+                               (sizeof(event->comm.comm) - size) +
+                               session->id_hdr_size);
        if (!full) {
-               ev.comm.tid = pid;
+               event->comm.tid = pid;
 
-               process(&ev, session);
-               goto out_fclose;
+               process(event, &synth_sample, session);
+               goto out;
        }
 
        snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -91,22 +114,19 @@ out_race:
                if (*end)
                        continue;
 
-               ev.comm.tid = pid;
+               event->comm.tid = pid;
 
-               process(&ev, session);
+               process(event, &synth_sample, session);
        }
-       closedir(tasks);
 
-out_fclose:
+       closedir(tasks);
+out:
        fclose(fp);
-       return tgid;
 
-out_failure:
-       pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
-       return -1;
+       return tgid;
 }
 
-static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
+static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
                                         event__handler_t process,
                                         struct perf_session *session)
 {
@@ -124,29 +144,25 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
                return -1;
        }
 
+       event->header.type = PERF_RECORD_MMAP;
+       /*
+        * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+        */
+       event->header.misc = PERF_RECORD_MISC_USER;
+
        while (1) {
                char bf[BUFSIZ], *pbf = bf;
-               event_t ev = {
-                       .header = {
-                               .type = PERF_RECORD_MMAP,
-                               /*
-                                * Just like the kernel, see __perf_event_mmap
-                                * in kernel/perf_event.c
-                                */
-                               .misc = PERF_RECORD_MISC_USER,
-                        },
-               };
                int n;
                size_t size;
                if (fgets(bf, sizeof(bf), fp) == NULL)
                        break;
 
                /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               n = hex2u64(pbf, &ev.mmap.start);
+               n = hex2u64(pbf, &event->mmap.start);
                if (n < 0)
                        continue;
                pbf += n + 1;
-               n = hex2u64(pbf, &ev.mmap.len);
+               n = hex2u64(pbf, &event->mmap.len);
                if (n < 0)
                        continue;
                pbf += n + 3;
@@ -161,19 +177,21 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
                                continue;
 
                        pbf += 3;
-                       n = hex2u64(pbf, &ev.mmap.pgoff);
+                       n = hex2u64(pbf, &event->mmap.pgoff);
 
                        size = strlen(execname);
                        execname[size - 1] = '\0'; /* Remove \n */
-                       memcpy(ev.mmap.filename, execname, size);
+                       memcpy(event->mmap.filename, execname, size);
                        size = ALIGN(size, sizeof(u64));
-                       ev.mmap.len -= ev.mmap.start;
-                       ev.mmap.header.size = (sizeof(ev.mmap) -
-                                              (sizeof(ev.mmap.filename) - size));
-                       ev.mmap.pid = tgid;
-                       ev.mmap.tid = pid;
-
-                       process(&ev, session);
+                       event->mmap.len -= event->mmap.start;
+                       event->mmap.header.size = (sizeof(event->mmap) -
+                                               (sizeof(event->mmap.filename) - size));
+                       memset(event->mmap.filename + size, 0, session->id_hdr_size);
+                       event->mmap.header.size += session->id_hdr_size;
+                       event->mmap.pid = tgid;
+                       event->mmap.tid = pid;
+
+                       process(event, &synth_sample, session);
                }
        }
 
@@ -187,20 +205,27 @@ int event__synthesize_modules(event__handler_t process,
 {
        struct rb_node *nd;
        struct map_groups *kmaps = &machine->kmaps;
-       u16 misc;
+       event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
+
+       event->header.type = PERF_RECORD_MMAP;
 
        /*
         * kernel uses 0 for user space maps, see kernel/perf_event.c
         * __perf_event_mmap
         */
        if (machine__is_host(machine))
-               misc = PERF_RECORD_MISC_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
        else
-               misc = PERF_RECORD_MISC_GUEST_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
        for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
             nd; nd = rb_next(nd)) {
-               event_t ev;
                size_t size;
                struct map *pos = rb_entry(nd, struct map, rb_node);
 
@@ -208,39 +233,78 @@ int event__synthesize_modules(event__handler_t process,
                        continue;
 
                size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
-               memset(&ev, 0, sizeof(ev));
-               ev.mmap.header.misc = misc;
-               ev.mmap.header.type = PERF_RECORD_MMAP;
-               ev.mmap.header.size = (sizeof(ev.mmap) -
-                                       (sizeof(ev.mmap.filename) - size));
-               ev.mmap.start = pos->start;
-               ev.mmap.len   = pos->end - pos->start;
-               ev.mmap.pid   = machine->pid;
-
-               memcpy(ev.mmap.filename, pos->dso->long_name,
+               event->mmap.header.type = PERF_RECORD_MMAP;
+               event->mmap.header.size = (sizeof(event->mmap) -
+                                       (sizeof(event->mmap.filename) - size));
+               memset(event->mmap.filename + size, 0, session->id_hdr_size);
+               event->mmap.header.size += session->id_hdr_size;
+               event->mmap.start = pos->start;
+               event->mmap.len   = pos->end - pos->start;
+               event->mmap.pid   = machine->pid;
+
+               memcpy(event->mmap.filename, pos->dso->long_name,
                       pos->dso->long_name_len + 1);
-               process(&ev, session);
+               process(event, &synth_sample, session);
        }
 
+       free(event);
        return 0;
 }
 
-int event__synthesize_thread(pid_t pid, event__handler_t process,
-                            struct perf_session *session)
+static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
+                                     pid_t pid, event__handler_t process,
+                                     struct perf_session *session)
 {
-       pid_t tgid = event__synthesize_comm(pid, 1, process, session);
+       pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
+                                           session);
        if (tgid == -1)
                return -1;
-       return event__synthesize_mmap_events(pid, tgid, process, session);
+       return event__synthesize_mmap_events(mmap_event, pid, tgid,
+                                            process, session);
+}
+
+int event__synthesize_thread(pid_t pid, event__handler_t process,
+                            struct perf_session *session)
+{
+       event_t *comm_event, *mmap_event;
+       int err = -1;
+
+       comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
+
+       err = __event__synthesize_thread(comm_event, mmap_event, pid,
+                                        process, session);
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
 }
 
-void event__synthesize_threads(event__handler_t process,
-                              struct perf_session *session)
+int event__synthesize_threads(event__handler_t process,
+                             struct perf_session *session)
 {
        DIR *proc;
        struct dirent dirent, *next;
+       event_t *comm_event, *mmap_event;
+       int err = -1;
+
+       comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+       if (comm_event == NULL)
+               goto out;
+
+       mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+       if (mmap_event == NULL)
+               goto out_free_comm;
 
        proc = opendir("/proc");
+       if (proc == NULL)
+               goto out_free_mmap;
 
        while (!readdir_r(proc, &dirent, &next) && next) {
                char *end;
@@ -249,10 +313,18 @@ void event__synthesize_threads(event__handler_t process,
                if (*end) /* only interested in proper numerical dirents */
                        continue;
 
-               event__synthesize_thread(pid, process, session);
+               __event__synthesize_thread(comm_event, mmap_event, pid,
+                                          process, session);
        }
 
        closedir(proc);
+       err = 0;
+out_free_mmap:
+       free(mmap_event);
+out_free_comm:
+       free(comm_event);
+out:
+       return err;
 }
 
 struct process_symbol_args {
@@ -260,7 +332,8 @@ struct process_symbol_args {
        u64        start;
 };
 
-static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
+static int find_symbol_cb(void *arg, const char *name, char type,
+                         u64 start, u64 end __used)
 {
        struct process_symbol_args *args = arg;
 
@@ -286,18 +359,20 @@ int event__synthesize_kernel_mmap(event__handler_t process,
        char path[PATH_MAX];
        char name_buff[PATH_MAX];
        struct map *map;
-
-       event_t ev = {
-               .header = {
-                       .type = PERF_RECORD_MMAP,
-               },
-       };
+       int err;
        /*
         * We should get this from /sys/kernel/sections/.text, but till that is
         * available use this, and after it is use this as a fallback for older
         * kernels.
         */
        struct process_symbol_args args = { .name = symbol_name, };
+       event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
+
+       if (event == NULL) {
+               pr_debug("Not enough memory synthesizing mmap event "
+                        "for kernel modules\n");
+               return -1;
+       }
 
        mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
        if (machine__is_host(machine)) {
@@ -305,10 +380,10 @@ int event__synthesize_kernel_mmap(event__handler_t process,
                 * kernel uses PERF_RECORD_MISC_USER for user space maps,
                 * see kernel/perf_event.c __perf_event_mmap
                 */
-               ev.header.misc = PERF_RECORD_MISC_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_KERNEL;
                filename = "/proc/kallsyms";
        } else {
-               ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+               event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
                if (machine__is_default_guest(machine))
                        filename = (char *) symbol_conf.default_guest_kallsyms;
                else {
@@ -321,17 +396,21 @@ int event__synthesize_kernel_mmap(event__handler_t process,
                return -ENOENT;
 
        map = machine->vmlinux_maps[MAP__FUNCTION];
-       size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+       size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
                        "%s%s", mmap_name, symbol_name) + 1;
        size = ALIGN(size, sizeof(u64));
-       ev.mmap.header.size = (sizeof(ev.mmap) -
-                       (sizeof(ev.mmap.filename) - size));
-       ev.mmap.pgoff = args.start;
-       ev.mmap.start = map->start;
-       ev.mmap.len   = map->end - ev.mmap.start;
-       ev.mmap.pid   = machine->pid;
-
-       return process(&ev, session);
+       event->mmap.header.type = PERF_RECORD_MMAP;
+       event->mmap.header.size = (sizeof(event->mmap) -
+                       (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
+       event->mmap.pgoff = args.start;
+       event->mmap.start = map->start;
+       event->mmap.len   = map->end - event->mmap.start;
+       event->mmap.pid   = machine->pid;
+
+       err = process(event, &synth_sample, session);
+       free(event);
+
+       return err;
 }
 
 static void thread__comm_adjust(struct thread *self, struct hists *hists)
@@ -361,7 +440,8 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
        return 0;
 }
 
-int event__process_comm(event_t *self, struct perf_session *session)
+int event__process_comm(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->comm.tid);
 
@@ -376,7 +456,8 @@ int event__process_comm(event_t *self, struct perf_session *session)
        return 0;
 }
 
-int event__process_lost(event_t *self, struct perf_session *session)
+int event__process_lost(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost);
        session->hists.stats.total_lost += self->lost.lost;
@@ -392,7 +473,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
         * a zero sized synthesized MMAP event for the kernel.
         */
        if (maps[MAP__FUNCTION]->end == 0)
-               maps[MAP__FUNCTION]->end = ~0UL;
+               maps[MAP__FUNCTION]->end = ~0ULL;
 }
 
 static int event__process_kernel_mmap(event_t *self,
@@ -485,7 +566,8 @@ out_problem:
        return -1;
 }
 
-int event__process_mmap(event_t *self, struct perf_session *session)
+int event__process_mmap(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct machine *machine;
        struct thread *thread;
@@ -526,7 +608,8 @@ out_problem:
        return 0;
 }
 
-int event__process_task(event_t *self, struct perf_session *session)
+int event__process_task(event_t *self, struct sample_data *sample __used,
+                       struct perf_session *session)
 {
        struct thread *thread = perf_session__findnew(session, self->fork.tid);
        struct thread *parent = perf_session__findnew(session, self->fork.ptid);
@@ -548,18 +631,19 @@ int event__process_task(event_t *self, struct perf_session *session)
        return 0;
 }
 
-int event__process(event_t *event, struct perf_session *session)
+int event__process(event_t *event, struct sample_data *sample,
+                  struct perf_session *session)
 {
        switch (event->header.type) {
        case PERF_RECORD_COMM:
-               event__process_comm(event, session);
+               event__process_comm(event, sample, session);
                break;
        case PERF_RECORD_MMAP:
-               event__process_mmap(event, session);
+               event__process_mmap(event, sample, session);
                break;
        case PERF_RECORD_FORK:
        case PERF_RECORD_EXIT:
-               event__process_task(event, session);
+               event__process_task(event, sample, session);
                break;
        default:
                break;
@@ -674,32 +758,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
                             symbol_filter_t filter)
 {
        u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-       struct thread *thread;
-
-       event__parse_sample(self, session->sample_type, data);
-
-       dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
-                   self->header.misc, data->pid, data->tid, data->ip,
-                   data->period, data->cpu);
-
-       if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
-               unsigned int i;
-
-               dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
+       struct thread *thread = perf_session__findnew(session, self->ip.pid);
 
-               if (!ip_callchain__valid(data->callchain, self)) {
-                       pr_debug("call-chain problem with event, "
-                                "skipping it.\n");
-                       goto out_filtered;
-               }
-
-               if (dump_trace) {
-                       for (i = 0; i < data->callchain->nr; i++)
-                               dump_printf("..... %2d: %016Lx\n",
-                                           i, data->callchain->ips[i]);
-               }
-       }
-       thread = perf_session__findnew(session, self->ip.pid);
        if (thread == NULL)
                return -1;
 
@@ -766,9 +826,65 @@ out_filtered:
        return 0;
 }
 
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
+static int event__parse_id_sample(const event_t *event,
+                                 struct perf_session *session,
+                                 struct sample_data *sample)
 {
-       const u64 *array = event->sample.array;
+       const u64 *array;
+       u64 type;
+
+       sample->cpu = sample->pid = sample->tid = -1;
+       sample->stream_id = sample->id = sample->time = -1ULL;
+
+       if (!session->sample_id_all)
+               return 0;
+
+       array = event->sample.array;
+       array += ((event->header.size -
+                  sizeof(event->header)) / sizeof(u64)) - 1;
+       type = session->sample_type;
+
+       if (type & PERF_SAMPLE_CPU) {
+               u32 *p = (u32 *)array;
+               sample->cpu = *p;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_STREAM_ID) {
+               sample->stream_id = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_ID) {
+               sample->id = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_TIME) {
+               sample->time = *array;
+               array--;
+       }
+
+       if (type & PERF_SAMPLE_TID) {
+               u32 *p = (u32 *)array;
+               sample->pid = p[0];
+               sample->tid = p[1];
+       }
+
+       return 0;
+}
+
+int event__parse_sample(const event_t *event, struct perf_session *session,
+                       struct sample_data *data)
+{
+       const u64 *array;
+       u64 type;
+
+       if (event->header.type != PERF_RECORD_SAMPLE)
+               return event__parse_id_sample(event, session, data);
+
+       array = event->sample.array;
+       type = session->sample_type;
 
        if (type & PERF_SAMPLE_IP) {
                data->ip = event->ip.ip;
index 8e790dae702625aa564594bee1dd52618b94f8d8..2b7e91902f105d5962a68254737ce21cfeef935c 100644 (file)
@@ -85,6 +85,7 @@ struct build_id_event {
 };
 
 enum perf_user_event_type { /* above any possible kernel type */
+       PERF_RECORD_USER_TYPE_START             = 64,
        PERF_RECORD_HEADER_ATTR                 = 64,
        PERF_RECORD_HEADER_EVENT_TYPE           = 65,
        PERF_RECORD_HEADER_TRACING_DATA         = 66,
@@ -135,12 +136,15 @@ void event__print_totals(void);
 
 struct perf_session;
 
-typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
+typedef int (*event__handler_synth_t)(event_t *event, 
+                                     struct perf_session *session);
+typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
+                               struct perf_session *session);
 
 int event__synthesize_thread(pid_t pid, event__handler_t process,
                             struct perf_session *session);
-void event__synthesize_threads(event__handler_t process,
-                              struct perf_session *session);
+int event__synthesize_threads(event__handler_t process,
+                             struct perf_session *session);
 int event__synthesize_kernel_mmap(event__handler_t process,
                                struct perf_session *session,
                                struct machine *machine,
@@ -150,18 +154,24 @@ int event__synthesize_modules(event__handler_t process,
                              struct perf_session *session,
                              struct machine *machine);
 
-int event__process_comm(event_t *self, struct perf_session *session);
-int event__process_lost(event_t *self, struct perf_session *session);
-int event__process_mmap(event_t *self, struct perf_session *session);
-int event__process_task(event_t *self, struct perf_session *session);
-int event__process(event_t *event, struct perf_session *session);
+int event__process_comm(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_lost(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_mmap(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process_task(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+int event__process(event_t *event, struct sample_data *sample,
+                  struct perf_session *session);
 
 struct addr_location;
 int event__preprocess_sample(const event_t *self, struct perf_session *session,
                             struct addr_location *al, struct sample_data *data,
                             symbol_filter_t filter);
-int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
+int event__parse_sample(const event_t *event, struct perf_session *session,
+                       struct sample_data *sample);
 
-extern const char *event__name[];
+const char *event__get_event_name(unsigned int id);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
new file mode 100644 (file)
index 0000000..c95267e
--- /dev/null
@@ -0,0 +1,186 @@
+#include "evsel.h"
+#include "../perf.h"
+#include "util.h"
+#include "cpumap.h"
+#include "thread.h"
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
+{
+       struct perf_evsel *evsel = zalloc(sizeof(*evsel));
+
+       if (evsel != NULL) {
+               evsel->idx         = idx;
+               evsel->attr.type   = type;
+               evsel->attr.config = config;
+               INIT_LIST_HEAD(&evsel->node);
+       }
+
+       return evsel;
+}
+
+int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
+       return evsel->fd != NULL ? 0 : -ENOMEM;
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+       evsel->counts = zalloc((sizeof(*evsel->counts) +
+                               (ncpus * sizeof(struct perf_counts_values))));
+       return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_fd(struct perf_evsel *evsel)
+{
+       xyarray__delete(evsel->fd);
+       evsel->fd = NULL;
+}
+
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       int cpu, thread;
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               for (thread = 0; thread < nthreads; ++thread) {
+                       close(FD(evsel, cpu, thread));
+                       FD(evsel, cpu, thread) = -1;
+               }
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+       assert(list_empty(&evsel->node));
+       xyarray__delete(evsel->fd);
+       free(evsel);
+}
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale)
+{
+       struct perf_counts_values count;
+       size_t nv = scale ? 3 : 1;
+
+       if (FD(evsel, cpu, thread) < 0)
+               return -EINVAL;
+
+       if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
+               return -ENOMEM;
+
+       if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
+               return -errno;
+
+       if (scale) {
+               if (count.run == 0)
+                       count.val = 0;
+               else if (count.run < count.ena)
+                       count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
+       } else
+               count.ena = count.run = 0;
+
+       evsel->counts->cpu[cpu] = count;
+       return 0;
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel,
+                      int ncpus, int nthreads, bool scale)
+{
+       size_t nv = scale ? 3 : 1;
+       int cpu, thread;
+       struct perf_counts_values *aggr = &evsel->counts->aggr, count;
+
+       aggr->val = 0;
+
+       for (cpu = 0; cpu < ncpus; cpu++) {
+               for (thread = 0; thread < nthreads; thread++) {
+                       if (FD(evsel, cpu, thread) < 0)
+                               continue;
+
+                       if (readn(FD(evsel, cpu, thread),
+                                 &count, nv * sizeof(u64)) < 0)
+                               return -errno;
+
+                       aggr->val += count.val;
+                       if (scale) {
+                               aggr->ena += count.ena;
+                               aggr->run += count.run;
+                       }
+               }
+       }
+
+       evsel->counts->scaled = 0;
+       if (scale) {
+               if (aggr->run == 0) {
+                       evsel->counts->scaled = -1;
+                       aggr->val = 0;
+                       return 0;
+               }
+
+               if (aggr->run < aggr->ena) {
+                       evsel->counts->scaled = 1;
+                       aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
+               }
+       } else
+               aggr->ena = aggr->run = 0;
+
+       return 0;
+}
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+{
+       int cpu;
+
+       if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0)
+               return -1;
+
+       for (cpu = 0; cpu < cpus->nr; cpu++) {
+               FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1,
+                                                       cpus->map[cpu], -1, 0);
+               if (FD(evsel, cpu, 0) < 0)
+                       goto out_close;
+       }
+
+       return 0;
+
+out_close:
+       while (--cpu >= 0) {
+               close(FD(evsel, cpu, 0));
+               FD(evsel, cpu, 0) = -1;
+       }
+       return -1;
+}
+
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+{
+       int thread;
+
+       if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
+               return -1;
+
+       for (thread = 0; thread < threads->nr; thread++) {
+               FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr,
+                                                          threads->map[thread], -1, -1, 0);
+               if (FD(evsel, 0, thread) < 0)
+                       goto out_close;
+       }
+
+       return 0;
+
+out_close:
+       while (--thread >= 0) {
+               close(FD(evsel, 0, thread));
+               FD(evsel, 0, thread) = -1;
+       }
+       return -1;
+}
+
+int perf_evsel__open(struct perf_evsel *evsel, 
+                    struct cpu_map *cpus, struct thread_map *threads)
+{
+       if (threads == NULL)
+               return perf_evsel__open_per_cpu(evsel, cpus);
+
+       return perf_evsel__open_per_thread(evsel, threads);
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
new file mode 100644 (file)
index 0000000..a0ccd69
--- /dev/null
@@ -0,0 +1,115 @@
+#ifndef __PERF_EVSEL_H
+#define __PERF_EVSEL_H 1
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include "../../../include/linux/perf_event.h"
+#include "types.h"
+#include "xyarray.h"
+struct perf_counts_values {
+       union {
+               struct {
+                       u64 val;
+                       u64 ena;
+                       u64 run;
+               };
+               u64 values[3];
+       };
+};
+
+struct perf_counts {
+       s8                        scaled;
+       struct perf_counts_values aggr;
+       struct perf_counts_values cpu[];
+};
+
+struct perf_evsel {
+       struct list_head        node;
+       struct perf_event_attr  attr;
+       char                    *filter;
+       struct xyarray          *fd;
+       struct perf_counts      *counts;
+       int                     idx;
+       void                    *priv;
+};
+
+struct cpu_map;
+struct thread_map;
+
+struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
+void perf_evsel__delete(struct perf_evsel *evsel);
+
+int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
+void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads);
+int perf_evsel__open(struct perf_evsel *evsel, 
+                    struct cpu_map *cpus, struct thread_map *threads);
+
+#define perf_evsel__match(evsel, t, c)         \
+       (evsel->attr.type == PERF_TYPE_##t &&   \
+        evsel->attr.config == PERF_COUNT_##c)
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale);
+
+/**
+ * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                                         int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+}
+
+/**
+ * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
+                                                int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
+                      bool scale);
+
+/**
+ * perf_evsel__read - Read the aggregate results on all CPUs
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read(struct perf_evsel *evsel,
+                                   int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, false);
+}
+
+/**
+ * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
+                                         int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, true);
+}
+
+#endif /* __PERF_EVSEL_H */
index 64a85bafde63a1c6f05451a32b31eec521c799d9..989fa2dee2fd2ae441d98921b9e69a56c4fe50e2 100644 (file)
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat)
        set_bit(feat, self->adds_features);
 }
 
+void perf_header__clear_feat(struct perf_header *self, int feat)
+{
+       clear_bit(feat, self->adds_features);
+}
+
 bool perf_header__has_feat(const struct perf_header *self, int feat)
 {
        return test_bit(feat, self->adds_features);
@@ -265,15 +270,16 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
                          const char *name, bool is_kallsyms)
 {
        const size_t size = PATH_MAX;
-       char *filename = malloc(size),
+       char *realname = realpath(name, NULL),
+            *filename = malloc(size),
             *linkname = malloc(size), *targetname;
        int len, err = -1;
 
-       if (filename == NULL || linkname == NULL)
+       if (realname == NULL || filename == NULL || linkname == NULL)
                goto out_free;
 
        len = snprintf(filename, size, "%s%s%s",
-                      debugdir, is_kallsyms ? "/" : "", name);
+                      debugdir, is_kallsyms ? "/" : "", realname);
        if (mkdir_p(filename, 0755))
                goto out_free;
 
@@ -283,7 +289,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
                if (is_kallsyms) {
                         if (copyfile("/proc/kallsyms", filename))
                                goto out_free;
-               } else if (link(name, filename) && copyfile(name, filename))
+               } else if (link(realname, filename) && copyfile(name, filename))
                        goto out_free;
        }
 
@@ -300,6 +306,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
        if (symlink(targetname, linkname) == 0)
                err = 0;
 out_free:
+       free(realname);
        free(filename);
        free(linkname);
        return err;
@@ -431,8 +438,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
        int idx = 0, err;
 
        session = container_of(self, struct perf_session, header);
-       if (perf_session__read_build_ids(session, true))
-               perf_header__set_feat(self, HEADER_BUILD_ID);
+
+       if (perf_header__has_feat(self, HEADER_BUILD_ID &&
+           !perf_session__read_build_ids(session, true)))
+               perf_header__clear_feat(self, HEADER_BUILD_ID);
 
        nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
        if (!nr_sections)
@@ -454,7 +463,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
 
                /* Write trace info */
                trace_sec->offset = lseek(fd, 0, SEEK_CUR);
-               read_tracing_data(fd, attrs, nr_counters);
+               read_tracing_data(fd, &evsel_list);
                trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
        }
 
@@ -597,7 +606,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
 static int perf_header__getbuffer64(struct perf_header *self,
                                    int fd, void *buf, size_t size)
 {
-       if (do_read(fd, buf, size) <= 0)
+       if (readn(fd, buf, size) <= 0)
                return -1;
 
        if (self->needs_swap)
@@ -653,7 +662,7 @@ int perf_file_header__read(struct perf_file_header *self,
 {
        lseek(fd, 0, SEEK_SET);
 
-       if (do_read(fd, self, sizeof(*self)) <= 0 ||
+       if (readn(fd, self, sizeof(*self)) <= 0 ||
            memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
                return -1;
 
@@ -814,7 +823,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
                                       struct perf_header *ph, int fd,
                                       bool repipe)
 {
-       if (do_read(fd, self, sizeof(*self)) <= 0 ||
+       if (readn(fd, self, sizeof(*self)) <= 0 ||
            memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
                return -1;
 
@@ -939,6 +948,24 @@ u64 perf_header__sample_type(struct perf_header *header)
        return type;
 }
 
+bool perf_header__sample_id_all(const struct perf_header *header)
+{
+       bool value = false, first = true;
+       int i;
+
+       for (i = 0; i < header->attrs; i++) {
+               struct perf_header_attr *attr = header->attr[i];
+
+               if (first) {
+                       value = attr->attr.sample_id_all;
+                       first = false;
+               } else if (value != attr->attr.sample_id_all)
+                       die("non matching sample_id_all");
+       }
+
+       return value;
+}
+
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header)
 {
@@ -985,21 +1012,23 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
 
        ev = malloc(size);
 
+       if (ev == NULL)
+               return -ENOMEM;
+
        ev->attr.attr = *attr;
        memcpy(ev->attr.id, id, ids * sizeof(u64));
 
        ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
        ev->attr.header.size = size;
 
-       err = process(ev, session);
+       err = process(ev, NULL, session);
 
        free(ev);
 
        return err;
 }
 
-int event__synthesize_attrs(struct perf_header *self,
-                           event__handler_t process,
+int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
                            struct perf_session *session)
 {
        struct perf_header_attr *attr;
@@ -1069,7 +1098,7 @@ int event__synthesize_event_type(u64 event_id, char *name,
        ev.event_type.header.size = sizeof(ev.event_type) -
                (sizeof(ev.event_type.event_type.name) - size);
 
-       err = process(&ev, session);
+       err = process(&ev, NULL, session);
 
        return err;
 }
@@ -1104,8 +1133,7 @@ int event__process_event_type(event_t *self,
        return 0;
 }
 
-int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
-                                  int nb_events,
+int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
                                   event__handler_t process,
                                   struct perf_session *session __unused)
 {
@@ -1116,7 +1144,7 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
        memset(&ev, 0, sizeof(ev));
 
        ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
-       size = read_tracing_data_size(fd, pattrs, nb_events);
+       size = read_tracing_data_size(fd, pattrs);
        if (size <= 0)
                return size;
        aligned_size = ALIGN(size, sizeof(u64));
@@ -1124,9 +1152,9 @@ int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
        ev.tracing_data.header.size = sizeof(ev.tracing_data);
        ev.tracing_data.size = aligned_size;
 
-       process(&ev, session);
+       process(&ev, NULL, session);
 
-       err = read_tracing_data(fd, pattrs, nb_events);
+       err = read_tracing_data(fd, pattrs);
        write_padded(fd, NULL, 0, padding);
 
        return aligned_size;
@@ -1184,7 +1212,7 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
        ev.build_id.header.size = sizeof(ev.build_id) + len;
        memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
 
-       err = process(&ev, session);
+       err = process(&ev, NULL, session);
 
        return err;
 }
index 402ac2454cf8bcc664c3daf193119f470c77d7c3..33f16be7b72fdad5a3757acbfbceb52b3966c593 100644 (file)
@@ -81,9 +81,11 @@ void perf_header_attr__delete(struct perf_header_attr *self);
 int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
+bool perf_header__sample_id_all(const struct perf_header *header);
 struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
 void perf_header__set_feat(struct perf_header *self, int feat);
+void perf_header__clear_feat(struct perf_header *self, int feat);
 bool perf_header__has_feat(const struct perf_header *self, int feat);
 
 int perf_header__process_sections(struct perf_header *self, int fd,
@@ -111,8 +113,7 @@ int event__synthesize_event_types(event__handler_t process,
 int event__process_event_type(event_t *self,
                              struct perf_session *session);
 
-int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
-                                  int nb_events,
+int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
                                   event__handler_t process,
                                   struct perf_session *session);
 int event__process_tracing_data(event_t *self,
index 2022e87409942ca4b0d133c3f889e41178a663d1..c749ba6136a0ac33d7cdae8bf5b6604f1ee524af 100644 (file)
@@ -356,7 +356,7 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
 
 static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
                                     int depth, int depth_mask, int period,
-                                    u64 total_samples, int hits,
+                                    u64 total_samples, u64 hits,
                                     int left_margin)
 {
        int i;
@@ -1092,6 +1092,12 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
        FILE *file;
        int err = 0;
        u64 len;
+       char symfs_filename[PATH_MAX];
+
+       if (filename) {
+               snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+                        symbol_conf.symfs, filename);
+       }
 
        if (filename == NULL) {
                if (dso->has_build_id) {
@@ -1100,9 +1106,9 @@ int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
                        return -ENOMEM;
                }
                goto fallback;
-       } else if (readlink(filename, command, sizeof(command)) < 0 ||
+       } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
                   strstr(command, "[kernel.kallsyms]") ||
-                  access(filename, R_OK)) {
+                  access(symfs_filename, R_OK)) {
                free(filename);
 fallback:
                /*
@@ -1111,6 +1117,8 @@ fallback:
                 * DSO is the same as when 'perf record' ran.
                 */
                filename = dso->long_name;
+               snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+                        symbol_conf.symfs, filename);
                free_filename = false;
        }
 
@@ -1137,7 +1145,7 @@ fallback:
                 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
                 map__rip_2objdump(map, sym->start),
                 map__rip_2objdump(map, sym->end),
-                filename, filename);
+                symfs_filename, filename);
 
        pr_debug("Executing: %s\n", command);
 
@@ -1168,10 +1176,13 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
        size_t ret = 0;
 
        for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
-               if (!event__name[i])
+               const char *name = event__get_event_name(i);
+
+               if (!strcmp(name, "UNKNOWN"))
                        continue;
-               ret += fprintf(fp, "%10s events: %10d\n",
-                              event__name[i], self->stats.nr_events[i]);
+
+               ret += fprintf(fp, "%16s events: %10d\n", name,
+                              self->stats.nr_events[i]);
        }
 
        return ret;
index 587d375d34300daa09948c1a0d95407b4f8439fa..ee789856a8c94644e189f0dc8a7be7933469a6cb 100644 (file)
@@ -52,8 +52,10 @@ struct sym_priv {
 struct events_stats {
        u64 total_period;
        u64 total_lost;
+       u64 total_invalid_chains;
        u32 nr_events[PERF_RECORD_HEADER_MAX];
        u32 nr_unknown_events;
+       u32 nr_invalid_chains;
 };
 
 enum hist_column {
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644 (file)
index 0000000..acffd5e
--- /dev/null
@@ -0,0 +1,9 @@
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644 (file)
index 0000000..bb4198e
--- /dev/null
@@ -0,0 +1,11 @@
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+
+#endif /* PERF_DWARF2_H */
+
index bb4ac2e053859482f98933b278a8d0adda71aa5a..8be0b968ca0bcfa44c95248e14e3878481887b21 100644 (file)
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr)
        addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
 }
 
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+       addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
 static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
 {
        return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644 (file)
index 0000000..06387cf
--- /dev/null
@@ -0,0 +1,13 @@
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+#define ENTRY(name)                            \
+       .globl name;                            \
+       name:
+
+#define ENDPROC(name)
+
+#endif /* PERF_LINUX_LINKAGE_H_ */
index 4af5bd59cfd14b475d0f2fa60e15f1b4b4e908de..649083f27e08bfbfa952b10bb68fc8639f4d1195 100644 (file)
@@ -1,6 +1,7 @@
 #include "../../../include/linux/hw_breakpoint.h"
 #include "util.h"
 #include "../perf.h"
+#include "evsel.h"
 #include "parse-options.h"
 #include "parse-events.h"
 #include "exec_cmd.h"
@@ -12,8 +13,7 @@
 
 int                            nr_counters;
 
-struct perf_event_attr         attrs[MAX_COUNTERS];
-char                           *filters[MAX_COUNTERS];
+LIST_HEAD(evsel_list);
 
 struct event_symbol {
        u8              type;
@@ -266,10 +266,10 @@ static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
        return name;
 }
 
-const char *event_name(int counter)
+const char *event_name(struct perf_evsel *evsel)
 {
-       u64 config = attrs[counter].config;
-       int type = attrs[counter].type;
+       u64 config = evsel->attr.config;
+       int type = evsel->attr.type;
 
        return __event_name(type, config);
 }
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name,
        id = atoll(id_buf);
        attr->config = id;
        attr->type = PERF_TYPE_TRACEPOINT;
-       *strp = evt_name + evt_length;
+       *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
 
        attr->sample_type |= PERF_SAMPLE_RAW;
        attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
                                    struct perf_event_attr *attr)
 {
        const char *evt_name;
-       char *flags;
+       char *flags = NULL, *comma_loc;
        char sys_name[MAX_EVENT_LENGTH];
        unsigned int sys_length, evt_length;
 
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp,
        sys_name[sys_length] = '\0';
        evt_name = evt_name + 1;
 
+       comma_loc = strchr(evt_name, ',');
+       if (comma_loc) {
+               /* take the event name up to the comma */
+               evt_name = strndup(evt_name, comma_loc - evt_name);
+       }
        flags = strchr(evt_name, ':');
        if (flags) {
                /* split it out: */
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
        evt_length = strlen(evt_name);
        if (evt_length >= MAX_EVENT_LENGTH)
                return EVT_FAILED;
-
        if (strpbrk(evt_name, "*?")) {
-               *strp = evt_name + evt_length;
+               *strp += strlen(sys_name) + evt_length;
                return parse_multiple_tracepoint_event(sys_name, evt_name,
                                                       flags);
        } else
@@ -810,9 +814,6 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
                        return -1;
 
        for (;;) {
-               if (nr_counters == MAX_COUNTERS)
-                       return -1;
-
                memset(&attr, 0, sizeof(attr));
                ret = parse_event_symbols(&str, &attr);
                if (ret == EVT_FAILED)
@@ -822,8 +823,13 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
                        return -1;
 
                if (ret != EVT_HANDLED_ALL) {
-                       attrs[nr_counters] = attr;
-                       nr_counters++;
+                       struct perf_evsel *evsel;
+                       evsel = perf_evsel__new(attr.type, attr.config,
+                                               nr_counters);
+                       if (evsel == NULL)
+                               return -1;
+                       list_add_tail(&evsel->node, &evsel_list);
+                       ++nr_counters;
                }
 
                if (*str == 0)
@@ -840,21 +846,22 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
 int parse_filter(const struct option *opt __used, const char *str,
                 int unset __used)
 {
-       int i = nr_counters - 1;
-       int len = strlen(str);
+       struct perf_evsel *last = NULL;
 
-       if (i < 0 || attrs[i].type != PERF_TYPE_TRACEPOINT) {
+       if (!list_empty(&evsel_list))
+               last = list_entry(evsel_list.prev, struct perf_evsel, node);
+
+       if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
                fprintf(stderr,
                        "-F option should follow a -e tracepoint option\n");
                return -1;
        }
 
-       filters[i] = malloc(len + 1);
-       if (!filters[i]) {
+       last->filter = strdup(str);
+       if (last->filter == NULL) {
                fprintf(stderr, "not enough memory to hold filter string\n");
                return -1;
        }
-       strcpy(filters[i], str);
 
        return 0;
 }
@@ -905,6 +912,47 @@ static void print_tracepoint_events(void)
        closedir(sys_dir);
 }
 
+/*
+ * Check whether event is in <debugfs_mount_point>/tracing/events
+ */
+
+int is_valid_tracepoint(const char *event_string)
+{
+       DIR *sys_dir, *evt_dir;
+       struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
+       char evt_path[MAXPATHLEN];
+       char dir_path[MAXPATHLEN];
+
+       if (debugfs_valid_mountpoint(debugfs_path))
+               return 0;
+
+       sys_dir = opendir(debugfs_path);
+       if (!sys_dir)
+               return 0;
+
+       for_each_subsystem(sys_dir, sys_dirent, sys_next) {
+
+               snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
+                        sys_dirent.d_name);
+               evt_dir = opendir(dir_path);
+               if (!evt_dir)
+                       continue;
+
+               for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
+                       snprintf(evt_path, MAXPATHLEN, "%s:%s",
+                                sys_dirent.d_name, evt_dirent.d_name);
+                       if (!strcmp(evt_path, event_string)) {
+                               closedir(evt_dir);
+                               closedir(sys_dir);
+                               return 1;
+                       }
+               }
+               closedir(evt_dir);
+       }
+       closedir(sys_dir);
+       return 0;
+}
+
 /*
  * Print the help text for the event symbols:
  */
@@ -963,3 +1011,26 @@ void print_events(void)
 
        exit(129);
 }
+
+int perf_evsel_list__create_default(void)
+{
+       struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE,
+                                                  PERF_COUNT_HW_CPU_CYCLES, 0);
+       if (evsel == NULL)
+               return -ENOMEM;
+
+       list_add(&evsel->node, &evsel_list);
+       ++nr_counters;
+       return 0;
+}
+
+void perf_evsel_list__delete(void)
+{
+       struct perf_evsel *pos, *n;
+
+       list_for_each_entry_safe(pos, n, &evsel_list, node) {
+               list_del_init(&pos->node);
+               perf_evsel__delete(pos);
+       }
+       nr_counters = 0;
+}
index fc4ab3fe877a22d191450a6bc4c75118cf71901d..b82cafb8377202462d16ef7ed36b8e546201c76b 100644 (file)
@@ -4,6 +4,16 @@
  * Parse symbolic events/counts passed in as options:
  */
 
+#include "../../../include/linux/perf_event.h"
+
+struct list_head;
+struct perf_evsel;
+
+extern struct list_head evsel_list;
+
+int perf_evsel_list__create_default(void);
+void perf_evsel_list__delete(void);
+
 struct option;
 
 struct tracepoint_path {
@@ -13,14 +23,11 @@ struct tracepoint_path {
 };
 
 extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
-extern bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events);
+extern bool have_tracepoints(struct list_head *evsel_list);
 
 extern int                     nr_counters;
 
-extern struct perf_event_attr attrs[MAX_COUNTERS];
-extern char *filters[MAX_COUNTERS];
-
-extern const char *event_name(int ctr);
+const char *event_name(struct perf_evsel *event);
 extern const char *__event_name(int type, u64 config);
 
 extern int parse_events(const struct option *opt, const char *str, int unset);
@@ -29,9 +36,9 @@ extern int parse_filter(const struct option *opt, const char *str, int unset);
 #define EVENTS_HELP_MAX (128*1024)
 
 extern void print_events(void);
+extern int is_valid_tracepoint(const char *event_string);
 
 extern char debugfs_path[];
 extern int valid_debugfs_mount(const char *debugfs);
 
-
 #endif /* __PERF_PARSE_EVENTS_H */
index c7d72dce54b2cf7c3f46042bd6ce6a68c941b4d6..abc31a1dac1a738c5791512032c5a9337af84b9a 100644 (file)
@@ -119,6 +119,10 @@ struct option {
        { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
 #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
        { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
+#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
+       { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
+       .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
+       .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
 
 /* parse_options() will filter out the processed options and leave the
  * non-option argments in argv[].
index 3b6a5297bf16cd5a318273bc0a9bf198734a0cfe..128aaab0aedad86403a0c722211e4d57cb982d8d 100644 (file)
@@ -95,7 +95,7 @@ static int init_vmlinux(void)
                goto out;
 
        if (machine__create_kernel_maps(&machine) < 0) {
-               pr_debug("machine__create_kernel_maps ");
+               pr_debug("machine__create_kernel_maps() failed.\n");
                goto out;
        }
 out:
@@ -114,6 +114,8 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
 const char *kernel_get_module_path(const char *module)
 {
        struct dso *dso;
+       struct map *map;
+       const char *vmlinux_name;
 
        if (module) {
                list_for_each_entry(dso, &machine.kernel_dsos, node) {
@@ -123,10 +125,17 @@ const char *kernel_get_module_path(const char *module)
                }
                pr_debug("Failed to find module %s.\n", module);
                return NULL;
+       }
+
+       map = machine.vmlinux_maps[MAP__FUNCTION];
+       dso = map->dso;
+
+       vmlinux_name = symbol_conf.vmlinux_name;
+       if (vmlinux_name) {
+               if (dso__load_vmlinux(dso, map, vmlinux_name, NULL) <= 0)
+                       return NULL;
        } else {
-               dso = machine.vmlinux_maps[MAP__FUNCTION]->dso;
-               if (dso__load_vmlinux_path(dso,
-                        machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
+               if (dso__load_vmlinux_path(dso, map, NULL) <= 0) {
                        pr_debug("Failed to load kernel map.\n");
                        return NULL;
                }
@@ -140,7 +149,8 @@ static int open_vmlinux(const char *module)
 {
        const char *path = kernel_get_module_path(module);
        if (!path) {
-               pr_err("Failed to find path of %s module", module ?: "kernel");
+               pr_err("Failed to find path of %s module.\n",
+                      module ?: "kernel");
                return -ENOENT;
        }
        pr_debug("Try to open %s\n", path);
@@ -217,7 +227,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
                pr_warning("Warning: No dwarf info found in the vmlinux - "
                        "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
                if (!need_dwarf) {
-                       pr_debug("Trying to use symbols.\nn");
+                       pr_debug("Trying to use symbols.\n");
                        return 0;
                }
        }
@@ -286,42 +296,49 @@ static int get_real_path(const char *raw_path, const char *comp_dir,
 #define LINEBUF_SIZE 256
 #define NR_ADDITIONAL_LINES 2
 
-static int show_one_line(FILE *fp, int l, bool skip, bool show_num)
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
 {
        char buf[LINEBUF_SIZE];
-       const char *color = PERF_COLOR_BLUE;
-
-       if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
-               goto error;
-       if (!skip) {
-               if (show_num)
-                       fprintf(stdout, "%7d  %s", l, buf);
-               else
-                       color_fprintf(stdout, color, "         %s", buf);
-       }
+       const char *color = show_num ? "" : PERF_COLOR_BLUE;
+       const char *prefix = NULL;
 
-       while (strlen(buf) == LINEBUF_SIZE - 1 &&
-              buf[LINEBUF_SIZE - 2] != '\n') {
+       do {
                if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
                        goto error;
-               if (!skip) {
-                       if (show_num)
-                               fprintf(stdout, "%s", buf);
-                       else
-                               color_fprintf(stdout, color, "%s", buf);
+               if (skip)
+                       continue;
+               if (!prefix) {
+                       prefix = show_num ? "%7d  " : "         ";
+                       color_fprintf(stdout, color, prefix, l);
                }
-       }
+               color_fprintf(stdout, color, "%s", buf);
 
-       return 0;
+       } while (strchr(buf, '\n') == NULL);
+
+       return 1;
 error:
-       if (feof(fp))
-               pr_warning("Source file is shorter than expected.\n");
-       else
+       if (ferror(fp)) {
                pr_warning("File read error: %s\n", strerror(errno));
+               return -1;
+       }
+       return 0;
+}
 
-       return -1;
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+       int rv = __show_one_line(fp, l, skip, show_num);
+       if (rv == 0) {
+               pr_warning("Source file is shorter than expected.\n");
+               rv = -1;
+       }
+       return rv;
 }
 
+#define show_one_line_with_num(f,l)    _show_one_line(f,l,false,true)
+#define show_one_line(f,l)             _show_one_line(f,l,false,false)
+#define skip_one_line(f,l)             _show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l)      __show_one_line(f,l,false,false)
+
 /*
  * Show line-range always requires debuginfo to find source file and
  * line number.
@@ -370,7 +387,7 @@ int show_line_range(struct line_range *lr, const char *module)
                fprintf(stdout, "<%s:%d>\n", lr->function,
                        lr->start - lr->offset);
        else
-               fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+               fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
 
        fp = fopen(lr->path, "r");
        if (fp == NULL) {
@@ -379,26 +396,30 @@ int show_line_range(struct line_range *lr, const char *module)
                return -errno;
        }
        /* Skip to starting line number */
-       while (l < lr->start && ret >= 0)
-               ret = show_one_line(fp, l++, true, false);
-       if (ret < 0)
-               goto end;
+       while (l < lr->start) {
+               ret = skip_one_line(fp, l++);
+               if (ret < 0)
+                       goto end;
+       }
 
        list_for_each_entry(ln, &lr->line_list, list) {
-               while (ln->line > l && ret >= 0)
-                       ret = show_one_line(fp, (l++) - lr->offset,
-                                           false, false);
-               if (ret >= 0)
-                       ret = show_one_line(fp, (l++) - lr->offset,
-                                           false, true);
+               for (; ln->line > l; l++) {
+                       ret = show_one_line(fp, l - lr->offset);
+                       if (ret < 0)
+                               goto end;
+               }
+               ret = show_one_line_with_num(fp, l++ - lr->offset);
                if (ret < 0)
                        goto end;
        }
 
        if (lr->end == INT_MAX)
                lr->end = l + NR_ADDITIONAL_LINES;
-       while (l <= lr->end && !feof(fp) && ret >= 0)
-               ret = show_one_line(fp, (l++) - lr->offset, false, false);
+       while (l <= lr->end) {
+               ret = show_one_line_or_eof(fp, l++ - lr->offset);
+               if (ret <= 0)
+                       break;
+       }
 end:
        fclose(fp);
        return ret;
@@ -457,7 +478,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 
        fd = open_vmlinux(module);
        if (fd < 0) {
-               pr_warning("Failed to open debuginfo file.\n");
+               pr_warning("Failed to open debug information file.\n");
                return fd;
        }
 
@@ -517,56 +538,87 @@ int show_available_vars(struct perf_probe_event *pevs __unused,
 }
 #endif
 
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+       const char *start = *ptr;
+
+       errno = 0;
+       *val = strtol(*ptr, ptr, 0);
+       if (errno || *ptr == start) {
+               semantic_error("'%s' is not a valid number.\n", what);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ *         SRC[:SLN[+NUM|-ELN]]
+ *         FNC[:SLN[+NUM|-ELN]]
+ */
 int parse_line_range_desc(const char *arg, struct line_range *lr)
 {
-       const char *ptr;
-       char *tmp;
-       /*
-        * <Syntax>
-        * SRC:SLN[+NUM|-ELN]
-        * FUNC[:SLN[+NUM|-ELN]]
-        */
-       ptr = strchr(arg, ':');
-       if (ptr) {
-               lr->start = (int)strtoul(ptr + 1, &tmp, 0);
-               if (*tmp == '+') {
-                       lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0);
-                       lr->end--;      /*
-                                        * Adjust the number of lines here.
-                                        * If the number of lines == 1, the
-                                        * the end of line should be equal to
-                                        * the start of line.
-                                        */
-               } else if (*tmp == '-')
-                       lr->end = (int)strtoul(tmp + 1, &tmp, 0);
-               else
-                       lr->end = INT_MAX;
+       char *range, *name = strdup(arg);
+       int err;
+
+       if (!name)
+               return -ENOMEM;
+
+       lr->start = 0;
+       lr->end = INT_MAX;
+
+       range = strchr(name, ':');
+       if (range) {
+               *range++ = '\0';
+
+               err = parse_line_num(&range, &lr->start, "start line");
+               if (err)
+                       goto err;
+
+               if (*range == '+' || *range == '-') {
+                       const char c = *range++;
+
+                       err = parse_line_num(&range, &lr->end, "end line");
+                       if (err)
+                               goto err;
+
+                       if (c == '+') {
+                               lr->end += lr->start;
+                               /*
+                                * Adjust the number of lines here.
+                                * If the number of lines == 1, the
+                                * the end of line should be equal to
+                                * the start of line.
+                                */
+                               lr->end--;
+                       }
+               }
+
                pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+               err = -EINVAL;
                if (lr->start > lr->end) {
                        semantic_error("Start line must be smaller"
                                       " than end line.\n");
-                       return -EINVAL;
+                       goto err;
                }
-               if (*tmp != '\0') {
-                       semantic_error("Tailing with invalid character '%d'.\n",
-                                      *tmp);
-                       return -EINVAL;
+               if (*range != '\0') {
+                       semantic_error("Tailing with invalid str '%s'.\n", range);
+                       goto err;
                }
-               tmp = strndup(arg, (ptr - arg));
-       } else {
-               tmp = strdup(arg);
-               lr->end = INT_MAX;
        }
 
-       if (tmp == NULL)
-               return -ENOMEM;
-
-       if (strchr(tmp, '.'))
-               lr->file = tmp;
+       if (strchr(name, '.'))
+               lr->file = name;
        else
-               lr->function = tmp;
+               lr->function = name;
 
        return 0;
+err:
+       free(name);
+       return err;
 }
 
 /* Check the name is good for event/group */
@@ -690,39 +742,40 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 
        /* Exclusion check */
        if (pp->lazy_line && pp->line) {
-               semantic_error("Lazy pattern can't be used with line number.");
+               semantic_error("Lazy pattern can't be used with"
+                              " line number.\n");
                return -EINVAL;
        }
 
        if (pp->lazy_line && pp->offset) {
-               semantic_error("Lazy pattern can't be used with offset.");
+               semantic_error("Lazy pattern can't be used with offset.\n");
                return -EINVAL;
        }
 
        if (pp->line && pp->offset) {
-               semantic_error("Offset can't be used with line number.");
+               semantic_error("Offset can't be used with line number.\n");
                return -EINVAL;
        }
 
        if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
                semantic_error("File always requires line number or "
-                              "lazy pattern.");
+                              "lazy pattern.\n");
                return -EINVAL;
        }
 
        if (pp->offset && !pp->function) {
-               semantic_error("Offset requires an entry function.");
+               semantic_error("Offset requires an entry function.\n");
                return -EINVAL;
        }
 
        if (pp->retprobe && !pp->function) {
-               semantic_error("Return probe requires an entry function.");
+               semantic_error("Return probe requires an entry function.\n");
                return -EINVAL;
        }
 
        if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
                semantic_error("Offset/Line/Lazy pattern can't be used with "
-                              "return probe.");
+                              "return probe.\n");
                return -EINVAL;
        }
 
@@ -996,7 +1049,7 @@ int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
 
        return tmp - buf;
 error:
-       pr_debug("Failed to synthesize perf probe argument: %s",
+       pr_debug("Failed to synthesize perf probe argument: %s\n",
                 strerror(-ret));
        return ret;
 }
@@ -1024,13 +1077,13 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
                        goto error;
        }
        if (pp->file) {
-               len = strlen(pp->file) - 31;
-               if (len < 0)
-                       len = 0;
-               tmp = strchr(pp->file + len, '/');
-               if (!tmp)
-                       tmp = pp->file + len;
-               ret = e_snprintf(file, 32, "@%s", tmp + 1);
+               tmp = pp->file;
+               len = strlen(tmp);
+               if (len > 30) {
+                       tmp = strchr(pp->file + len - 30, '/');
+                       tmp = tmp ? tmp + 1 : pp->file + len - 30;
+               }
+               ret = e_snprintf(file, 32, "@%s", tmp);
                if (ret <= 0)
                        goto error;
        }
@@ -1046,7 +1099,7 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
 
        return buf;
 error:
-       pr_debug("Failed to synthesize perf probe point: %s",
+       pr_debug("Failed to synthesize perf probe point: %s\n",
                 strerror(-ret));
        if (buf)
                free(buf);
@@ -1787,7 +1840,7 @@ static int del_trace_probe_event(int fd, const char *group,
 
        ret = e_snprintf(buf, 128, "%s:%s", group, event);
        if (ret < 0) {
-               pr_err("Failed to copy event.");
+               pr_err("Failed to copy event.\n");
                return ret;
        }
 
index 3991d73d1cff9164cb23fb4aae3169af97abfe77..ab83b6ac5d657c80af1e67790c1ace15d7592578 100644 (file)
@@ -117,28 +117,6 @@ static void line_list__free(struct list_head *head)
 }
 
 /* Dwarf FL wrappers */
-
-static int __linux_kernel_find_elf(Dwfl_Module *mod,
-                                  void **userdata,
-                                  const char *module_name,
-                                  Dwarf_Addr base,
-                                  char **file_name, Elf **elfp)
-{
-       int fd;
-       const char *path = kernel_get_module_path(module_name);
-
-       if (path) {
-               fd = open(path, O_RDONLY);
-               if (fd >= 0) {
-                       *file_name = strdup(path);
-                       return fd;
-               }
-       }
-       /* If failed, try to call standard method */
-       return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
-                                         file_name, elfp);
-}
-
 static char *debuginfo_path;   /* Currently dummy */
 
 static const Dwfl_Callbacks offline_callbacks = {
@@ -151,14 +129,6 @@ static const Dwfl_Callbacks offline_callbacks = {
        .find_elf = dwfl_build_id_find_elf,
 };
 
-static const Dwfl_Callbacks kernel_callbacks = {
-       .find_debuginfo = dwfl_standard_find_debuginfo,
-       .debuginfo_path = &debuginfo_path,
-
-       .find_elf = __linux_kernel_find_elf,
-       .section_address = dwfl_linux_kernel_module_section_address,
-};
-
 /* Get a Dwarf from offline image */
 static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
 {
@@ -185,6 +155,38 @@ error:
        return dbg;
 }
 
+#if _ELFUTILS_PREREQ(0, 148)
+/* This method is buggy if elfutils is older than 0.148 */
+static int __linux_kernel_find_elf(Dwfl_Module *mod,
+                                  void **userdata,
+                                  const char *module_name,
+                                  Dwarf_Addr base,
+                                  char **file_name, Elf **elfp)
+{
+       int fd;
+       const char *path = kernel_get_module_path(module_name);
+
+       pr_debug2("Use file %s for %s\n", path, module_name);
+       if (path) {
+               fd = open(path, O_RDONLY);
+               if (fd >= 0) {
+                       *file_name = strdup(path);
+                       return fd;
+               }
+       }
+       /* If failed, try to call standard method */
+       return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
+                                         file_name, elfp);
+}
+
+static const Dwfl_Callbacks kernel_callbacks = {
+       .find_debuginfo = dwfl_standard_find_debuginfo,
+       .debuginfo_path = &debuginfo_path,
+
+       .find_elf = __linux_kernel_find_elf,
+       .section_address = dwfl_linux_kernel_module_section_address,
+};
+
 /* Get a Dwarf from live kernel image */
 static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
                                          Dwarf_Addr *bias)
@@ -205,11 +207,34 @@ static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
        dbg = dwfl_addrdwarf(*dwflp, addr, bias);
        /* Here, check whether we could get a real dwarf */
        if (!dbg) {
+               pr_debug("Failed to find kernel dwarf at %lx\n",
+                        (unsigned long)addr);
                dwfl_end(*dwflp);
                *dwflp = NULL;
        }
        return dbg;
 }
+#else
+/* With older elfutils, this just support kernel module... */
+static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr __used, Dwfl **dwflp,
+                                         Dwarf_Addr *bias)
+{
+       int fd;
+       const char *path = kernel_get_module_path("kernel");
+
+       if (!path) {
+               pr_err("Failed to find vmlinux path\n");
+               return NULL;
+       }
+
+       pr_debug2("Use file %s for debuginfo\n", path);
+       fd = open(path, O_RDONLY);
+       if (fd < 0)
+               return NULL;
+
+       return dwfl_init_offline_dwarf(fd, dwflp, bias);
+}
+#endif
 
 /* Dwarf wrappers */
 
@@ -627,8 +652,8 @@ static_var:
        regs = get_arch_regstr(regn);
        if (!regs) {
                /* This should be a bug in DWARF or this tool */
-               pr_warning("Mapping for DWARF register number %u "
-                          "missing on this architecture.", regn);
+               pr_warning("Mapping for the register number %u "
+                          "missing on this architecture.\n", regn);
                return -ERANGE;
        }
 
@@ -674,13 +699,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                if (ret != DW_TAG_pointer_type &&
                    ret != DW_TAG_array_type) {
                        pr_warning("Failed to cast into string: "
-                                  "%s(%s) is not a pointer nor array.",
+                                  "%s(%s) is not a pointer nor array.\n",
                                   dwarf_diename(vr_die), dwarf_diename(&type));
                        return -EINVAL;
                }
                if (ret == DW_TAG_pointer_type) {
                        if (die_get_real_type(&type, &type) == NULL) {
-                               pr_warning("Failed to get a type information.");
+                               pr_warning("Failed to get a type"
+                                          " information.\n");
                                return -ENOENT;
                        }
                        while (*ref_ptr)
@@ -695,7 +721,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                if (!die_compare_name(&type, "char") &&
                    !die_compare_name(&type, "unsigned char")) {
                        pr_warning("Failed to cast into string: "
-                                  "%s is not (unsigned) char *.",
+                                  "%s is not (unsigned) char *.\n",
                                   dwarf_diename(vr_die));
                        return -EINVAL;
                }
@@ -805,8 +831,8 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                        return -EINVAL;
                }
                if (field->name[0] == '[') {
-                       pr_err("Semantic error: %s is not a pointor nor array.",
-                              varname);
+                       pr_err("Semantic error: %s is not a pointor"
+                              " nor array.\n", varname);
                        return -EINVAL;
                }
                if (field->ref) {
@@ -953,7 +979,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
        name = dwarf_diename(sp_die);
        if (name) {
                if (dwarf_entrypc(sp_die, &eaddr) != 0) {
-                       pr_warning("Failed to get entry pc of %s\n",
+                       pr_warning("Failed to get entry address of %s\n",
                                   dwarf_diename(sp_die));
                        return -ENOENT;
                }
@@ -969,7 +995,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
        if (retprobe) {
                if (eaddr != paddr) {
                        pr_warning("Return probe must be on the head of"
-                                  " a real function\n");
+                                  " a real function.\n");
                        return -EINVAL;
                }
                tp->retprobe = true;
@@ -1008,7 +1034,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
                Dwarf_Frame *frame;
                if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
                    dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
-                       pr_warning("Failed to get CFA on 0x%jx\n",
+                       pr_warning("Failed to get call frame on 0x%jx\n",
                                   (uintmax_t)pf->addr);
                        return -ENOENT;
                }
@@ -1035,7 +1061,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
        int ret = 0;
 
        if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1137,7 +1163,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
        }
 
        if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1195,7 +1221,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
        else {
                /* Get probe address */
                if (dwarf_entrypc(in_die, &addr) != 0) {
-                       pr_warning("Failed to get entry pc of %s.\n",
+                       pr_warning("Failed to get entry address of %s.\n",
                                   dwarf_diename(in_die));
                        param->retval = -ENOENT;
                        return DWARF_CB_ABORT;
@@ -1236,8 +1262,8 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
                        param->retval = find_probe_point_lazy(sp_die, pf);
                else {
                        if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
-                               pr_warning("Failed to get entry pc of %s.\n",
-                                          dwarf_diename(sp_die));
+                               pr_warning("Failed to get entry address of "
+                                          "%s.\n", dwarf_diename(sp_die));
                                param->retval = -ENOENT;
                                return DWARF_CB_ABORT;
                        }
@@ -1279,7 +1305,7 @@ static int find_probes(int fd, struct probe_finder *pf)
 
        dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                return -EBADF;
        }
@@ -1524,7 +1550,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
        /* Open the live linux kernel */
        dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                ret = -EINVAL;
                goto end;
@@ -1534,7 +1560,8 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
        addr += bias;
        /* Find cu die */
        if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
-               pr_warning("No CU DIE is found at %lx\n", addr);
+               pr_warning("Failed to find debug information for address %lx\n",
+                          addr);
                ret = -EINVAL;
                goto end;
        }
@@ -1659,7 +1686,7 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
 
        line_list__init(&lf->lr->line_list);
        if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
-               pr_warning("No source lines found in this CU.\n");
+               pr_warning("No source lines found.\n");
                return -ENOENT;
        }
 
@@ -1784,7 +1811,7 @@ int find_line_range(int fd, struct line_range *lr)
 
        dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
        if (!dbg) {
-               pr_warning("No dwarf info found in the vmlinux - "
+               pr_warning("No debug information found in the vmlinux - "
                        "please rebuild with CONFIG_DEBUG_INFO=y.\n");
                return -EBADF;
        }
index bba69d4556999e5081b018857acd230de6740eea..beaefc3c1223df16423cbd084f700b6ee5a5fbf2 100644 (file)
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
                                  bool externs);
 
 #include <dwarf.h>
-#include <libdw.h>
-#include <libdwfl.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
 
 struct probe_finder {
        struct perf_probe_event *pev;           /* Target probe event */
index b059dc50cc2db9021b75435e9aac132174c6dbec..93680818e244ca8a2e58f49e59af0ca729eeb50e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * trace-event-perl.  Feed perf trace events to an embedded Perl interpreter.
+ * trace-event-perl.  Feed perf script events to an embedded Perl interpreter.
  *
  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
  *
@@ -411,8 +411,8 @@ static int perl_generate_script(const char *outfile)
                return -1;
        }
 
-       fprintf(ofp, "# perf trace event handlers, "
-               "generated by perf trace -g perl\n");
+       fprintf(ofp, "# perf script event handlers, "
+               "generated by perf script -g perl\n");
 
        fprintf(ofp, "# Licensed under the terms of the GNU GPL"
                " License version 2\n\n");
index 33a632523743deff80cb9e3636bd9144b84b2f76..c6d99334bdfa836c1adba2d613b658fadf20797b 100644 (file)
@@ -442,8 +442,8 @@ static int python_generate_script(const char *outfile)
                fprintf(stderr, "couldn't open %s\n", fname);
                return -1;
        }
-       fprintf(ofp, "# perf trace event handlers, "
-               "generated by perf trace -g python\n");
+       fprintf(ofp, "# perf script event handlers, "
+               "generated by perf script -g python\n");
 
        fprintf(ofp, "# Licensed under the terms of the GNU GPL"
                " License version 2\n\n");
index fa9d652c2dc3c07182028d4a196293333be75d78..6fb4694d05fa1e2c89e5ac91207c025214031f38 100644 (file)
@@ -65,9 +65,49 @@ out_close:
        return -1;
 }
 
+static void perf_session__id_header_size(struct perf_session *session)
+{
+       struct sample_data *data;
+       u64 sample_type = session->sample_type;
+       u16 size = 0;
+
+       if (!session->sample_id_all)
+               goto out;
+
+       if (sample_type & PERF_SAMPLE_TID)
+               size += sizeof(data->tid) * 2;
+
+       if (sample_type & PERF_SAMPLE_TIME)
+               size += sizeof(data->time);
+
+       if (sample_type & PERF_SAMPLE_ID)
+               size += sizeof(data->id);
+
+       if (sample_type & PERF_SAMPLE_STREAM_ID)
+               size += sizeof(data->stream_id);
+
+       if (sample_type & PERF_SAMPLE_CPU)
+               size += sizeof(data->cpu) * 2;
+out:
+       session->id_hdr_size = size;
+}
+
+void perf_session__set_sample_id_all(struct perf_session *session, bool value)
+{
+       session->sample_id_all = value;
+       perf_session__id_header_size(session);
+}
+
+void perf_session__set_sample_type(struct perf_session *session, u64 type)
+{
+       session->sample_type = type;
+}
+
 void perf_session__update_sample_type(struct perf_session *self)
 {
        self->sample_type = perf_header__sample_type(&self->header);
+       self->sample_id_all = perf_header__sample_id_all(&self->header);
+       perf_session__id_header_size(self);
 }
 
 int perf_session__create_kernel_maps(struct perf_session *self)
@@ -85,7 +125,9 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self)
        machines__destroy_guest_kernel_maps(&self->machines);
 }
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe)
+struct perf_session *perf_session__new(const char *filename, int mode,
+                                      bool force, bool repipe,
+                                      struct perf_event_ops *ops)
 {
        size_t len = filename ? strlen(filename) + 1 : 0;
        struct perf_session *self = zalloc(sizeof(*self) + len);
@@ -101,10 +143,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
        INIT_LIST_HEAD(&self->dead_threads);
        self->hists_tree = RB_ROOT;
        self->last_match = NULL;
-       self->mmap_window = 32;
+       /*
+        * On 64bit we can mmap the data file in one go. No need for tiny mmap
+        * slices. On 32bit we use 32MB.
+        */
+#if BITS_PER_LONG == 64
+       self->mmap_window = ULLONG_MAX;
+#else
+       self->mmap_window = 32 * 1024 * 1024ULL;
+#endif
        self->machines = RB_ROOT;
        self->repipe = repipe;
-       INIT_LIST_HEAD(&self->ordered_samples.samples_head);
+       INIT_LIST_HEAD(&self->ordered_samples.samples);
+       INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
+       INIT_LIST_HEAD(&self->ordered_samples.to_free);
        machine__init(&self->host_machine, "", HOST_KERNEL_ID);
 
        if (mode == O_RDONLY) {
@@ -120,6 +172,13 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
        }
 
        perf_session__update_sample_type(self);
+
+       if (ops && ops->ordering_requires_timestamps &&
+           ops->ordered_samples && !self->sample_id_all) {
+               dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+               ops->ordered_samples = false;
+       }
+
 out:
        return self;
 out_free:
@@ -230,7 +289,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
        return syms;
 }
 
+static int process_event_synth_stub(event_t *event __used,
+                                   struct perf_session *session __used)
+{
+       dump_printf(": unhandled!\n");
+       return 0;
+}
+
 static int process_event_stub(event_t *event __used,
+                             struct sample_data *sample __used,
                              struct perf_session *session __used)
 {
        dump_printf(": unhandled!\n");
@@ -262,7 +329,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
        if (handler->exit == NULL)
                handler->exit = process_event_stub;
        if (handler->lost == NULL)
-               handler->lost = process_event_stub;
+               handler->lost = event__process_lost;
        if (handler->read == NULL)
                handler->read = process_event_stub;
        if (handler->throttle == NULL)
@@ -270,13 +337,13 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
        if (handler->unthrottle == NULL)
                handler->unthrottle = process_event_stub;
        if (handler->attr == NULL)
-               handler->attr = process_event_stub;
+               handler->attr = process_event_synth_stub;
        if (handler->event_type == NULL)
-               handler->event_type = process_event_stub;
+               handler->event_type = process_event_synth_stub;
        if (handler->tracing_data == NULL)
-               handler->tracing_data = process_event_stub;
+               handler->tracing_data = process_event_synth_stub;
        if (handler->build_id == NULL)
-               handler->build_id = process_event_stub;
+               handler->build_id = process_event_synth_stub;
        if (handler->finished_round == NULL) {
                if (handler->ordered_samples)
                        handler->finished_round = process_finished_round;
@@ -386,33 +453,61 @@ static event__swap_op event__swap_ops[] = {
 
 struct sample_queue {
        u64                     timestamp;
-       struct sample_event     *event;
+       u64                     file_offset;
+       event_t                 *event;
        struct list_head        list;
 };
 
+static void perf_session_free_sample_buffers(struct perf_session *session)
+{
+       struct ordered_samples *os = &session->ordered_samples;
+
+       while (!list_empty(&os->to_free)) {
+               struct sample_queue *sq;
+
+               sq = list_entry(os->to_free.next, struct sample_queue, list);
+               list_del(&sq->list);
+               free(sq);
+       }
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+                                     event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_event_ops *ops,
+                                     u64 file_offset);
+
 static void flush_sample_queue(struct perf_session *s,
                               struct perf_event_ops *ops)
 {
-       struct list_head *head = &s->ordered_samples.samples_head;
-       u64 limit = s->ordered_samples.next_flush;
+       struct ordered_samples *os = &s->ordered_samples;
+       struct list_head *head = &os->samples;
        struct sample_queue *tmp, *iter;
+       struct sample_data sample;
+       u64 limit = os->next_flush;
+       u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
 
        if (!ops->ordered_samples || !limit)
                return;
 
        list_for_each_entry_safe(iter, tmp, head, list) {
                if (iter->timestamp > limit)
-                       return;
+                       break;
 
-               if (iter == s->ordered_samples.last_inserted)
-                       s->ordered_samples.last_inserted = NULL;
+               event__parse_sample(iter->event, s, &sample);
+               perf_session_deliver_event(s, iter->event, &sample, ops,
+                                          iter->file_offset);
 
-               ops->sample((event_t *)iter->event, s);
-
-               s->ordered_samples.last_flush = iter->timestamp;
+               os->last_flush = iter->timestamp;
                list_del(&iter->list);
-               free(iter->event);
-               free(iter);
+               list_add(&iter->list, &os->sample_cache);
+       }
+
+       if (list_empty(head)) {
+               os->last_sample = NULL;
+       } else if (last_ts <= limit) {
+               os->last_sample =
+                       list_entry(head->prev, struct sample_queue, list);
        }
 }
 
@@ -465,178 +560,265 @@ static int process_finished_round(event_t *event __used,
        return 0;
 }
 
-static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
-{
-       struct sample_queue *iter;
-
-       list_for_each_entry_reverse(iter, head, list) {
-               if (iter->timestamp < new->timestamp) {
-                       list_add(&new->list, &iter->list);
-                       return;
-               }
-       }
-
-       list_add(&new->list, head);
-}
-
-static void __queue_sample_before(struct sample_queue *new,
-                                 struct sample_queue *iter,
-                                 struct list_head *head)
-{
-       list_for_each_entry_continue_reverse(iter, head, list) {
-               if (iter->timestamp < new->timestamp) {
-                       list_add(&new->list, &iter->list);
-                       return;
-               }
-       }
-
-       list_add(&new->list, head);
-}
-
-static void __queue_sample_after(struct sample_queue *new,
-                                struct sample_queue *iter,
-                                struct list_head *head)
-{
-       list_for_each_entry_continue(iter, head, list) {
-               if (iter->timestamp > new->timestamp) {
-                       list_add_tail(&new->list, &iter->list);
-                       return;
-               }
-       }
-       list_add_tail(&new->list, head);
-}
-
 /* The queue is ordered by time */
-static void __queue_sample_event(struct sample_queue *new,
-                                struct perf_session *s)
+static void __queue_event(struct sample_queue *new, struct perf_session *s)
 {
-       struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
-       struct list_head *head = &s->ordered_samples.samples_head;
+       struct ordered_samples *os = &s->ordered_samples;
+       struct sample_queue *sample = os->last_sample;
+       u64 timestamp = new->timestamp;
+       struct list_head *p;
 
+       os->last_sample = new;
 
-       if (!last_inserted) {
-               __queue_sample_end(new, head);
+       if (!sample) {
+               list_add(&new->list, &os->samples);
+               os->max_timestamp = timestamp;
                return;
        }
 
        /*
-        * Most of the time the current event has a timestamp
-        * very close to the last event inserted, unless we just switched
-        * to another event buffer. Having a sorting based on a list and
-        * on the last inserted event that is close to the current one is
-        * probably more efficient than an rbtree based sorting.
+        * last_sample might point to some random place in the list as it's
+        * the last queued event. We expect that the new event is close to
+        * this.
         */
-       if (last_inserted->timestamp >= new->timestamp)
-               __queue_sample_before(new, last_inserted, head);
-       else
-               __queue_sample_after(new, last_inserted, head);
+       if (sample->timestamp <= timestamp) {
+               while (sample->timestamp <= timestamp) {
+                       p = sample->list.next;
+                       if (p == &os->samples) {
+                               list_add_tail(&new->list, &os->samples);
+                               os->max_timestamp = timestamp;
+                               return;
+                       }
+                       sample = list_entry(p, struct sample_queue, list);
+               }
+               list_add_tail(&new->list, &sample->list);
+       } else {
+               while (sample->timestamp > timestamp) {
+                       p = sample->list.prev;
+                       if (p == &os->samples) {
+                               list_add(&new->list, &os->samples);
+                               return;
+                       }
+                       sample = list_entry(p, struct sample_queue, list);
+               }
+               list_add(&new->list, &sample->list);
+       }
 }
 
-static int queue_sample_event(event_t *event, struct sample_data *data,
-                             struct perf_session *s)
+#define MAX_SAMPLE_BUFFER      (64 * 1024 / sizeof(struct sample_queue))
+
+static int perf_session_queue_event(struct perf_session *s, event_t *event,
+                                   struct sample_data *data, u64 file_offset)
 {
+       struct ordered_samples *os = &s->ordered_samples;
+       struct list_head *sc = &os->sample_cache;
        u64 timestamp = data->time;
        struct sample_queue *new;
 
+       if (!timestamp || timestamp == ~0ULL)
+               return -ETIME;
 
        if (timestamp < s->ordered_samples.last_flush) {
                printf("Warning: Timestamp below last timeslice flush\n");
                return -EINVAL;
        }
 
-       new = malloc(sizeof(*new));
-       if (!new)
-               return -ENOMEM;
+       if (!list_empty(sc)) {
+               new = list_entry(sc->next, struct sample_queue, list);
+               list_del(&new->list);
+       } else if (os->sample_buffer) {
+               new = os->sample_buffer + os->sample_buffer_idx;
+               if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
+                       os->sample_buffer = NULL;
+       } else {
+               os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
+               if (!os->sample_buffer)
+                       return -ENOMEM;
+               list_add(&os->sample_buffer->list, &os->to_free);
+               os->sample_buffer_idx = 2;
+               new = os->sample_buffer + 1;
+       }
 
        new->timestamp = timestamp;
+       new->file_offset = file_offset;
+       new->event = event;
 
-       new->event = malloc(event->header.size);
-       if (!new->event) {
-               free(new);
-               return -ENOMEM;
-       }
+       __queue_event(new, s);
 
-       memcpy(new->event, event, event->header.size);
+       return 0;
+}
 
-       __queue_sample_event(new, s);
-       s->ordered_samples.last_inserted = new;
+static void callchain__printf(struct sample_data *sample)
+{
+       unsigned int i;
 
-       if (new->timestamp > s->ordered_samples.max_timestamp)
-               s->ordered_samples.max_timestamp = new->timestamp;
+       printf("... chain: nr:%Lu\n", sample->callchain->nr);
 
-       return 0;
+       for (i = 0; i < sample->callchain->nr; i++)
+               printf("..... %2d: %016Lx\n", i, sample->callchain->ips[i]);
 }
 
-static int perf_session__process_sample(event_t *event, struct perf_session *s,
-                                       struct perf_event_ops *ops)
+static void perf_session__print_tstamp(struct perf_session *session,
+                                      event_t *event,
+                                      struct sample_data *sample)
 {
-       struct sample_data data;
+       if (event->header.type != PERF_RECORD_SAMPLE &&
+           !session->sample_id_all) {
+               fputs("-1 -1 ", stdout);
+               return;
+       }
 
-       if (!ops->ordered_samples)
-               return ops->sample(event, s);
+       if ((session->sample_type & PERF_SAMPLE_CPU))
+               printf("%u ", sample->cpu);
 
-       bzero(&data, sizeof(struct sample_data));
-       event__parse_sample(event, s->sample_type, &data);
+       if (session->sample_type & PERF_SAMPLE_TIME)
+               printf("%Lu ", sample->time);
+}
 
-       queue_sample_event(event, &data, s);
+static void dump_event(struct perf_session *session, event_t *event,
+                      u64 file_offset, struct sample_data *sample)
+{
+       if (!dump_trace)
+               return;
 
-       return 0;
+       printf("\n%#Lx [%#x]: event: %d\n", file_offset, event->header.size,
+              event->header.type);
+
+       trace_event(event);
+
+       if (sample)
+               perf_session__print_tstamp(session, event, sample);
+
+       printf("%#Lx [%#x]: PERF_RECORD_%s", file_offset, event->header.size,
+              event__get_event_name(event->header.type));
 }
 
-static int perf_session__process_event(struct perf_session *self,
-                                      event_t *event,
-                                      struct perf_event_ops *ops,
-                                      u64 offset, u64 head)
+static void dump_sample(struct perf_session *session, event_t *event,
+                       struct sample_data *sample)
 {
-       trace_event(event);
+       if (!dump_trace)
+               return;
 
-       if (event->header.type < PERF_RECORD_HEADER_MAX) {
-               dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
-                           offset + head, event->header.size,
-                           event__name[event->header.type]);
-               hists__inc_nr_events(&self->hists, event->header.type);
-       }
+       printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+              sample->pid, sample->tid, sample->ip, sample->period);
 
-       if (self->header.needs_swap && event__swap_ops[event->header.type])
-               event__swap_ops[event->header.type](event);
+       if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
+               callchain__printf(sample);
+}
+
+static int perf_session_deliver_event(struct perf_session *session,
+                                     event_t *event,
+                                     struct sample_data *sample,
+                                     struct perf_event_ops *ops,
+                                     u64 file_offset)
+{
+       dump_event(session, event, file_offset, sample);
 
        switch (event->header.type) {
        case PERF_RECORD_SAMPLE:
-               return perf_session__process_sample(event, self, ops);
+               dump_sample(session, event, sample);
+               return ops->sample(event, sample, session);
        case PERF_RECORD_MMAP:
-               return ops->mmap(event, self);
+               return ops->mmap(event, sample, session);
        case PERF_RECORD_COMM:
-               return ops->comm(event, self);
+               return ops->comm(event, sample, session);
        case PERF_RECORD_FORK:
-               return ops->fork(event, self);
+               return ops->fork(event, sample, session);
        case PERF_RECORD_EXIT:
-               return ops->exit(event, self);
+               return ops->exit(event, sample, session);
        case PERF_RECORD_LOST:
-               return ops->lost(event, self);
+               return ops->lost(event, sample, session);
        case PERF_RECORD_READ:
-               return ops->read(event, self);
+               return ops->read(event, sample, session);
        case PERF_RECORD_THROTTLE:
-               return ops->throttle(event, self);
+               return ops->throttle(event, sample, session);
        case PERF_RECORD_UNTHROTTLE:
-               return ops->unthrottle(event, self);
+               return ops->unthrottle(event, sample, session);
+       default:
+               ++session->hists.stats.nr_unknown_events;
+               return -1;
+       }
+}
+
+static int perf_session__preprocess_sample(struct perf_session *session,
+                                          event_t *event, struct sample_data *sample)
+{
+       if (event->header.type != PERF_RECORD_SAMPLE ||
+           !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
+               return 0;
+
+       if (!ip_callchain__valid(sample->callchain, event)) {
+               pr_debug("call-chain problem with event, skipping it.\n");
+               ++session->hists.stats.nr_invalid_chains;
+               session->hists.stats.total_invalid_chains += sample->period;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int perf_session__process_user_event(struct perf_session *session, event_t *event,
+                                           struct perf_event_ops *ops, u64 file_offset)
+{
+       dump_event(session, event, file_offset, NULL);
+
+       /* These events are processed right away */
+       switch (event->header.type) {
        case PERF_RECORD_HEADER_ATTR:
-               return ops->attr(event, self);
+               return ops->attr(event, session);
        case PERF_RECORD_HEADER_EVENT_TYPE:
-               return ops->event_type(event, self);
+               return ops->event_type(event, session);
        case PERF_RECORD_HEADER_TRACING_DATA:
                /* setup for reading amidst mmap */
-               lseek(self->fd, offset + head, SEEK_SET);
-               return ops->tracing_data(event, self);
+               lseek(session->fd, file_offset, SEEK_SET);
+               return ops->tracing_data(event, session);
        case PERF_RECORD_HEADER_BUILD_ID:
-               return ops->build_id(event, self);
+               return ops->build_id(event, session);
        case PERF_RECORD_FINISHED_ROUND:
-               return ops->finished_round(event, self, ops);
+               return ops->finished_round(event, session, ops);
        default:
-               ++self->hists.stats.nr_unknown_events;
-               return -1;
+               return -EINVAL;
        }
 }
 
+static int perf_session__process_event(struct perf_session *session,
+                                      event_t *event,
+                                      struct perf_event_ops *ops,
+                                      u64 file_offset)
+{
+       struct sample_data sample;
+       int ret;
+
+       if (session->header.needs_swap && event__swap_ops[event->header.type])
+               event__swap_ops[event->header.type](event);
+
+       if (event->header.type >= PERF_RECORD_HEADER_MAX)
+               return -EINVAL;
+
+       hists__inc_nr_events(&session->hists, event->header.type);
+
+       if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+               return perf_session__process_user_event(session, event, ops, file_offset);
+
+       /*
+        * For all kernel events we get the sample data
+        */
+       event__parse_sample(event, session, &sample);
+
+       /* Preprocess sample records - precheck callchains */
+       if (perf_session__preprocess_sample(session, event, &sample))
+               return 0;
+
+       if (ops->ordered_samples) {
+               ret = perf_session_queue_event(session, event, &sample,
+                                              file_offset);
+               if (ret != -ETIME)
+                       return ret;
+       }
+
+       return perf_session_deliver_event(session, event, &sample, ops,
+                                         file_offset);
+}
+
 void perf_event_header__bswap(struct perf_event_header *self)
 {
        self->type = bswap_32(self->type);
@@ -656,21 +838,33 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
        return thread;
 }
 
-int do_read(int fd, void *buf, size_t size)
+static void perf_session__warn_about_errors(const struct perf_session *session,
+                                           const struct perf_event_ops *ops)
 {
-       void *buf_start = buf;
-
-       while (size) {
-               int ret = read(fd, buf, size);
-
-               if (ret <= 0)
-                       return ret;
+       if (ops->lost == event__process_lost &&
+           session->hists.stats.total_lost != 0) {
+               ui__warning("Processed %Lu events and LOST %Lu!\n\n"
+                           "Check IO/CPU overload!\n\n",
+                           session->hists.stats.total_period,
+                           session->hists.stats.total_lost);
+       }
 
-               size -= ret;
-               buf += ret;
+       if (session->hists.stats.nr_unknown_events != 0) {
+               ui__warning("Found %u unknown events!\n\n"
+                           "Is this an older tool processing a perf.data "
+                           "file generated by a more recent tool?\n\n"
+                           "If that is not the case, consider "
+                           "reporting to linux-kernel@vger.kernel.org.\n\n",
+                           session->hists.stats.nr_unknown_events);
        }
 
-       return buf - buf_start;
+       if (session->hists.stats.nr_invalid_chains != 0) {
+               ui__warning("Found invalid callchains!\n\n"
+                           "%u out of %u events were discarded for this reason.\n\n"
+                           "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+                           session->hists.stats.nr_invalid_chains,
+                           session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
+       }
 }
 
 #define session_done() (*(volatile int *)(&session_done))
@@ -690,7 +884,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
 
        head = 0;
 more:
-       err = do_read(self->fd, &event, sizeof(struct perf_event_header));
+       err = readn(self->fd, &event, sizeof(struct perf_event_header));
        if (err <= 0) {
                if (err == 0)
                        goto done;
@@ -710,8 +904,7 @@ more:
        p += sizeof(struct perf_event_header);
 
        if (size - sizeof(struct perf_event_header)) {
-               err = do_read(self->fd, p,
-                             size - sizeof(struct perf_event_header));
+               err = readn(self->fd, p, size - sizeof(struct perf_event_header));
                if (err <= 0) {
                        if (err == 0) {
                                pr_err("unexpected end of event stream\n");
@@ -724,8 +917,7 @@ more:
        }
 
        if (size == 0 ||
-           (skip = perf_session__process_event(self, &event, ops,
-                                               0, head)) < 0) {
+           (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
                dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
                            head, event.header.size, event.header.type);
                /*
@@ -740,9 +932,6 @@ more:
 
        head += size;
 
-       dump_printf("\n%#Lx [%#x]: event: %d\n",
-                   head, event.header.size, event.header.type);
-
        if (skip > 0)
                head += skip;
 
@@ -751,82 +940,91 @@ more:
 done:
        err = 0;
 out_err:
+       perf_session__warn_about_errors(self, ops);
+       perf_session_free_sample_buffers(self);
        return err;
 }
 
-int __perf_session__process_events(struct perf_session *self,
+int __perf_session__process_events(struct perf_session *session,
                                   u64 data_offset, u64 data_size,
                                   u64 file_size, struct perf_event_ops *ops)
 {
-       int err, mmap_prot, mmap_flags;
-       u64 head, shift;
-       u64 offset = 0;
-       size_t  page_size;
+       u64 head, page_offset, file_offset, file_pos, progress_next;
+       int err, mmap_prot, mmap_flags, map_idx = 0;
+       struct ui_progress *progress;
+       size_t  page_size, mmap_size;
+       char *buf, *mmaps[8];
        event_t *event;
        uint32_t size;
-       char *buf;
-       struct ui_progress *progress = ui_progress__new("Processing events...",
-                                                       self->size);
-       if (progress == NULL)
-               return -1;
 
        perf_event_ops__fill_defaults(ops);
 
        page_size = sysconf(_SC_PAGESIZE);
 
-       head = data_offset;
-       shift = page_size * (head / page_size);
-       offset += shift;
-       head -= shift;
+       page_offset = page_size * (data_offset / page_size);
+       file_offset = page_offset;
+       head = data_offset - page_offset;
+
+       if (data_offset + data_size < file_size)
+               file_size = data_offset + data_size;
+
+       progress_next = file_size / 16;
+       progress = ui_progress__new("Processing events...", file_size);
+       if (progress == NULL)
+               return -1;
+
+       mmap_size = session->mmap_window;
+       if (mmap_size > file_size)
+               mmap_size = file_size;
+
+       memset(mmaps, 0, sizeof(mmaps));
 
        mmap_prot  = PROT_READ;
        mmap_flags = MAP_SHARED;
 
-       if (self->header.needs_swap) {
+       if (session->header.needs_swap) {
                mmap_prot  |= PROT_WRITE;
                mmap_flags = MAP_PRIVATE;
        }
 remap:
-       buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
-                  mmap_flags, self->fd, offset);
+       buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
+                  file_offset);
        if (buf == MAP_FAILED) {
                pr_err("failed to mmap file\n");
                err = -errno;
                goto out_err;
        }
+       mmaps[map_idx] = buf;
+       map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
+       file_pos = file_offset + head;
 
 more:
        event = (event_t *)(buf + head);
-       ui_progress__update(progress, offset);
 
-       if (self->header.needs_swap)
+       if (session->header.needs_swap)
                perf_event_header__bswap(&event->header);
        size = event->header.size;
        if (size == 0)
                size = 8;
 
-       if (head + event->header.size >= page_size * self->mmap_window) {
-               int munmap_ret;
-
-               shift = page_size * (head / page_size);
-
-               munmap_ret = munmap(buf, page_size * self->mmap_window);
-               assert(munmap_ret == 0);
+       if (head + event->header.size >= mmap_size) {
+               if (mmaps[map_idx]) {
+                       munmap(mmaps[map_idx], mmap_size);
+                       mmaps[map_idx] = NULL;
+               }
 
-               offset += shift;
-               head -= shift;
+               page_offset = page_size * (head / page_size);
+               file_offset += page_offset;
+               head -= page_offset;
                goto remap;
        }
 
        size = event->header.size;
 
-       dump_printf("\n%#Lx [%#x]: event: %d\n",
-                   offset + head, event->header.size, event->header.type);
-
        if (size == 0 ||
-           perf_session__process_event(self, event, ops, offset, head) < 0) {
+           perf_session__process_event(session, event, ops, file_pos) < 0) {
                dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
-                           offset + head, event->header.size,
+                           file_offset + head, event->header.size,
                            event->header.type);
                /*
                 * assume we lost track of the stream, check alignment, and
@@ -839,19 +1037,24 @@ more:
        }
 
        head += size;
+       file_pos += size;
 
-       if (offset + head >= data_offset + data_size)
-               goto done;
+       if (file_pos >= progress_next) {
+               progress_next += file_size / 16;
+               ui_progress__update(progress, file_pos);
+       }
 
-       if (offset + head < file_size)
+       if (file_pos < file_size)
                goto more;
-done:
+
        err = 0;
        /* do the final flush for ordered samples */
-       self->ordered_samples.next_flush = ULLONG_MAX;
-       flush_sample_queue(self, ops);
+       session->ordered_samples.next_flush = ULLONG_MAX;
+       flush_sample_queue(session, ops);
 out_err:
        ui_progress__delete(progress);
+       perf_session__warn_about_errors(session, ops);
+       perf_session_free_sample_buffers(session);
        return err;
 }
 
index 9fa0fc2a863f1259caf8caf7494f867d0737a297..decd83f274fd3642fecf039740e75a015431df4d 100644 (file)
@@ -17,8 +17,12 @@ struct ordered_samples {
        u64                     last_flush;
        u64                     next_flush;
        u64                     max_timestamp;
-       struct list_head        samples_head;
-       struct sample_queue     *last_inserted;
+       struct list_head        samples;
+       struct list_head        sample_cache;
+       struct list_head        to_free;
+       struct sample_queue     *sample_buffer;
+       struct sample_queue     *last_sample;
+       int                     sample_buffer_idx;
 };
 
 struct perf_session {
@@ -42,6 +46,8 @@ struct perf_session {
        int                     fd;
        bool                    fd_pipe;
        bool                    repipe;
+       bool                    sample_id_all;
+       u16                     id_hdr_size;
        int                     cwdlen;
        char                    *cwd;
        struct ordered_samples  ordered_samples;
@@ -50,7 +56,9 @@ struct perf_session {
 
 struct perf_event_ops;
 
-typedef int (*event_op)(event_t *self, struct perf_session *session);
+typedef int (*event_op)(event_t *self, struct sample_data *sample,
+                       struct perf_session *session);
+typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
 typedef int (*event_op2)(event_t *self, struct perf_session *session,
                         struct perf_event_ops *ops);
 
@@ -63,16 +71,19 @@ struct perf_event_ops {
                        lost,
                        read,
                        throttle,
-                       unthrottle,
-                       attr,
+                       unthrottle;
+       event_synth_op  attr,
                        event_type,
                        tracing_data,
                        build_id;
        event_op2       finished_round;
        bool            ordered_samples;
+       bool            ordering_requires_timestamps;
 };
 
-struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe);
+struct perf_session *perf_session__new(const char *filename, int mode,
+                                      bool force, bool repipe,
+                                      struct perf_event_ops *ops);
 void perf_session__delete(struct perf_session *self);
 
 void perf_event_header__bswap(struct perf_event_header *self);
@@ -98,8 +109,9 @@ void mem_bswap_64(void *src, int byte_size);
 
 int perf_session__create_kernel_maps(struct perf_session *self);
 
-int do_read(int fd, void *buf, size_t size);
 void perf_session__update_sample_type(struct perf_session *self);
+void perf_session__set_sample_id_all(struct perf_session *session, bool value);
+void perf_session__set_sample_type(struct perf_session *session, u64 type);
 void perf_session__remove_thread(struct perf_session *self, struct thread *th);
 
 static inline
index b62a553cc67d969c104638b9bf33922f351f06bb..f44fa541d56e67c6bb6c976e78123e99657ffbee 100644 (file)
@@ -170,7 +170,7 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
                return repsep_snprintf(bf, size, "%-*s", width, dso_name);
        }
 
-       return repsep_snprintf(bf, size, "%*Lx", width, self->ip);
+       return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
 }
 
 /* --sort symbol */
@@ -196,7 +196,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
 
        if (verbose) {
                char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
-               ret += repsep_snprintf(bf, size, "%*Lx %c ",
+               ret += repsep_snprintf(bf, size, "%-#*llx %c ",
                                       BITS_PER_LONG / 4, self->ip, o);
        }
 
@@ -205,7 +205,7 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
                ret += repsep_snprintf(bf + ret, size - ret, "%s",
                                       self->ms.sym->name);
        else
-               ret += repsep_snprintf(bf + ret, size - ret, "%*Lx",
+               ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx",
                                       BITS_PER_LONG / 4, self->ip);
 
        return ret;
index 0409fc7c0058809f01da646c4d5ee98c21f69b1d..8fc0bd3a3a4a6bd3f816a6619cc1de0793a98caa 100644 (file)
@@ -259,7 +259,7 @@ static bool __match_glob(const char *str, const char *pat, bool ignore_space)
                if (!*pat)      /* Tail wild card matches all */
                        return true;
                while (*str)
-                       if (strglobmatch(str++, pat))
+                       if (__match_glob(str++, pat, ignore_space))
                                return true;
        }
        return !*str && !*pat;
index d628c8d1cf5ec11111ab3b750da0d14f6d8fff7f..15ccfba8cdf805111d56b1e7f3bf71431ef2c1e4 100644 (file)
 #include <limits.h>
 #include <sys/utsname.h>
 
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 128
+#endif
+
 #ifndef NT_GNU_BUILD_ID
 #define NT_GNU_BUILD_ID 3
 #endif
@@ -41,6 +45,7 @@ struct symbol_conf symbol_conf = {
        .exclude_other    = true,
        .use_modules      = true,
        .try_vmlinux_path = true,
+       .symfs            = "",
 };
 
 int dso__name_len(const struct dso *self)
@@ -92,7 +97,7 @@ static void symbols__fixup_end(struct rb_root *self)
                prev = curr;
                curr = rb_entry(nd, struct symbol, rb_node);
 
-               if (prev->end == prev->start)
+               if (prev->end == prev->start && prev->end != curr->start)
                        prev->end = curr->start - 1;
        }
 
@@ -121,7 +126,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
         * We still haven't the actual symbols, so guess the
         * last map final address.
         */
-       curr->end = ~0UL;
+       curr->end = ~0ULL;
 }
 
 static void map_groups__fixup_end(struct map_groups *self)
@@ -425,16 +430,25 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
 
 int kallsyms__parse(const char *filename, void *arg,
                    int (*process_symbol)(void *arg, const char *name,
-                                                    char type, u64 start))
+                                         char type, u64 start, u64 end))
 {
        char *line = NULL;
        size_t n;
-       int err = 0;
+       int err = -1;
+       u64 prev_start = 0;
+       char prev_symbol_type = 0;
+       char *prev_symbol_name;
        FILE *file = fopen(filename, "r");
 
        if (file == NULL)
                goto out_failure;
 
+       prev_symbol_name = malloc(KSYM_NAME_LEN);
+       if (prev_symbol_name == NULL)
+               goto out_close;
+
+       err = 0;
+
        while (!feof(file)) {
                u64 start;
                int line_len, len;
@@ -454,14 +468,33 @@ int kallsyms__parse(const char *filename, void *arg,
                        continue;
 
                symbol_type = toupper(line[len]);
-               symbol_name = line + len + 2;
+               len += 2;
+               symbol_name = line + len;
+               len = line_len - len;
 
-               err = process_symbol(arg, symbol_name, symbol_type, start);
-               if (err)
+               if (len >= KSYM_NAME_LEN) {
+                       err = -1;
                        break;
+               }
+
+               if (prev_symbol_type) {
+                       u64 end = start;
+                       if (end != prev_start)
+                               --end;
+                       err = process_symbol(arg, prev_symbol_name,
+                                            prev_symbol_type, prev_start, end);
+                       if (err)
+                               break;
+               }
+
+               memcpy(prev_symbol_name, symbol_name, len + 1);
+               prev_symbol_type = symbol_type;
+               prev_start = start;
        }
 
+       free(prev_symbol_name);
        free(line);
+out_close:
        fclose(file);
        return err;
 
@@ -483,7 +516,7 @@ static u8 kallsyms2elf_type(char type)
 }
 
 static int map__process_kallsym_symbol(void *arg, const char *name,
-                                      char type, u64 start)
+                                      char type, u64 start, u64 end)
 {
        struct symbol *sym;
        struct process_kallsyms_args *a = arg;
@@ -492,11 +525,8 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
        if (!symbol_type__is_a(type, a->map->type))
                return 0;
 
-       /*
-        * Will fix up the end later, when we have all symbols sorted.
-        */
-       sym = symbol__new(start, 0, kallsyms2elf_type(type), name);
-
+       sym = symbol__new(start, end - start + 1,
+                         kallsyms2elf_type(type), name);
        if (sym == NULL)
                return -ENOMEM;
        /*
@@ -649,7 +679,6 @@ int dso__load_kallsyms(struct dso *self, const char *filename,
        if (dso__load_all_kallsyms(self, filename, map) < 0)
                return -1;
 
-       symbols__fixup_end(&self->symbols[map->type]);
        if (self->kernel == DSO_TYPE_GUEST_KERNEL)
                self->origin = DSO__ORIG_GUEST_KERNEL;
        else
@@ -839,8 +868,11 @@ static int dso__synthesize_plt_symbols(struct  dso *self, struct map *map,
        char sympltname[1024];
        Elf *elf;
        int nr = 0, symidx, fd, err = 0;
+       char name[PATH_MAX];
 
-       fd = open(self->long_name, O_RDONLY);
+       snprintf(name, sizeof(name), "%s%s",
+                symbol_conf.symfs, self->long_name);
+       fd = open(name, O_RDONLY);
        if (fd < 0)
                goto out;
 
@@ -1452,16 +1484,19 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
             self->origin++) {
                switch (self->origin) {
                case DSO__ORIG_BUILD_ID_CACHE:
-                       if (dso__build_id_filename(self, name, size) == NULL)
+                       /* skip the locally configured cache if a symfs is given */
+                       if (symbol_conf.symfs[0] ||
+                           (dso__build_id_filename(self, name, size) == NULL)) {
                                continue;
+                       }
                        break;
                case DSO__ORIG_FEDORA:
-                       snprintf(name, size, "/usr/lib/debug%s.debug",
-                                self->long_name);
+                       snprintf(name, size, "%s/usr/lib/debug%s.debug",
+                                symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_UBUNTU:
-                       snprintf(name, size, "/usr/lib/debug%s",
-                                self->long_name);
+                       snprintf(name, size, "%s/usr/lib/debug%s",
+                                symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_BUILDID: {
                        char build_id_hex[BUILD_ID_SIZE * 2 + 1];
@@ -1473,19 +1508,26 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
                                          sizeof(self->build_id),
                                          build_id_hex);
                        snprintf(name, size,
-                                "/usr/lib/debug/.build-id/%.2s/%s.debug",
-                                build_id_hex, build_id_hex + 2);
+                                "%s/usr/lib/debug/.build-id/%.2s/%s.debug",
+                                symbol_conf.symfs, build_id_hex, build_id_hex + 2);
                        }
                        break;
                case DSO__ORIG_DSO:
-                       snprintf(name, size, "%s", self->long_name);
+                       snprintf(name, size, "%s%s",
+                            symbol_conf.symfs, self->long_name);
                        break;
                case DSO__ORIG_GUEST_KMODULE:
                        if (map->groups && map->groups->machine)
                                root_dir = map->groups->machine->root_dir;
                        else
                                root_dir = "";
-                       snprintf(name, size, "%s%s", root_dir, self->long_name);
+                       snprintf(name, size, "%s%s%s", symbol_conf.symfs,
+                                root_dir, self->long_name);
+                       break;
+
+               case DSO__ORIG_KMODULE:
+                       snprintf(name, size, "%s%s", symbol_conf.symfs,
+                                self->long_name);
                        break;
 
                default:
@@ -1780,21 +1822,24 @@ out_failure:
        return -1;
 }
 
-static int dso__load_vmlinux(struct dso *self, struct map *map,
-                            const char *vmlinux, symbol_filter_t filter)
+int dso__load_vmlinux(struct dso *self, struct map *map,
+                     const char *vmlinux, symbol_filter_t filter)
 {
        int err = -1, fd;
+       char symfs_vmlinux[PATH_MAX];
 
-       fd = open(vmlinux, O_RDONLY);
+       snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s",
+                symbol_conf.symfs, vmlinux);
+       fd = open(symfs_vmlinux, O_RDONLY);
        if (fd < 0)
                return -1;
 
        dso__set_loaded(self, map->type);
-       err = dso__load_sym(self, map, vmlinux, fd, filter, 0, 0);
+       err = dso__load_sym(self, map, symfs_vmlinux, fd, filter, 0, 0);
        close(fd);
 
        if (err > 0)
-               pr_debug("Using %s for symbols\n", vmlinux);
+               pr_debug("Using %s for symbols\n", symfs_vmlinux);
 
        return err;
 }
@@ -1836,8 +1881,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
        const char *kallsyms_filename = NULL;
        char *kallsyms_allocated_filename = NULL;
        /*
-        * Step 1: if the user specified a vmlinux filename, use it and only
-        * it, reporting errors to the user if it cannot be used.
+        * Step 1: if the user specified a kallsyms or vmlinux filename, use
+        * it and only it, reporting errors to the user if it cannot be used.
         *
         * For instance, try to analyse an ARM perf.data file _without_ a
         * build-id, or if the user specifies the wrong path to the right
@@ -1850,6 +1895,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
         * validation in dso__load_vmlinux and will bail out if they don't
         * match.
         */
+       if (symbol_conf.kallsyms_name != NULL) {
+               kallsyms_filename = symbol_conf.kallsyms_name;
+               goto do_kallsyms;
+       }
+
        if (symbol_conf.vmlinux_name != NULL) {
                err = dso__load_vmlinux(self, map,
                                        symbol_conf.vmlinux_name, filter);
@@ -1867,6 +1917,10 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
                        goto out_fixup;
        }
 
+       /* do not try local files if a symfs was given */
+       if (symbol_conf.symfs[0] != 0)
+               return -1;
+
        /*
         * Say the kernel DSO was created when processing the build-id header table,
         * we have a build-id, so check if it is the same as the running kernel,
@@ -2136,7 +2190,7 @@ struct process_args {
 };
 
 static int symbol__in_kernel(void *arg, const char *name,
-                            char type __used, u64 start)
+                            char type __used, u64 start, u64 end __used)
 {
        struct process_args *args = arg;
 
@@ -2257,9 +2311,6 @@ static int vmlinux_path__init(void)
        struct utsname uts;
        char bf[PATH_MAX];
 
-       if (uname(&uts) < 0)
-               return -1;
-
        vmlinux_path = malloc(sizeof(char *) * 5);
        if (vmlinux_path == NULL)
                return -1;
@@ -2272,6 +2323,14 @@ static int vmlinux_path__init(void)
        if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
                goto out_fail;
        ++vmlinux_path__nr_entries;
+
+       /* only try running kernel version if no symfs was given */
+       if (symbol_conf.symfs[0] != 0)
+               return 0;
+
+       if (uname(&uts) < 0)
+               return -1;
+
        snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
        vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
        if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
@@ -2331,6 +2390,8 @@ static int setup_list(struct strlist **list, const char *list_str,
 
 int symbol__init(void)
 {
+       const char *symfs;
+
        if (symbol_conf.initialized)
                return 0;
 
@@ -2359,6 +2420,18 @@ int symbol__init(void)
                       symbol_conf.sym_list_str, "symbol") < 0)
                goto out_free_comm_list;
 
+       /*
+        * A path to symbols of "/" is identical to ""
+        * reset here for simplicity.
+        */
+       symfs = realpath(symbol_conf.symfs, NULL);
+       if (symfs == NULL)
+               symfs = symbol_conf.symfs;
+       if (strcmp(symfs, "/") == 0)
+               symbol_conf.symfs = "";
+       if (symfs != symbol_conf.symfs)
+               free((void *)symfs);
+
        symbol_conf.initialized = true;
        return 0;
 
index 038f2201ee09579ca3f460d9f59576770ea477d2..670cd1c88f54dc932d18b7c79d609d61e7560295 100644 (file)
@@ -72,6 +72,7 @@ struct symbol_conf {
                        show_cpu_utilization,
                        initialized;
        const char      *vmlinux_name,
+                       *kallsyms_name,
                        *source_prefix,
                        *field_sep;
        const char      *default_guest_vmlinux_name,
@@ -85,6 +86,7 @@ struct symbol_conf {
        struct strlist  *dso_list,
                        *comm_list,
                        *sym_list;
+       const char      *symfs;
 };
 
 extern struct symbol_conf symbol_conf;
@@ -166,6 +168,8 @@ void dso__sort_by_name(struct dso *self, enum map_type type);
 struct dso *__dsos__findnew(struct list_head *head, const char *name);
 
 int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
+int dso__load_vmlinux(struct dso *self, struct map *map,
+                     const char *vmlinux, symbol_filter_t filter);
 int dso__load_vmlinux_path(struct dso *self, struct map *map,
                           symbol_filter_t filter);
 int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
@@ -213,7 +217,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 int build_id__sprintf(const u8 *self, int len, char *bf);
 int kallsyms__parse(const char *filename, void *arg,
                    int (*process_symbol)(void *arg, const char *name,
-                                         char type, u64 start));
+                                         char type, u64 start, u64 end));
 
 void machine__destroy_kernel_maps(struct machine *self);
 int __machine__create_kernel_maps(struct machine *self, struct dso *kernel);
index 8c72d888e449989dcdf1c349fdc4a97ead6b694a..00f4eade2e3e9c1fdd2e9c92c83222a10c1555fd 100644 (file)
@@ -16,35 +16,50 @@ static int filter(const struct dirent *dir)
                return 1;
 }
 
-int find_all_tid(int pid, pid_t ** all_tid)
+struct thread_map *thread_map__new_by_pid(pid_t pid)
 {
+       struct thread_map *threads;
        char name[256];
        int items;
        struct dirent **namelist = NULL;
-       int ret = 0;
        int i;
 
        sprintf(name, "/proc/%d/task", pid);
        items = scandir(name, &namelist, filter, NULL);
        if (items <= 0)
-                return -ENOENT;
-       *all_tid = malloc(sizeof(pid_t) * items);
-       if (!*all_tid) {
-               ret = -ENOMEM;
-               goto failure;
-       }
-
-       for (i = 0; i < items; i++)
-               (*all_tid)[i] = atoi(namelist[i]->d_name);
+                return NULL;
 
-       ret = items;
+       threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+       if (threads != NULL) {
+               for (i = 0; i < items; i++)
+                       threads->map[i] = atoi(namelist[i]->d_name);
+               threads->nr = items;
+       }
 
-failure:
        for (i=0; i<items; i++)
                free(namelist[i]);
        free(namelist);
 
-       return ret;
+       return threads;
+}
+
+struct thread_map *thread_map__new_by_tid(pid_t tid)
+{
+       struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+
+       if (threads != NULL) {
+               threads->map[0] = tid;
+               threads->nr     = 1;
+       }
+
+       return threads;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+{
+       if (pid != -1)
+               return thread_map__new_by_pid(pid);
+       return thread_map__new_by_tid(tid);
 }
 
 static struct thread *thread__new(pid_t pid)
index 688500ff826f008a4c48cd5bf782ad60c1363325..d7574101054a8ae9b6ded40fd0a813c3cacf9965 100644 (file)
@@ -18,11 +18,24 @@ struct thread {
        int                     comm_len;
 };
 
+struct thread_map {
+       int nr;
+       int map[];
+};
+
 struct perf_session;
 
 void thread__delete(struct thread *self);
 
-int find_all_tid(int pid, pid_t ** all_tid);
+struct thread_map *thread_map__new_by_pid(pid_t pid);
+struct thread_map *thread_map__new_by_tid(pid_t tid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+
+static inline void thread_map__delete(struct thread_map *threads)
+{
+       free(threads);
+}
+
 int thread__set_comm(struct thread *self, const char *comm);
 int thread__comm_len(struct thread *self);
 struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
index b1572601286cad7020c323d175177d0445eb806f..35729f4c40cb7a98e891a2013d4b6e812a24237e 100644 (file)
 #include <ctype.h>
 #include <errno.h>
 #include <stdbool.h>
+#include <linux/list.h>
 #include <linux/kernel.h>
 
 #include "../perf.h"
 #include "trace-event.h"
 #include "debugfs.h"
+#include "evsel.h"
 
 #define VERSION "0.5"
 
@@ -469,16 +471,17 @@ out:
 }
 
 static struct tracepoint_path *
-get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
+get_tracepoints_path(struct list_head *pattrs)
 {
        struct tracepoint_path path, *ppath = &path;
-       int i, nr_tracepoints = 0;
+       struct perf_evsel *pos;
+       int nr_tracepoints = 0;
 
-       for (i = 0; i < nb_events; i++) {
-               if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
+       list_for_each_entry(pos, pattrs, node) {
+               if (pos->attr.type != PERF_TYPE_TRACEPOINT)
                        continue;
                ++nr_tracepoints;
-               ppath->next = tracepoint_id_to_path(pattrs[i].config);
+               ppath->next = tracepoint_id_to_path(pos->attr.config);
                if (!ppath->next)
                        die("%s\n", "No memory to alloc tracepoints list");
                ppath = ppath->next;
@@ -487,21 +490,21 @@ get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
        return nr_tracepoints > 0 ? path.next : NULL;
 }
 
-bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events)
+bool have_tracepoints(struct list_head *pattrs)
 {
-       int i;
+       struct perf_evsel *pos;
 
-       for (i = 0; i < nb_events; i++)
-               if (pattrs[i].type == PERF_TYPE_TRACEPOINT)
+       list_for_each_entry(pos, pattrs, node)
+               if (pos->attr.type == PERF_TYPE_TRACEPOINT)
                        return true;
 
        return false;
 }
 
-int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
+int read_tracing_data(int fd, struct list_head *pattrs)
 {
        char buf[BUFSIZ];
-       struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events);
+       struct tracepoint_path *tps = get_tracepoints_path(pattrs);
 
        /*
         * What? No tracepoints? No sense writing anything here, bail out.
@@ -545,14 +548,13 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
        return 0;
 }
 
-ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs,
-                              int nb_events)
+ssize_t read_tracing_data_size(int fd, struct list_head *pattrs)
 {
        ssize_t size;
        int err = 0;
 
        calc_data_size = 1;
-       err = read_tracing_data(fd, pattrs, nb_events);
+       err = read_tracing_data(fd, pattrs);
        size = calc_data_size - 1;
        calc_data_size = 0;
 
index b3e86b1e44443909e518573ab0a1d49f8167f151..b5f12ca24d99f877877b701ae6f8b0d7f6b3acca 100644 (file)
@@ -262,9 +262,8 @@ raw_field_value(struct event *event, const char *name, void *data);
 void *raw_field_ptr(struct event *event, const char *name, void *data);
 unsigned long long eval_flag(const char *flag);
 
-int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
-ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs,
-                              int nb_events);
+int read_tracing_data(int fd, struct list_head *pattrs);
+ssize_t read_tracing_data_size(int fd, struct list_head *pattrs);
 
 /* taken from kernel/trace/trace.h */
 enum trace_flag_type {
index 056c69521a38098a8053d18f3e615d3a35390fd4..7b5a8926624e49be67d802fc91afc56451bead23 100644 (file)
@@ -104,10 +104,24 @@ out_destroy_form:
        return rc;
 }
 
-static const char yes[] = "Yes", no[] = "No";
+static const char yes[] = "Yes", no[] = "No",
+                 warning_str[] = "Warning!", ok[] = "Ok";
 
 bool ui__dialog_yesno(const char *msg)
 {
        /* newtWinChoice should really be accepting const char pointers... */
        return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
 }
+
+void ui__warning(const char *format, ...)
+{
+       va_list args;
+
+       va_start(args, format);
+       if (use_browser > 0)
+               newtWinMessagev((char *)warning_str, (char *)ok,
+                               (char *)format, args);
+       else
+               vfprintf(stderr, format, args);
+       va_end(args);
+}
index 214265674ddda0a59488cc0871a0943b20fc6bd4..5b3ea49aa63ea1ff35b7d949b49a661187aa16e6 100644 (file)
@@ -114,3 +114,20 @@ unsigned long convert_unit(unsigned long value, char *unit)
 
        return value;
 }
+
+int readn(int fd, void *buf, size_t n)
+{
+       void *buf_start = buf;
+
+       while (n) {
+               int ret = read(fd, buf, n);
+
+               if (ret <= 0)
+                       return ret;
+
+               n -= ret;
+               buf += ret;
+       }
+
+       return buf - buf_start;
+}
index 7562707ddd1c491755dc8ea5121637918ba1b844..e833f26f3bfc7d702cd58ba55c7ae85657fe3801 100644 (file)
@@ -265,6 +265,7 @@ void argv_free(char **argv);
 bool strglobmatch(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
 unsigned long convert_unit(unsigned long value, char *unit);
+int readn(int fd, void *buf, size_t size);
 
 #define _STR(x) #x
 #define STR(x) _STR(x)
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
new file mode 100644 (file)
index 0000000..22afbf6
--- /dev/null
@@ -0,0 +1,20 @@
+#include "xyarray.h"
+#include "util.h"
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
+{
+       size_t row_size = ylen * entry_size;
+       struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
+
+       if (xy != NULL) {
+               xy->entry_size = entry_size;
+               xy->row_size   = row_size;
+       }
+
+       return xy;
+}
+
+void xyarray__delete(struct xyarray *xy)
+{
+       free(xy);
+}
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
new file mode 100644 (file)
index 0000000..c488a07
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _PERF_XYARRAY_H_
+#define _PERF_XYARRAY_H_ 1
+
+#include <sys/types.h>
+
+struct xyarray {
+       size_t row_size;
+       size_t entry_size;
+       char contents[];
+};
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
+void xyarray__delete(struct xyarray *xy);
+
+static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
+{
+       return &xy->contents[x * xy->row_size + y * xy->entry_size];
+}
+
+#endif /* _PERF_XYARRAY_H_ */