]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Mar 2013 04:44:23 +0000 (20:44 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Mar 2013 04:44:23 +0000 (20:44 -0800)
Pull one kvm bugfix from Gleb Natapov.

* git://git.kernel.org/pub/scm/virt/kvm/kvm:
  x86/kvm: Fix pvclock vsyscall fixmap

333 files changed:
Documentation/block/cfq-iosched.txt
Documentation/cgroups/blkio-controller.txt
Documentation/devicetree/bindings/arm/armadeus.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arm/fsl.txt
Documentation/devicetree/bindings/clock/imx5-clock.txt
Documentation/devicetree/bindings/clock/imx6q-clock.txt
Documentation/devicetree/bindings/thermal/dove-thermal.txt [new file with mode: 0644]
Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt [new file with mode: 0644]
Documentation/devicetree/bindings/thermal/rcar-thermal.txt [new file with mode: 0644]
Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt [moved from Documentation/devicetree/bindings/arm/armada-370-xp-timer.txt with 55% similarity]
Documentation/devicetree/bindings/w1/fsl-imx-owire.txt [new file with mode: 0644]
Documentation/dma-buf-sharing.txt
Documentation/thermal/exynos_thermal_emulation [new file with mode: 0644]
Documentation/thermal/intel_powerclamp.txt [new file with mode: 0644]
Documentation/thermal/sysfs-api.txt
MAINTAINERS
arch/arm/Kconfig
arch/arm/boot/dts/Makefile
arch/arm/boot/dts/am33xx.dtsi
arch/arm/boot/dts/armada-370-db.dts
arch/arm/boot/dts/armada-370-mirabox.dts
arch/arm/boot/dts/armada-370-rd.dts [new file with mode: 0644]
arch/arm/boot/dts/armada-370-xp.dtsi
arch/arm/boot/dts/armada-370.dtsi
arch/arm/boot/dts/armada-xp-db.dts
arch/arm/boot/dts/armada-xp-gp.dts [new file with mode: 0644]
arch/arm/boot/dts/armada-xp-mv78230.dtsi
arch/arm/boot/dts/armada-xp-mv78260.dtsi
arch/arm/boot/dts/armada-xp-mv78460.dtsi
arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
arch/arm/boot/dts/armada-xp.dtsi
arch/arm/boot/dts/dove-cubox.dts
arch/arm/boot/dts/dove.dtsi
arch/arm/boot/dts/imx25-karo-tx25.dts
arch/arm/boot/dts/imx25-pdk.dts [new file with mode: 0644]
arch/arm/boot/dts/imx25.dtsi
arch/arm/boot/dts/imx27-apf27.dts
arch/arm/boot/dts/imx27-pdk.dts [moved from arch/arm/boot/dts/imx27-3ds.dts with 59% similarity]
arch/arm/boot/dts/imx31-bug.dts
arch/arm/boot/dts/imx51-apf51.dts [new file with mode: 0644]
arch/arm/boot/dts/imx51-babbage.dts
arch/arm/boot/dts/imx51.dtsi
arch/arm/boot/dts/imx53-ard.dts
arch/arm/boot/dts/imx53-evk.dts
arch/arm/boot/dts/imx53-mba53.dts [new file with mode: 0644]
arch/arm/boot/dts/imx53-qsb.dts
arch/arm/boot/dts/imx53-smd.dts
arch/arm/boot/dts/imx53-tqma53.dtsi [new file with mode: 0644]
arch/arm/boot/dts/imx53.dtsi
arch/arm/boot/dts/imx6dl.dtsi [new file with mode: 0644]
arch/arm/boot/dts/imx6q-arm2.dts
arch/arm/boot/dts/imx6q-sabreauto.dts
arch/arm/boot/dts/imx6q-sabrelite.dts
arch/arm/boot/dts/imx6q-sabresd.dts
arch/arm/boot/dts/imx6q.dtsi
arch/arm/boot/dts/imx6qdl.dtsi [new file with mode: 0644]
arch/arm/boot/dts/kirkwood-6282.dtsi
arch/arm/boot/dts/kirkwood-dreamplug.dts
arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts [new file with mode: 0644]
arch/arm/boot/dts/kirkwood-mplcec4.dts
arch/arm/boot/dts/kirkwood-ns2-common.dtsi
arch/arm/boot/dts/kirkwood-nsa310.dts
arch/arm/boot/dts/kirkwood-openblocks_a6.dts
arch/arm/boot/dts/kirkwood-topkick.dts
arch/arm/boot/dts/kirkwood.dtsi
arch/arm/configs/dove_defconfig
arch/arm/configs/mvebu_defconfig
arch/arm/mach-dove/Kconfig
arch/arm/mach-dove/Makefile
arch/arm/mach-dove/board-dt.c [new file with mode: 0644]
arch/arm/mach-dove/common.c
arch/arm/mach-imx/clk-imx51-imx53.c
arch/arm/mach-imx/clk-imx6q.c
arch/arm/mach-imx/mach-imx6q.c
arch/arm/mach-kirkwood/Kconfig
arch/arm/mach-kirkwood/Makefile
arch/arm/mach-kirkwood/board-dreamplug.c
arch/arm/mach-kirkwood/board-dt.c
arch/arm/mach-kirkwood/board-guruplug.c [new file with mode: 0644]
arch/arm/mach-kirkwood/board-mplcec4.c
arch/arm/mach-kirkwood/board-ns2.c
arch/arm/mach-kirkwood/board-nsa310.c
arch/arm/mach-kirkwood/board-openblocks_a6.c
arch/arm/mach-kirkwood/board-usi_topkick.c
arch/arm/mach-kirkwood/common.h
arch/arm/mach-mvebu/irq-armada-370-xp.c
arch/arm/mach-omap2/Makefile
arch/arm/mach-omap2/am33xx-restart.c [new file with mode: 0644]
arch/arm/mach-omap2/am35xx-emac.c
arch/arm/mach-omap2/board-generic.c
arch/arm/mach-omap2/cclock33xx_data.c
arch/arm/mach-omap2/cclock3xxx_data.c
arch/arm/mach-omap2/cclock44xx_data.c
arch/arm/mach-omap2/clock.h
arch/arm/mach-omap2/cm33xx.c
arch/arm/mach-omap2/cm33xx.h
arch/arm/mach-omap2/common.h
arch/arm/mach-omap2/devices.c
arch/arm/mach-omap2/dpll3xxx.c
arch/arm/mach-omap2/id.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mach-omap2/omap_hwmod.h
arch/arm/mach-omap2/omap_hwmod_33xx_data.c
arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
arch/arm/mach-omap2/omap_hwmod_44xx_data.c
arch/arm/mach-omap2/omap_hwmod_reset.c [new file with mode: 0644]
arch/arm/mach-omap2/pm.c
arch/arm/mach-omap2/pm24xx.c
arch/arm/mach-omap2/pm44xx.c
arch/arm/mach-omap2/prm33xx.c
arch/arm/mach-omap2/prm33xx.h
arch/arm/mach-omap2/sleep24xx.S
arch/arm/mach-omap2/soc.h
arch/arm/mach-omap2/sr_device.c
arch/arm/plat-omap/Kconfig
arch/arm/plat-omap/include/plat/timex.h
arch/x86/kernel/nmi.c
block/Kconfig
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-core.c
block/blk-exec.c
block/blk-flush.c
block/blk-lib.c
block/blk-sysfs.c
block/blk.h
block/cfq-iosched.c
block/elevator.c
drivers/acpi/apei/ghes.c
drivers/base/dma-buf.c
drivers/block/DAC960.c
drivers/block/Kconfig
drivers/block/Makefile
drivers/block/loop.c
drivers/block/mtip32xx/Kconfig
drivers/block/mtip32xx/mtip32xx.c
drivers/block/mtip32xx/mtip32xx.h
drivers/block/rbd.c
drivers/block/rsxx/Makefile [new file with mode: 0644]
drivers/block/rsxx/config.c [new file with mode: 0644]
drivers/block/rsxx/core.c [new file with mode: 0644]
drivers/block/rsxx/cregs.c [new file with mode: 0644]
drivers/block/rsxx/dev.c [new file with mode: 0644]
drivers/block/rsxx/dma.c [new file with mode: 0644]
drivers/block/rsxx/rsxx.h [new file with mode: 0644]
drivers/block/rsxx/rsxx_cfg.h [new file with mode: 0644]
drivers/block/rsxx/rsxx_priv.h [new file with mode: 0644]
drivers/block/swim3.c
drivers/block/xd.c [deleted file]
drivers/block/xd.h [deleted file]
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkback/xenbus.c
drivers/block/xen-blkfront.c
drivers/clocksource/time-armada-370-xp.c
drivers/edac/Kconfig
drivers/edac/Makefile
drivers/edac/edac_core.h
drivers/edac/edac_mc.c
drivers/edac/edac_mc_sysfs.c
drivers/edac/edac_module.c
drivers/edac/edac_pci_sysfs.c
drivers/edac/ghes_edac.c [new file with mode: 0644]
drivers/edac/i3200_edac.c
drivers/edac/i5100_edac.c
drivers/edac/i7core_edac.c
drivers/edac/sb_edac.c
drivers/md/dm.c
drivers/md/raid5.c
drivers/media/platform/omap3isp/isp.c
drivers/media/platform/omap3isp/isp.h
drivers/scsi/aacraid/aacraid.h
drivers/scsi/aacraid/comminit.c
drivers/scsi/bfa/bfad.c
drivers/scsi/bnx2fc/bnx2fc.h
drivers/scsi/bnx2fc/bnx2fc_fcoe.c
drivers/scsi/bnx2fc/bnx2fc_hwi.c
drivers/scsi/bnx2fc/bnx2fc_io.c
drivers/scsi/bnx2fc/bnx2fc_tgt.c
drivers/scsi/bnx2i/bnx2i_hwi.c
drivers/scsi/csiostor/csio_hw.c
drivers/scsi/csiostor/csio_init.c
drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
drivers/scsi/fnic/fnic_fcs.c
drivers/scsi/gdth.c
drivers/scsi/ipr.c
drivers/scsi/ipr.h
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_bsg.c
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_ct.c
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hw.h
drivers/scsi/lpfc/lpfc_hw4.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nportdisc.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli4.h
drivers/scsi/lpfc/lpfc_version.h
drivers/scsi/mpt2sas/mpt2sas_base.c
drivers/scsi/mpt2sas/mpt2sas_base.h
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_config.c
drivers/scsi/mpt3sas/mpt3sas_ctl.c
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
drivers/scsi/qla4xxx/ql4_83xx.c
drivers/scsi/qla4xxx/ql4_attr.c
drivers/scsi/qla4xxx/ql4_def.h
drivers/scsi/qla4xxx/ql4_fw.h
drivers/scsi/qla4xxx/ql4_glbl.h
drivers/scsi/qla4xxx/ql4_init.c
drivers/scsi/qla4xxx/ql4_iocb.c
drivers/scsi/qla4xxx/ql4_isr.c
drivers/scsi/qla4xxx/ql4_mbx.c
drivers/scsi/qla4xxx/ql4_nx.c
drivers/scsi/qla4xxx/ql4_os.c
drivers/scsi/qla4xxx/ql4_version.h
drivers/scsi/scsi_transport_iscsi.c
drivers/thermal/Kconfig
drivers/thermal/Makefile
drivers/thermal/cpu_cooling.c
drivers/thermal/db8500_cpufreq_cooling.c
drivers/thermal/db8500_thermal.c
drivers/thermal/dove_thermal.c [new file with mode: 0644]
drivers/thermal/exynos_thermal.c
drivers/thermal/intel_powerclamp.c [new file with mode: 0644]
drivers/thermal/kirkwood_thermal.c [new file with mode: 0644]
drivers/thermal/rcar_thermal.c
drivers/thermal/spear_thermal.c
drivers/thermal/step_wise.c
drivers/thermal/thermal_sys.c
drivers/w1/masters/mxc_w1.c
fs/bio.c
fs/block_dev.c
fs/btrfs/extent-tree.c
fs/buffer.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/file.c
fs/ceph/ioctl.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/mdsmap.c
fs/ceph/strings.c
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/ext4/inode.c
fs/fs-writeback.c
fs/lockd/clntlock.c
fs/lockd/clntproc.c
fs/lockd/host.c
fs/lockd/mon.c
fs/lockd/svcsubs.c
fs/nfs/cache_lib.c
fs/nfs/cache_lib.h
fs/nfs/dns_resolve.c
fs/nfs/nfs4client.c
fs/nfs/nfs4filelayoutdev.c
fs/nfs/nfs4namespace.c
fs/nfs/super.c
fs/nfsd/cache.h
fs/nfsd/export.c
fs/nfsd/fault_inject.c
fs/nfsd/nfs4idmap.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfscache.c
fs/nfsd/nfsctl.c
fs/nfsd/nfssvc.c
fs/nfsd/xdr4.h
include/acpi/ghes.h [new file with mode: 0644]
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/buffer_head.h
include/linux/ceph/ceph_features.h
include/linux/ceph/ceph_fs.h
include/linux/ceph/decode.h
include/linux/ceph/libceph.h
include/linux/ceph/mdsmap.h
include/linux/ceph/messenger.h
include/linux/ceph/osd_client.h
include/linux/ceph/osdmap.h
include/linux/ceph/rados.h
include/linux/completion.h
include/linux/crush/crush.h
include/linux/dma-buf.h
include/linux/edac.h
include/linux/elevator.h
include/linux/llist.h
include/linux/lockd/lockd.h
include/linux/pci_ids.h
include/linux/platform_data/exynos_thermal.h
include/linux/sunrpc/addr.h [new file with mode: 0644]
include/linux/sunrpc/cache.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/xdr.h
include/linux/thermal.h
include/linux/writeback.h
include/ras/ras_event.h
include/sound/aess.h [new file with mode: 0644]
include/trace/events/block.h
include/trace/events/writeback.h
kernel/sched/core.c
kernel/time/tick-sched.c
kernel/trace/blktrace.c
mm/page-writeback.c
net/ceph/ceph_common.c
net/ceph/ceph_strings.c
net/ceph/crush/mapper.c
net/ceph/crypto.c
net/ceph/debugfs.c
net/ceph/messenger.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/ceph/osdmap.c
net/ceph/pagevec.c
net/sunrpc/addr.c
net/sunrpc/auth_gss/gss_krb5_wrap.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/cache.c
net/sunrpc/clnt.c
net/sunrpc/rpcb_clnt.c
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcauth_unix.c
net/sunrpc/xdr.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtsock.c

index d89b4fe724d75393a003b33c22bf531252c29672..a5eb7d19a65d241650e26a2b4e5a303437b46878 100644 (file)
@@ -102,6 +102,64 @@ processing of request. Therefore, increasing the value can imporve the
 performace although this can cause the latency of some I/O to increase due
 to more number of requests.
 
+CFQ Group scheduling
+====================
+
+CFQ supports blkio cgroup and has "blkio." prefixed files in each
+blkio cgroup directory. It is weight-based and there are four knobs
+for configuration - weight[_device] and leaf_weight[_device].
+Internal cgroup nodes (the ones with children) can also have tasks in
+them, so the former two configure how much proportion the cgroup as a
+whole is entitled to at its parent's level while the latter two
+configure how much proportion the tasks in the cgroup have compared to
+its direct children.
+
+Another way to think about it is assuming that each internal node has
+an implicit leaf child node which hosts all the tasks whose weight is
+configured by leaf_weight[_device]. Let's assume a blkio hierarchy
+composed of five cgroups - root, A, B, AA and AB - with the following
+weights where the names represent the hierarchy.
+
+        weight leaf_weight
+ root :  125    125
+ A    :  500    750
+ B    :  250    500
+ AA   :  500    500
+ AB   : 1000    500
+
+root never has a parent making its weight is meaningless. For backward
+compatibility, weight is always kept in sync with leaf_weight. B, AA
+and AB have no child and thus its tasks have no children cgroup to
+compete with. They always get 100% of what the cgroup won at the
+parent level. Considering only the weights which matter, the hierarchy
+looks like the following.
+
+          root
+       /    |   \
+      A     B    leaf
+     500   250   125
+   /  |  \
+  AA  AB  leaf
+ 500 1000 750
+
+If all cgroups have active IOs and competing with each other, disk
+time will be distributed like the following.
+
+Distribution below root. The total active weight at this level is
+A:500 + B:250 + C:125 = 875.
+
+ root-leaf :   125 /  875      =~ 14%
+ A         :   500 /  875      =~ 57%
+ B(-leaf)  :   250 /  875      =~ 28%
+
+A has children and further distributes its 57% among the children and
+the implicit leaf node. The total active weight at this level is
+AA:500 + AB:1000 + A-leaf:750 = 2250.
+
+ A-leaf    : ( 750 / 2250) * A =~ 19%
+ AA(-leaf) : ( 500 / 2250) * A =~ 12%
+ AB(-leaf) : (1000 / 2250) * A =~ 25%
+
 CFQ IOPS Mode for group scheduling
 ===================================
 Basic CFQ design is to provide priority based time slices. Higher priority
index a794ce91a2d596f2ef11e91706cfdf5932d75022..da272c8f44e7db9e8aa41780b29bc6a95401c34b 100644 (file)
@@ -94,13 +94,11 @@ Throttling/Upper Limit policy
 
 Hierarchical Cgroups
 ====================
-- Currently none of the IO control policy supports hierarchical groups. But
-  cgroup interface does allow creation of hierarchical cgroups and internally
-  IO policies treat them as flat hierarchy.
+- Currently only CFQ supports hierarchical groups. For throttling,
+  cgroup interface does allow creation of hierarchical cgroups and
+  internally it treats them as flat hierarchy.
 
-  So this patch will allow creation of cgroup hierarchcy but at the backend
-  everything will be treated as flat. So if somebody created a hierarchy like
-  as follows.
+  If somebody created a hierarchy like as follows.
 
                        root
                        /  \
@@ -108,16 +106,20 @@ Hierarchical Cgroups
                        |
                     test3
 
-  CFQ and throttling will practically treat all groups at same level.
+  CFQ will handle the hierarchy correctly but and throttling will
+  practically treat all groups at same level. For details on CFQ
+  hierarchy support, refer to Documentation/block/cfq-iosched.txt.
+  Throttling will treat the hierarchy as if it looks like the
+  following.
 
                                pivot
                             /  /   \  \
                        root  test1 test2  test3
 
-  Down the line we can implement hierarchical accounting/control support
-  and also introduce a new cgroup file "use_hierarchy" which will control
-  whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
-  This is how memory controller also has implemented the things.
+  Nesting cgroups, while allowed, isn't officially supported and blkio
+  genereates warning when cgroups nest. Once throttling implements
+  hierarchy support, hierarchy will be supported and the warning will
+  be removed.
 
 Various user visible config options
 ===================================
@@ -172,6 +174,12 @@ Proportional weight policy files
          dev     weight
          8:16    300
 
+- blkio.leaf_weight[_device]
+       - Equivalents of blkio.weight[_device] for the purpose of
+          deciding how much weight tasks in the given cgroup has while
+          competing with the cgroup's child cgroups. For details,
+          please refer to Documentation/block/cfq-iosched.txt.
+
 - blkio.time
        - disk time allocated to cgroup per device in milliseconds. First
          two fields specify the major and minor number of the device and
@@ -279,6 +287,11 @@ Proportional weight policy files
          and minor number of the device and third field specifies the number
          of times a group was dequeued from a particular device.
 
+- blkio.*_recursive
+       - Recursive version of various stats. These files show the
+          same information as their non-recursive counterparts but
+          include stats from all the descendant cgroups.
+
 Throttling/Upper limit policy files
 -----------------------------------
 - blkio.throttle.read_bps_device
diff --git a/Documentation/devicetree/bindings/arm/armadeus.txt b/Documentation/devicetree/bindings/arm/armadeus.txt
new file mode 100644 (file)
index 0000000..9821283
--- /dev/null
@@ -0,0 +1,6 @@
+Armadeus i.MX Platforms Device Tree Bindings
+-----------------------------------------------
+
+APF51: i.MX51 based module.
+Required root node properties:
+    - compatible = "armadeus,imx51-apf51", "fsl,imx51";
index f79818711e83c8b3905a997f4d9c0a36e55b883a..e935d7d4ac432fa0f20665d45758bb50500054b2 100644 (file)
@@ -5,6 +5,14 @@ i.MX23 Evaluation Kit
 Required root node properties:
     - compatible = "fsl,imx23-evk", "fsl,imx23";
 
+i.MX25 Product Development Kit
+Required root node properties:
+    - compatible = "fsl,imx25-pdk", "fsl,imx25";
+
+i.MX27 Product Development Kit
+Required root node properties:
+    - compatible = "fsl,imx27-pdk", "fsl,imx27";
+
 i.MX28 Evaluation Kit
 Required root node properties:
     - compatible = "fsl,imx28-evk", "fsl,imx28";
index 04ad47876be0579fb89cc6155051edc7e13e96fa..2a0c904c46aee13ac4ff6667c943f09cde483fcb 100644 (file)
@@ -171,6 +171,7 @@ clocks and IDs.
        can_sel                 156
        can1_serial_gate        157
        can1_ipg_gate           158
+       owire_gate              159
 
 Examples (for mx53):
 
index f73fdf595568e30d9e79f0c0ee60d2799b8a5c0d..969b38e06ad3157c0621e2deabfecf0157a7913e 100644 (file)
@@ -203,6 +203,8 @@ clocks and IDs.
        pcie_ref                188
        pcie_ref_125m           189
        enet_ref                190
+       usbphy1_gate            191
+       usbphy2_gate            192
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/thermal/dove-thermal.txt b/Documentation/devicetree/bindings/thermal/dove-thermal.txt
new file mode 100644 (file)
index 0000000..6f47467
--- /dev/null
@@ -0,0 +1,18 @@
+* Dove Thermal
+
+This driver is for Dove SoCs which contain a thermal sensor.
+
+Required properties:
+- compatible : "marvell,dove-thermal"
+- reg : Address range of the thermal registers
+
+The reg properties should contain two ranges. The first is for the
+three Thermal Manager registers, while the second range contains the
+Thermal Diode Control Registers.
+
+Example:
+
+       thermal@10078 {
+               compatible = "marvell,dove-thermal";
+               reg = <0xd001c 0x0c>, <0xd005c 0x08>;
+       };
diff --git a/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt b/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt
new file mode 100644 (file)
index 0000000..8c0f5eb
--- /dev/null
@@ -0,0 +1,15 @@
+* Kirkwood Thermal
+
+This version is for Kirkwood 88F8262 & 88F6283 SoCs. Other kirkwoods
+don't contain a thermal sensor.
+
+Required properties:
+- compatible : "marvell,kirkwood-thermal"
+- reg : Address range of the thermal registers
+
+Example:
+
+       thermal@10078 {
+               compatible = "marvell,kirkwood-thermal";
+               reg = <0x10078 0x4>;
+       };
diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
new file mode 100644 (file)
index 0000000..28ef498
--- /dev/null
@@ -0,0 +1,29 @@
+* Renesas R-Car Thermal
+
+Required properties:
+- compatible           : "renesas,rcar-thermal"
+- reg                  : Address range of the thermal registers.
+                         The 1st reg will be recognized as common register
+                         if it has "interrupts".
+
+Option properties:
+
+- interrupts           : use interrupt
+
+Example (non interrupt support):
+
+thermal@e61f0100 {
+       compatible = "renesas,rcar-thermal";
+       reg = <0xe61f0100 0x38>;
+};
+
+Example (interrupt support):
+
+thermal@e61f0000 {
+       compatible = "renesas,rcar-thermal";
+       reg = <0xe61f0000 0x14
+               0xe61f0100 0x38
+               0xe61f0200 0x38
+               0xe61f0300 0x38>;
+       interrupts = <0 69 4>;
+};
similarity index 55%
rename from Documentation/devicetree/bindings/arm/armada-370-xp-timer.txt
rename to Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt
index 64830118b013fccea7e7ed878931e61baebc476f..36381129d141c47556fc5605670494fcfc4feb18 100644 (file)
@@ -1,10 +1,13 @@
-Marvell Armada 370 and Armada XP Global Timers
-----------------------------------------------
+Marvell Armada 370 and Armada XP Timers
+---------------------------------------
 
 Required properties:
 - compatible: Should be "marvell,armada-370-xp-timer"
-- interrupts: Should contain the list of Global Timer interrupts
-- reg: Should contain the base address of the Global Timer registers
+- interrupts: Should contain the list of Global Timer interrupts and
+  then local timer interrupts
+- reg: Should contain location and length for timers register. First
+  pair for the Global Timer registers, second pair for the
+  local/private timers.
 - clocks: clock driving the timer hardware
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt b/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt
new file mode 100644 (file)
index 0000000..ecf42c0
--- /dev/null
@@ -0,0 +1,19 @@
+* Freescale i.MX One wire bus master controller
+
+Required properties:
+- compatible : should be "fsl,imx21-owire"
+- reg : Address and length of the register set for the device
+
+Optional properties:
+- clocks : phandle of clock that supplies the module (required if platform
+               clock bindings use device tree)
+
+Example:
+
+- From imx53.dtsi:
+owire: owire@63fa4000 {
+       compatible = "fsl,imx53-owire", "fsl,imx21-owire";
+       reg = <0x63fa4000 0x4000>;
+       clocks = <&clks 159>;
+       status = "disabled";
+};
index 0188903bc9e1ede2c5668d5d78c632d2c4f923b3..4966b1be42ac09d17ecfa0ded944c13cf808ea37 100644 (file)
@@ -302,7 +302,11 @@ Access to a dma_buf from the kernel context involves three steps:
       void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 
    The vmap call can fail if there is no vmap support in the exporter, or if it
-   runs out of vmalloc space. Fallback to kmap should be implemented.
+   runs out of vmalloc space. Fallback to kmap should be implemented. Note that
+   the dma-buf layer keeps a reference count for all vmap access and calls down
+   into the exporter's vmap function only when no vmapping exists, and only
+   unmaps it once. Protection against concurrent vmap/vunmap calls is provided
+   by taking the dma_buf->lock mutex.
 
 3. Finish access
 
diff --git a/Documentation/thermal/exynos_thermal_emulation b/Documentation/thermal/exynos_thermal_emulation
new file mode 100644 (file)
index 0000000..b73bbfb
--- /dev/null
@@ -0,0 +1,53 @@
+EXYNOS EMULATION MODE
+========================
+
+Copyright (C) 2012 Samsung Electronics
+
+Written by Jonghwa Lee <jonghwa3.lee@samsung.com>
+
+Description
+-----------
+
+Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal management unit.
+Thermal emulation mode supports software debug for TMU's operation. User can set temperature
+manually with software code and TMU will read current temperature from user value not from
+sensor's value.
+
+Enabling CONFIG_EXYNOS_THERMAL_EMUL option will make this support in available.
+When it's enabled, sysfs node will be created under
+/sys/bus/platform/devices/'exynos device name'/ with name of 'emulation'.
+
+The sysfs node, 'emulation', will contain value 0 for the initial state. When you input any
+temperature you want to update to sysfs node, it automatically enable emulation mode and
+current temperature will be changed into it.
+(Exynos also supports user changable delay time which would be used to delay of
+ changing temperature. However, this node only uses same delay of real sensing time, 938us.)
+
+Exynos emulation mode requires synchronous of value changing and enabling. It means when you
+want to update the any value of delay or next temperature, then you have to enable emulation
+mode at the same time. (Or you have to keep the mode enabling.) If you don't, it fails to
+change the value to updated one and just use last succeessful value repeatedly. That's why
+this node gives users the right to change termerpature only. Just one interface makes it more
+simply to use.
+
+Disabling emulation mode only requires writing value 0 to sysfs node.
+
+
+TEMP   120 |
+           |
+       100 |
+           |
+        80 |
+           |                            +-----------
+        60 |                            |          |
+           |              +-------------|          |
+        40 |              |             |          |
+           |              |             |          |
+        20 |              |             |          +----------
+           |              |             |          |          |
+         0 |______________|_____________|__________|__________|_________
+                  A             A          A                  A     TIME
+                  |<----->|     |<----->|  |<----->|          |
+                  | 938us |     |       |  |       |          |
+emulation    :  0  50     |     70      |  20      |          0
+current temp :   sensor   50            70         20        sensor
diff --git a/Documentation/thermal/intel_powerclamp.txt b/Documentation/thermal/intel_powerclamp.txt
new file mode 100644 (file)
index 0000000..332de4a
--- /dev/null
@@ -0,0 +1,307 @@
+                        =======================
+                        INTEL POWERCLAMP DRIVER
+                        =======================
+By: Arjan van de Ven <arjan@linux.intel.com>
+    Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+Contents:
+       (*) Introduction
+           - Goals and Objectives
+
+       (*) Theory of Operation
+           - Idle Injection
+           - Calibration
+
+       (*) Performance Analysis
+           - Effectiveness and Limitations
+           - Power vs Performance
+           - Scalability
+           - Calibration
+           - Comparison with Alternative Techniques
+
+       (*) Usage and Interfaces
+           - Generic Thermal Layer (sysfs)
+           - Kernel APIs (TBD)
+
+============
+INTRODUCTION
+============
+
+Consider the situation where a system’s power consumption must be
+reduced at runtime, due to power budget, thermal constraint, or noise
+level, and where active cooling is not preferred. Software managed
+passive power reduction must be performed to prevent the hardware
+actions that are designed for catastrophic scenarios.
+
+Currently, P-states, T-states (clock modulation), and CPU offlining
+are used for CPU throttling.
+
+On Intel CPUs, C-states provide effective power reduction, but so far
+they’re only used opportunistically, based on workload. With the
+development of intel_powerclamp driver, the method of synchronizing
+idle injection across all online CPU threads was introduced. The goal
+is to achieve forced and controllable C-state residency.
+
+Test/Analysis has been made in the areas of power, performance,
+scalability, and user experience. In many cases, clear advantage is
+shown over taking the CPU offline or modulating the CPU clock.
+
+
+===================
+THEORY OF OPERATION
+===================
+
+Idle Injection
+--------------
+
+On modern Intel processors (Nehalem or later), package level C-state
+residency is available in MSRs, thus also available to the kernel.
+
+These MSRs are:
+      #define MSR_PKG_C2_RESIDENCY     0x60D
+      #define MSR_PKG_C3_RESIDENCY     0x3F8
+      #define MSR_PKG_C6_RESIDENCY     0x3F9
+      #define MSR_PKG_C7_RESIDENCY     0x3FA
+
+If the kernel can also inject idle time to the system, then a
+closed-loop control system can be established that manages package
+level C-state. The intel_powerclamp driver is conceived as such a
+control system, where the target set point is a user-selected idle
+ratio (based on power reduction), and the error is the difference
+between the actual package level C-state residency ratio and the target idle
+ratio.
+
+Injection is controlled by high priority kernel threads, spawned for
+each online CPU.
+
+These kernel threads, with SCHED_FIFO class, are created to perform
+clamping actions of controlled duty ratio and duration. Each per-CPU
+thread synchronizes its idle time and duration, based on the rounding
+of jiffies, so accumulated errors can be prevented to avoid a jittery
+effect. Threads are also bound to the CPU such that they cannot be
+migrated, unless the CPU is taken offline. In this case, threads
+belong to the offlined CPUs will be terminated immediately.
+
+Running as SCHED_FIFO and relatively high priority, also allows such
+scheme to work for both preemptable and non-preemptable kernels.
+Alignment of idle time around jiffies ensures scalability for HZ
+values. This effect can be better visualized using a Perf timechart.
+The following diagram shows the behavior of kernel thread
+kidle_inject/cpu. During idle injection, it runs monitor/mwait idle
+for a given "duration", then relinquishes the CPU to other tasks,
+until the next time interval.
+
+The NOHZ schedule tick is disabled during idle time, but interrupts
+are not masked. Tests show that the extra wakeups from scheduler tick
+have a dramatic impact on the effectiveness of the powerclamp driver
+on large scale systems (Westmere system with 80 processors).
+
+CPU0
+                 ____________          ____________
+kidle_inject/0   |   sleep    |  mwait |  sleep     |
+       _________|            |________|            |_______
+                              duration
+CPU1
+                 ____________          ____________
+kidle_inject/1   |   sleep    |  mwait |  sleep     |
+       _________|            |________|            |_______
+                             ^
+                             |
+                             |
+                             roundup(jiffies, interval)
+
+Only one CPU is allowed to collect statistics and update global
+control parameters. This CPU is referred to as the controlling CPU in
+this document. The controlling CPU is elected at runtime, with a
+policy that favors BSP, taking into account the possibility of a CPU
+hot-plug.
+
+In terms of dynamics of the idle control system, package level idle
+time is considered largely as a non-causal system where its behavior
+cannot be based on the past or current input. Therefore, the
+intel_powerclamp driver attempts to enforce the desired idle time
+instantly as given input (target idle ratio). After injection,
+powerclamp moniors the actual idle for a given time window and adjust
+the next injection accordingly to avoid over/under correction.
+
+When used in a causal control system, such as a temperature control,
+it is up to the user of this driver to implement algorithms where
+past samples and outputs are included in the feedback. For example, a
+PID-based thermal controller can use the powerclamp driver to
+maintain a desired target temperature, based on integral and
+derivative gains of the past samples.
+
+
+
+Calibration
+-----------
+During scalability testing, it is observed that synchronized actions
+among CPUs become challenging as the number of cores grows. This is
+also true for the ability of a system to enter package level C-states.
+
+To make sure the intel_powerclamp driver scales well, online
+calibration is implemented. The goals for doing such a calibration
+are:
+
+a) determine the effective range of idle injection ratio
+b) determine the amount of compensation needed at each target ratio
+
+Compensation to each target ratio consists of two parts:
+
+        a) steady state error compensation
+       This is to offset the error occurring when the system can
+       enter idle without extra wakeups (such as external interrupts).
+
+       b) dynamic error compensation
+       When an excessive amount of wakeups occurs during idle, an
+       additional idle ratio can be added to quiet interrupts, by
+       slowing down CPU activities.
+
+A debugfs file is provided for the user to examine compensation
+progress and results, such as on a Westmere system.
+[jacob@nex01 ~]$ cat
+/sys/kernel/debug/intel_powerclamp/powerclamp_calib
+controlling cpu: 0
+pct confidence steady dynamic (compensation)
+0      0       0       0
+1      1       0       0
+2      1       1       0
+3      3       1       0
+4      3       1       0
+5      3       1       0
+6      3       1       0
+7      3       1       0
+8      3       1       0
+...
+30     3       2       0
+31     3       2       0
+32     3       1       0
+33     3       2       0
+34     3       1       0
+35     3       2       0
+36     3       1       0
+37     3       2       0
+38     3       1       0
+39     3       2       0
+40     3       3       0
+41     3       1       0
+42     3       2       0
+43     3       1       0
+44     3       1       0
+45     3       2       0
+46     3       3       0
+47     3       0       0
+48     3       2       0
+49     3       3       0
+
+Calibration occurs during runtime. No offline method is available.
+Steady state compensation is used only when confidence levels of all
+adjacent ratios have reached satisfactory level. A confidence level
+is accumulated based on clean data collected at runtime. Data
+collected during a period without extra interrupts is considered
+clean.
+
+To compensate for excessive amounts of wakeup during idle, additional
+idle time is injected when such a condition is detected. Currently,
+we have a simple algorithm to double the injection ratio. A possible
+enhancement might be to throttle the offending IRQ, such as delaying
+EOI for level triggered interrupts. But it is a challenge to be
+non-intrusive to the scheduler or the IRQ core code.
+
+
+CPU Online/Offline
+------------------
+Per-CPU kernel threads are started/stopped upon receiving
+notifications of CPU hotplug activities. The intel_powerclamp driver
+keeps track of clamping kernel threads, even after they are migrated
+to other CPUs, after a CPU offline event.
+
+
+=====================
+Performance Analysis
+=====================
+This section describes the general performance data collected on
+multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P).
+
+Effectiveness and Limitations
+-----------------------------
+The maximum range that idle injection is allowed is capped at 50
+percent. As mentioned earlier, since interrupts are allowed during
+forced idle time, excessive interrupts could result in less
+effectiveness. The extreme case would be doing a ping -f to generated
+flooded network interrupts without much CPU acknowledgement. In this
+case, little can be done from the idle injection threads. In most
+normal cases, such as scp a large file, applications can be throttled
+by the powerclamp driver, since slowing down the CPU also slows down
+network protocol processing, which in turn reduces interrupts.
+
+When control parameters change at runtime by the controlling CPU, it
+may take an additional period for the rest of the CPUs to catch up
+with the changes. During this time, idle injection is out of sync,
+thus not able to enter package C- states at the expected ratio. But
+this effect is minor, in that in most cases change to the target
+ratio is updated much less frequently than the idle injection
+frequency.
+
+Scalability
+-----------
+Tests also show a minor, but measurable, difference between the 4P/8P
+Ivy Bridge system and the 80P Westmere server under 50% idle ratio.
+More compensation is needed on Westmere for the same amount of
+target idle ratio. The compensation also increases as the idle ratio
+gets larger. The above reason constitutes the need for the
+calibration code.
+
+On the IVB 8P system, compared to an offline CPU, powerclamp can
+achieve up to 40% better performance per watt. (measured by a spin
+counter summed over per CPU counting threads spawned for all running
+CPUs).
+
+====================
+Usage and Interfaces
+====================
+The powerclamp driver is registered to the generic thermal layer as a
+cooling device. Currently, it’s not bound to any thermal zones.
+
+jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . *
+cur_state:0
+max_state:50
+type:intel_powerclamp
+
+Example usage:
+- To inject 25% idle time
+$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
+"
+
+If the system is not busy and has more than 25% idle time already,
+then the powerclamp driver will not start idle injection. Using Top
+will not show idle injection kernel threads.
+
+If the system is busy (spin test below) and has less than 25% natural
+idle time, powerclamp kernel threads will do idle injection, which
+appear running to the scheduler. But the overall system idle is still
+reflected. In this example, 24.1% idle is shown. This helps the
+system admin or user determine the cause of slowdown, when a
+powerclamp driver is in action.
+
+
+Tasks: 197 total,   1 running, 196 sleeping,   0 stopped,   0 zombie
+Cpu(s): 71.2%us,  4.7%sy,  0.0%ni, 24.1%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
+Mem:   3943228k total,  1689632k used,  2253596k free,    74960k buffers
+Swap:  4087804k total,        0k used,  4087804k free,   945336k cached
+
+  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
+ 3352 jacob     20   0  262m  644  428 S  286  0.0   0:17.16 spin
+ 3341 root     -51   0     0    0    0 D   25  0.0   0:01.62 kidle_inject/0
+ 3344 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/3
+ 3342 root     -51   0     0    0    0 D   25  0.0   0:01.61 kidle_inject/1
+ 3343 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/2
+ 2935 jacob     20   0  696m 125m  35m S    5  3.3   0:31.11 firefox
+ 1546 root      20   0  158m  20m 6640 S    3  0.5   0:26.97 Xorg
+ 2100 jacob     20   0 1223m  88m  30m S    3  2.3   0:23.68 compiz
+
+Tests have shown that by using the powerclamp driver as a cooling
+device, a PID based userspace thermal controller can manage to
+control CPU temperature effectively, when no other thermal influence
+is added. For example, a UltraBook user can compile the kernel under
+certain temperature (below most active trip points).
index 88c02334e35681b005ac20a1ad0dfe4a894bc1f7..6859661c9d31be090a78e5a2c35a9a5fdad7ca80 100644 (file)
@@ -55,6 +55,8 @@ temperature) and throttle appropriate devices.
        .get_trip_type: get the type of certain trip point.
        .get_trip_temp: get the temperature above which the certain trip point
                        will be fired.
+       .set_emul_temp: set the emulation temperature which helps in debugging
+                       different threshold temperature points.
 
 1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 
@@ -153,6 +155,7 @@ Thermal zone device sys I/F, created once it's registered:
     |---trip_point_[0-*]_temp: Trip point temperature
     |---trip_point_[0-*]_type: Trip point type
     |---trip_point_[0-*]_hyst: Hysteresis value for this trip point
+    |---emul_temp:             Emulated temperature set node
 
 Thermal cooling device sys I/F, created once it's registered:
 /sys/class/thermal/cooling_device[0-*]:
@@ -252,6 +255,16 @@ passive
        Valid values: 0 (disabled) or greater than 1000
        RW, Optional
 
+emul_temp
+       Interface to set the emulated temperature method in thermal zone
+       (sensor). After setting this temperature, the thermal zone may pass
+       this temperature to platform emulation function if registered or
+       cache it locally. This is useful in debugging different temperature
+       threshold and its associated cooling action. This is write only node
+       and writing 0 on this node should disable emulation.
+       Unit: millidegree Celsius
+       WO, Optional
+
 *****************************
 * Cooling device attributes *
 *****************************
@@ -329,8 +342,9 @@ The framework includes a simple notification mechanism, in the form of a
 netlink event. Netlink socket initialization is done during the _init_
 of the framework. Drivers which intend to use the notification mechanism
 just need to call thermal_generate_netlink_event() with two arguments viz
-(originator, event). Typically the originator will be an integer assigned
-to a thermal_zone_device when it registers itself with the framework. The
+(originator, event). The originator is a pointer to struct thermal_zone_device
+from where the event has been originated. An integer which represents the
+thermal zone device will be used in the message to identify the zone. The
 event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
 THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
 crosses any of the configured thresholds.
index 0b4bb157a482a2b38619682fcb57a5b78534a622..6db1c6bdf0150fbeba1606f4090f1c3d7afcd2dd 100644 (file)
@@ -1800,7 +1800,8 @@ F:        drivers/bcma/
 F:     include/linux/bcma/
 
 BROCADE BFA FC SCSI DRIVER
-M:     Krishna C Gudipati <kgudipat@brocade.com>
+M:     Anil Gurumurthy <agurumur@brocade.com>
+M:     Vijaya Mohan Guvva <vmohan@brocade.com>
 L:     linux-scsi@vger.kernel.org
 S:     Supported
 F:     drivers/scsi/bfa/
@@ -2074,8 +2075,8 @@ S:        Maintained
 F:     include/linux/clk.h
 
 CISCO FCOE HBA DRIVER
-M:     Abhijeet Joglekar <abjoglek@cisco.com>
-M:     Venkata Siva Vijayendra Bhamidipati <vbhamidi@cisco.com>
+M:     Hiral Patel <hiralpat@cisco.com>
+M:     Suma Ramars <sramars@cisco.com>
 M:     Brian Uchino <buchino@cisco.com>
 L:     linux-scsi@vger.kernel.org
 S:     Supported
@@ -2903,6 +2904,13 @@ W:       bluesmoke.sourceforge.net
 S:     Maintained
 F:     drivers/edac/e7xxx_edac.c
 
+EDAC-GHES
+M:     Mauro Carvalho Chehab <mchehab@redhat.com>
+L:     linux-edac@vger.kernel.org
+W:     bluesmoke.sourceforge.net
+S:     Maintained
+F:     drivers/edac/ghes-edac.c
+
 EDAC-I82443BXGX
 M:     Tim Small <tim@buttersideup.com>
 L:     linux-edac@vger.kernel.org
@@ -6514,6 +6522,12 @@ S:       Maintained
 F:     Documentation/blockdev/ramdisk.txt
 F:     drivers/block/brd.c
 
+RAMSAM DRIVER (IBM RamSan 70/80 PCI SSD Flash Card)
+M:     Joshua Morris <josh.h.morris@us.ibm.com>
+M:     Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+S:     Maintained
+F:     drivers/block/rsxx/
+
 RANDOM NUMBER DRIVER
 M:     Theodore Ts'o" <tytso@mit.edu>
 S:     Maintained
index 0e16cca1d01168ad6fb78112879aa87835b5c0f4..5b714695b01bb9db0455ad2f5c959a714aa00064 100644 (file)
@@ -1676,7 +1676,6 @@ config HZ
        int
        default 200 if ARCH_EBSA110 || ARCH_S3C24XX || ARCH_S5P64X0 || \
                ARCH_S5PV210 || ARCH_EXYNOS4
-       default OMAP_32K_TIMER_HZ if ARCH_OMAP && OMAP_32K_TIMER
        default AT91_TIMER_HZ if ARCH_AT91
        default SHMOBILE_TIMER_HZ if ARCH_SHMOBILE
        default 100
index 411ab1614a0eceb07b917f37b29c1780e855057a..9c6255884cbbd51f7efd83c5c4b6327156fc5c6c 100644 (file)
@@ -56,6 +56,7 @@ dtb-$(CONFIG_ARCH_KIRKWOOD) += kirkwood-dns320.dtb \
        kirkwood-dockstar.dtb \
        kirkwood-dreamplug.dtb \
        kirkwood-goflexnet.dtb \
+       kirkwood-guruplug-server-plus.dtb \
        kirkwood-ib62x0.dtb \
        kirkwood-iconnect.dtb \
        kirkwood-iomega_ix2_200.dtb \
@@ -78,11 +79,21 @@ dtb-$(CONFIG_ARCH_MSM) += msm8660-surf.dtb \
        msm8960-cdp.dtb
 dtb-$(CONFIG_ARCH_MVEBU) += armada-370-db.dtb \
        armada-370-mirabox.dtb \
+       armada-370-rd.dtb \
        armada-xp-db.dtb \
+       armada-xp-gp.dtb \
        armada-xp-openblocks-ax3-4.dtb
-dtb-$(CONFIG_ARCH_MXC) += imx51-babbage.dtb \
+dtb-$(CONFIG_ARCH_MXC) += \
+       imx25-karo-tx25.dtb \
+       imx25-pdk.dtb \
+       imx27-apf27.dtb \
+       imx27-pdk.dtb \
+       imx31-bug.dtb \
+       imx51-apf51.dtb \
+       imx51-babbage.dtb \
        imx53-ard.dtb \
        imx53-evk.dtb \
+       imx53-mba53.dtb \
        imx53-qsb.dtb \
        imx53-smd.dtb \
        imx6q-arm2.dtb \
index c2f14e875eb6274b18fc159e8e773395aca6df87..0957645b73afaca716348485193aa5060f06659c 100644 (file)
                                mac-address = [ 00 00 00 00 00 00 ];
                        };
                };
+
+               ocmcram: ocmcram@40300000 {
+                       compatible = "ti,am3352-ocmcram";
+                       reg = <0x40300000 0x10000>;
+                       ti,hwmods = "ocmcram";
+                       ti,no_idle_on_suspend;
+               };
+
+               wkup_m3: wkup_m3@44d00000 {
+                       compatible = "ti,am3353-wkup-m3";
+                       reg = <0x44d00000 0x4000        /* M3 UMEM */
+                              0x44d80000 0x2000>;      /* M3 DMEM */
+                       ti,hwmods = "wkup_m3";
+               };
        };
 };
index 9b82facb2561cfdb1cbc9cda25f5021668752e4a..e34b280ce6ec44c3abfc17284ef53d103a15f045 100644 (file)
                        phy = <&phy1>;
                        phy-mode = "rgmii-id";
                };
+
+               mvsdio@d00d4000 {
+                       pinctrl-0 = <&sdio_pins1>;
+                       pinctrl-names = "default";
+                       /*
+                        * This device is disabled by default, because
+                        * using the SD card connector requires
+                        * changing the default CON40 connector
+                        * "DB-88F6710_MPP_2xRGMII_DEVICE_Jumper" to a
+                        * different connector
+                        * "DB-88F6710_MPP_RGMII_SD_Jumper".
+                        */
+                       status = "disabled";
+                       /* No CD or WP GPIOs */
+               };
+
+               usb@d0050000 {
+                       status = "okay";
+               };
+
+               usb@d0051000 {
+                       status = "okay";
+               };
+
+               spi0: spi@d0010600 {
+                       status = "okay";
+
+                       spi-flash@0 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "mx25l25635e";
+                               reg = <0>; /* Chip select 0 */
+                               spi-max-frequency = <50000000>;
+                       };
+               };
        };
 };
index 3b40713365997a70cf93b61bb244d3e541651d79..dd0c57dd9f3096ae40b35e2b4e43fc3c983f3ea1 100644 (file)
                        phy = <&phy1>;
                        phy-mode = "rgmii-id";
                };
+
+               mvsdio@d00d4000 {
+                       pinctrl-0 = <&sdio_pins2>;
+                       pinctrl-names = "default";
+                       status = "okay";
+                       /*
+                        * No CD or WP GPIOs: SDIO interface used for
+                        * Wifi/Bluetooth chip
+                        */
+               };
+
+               usb@d0050000 {
+                       status = "okay";
+               };
+
+               usb@d0051000 {
+                       status = "okay";
+               };
        };
 };
diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts
new file mode 100644 (file)
index 0000000..f8e4855
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Device Tree file for Marvell Armada 370 Reference Design board
+ * (RD-88F6710-A1)
+ *
+ *  Copied from arch/arm/boot/dts/armada-370-db.dts
+ *
+ *  Copyright (C) 2013 Florian Fainelli <florian@openwrt.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+/include/ "armada-370.dtsi"
+
+/ {
+       model = "Marvell Armada 370 Reference Design";
+       compatible = "marvell,a370-rd", "marvell,armada370", "marvell,armada-370-xp";
+
+       chosen {
+               bootargs = "console=ttyS0,115200 earlyprintk";
+       };
+
+       memory {
+               device_type = "memory";
+               reg = <0x00000000 0x20000000>; /* 512 MB */
+       };
+
+       soc {
+               serial@d0012000 {
+                       clock-frequency = <200000000>;
+                       status = "okay";
+               };
+               sata@d00a0000 {
+                       nr-ports = <2>;
+                       status = "okay";
+               };
+
+               mdio {
+                       phy0: ethernet-phy@0 {
+                               reg = <0>;
+                       };
+
+                       phy1: ethernet-phy@1 {
+                               reg = <1>;
+                       };
+               };
+
+               ethernet@d0070000 {
+                       status = "okay";
+                       phy = <&phy0>;
+                       phy-mode = "sgmii";
+               };
+               ethernet@d0074000 {
+                       status = "okay";
+                       phy = <&phy1>;
+                       phy-mode = "rgmii-id";
+               };
+
+               mvsdio@d00d4000 {
+                       pinctrl-0 = <&sdio_pins1>;
+                       pinctrl-names = "default";
+                       status = "okay";
+                       /* No CD or WP GPIOs */
+               };
+       };
+};
index 5b2922599f0e85c0b8e959993460acb4a41810d4..6f1acc75e1559ca109066c7ad68c6e11f50087a0 100644 (file)
@@ -68,8 +68,9 @@
 
                timer@d0020300 {
                               compatible = "marvell,armada-370-xp-timer";
-                              reg = <0xd0020300 0x30>;
-                              interrupts = <37>, <38>, <39>, <40>;
+                              reg = <0xd0020300 0x30>,
+                              <0xd0021040 0x30>;
+                              interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
                               clocks = <&coreclk 2>;
                };
 
                        reg = <0xd0010300 0x20>;
                        interrupts = <50>;
                };
+
+               mvsdio@d00d4000 {
+                       compatible = "marvell,orion-sdio";
+                       reg = <0xd00d4000 0x200>;
+                       interrupts = <54>;
+                       clocks = <&gateclk 17>;
+                       status = "disabled";
+               };
+
+               usb@d0050000 {
+                       compatible = "marvell,orion-ehci";
+                       reg = <0xd0050000 0x500>;
+                       interrupts = <45>;
+                       status = "disabled";
+               };
+
+               usb@d0051000 {
+                       compatible = "marvell,orion-ehci";
+                       reg = <0xd0051000 0x500>;
+                       interrupts = <46>;
+                       status = "disabled";
+               };
+
+               spi0: spi@d0010600 {
+                       compatible = "marvell,orion-spi";
+                       reg = <0xd0010600 0x28>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       cell-index = <0>;
+                       interrupts = <30>;
+                       clocks = <&coreclk 0>;
+                       status = "disabled";
+               };
+
+               spi1: spi@d0010680 {
+                       compatible = "marvell,orion-spi";
+                       reg = <0xd0010680 0x28>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       cell-index = <1>;
+                       interrupts = <92>;
+                       clocks = <&coreclk 0>;
+                       status = "disabled";
+               };
        };
 };
 
index 636cf7d4009ecf343d3b9a8d596bf092360a59b0..8188d138020edc57c88db0cb749b91bdf016a5cf 100644 (file)
                pinctrl {
                        compatible = "marvell,mv88f6710-pinctrl";
                        reg = <0xd0018000 0x38>;
+
+                       sdio_pins1: sdio-pins1 {
+                             marvell,pins = "mpp9",  "mpp11", "mpp12",
+                                            "mpp13", "mpp14", "mpp15";
+                             marvell,function = "sd0";
+                       };
+
+                       sdio_pins2: sdio-pins2 {
+                             marvell,pins = "mpp47", "mpp48", "mpp49",
+                                            "mpp50", "mpp51", "mpp52";
+                             marvell,function = "sd0";
+                       };
                };
 
                gpio0: gpio@d0018100 {
                                dmacap,memset;
                        };
                };
+
+               usb@d0050000 {
+                       clocks = <&coreclk 0>;
+               };
+
+               usb@d0051000 {
+                       clocks = <&coreclk 0>;
+               };
+
        };
 };
index 8e53b25b55084ed3b8712156f21d357510be207c..e83505e4c236c242eeb13c1094961b622a7646a8 100644 (file)
                        phy = <&phy3>;
                        phy-mode = "sgmii";
                };
+
+               mvsdio@d00d4000 {
+                       pinctrl-0 = <&sdio_pins>;
+                       pinctrl-names = "default";
+                       status = "okay";
+                       /* No CD or WP GPIOs */
+               };
+
+               usb@d0050000 {
+                       status = "okay";
+               };
+
+               usb@d0051000 {
+                       status = "okay";
+               };
+
+               usb@d0052000 {
+                       status = "okay";
+               };
+
+               spi0: spi@d0010600 {
+                       status = "okay";
+
+                       spi-flash@0 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "m25p64";
+                               reg = <0>; /* Chip select 0 */
+                               spi-max-frequency = <20000000>;
+                       };
+               };
        };
 };
diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts
new file mode 100644 (file)
index 0000000..1c8afe2
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Device Tree file for Marvell Armada XP development board
+ * (DB-MV784MP-GP)
+ *
+ * Copyright (C) 2013 Marvell
+ *
+ * Lior Amsalem <alior@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+/dts-v1/;
+/include/ "armada-xp-mv78460.dtsi"
+
+/ {
+       model = "Marvell Armada XP Development Board DB-MV784MP-GP";
+       compatible = "marvell,axp-gp", "marvell,armadaxp-mv78460", "marvell,armadaxp", "marvell,armada-370-xp";
+
+       chosen {
+               bootargs = "console=ttyS0,115200 earlyprintk";
+       };
+
+       memory {
+               device_type = "memory";
+
+               /*
+                * 4 GB of plug-in RAM modules by default but only 3GB
+                * are visible, the amount of memory available can be
+                * changed by the bootloader according the size of the
+                * module actually plugged
+                */
+               reg = <0x00000000 0xC0000000>;
+       };
+
+       soc {
+               serial@d0012000 {
+                       clock-frequency = <250000000>;
+                       status = "okay";
+               };
+               serial@d0012100 {
+                       clock-frequency = <250000000>;
+                       status = "okay";
+               };
+               serial@d0012200 {
+                       clock-frequency = <250000000>;
+                       status = "okay";
+               };
+               serial@d0012300 {
+                       clock-frequency = <250000000>;
+                       status = "okay";
+               };
+
+               sata@d00a0000 {
+                       nr-ports = <2>;
+                       status = "okay";
+               };
+
+               mdio {
+                       phy0: ethernet-phy@0 {
+                               reg = <16>;
+                       };
+
+                       phy1: ethernet-phy@1 {
+                               reg = <17>;
+                       };
+
+                       phy2: ethernet-phy@2 {
+                               reg = <18>;
+                       };
+
+                       phy3: ethernet-phy@3 {
+                               reg = <19>;
+                       };
+               };
+
+               ethernet@d0070000 {
+                       status = "okay";
+                       phy = <&phy0>;
+                       phy-mode = "rgmii-id";
+               };
+               ethernet@d0074000 {
+                       status = "okay";
+                       phy = <&phy1>;
+                       phy-mode = "rgmii-id";
+               };
+               ethernet@d0030000 {
+                       status = "okay";
+                       phy = <&phy2>;
+                       phy-mode = "rgmii-id";
+               };
+               ethernet@d0034000 {
+                       status = "okay";
+                       phy = <&phy3>;
+                       phy-mode = "rgmii-id";
+               };
+
+               spi0: spi@d0010600 {
+                       status = "okay";
+
+                       spi-flash@0 {
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               compatible = "n25q128a13";
+                               reg = <0>; /* Chip select 0 */
+                               spi-max-frequency = <108000000>;
+                       };
+               };
+       };
+};
index e041f42ed711b7e31fe93f2601fc153b2db6c2c0..f56c40599f5b975beb1f711369fecc1c5cf74a7d 100644 (file)
                pinctrl {
                        compatible = "marvell,mv78230-pinctrl";
                        reg = <0xd0018000 0x38>;
+
+                       sdio_pins: sdio-pins {
+                               marvell,pins = "mpp30", "mpp31", "mpp32",
+                                              "mpp33", "mpp34", "mpp35";
+                               marvell,function = "sd0";
+                       };
                };
 
                gpio0: gpio@d0018100 {
index 9e23bd8c9536d9b3d02c14a275efad5111d03f4a..f8f2b787d2b0e759cfe8578aab6bb1e63e11dd83 100644 (file)
                pinctrl {
                        compatible = "marvell,mv78260-pinctrl";
                        reg = <0xd0018000 0x38>;
+
+                       sdio_pins: sdio-pins {
+                               marvell,pins = "mpp30", "mpp31", "mpp32",
+                                              "mpp33", "mpp34", "mpp35";
+                               marvell,function = "sd0";
+                       };
                };
 
                gpio0: gpio@d0018100 {
index 965966110e3850a46eb247ca115c28ef827aa984..936c25dc32b0e7dc2a0f9b6b2a481940ba6f6d21 100644 (file)
                pinctrl {
                        compatible = "marvell,mv78460-pinctrl";
                        reg = <0xd0018000 0x38>;
+
+                       sdio_pins: sdio-pins {
+                               marvell,pins = "mpp30", "mpp31", "mpp32",
+                                              "mpp33", "mpp34", "mpp35";
+                               marvell,function = "sd0";
+                       };
                };
 
                gpio0: gpio@d0018100 {
index b42652fd3d8c373f5ef14bcd3695e38799bcd1cc..3818a82176a227996051a389bd783bbc4cc1fcb6 100644 (file)
                        };
                };
 
+               gpio_keys {
+                       compatible = "gpio-keys";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+
+                       button@1 {
+                               label = "Init Button";
+                               linux,code = <116>;
+                               gpios = <&gpio1 28 0>;
+                       };
+               };
+
                mdio {
                        phy0: ethernet-phy@0 {
                                reg = <0>;
                        nr-ports = <2>;
                        status = "okay";
                };
+               usb@d0050000 {
+                       status = "okay";
+               };
+               usb@d0051000 {
+                       status = "okay";
+               };
        };
 };
index 2e37ef101c9035d65efefa83854ba79c742a8d94..1443949c165ea8fa787fe798f687a63a116f2626 100644 (file)
@@ -30,7 +30,7 @@
        };
 
        mpic: interrupt-controller@d0020000 {
-             reg = <0xd0020a00 0x1d0>,
+             reg = <0xd0020a00 0x2d0>,
                    <0xd0021070 0x58>;
        };
 
                                dmacap,memset;
                        };
                };
+
+               usb@d0050000 {
+                       clocks = <&gateclk 18>;
+               };
+
+               usb@d0051000 {
+                       clocks = <&gateclk 19>;
+               };
+
+               usb@d0052000 {
+                       compatible = "marvell,orion-ehci";
+                       reg = <0xd0052000 0x500>;
+                       interrupts = <47>;
+                       clocks = <&gateclk 20>;
+                       status = "disabled";
+               };
+
        };
 };
index cdee96fca6e25fec9a0e24208344c03621298721..7e3065abd7512eaccc203feefd60345ac931e7f0 100644 (file)
 
        leds {
                compatible = "gpio-leds";
+               pinctrl-0 = <&pmx_gpio_18>;
+               pinctrl-names = "default";
+
                power {
                        label = "Power";
                        gpios = <&gpio0 18 1>;
                        linux,default-trigger = "default-on";
                };
        };
+
+       regulators {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               usb_power: regulator@1 {
+                       compatible = "regulator-fixed";
+                       reg = <1>;
+                       regulator-name = "USB Power";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       enable-active-high;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&gpio0 1 0>;
+               };
+       };
 };
 
 &uart0 { status = "okay"; };
 };
 
 &pinctrl {
-       pinctrl-0 = <&pmx_gpio_12 &pmx_gpio_18>;
+       pinctrl-0 = <&pmx_gpio_1 &pmx_gpio_12>;
        pinctrl-names = "default";
 
+       pmx_gpio_1: pmx-gpio-1 {
+               marvell,pins = "mpp1";
+               marvell,function = "gpio";
+       };
+
        pmx_gpio_12: pmx-gpio-12 {
                marvell,pins = "mpp12";
                marvell,function = "gpio";
index 740630f9cd6584544dbc1d6ef2f26a6f6befe365..67dbe20868a2746638ef2e20c380633a1e6fbcfa 100644 (file)
@@ -55,7 +55,7 @@
                        reg = <0x12000 0x100>;
                        reg-shift = <2>;
                        interrupts = <7>;
-                       clock-frequency = <166666667>;
+                       clocks = <&core_clk 0>;
                        status = "disabled";
                };
 
@@ -64,7 +64,7 @@
                        reg = <0x12100 0x100>;
                        reg-shift = <2>;
                        interrupts = <8>;
-                       clock-frequency = <166666667>;
+                       clocks = <&core_clk 0>;
                        status = "disabled";
                };
 
@@ -73,7 +73,7 @@
                        reg = <0x12000 0x100>;
                        reg-shift = <2>;
                        interrupts = <9>;
-                       clock-frequency = <166666667>;
+                       clocks = <&core_clk 0>;
                        status = "disabled";
                };
 
@@ -82,7 +82,7 @@
                        reg = <0x12100 0x100>;
                        reg-shift = <2>;
                        interrupts = <10>;
-                       clock-frequency = <166666667>;
+                       clocks = <&core_clk 0>;
                        status = "disabled";
                };
 
                        status = "disabled";
                };
 
+               ehci0: usb-host@50000 {
+                       compatible = "marvell,orion-ehci";
+                       reg = <0x50000 0x1000>;
+                       interrupts = <24>;
+                       clocks = <&gate_clk 0>;
+                       status = "okay";
+               };
+
+               ehci1: usb-host@51000 {
+                       compatible = "marvell,orion-ehci";
+                       reg = <0x51000 0x1000>;
+                       interrupts = <25>;
+                       clocks = <&gate_clk 1>;
+                       status = "okay";
+               };
+
                sdio0: sdio@92000 {
                        compatible = "marvell,dove-sdhci";
                        reg = <0x92000 0x100>;
index d81f8a0b97949a470d44f9b673ea994a83c74475..1a9d0491cdced7ca0f5878560a2a4cc8de96e396 100644 (file)
        memory {
                reg = <0x80000000 0x02000000 0x90000000 0x02000000>;
        };
+};
 
-       soc {
-               aips@43f00000 {
-                       uart1: serial@43f90000 {
-                               status = "okay";
-                       };
-               };
+&uart1 {
+       status = "okay";
+};
 
-               spba@50000000 {
-                       fec: ethernet@50038000 {
-                               status = "okay";
-                               phy-mode = "rmii";
-                       };
-               };
+&fec {
+       phy-mode = "rmii";
+       status = "okay";
+};
 
-               emi@80000000 {
-                       nand@bb000000 {
-                               nand-on-flash-bbt;
-                               status = "okay";
-                       };
-               };
-       };
+&nfc {
+       nand-on-flash-bbt;
+       status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts
new file mode 100644 (file)
index 0000000..a02a860
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/dts-v1/;
+/include/ "imx25.dtsi"
+
+/ {
+       model = "Freescale i.MX25 Product Development Kit";
+       compatible = "fsl,imx25-pdk", "fsl,imx25";
+
+       memory {
+               reg = <0x80000000 0x4000000>;
+       };
+};
+
+&uart1 {
+       status = "okay";
+};
+
+&fec {
+       phy-mode = "rmii";
+       status = "okay";
+};
+
+&nfc {
+       nand-on-flash-bbt;
+       status = "okay";
+};
index e1b13ebc96d6c3e34c4c15fecbe2738ed47fffa9..94f33059158a190a5dbff74d57cfb70a47830e13 100644 (file)
                        reg = <0x80000000 0x3b002000>;
                        ranges;
 
-                       nand@bb000000 {
+                       nfc: nand@bb000000 {
                                #address-cells = <1>;
                                #size-cells = <1>;
 
index c0327c054de25b68852a82229fa2b871ee944102..b464c807d8d92bfdd6b8d034ae24c75f98b2972a 100644 (file)
                        clock-frequency = <0>;
                };
        };
+};
 
-       soc {
-               aipi@10000000 {
-                       serial@1000a000 {
-                               status = "okay";
-                       };
+&uart1 {
+       status = "okay";
+};
 
-                       ethernet@1002b000 {
-                               status = "okay";
-                       };
-               };
+&fec {
+       status = "okay";
+};
 
-               nand@d8000000 {
-                       status = "okay";
-                       nand-bus-width = <16>;
-                       nand-ecc-mode = "hw";
-                       nand-on-flash-bbt;
+&nfc {
+       status = "okay";
+       nand-bus-width = <16>;
+       nand-ecc-mode = "hw";
+       nand-on-flash-bbt;
 
-                       partition@0 {
-                               label = "u-boot";
-                               reg = <0x0 0x100000>;
-                       };
+       partition@0 {
+               label = "u-boot";
+               reg = <0x0 0x100000>;
+       };
 
-                       partition@100000 {
-                               label = "env";
-                               reg = <0x100000 0x80000>;
-                       };
+       partition@100000 {
+               label = "env";
+               reg = <0x100000 0x80000>;
+       };
 
-                       partition@180000 {
-                               label = "env2";
-                               reg = <0x180000 0x80000>;
-                       };
+       partition@180000 {
+               label = "env2";
+               reg = <0x180000 0x80000>;
+       };
 
-                       partition@200000 {
-                               label = "firmware";
-                               reg = <0x200000 0x80000>;
-                       };
+       partition@200000 {
+               label = "firmware";
+               reg = <0x200000 0x80000>;
+       };
 
-                       partition@280000 {
-                               label = "dtb";
-                               reg = <0x280000 0x80000>;
-                       };
+       partition@280000 {
+               label = "dtb";
+               reg = <0x280000 0x80000>;
+       };
 
-                       partition@300000 {
-                               label = "kernel";
-                               reg = <0x300000 0x500000>;
-                       };
+       partition@300000 {
+               label = "kernel";
+               reg = <0x300000 0x500000>;
+       };
 
-                       partition@800000 {
-                               label = "rootfs";
-                               reg = <0x800000 0xf800000>;
-                       };
-               };
+       partition@800000 {
+               label = "rootfs";
+               reg = <0x800000 0xf800000>;
        };
 };
similarity index 59%
rename from arch/arm/boot/dts/imx27-3ds.dts
rename to arch/arm/boot/dts/imx27-pdk.dts
index fa04c7b18bcb8350f47917e969c97d02028b09f0..41cd1105608e47d3729d00e9b3b227b869ae0e26 100644 (file)
 /include/ "imx27.dtsi"
 
 / {
-       model = "mx27_3ds";
-       compatible = "freescale,imx27-3ds", "fsl,imx27";
+       model = "Freescale i.MX27 Product Development Kit";
+       compatible = "fsl,imx27-pdk", "fsl,imx27";
 
        memory {
                reg = <0x0 0x0>;
        };
+};
 
-       soc {
-               aipi@10000000 { /* aipi1 */
-                       uart1: serial@1000a000 {
-                               fsl,uart-has-rtscts;
-                               status = "okay";
-                       };
-               };
+&uart1 {
+       fsl,uart-has-rtscts;
+       status = "okay";
+};
 
-               aipi@10020000 { /* aipi2 */
-                       ethernet@1002b000 {
-                               status = "okay";
-                       };
-               };
-       };
+&fec {
+       status = "okay";
 };
index 7f67402328d377aa213a579860ff934db4e2483f..9ac6f6ba1d64c2d8cff1a6decf30a66667419b32 100644 (file)
        memory {
                reg = <0x80000000 0x8000000>; /* 128M */
        };
+};
 
-       soc {
-               aips@43f00000 { /* AIPS1 */
-                       uart5: serial@43fb4000 {
-                               fsl,uart-has-rtscts;
-                               status = "okay";
-                       };
-               };
-       };
+&uart5 {
+       fsl,uart-has-rtscts;
+       status = "okay";
 };
diff --git a/arch/arm/boot/dts/imx51-apf51.dts b/arch/arm/boot/dts/imx51-apf51.dts
new file mode 100644 (file)
index 0000000..92d3a66
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2012 Armadeus Systems - <support@armadeus.com>
+ * Copyright 2012 Laurent Cans <laurent.cans@gmail.com>
+ *
+ * Based on mx51-babbage.dts
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011 Linaro Ltd.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/dts-v1/;
+/include/ "imx51.dtsi"
+
+/ {
+       model = "Armadeus Systems APF51 module";
+       compatible = "armadeus,imx51-apf51", "fsl,imx51";
+
+       memory {
+               reg = <0x90000000 0x20000000>;
+       };
+
+       clocks {
+               ckih1 {
+                       clock-frequency = <0>;
+               };
+
+               osc {
+                       clock-frequency = <33554432>;
+               };
+       };
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_fec_2>;
+       phy-mode = "mii";
+       phy-reset-gpios = <&gpio3 0 0>;
+       phy-reset-duration = <1>;
+       status = "okay";
+};
+
+&uart3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart3_2>;
+       status = "okay";
+};
index 567e7ee72f9122396a05a98add4dc104dba699d0..aab6e43219af50dbd10e6bcdd2000963d43528a1 100644 (file)
                reg = <0x90000000 0x20000000>;
        };
 
-       soc {
-               display@di0 {
-                       compatible = "fsl,imx-parallel-display";
-                       crtcs = <&ipu 0>;
-                       interface-pix-fmt = "rgb24";
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&pinctrl_ipu_disp1_1>;
-               };
-
-               display@di1 {
-                       compatible = "fsl,imx-parallel-display";
-                       crtcs = <&ipu 1>;
-                       interface-pix-fmt = "rgb565";
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&pinctrl_ipu_disp2_1>;
-               };
-
-               aips@70000000 { /* aips-1 */
-                       spba@70000000 {
-                               esdhc@70004000 { /* ESDHC1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc1_1>;
-                                       fsl,cd-controller;
-                                       fsl,wp-controller;
-                                       status = "okay";
-                               };
-
-                               esdhc@70008000 { /* ESDHC2 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc2_1>;
-                                       cd-gpios = <&gpio1 6 0>;
-                                       wp-gpios = <&gpio1 5 0>;
-                                       status = "okay";
-                               };
-
-                               uart3: serial@7000c000 {
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_uart3_1>;
-                                       fsl,uart-has-rtscts;
-                                       status = "okay";
-                               };
-
-                               ecspi@70010000 { /* ECSPI1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_ecspi1_1>;
-                                       fsl,spi-num-chipselects = <2>;
-                                       cs-gpios = <&gpio4 24 0>, <&gpio4 25 0>;
-                                       status = "okay";
-
-                                       pmic: mc13892@0 {
-                                               #address-cells = <1>;
-                                               #size-cells = <0>;
-                                               compatible = "fsl,mc13892";
-                                               spi-max-frequency = <6000000>;
-                                               reg = <0>;
-                                               interrupt-parent = <&gpio1>;
-                                               interrupts = <8 0x4>;
-
-                                               regulators {
-                                                       sw1_reg: sw1 {
-                                                               regulator-min-microvolt = <600000>;
-                                                               regulator-max-microvolt = <1375000>;
-                                                               regulator-boot-on;
-                                                               regulator-always-on;
-                                                       };
-
-                                                       sw2_reg: sw2 {
-                                                               regulator-min-microvolt = <900000>;
-                                                               regulator-max-microvolt = <1850000>;
-                                                               regulator-boot-on;
-                                                               regulator-always-on;
-                                                       };
-
-                                                       sw3_reg: sw3 {
-                                                               regulator-min-microvolt = <1100000>;
-                                                               regulator-max-microvolt = <1850000>;
-                                                               regulator-boot-on;
-                                                               regulator-always-on;
-                                                       };
-
-                                                       sw4_reg: sw4 {
-                                                               regulator-min-microvolt = <1100000>;
-                                                               regulator-max-microvolt = <1850000>;
-                                                               regulator-boot-on;
-                                                               regulator-always-on;
-                                                       };
-
-                                                       vpll_reg: vpll {
-                                                               regulator-min-microvolt = <1050000>;
-                                                               regulator-max-microvolt = <1800000>;
-                                                               regulator-boot-on;
-                                                               regulator-always-on;
-                                                       };
-
-                                                       vdig_reg: vdig {
-                                                               regulator-min-microvolt = <1650000>;
-                                                               regulator-max-microvolt = <1650000>;
-                                                               regulator-boot-on;
-                                                       };
-
-                                                       vsd_reg: vsd {
-                                                               regulator-min-microvolt = <1800000>;
-                                                               regulator-max-microvolt = <3150000>;
-                                                       };
-
-                                                       vusb2_reg: vusb2 {
-                                                               regulator-min-microvolt = <2400000>;
-                                                               regulator-max-microvolt = <2775000>;
-                                                               regulator-boot-on;
-                                                               regulator-always-on;
-                                                       };
-
-                                                       vvideo_reg: vvideo {
-                                                               regulator-min-microvolt = <2775000>;
-                                                               regulator-max-microvolt = <2775000>;
-                                                       };
-
-                                                       vaudio_reg: vaudio {
-                                                               regulator-min-microvolt = <2300000>;
-                                                               regulator-max-microvolt = <3000000>;
-                                                       };
-
-                                                       vcam_reg: vcam {
-                                                               regulator-min-microvolt = <2500000>;
-                                                               regulator-max-microvolt = <3000000>;
-                                                       };
-
-                                                       vgen1_reg: vgen1 {
-                                                               regulator-min-microvolt = <1200000>;
-                                                               regulator-max-microvolt = <1200000>;
-                                                       };
-
-                                                       vgen2_reg: vgen2 {
-                                                               regulator-min-microvolt = <1200000>;
-                                                               regulator-max-microvolt = <3150000>;
-                                                               regulator-always-on;
-                                                       };
-
-                                                       vgen3_reg: vgen3 {
-                                                               regulator-min-microvolt = <1800000>;
-                                                               regulator-max-microvolt = <2900000>;
-                                                               regulator-always-on;
-                                                       };
-                                               };
-                                       };
-
-                                       flash: at45db321d@1 {
-                                               #address-cells = <1>;
-                                               #size-cells = <1>;
-                                               compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash";
-                                               spi-max-frequency = <25000000>;
-                                               reg = <1>;
-
-                                               partition@0 {
-                                                       label = "U-Boot";
-                                                       reg = <0x0 0x40000>;
-                                                       read-only;
-                                               };
-
-                                               partition@40000 {
-                                                       label = "Kernel";
-                                                       reg = <0x40000 0x3c0000>;
-                                               };
-                                       };
-                               };
-
-                               ssi2: ssi@70014000 {
-                                       fsl,mode = "i2s-slave";
-                                       status = "okay";
-                               };
-                       };
-
-                       iomuxc@73fa8000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       694  0x20d5     /* MX51_PAD_GPIO1_0__SD1_CD */
-                                                       697  0x20d5     /* MX51_PAD_GPIO1_1__SD1_WP */
-                                                       737  0x100      /* MX51_PAD_GPIO1_5__GPIO1_5 */
-                                                       740  0x100      /* MX51_PAD_GPIO1_6__GPIO1_6 */
-                                                       121  0x5        /* MX51_PAD_EIM_A27__GPIO2_21 */
-                                                       402  0x85       /* MX51_PAD_CSPI1_SS0__GPIO4_24 */
-                                                       405  0x85       /* MX51_PAD_CSPI1_SS1__GPIO4_25 */
-                                               >;
-                                       };
-                               };
-                       };
-
-                       uart1: serial@73fbc000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart1_1>;
-                               fsl,uart-has-rtscts;
-                               status = "okay";
-                       };
-
-                       uart2: serial@73fc0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart2_1>;
-                               status = "okay";
-                       };
-               };
-
-               aips@80000000 { /* aips-2 */
-                       i2c@83fc4000 { /* I2C2 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_i2c2_1>;
-                               status = "okay";
-
-                               sgtl5000: codec@0a {
-                                       compatible = "fsl,sgtl5000";
-                                       reg = <0x0a>;
-                                       clock-frequency = <26000000>;
-                                       VDDA-supply = <&vdig_reg>;
-                                       VDDIO-supply = <&vvideo_reg>;
-                               };
-                       };
-
-                       audmux@83fd0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_audmux_1>;
-                               status = "okay";
-                       };
+       display@di0 {
+               compatible = "fsl,imx-parallel-display";
+               crtcs = <&ipu 0>;
+               interface-pix-fmt = "rgb24";
+               pinctrl-names = "default";
+               pinctrl-0 = <&pinctrl_ipu_disp1_1>;
+       };
 
-                       ethernet@83fec000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_fec_1>;
-                               phy-mode = "mii";
-                               status = "okay";
-                       };
-               };
+       display@di1 {
+               compatible = "fsl,imx-parallel-display";
+               crtcs = <&ipu 1>;
+               interface-pix-fmt = "rgb565";
+               pinctrl-names = "default";
+               pinctrl-0 = <&pinctrl_ipu_disp2_1>;
        };
 
        gpio-keys {
                mux-ext-port = <3>;
        };
 };
+
+&esdhc1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc1_1>;
+       fsl,cd-controller;
+       fsl,wp-controller;
+       status = "okay";
+};
+
+&esdhc2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc2_1>;
+       cd-gpios = <&gpio1 6 0>;
+       wp-gpios = <&gpio1 5 0>;
+       status = "okay";
+};
+
+&uart3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart3_1>;
+       fsl,uart-has-rtscts;
+       status = "okay";
+};
+
+&ecspi1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_ecspi1_1>;
+       fsl,spi-num-chipselects = <2>;
+       cs-gpios = <&gpio4 24 0>, <&gpio4 25 0>;
+       status = "okay";
+
+       pmic: mc13892@0 {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               compatible = "fsl,mc13892";
+               spi-max-frequency = <6000000>;
+               reg = <0>;
+               interrupt-parent = <&gpio1>;
+               interrupts = <8 0x4>;
+
+               regulators {
+                       sw1_reg: sw1 {
+                               regulator-min-microvolt = <600000>;
+                               regulator-max-microvolt = <1375000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+                       };
+
+                       sw2_reg: sw2 {
+                               regulator-min-microvolt = <900000>;
+                               regulator-max-microvolt = <1850000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+                       };
+
+                       sw3_reg: sw3 {
+                               regulator-min-microvolt = <1100000>;
+                               regulator-max-microvolt = <1850000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+                       };
+
+                       sw4_reg: sw4 {
+                               regulator-min-microvolt = <1100000>;
+                               regulator-max-microvolt = <1850000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+                       };
+
+                       vpll_reg: vpll {
+                               regulator-min-microvolt = <1050000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+                       };
+
+                       vdig_reg: vdig {
+                               regulator-min-microvolt = <1650000>;
+                               regulator-max-microvolt = <1650000>;
+                               regulator-boot-on;
+                       };
+
+                       vsd_reg: vsd {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <3150000>;
+                       };
+
+                       vusb2_reg: vusb2 {
+                               regulator-min-microvolt = <2400000>;
+                               regulator-max-microvolt = <2775000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+                       };
+
+                       vvideo_reg: vvideo {
+                               regulator-min-microvolt = <2775000>;
+                               regulator-max-microvolt = <2775000>;
+                       };
+
+                       vaudio_reg: vaudio {
+                               regulator-min-microvolt = <2300000>;
+                               regulator-max-microvolt = <3000000>;
+                       };
+
+                       vcam_reg: vcam {
+                               regulator-min-microvolt = <2500000>;
+                               regulator-max-microvolt = <3000000>;
+                       };
+
+                       vgen1_reg: vgen1 {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <1200000>;
+                       };
+
+                       vgen2_reg: vgen2 {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <3150000>;
+                               regulator-always-on;
+                       };
+
+                       vgen3_reg: vgen3 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <2900000>;
+                               regulator-always-on;
+                       };
+               };
+       };
+
+       flash: at45db321d@1 {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash";
+               spi-max-frequency = <25000000>;
+               reg = <1>;
+
+               partition@0 {
+                       label = "U-Boot";
+                       reg = <0x0 0x40000>;
+                       read-only;
+               };
+
+               partition@40000 {
+                       label = "Kernel";
+                       reg = <0x40000 0x3c0000>;
+               };
+       };
+};
+
+&ssi2 {
+       fsl,mode = "i2s-slave";
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               694  0x20d5     /* MX51_PAD_GPIO1_0__SD1_CD */
+                               697  0x20d5     /* MX51_PAD_GPIO1_1__SD1_WP */
+                               737  0x100      /* MX51_PAD_GPIO1_5__GPIO1_5 */
+                               740  0x100      /* MX51_PAD_GPIO1_6__GPIO1_6 */
+                               121  0x5        /* MX51_PAD_EIM_A27__GPIO2_21 */
+                               402  0x85       /* MX51_PAD_CSPI1_SS0__GPIO4_24 */
+                               405  0x85       /* MX51_PAD_CSPI1_SS1__GPIO4_25 */
+                       >;
+               };
+       };
+};
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1_1>;
+       fsl,uart-has-rtscts;
+       status = "okay";
+};
+
+&uart2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart2_1>;
+       status = "okay";
+};
+
+&i2c2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c2_1>;
+       status = "okay";
+
+       sgtl5000: codec@0a {
+               compatible = "fsl,sgtl5000";
+               reg = <0x0a>;
+               clock-frequency = <26000000>;
+               VDDA-supply = <&vdig_reg>;
+               VDDIO-supply = <&vvideo_reg>;
+       };
+};
+
+&audmux {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_audmux_1>;
+       status = "okay";
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_fec_1>;
+       phy-mode = "mii";
+       status = "okay";
+};
+
+&kpp {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_kpp_1>;
+       linux,keymap = <0x00000067      /* KEY_UP */
+                       0x0001006c      /* KEY_DOWN */
+                       0x00020072      /* KEY_VOLUMEDOWN */
+                       0x00030066      /* KEY_HOME */
+                       0x0100006a      /* KEY_RIGHT */
+                       0x01010069      /* KEY_LEFT */
+                       0x0102001c      /* KEY_ENTER */
+                       0x01030073      /* KEY_VOLUMEUP */
+                       0x02000040      /* KEY_F6 */
+                       0x02010042      /* KEY_F8 */
+                       0x02020043      /* KEY_F9 */
+                       0x02030044      /* KEY_F10 */
+                       0x0300003b      /* KEY_F1 */
+                       0x0301003c      /* KEY_F2 */
+                       0x0302003d      /* KEY_F3 */
+                       0x03030074>;    /* KEY_POWER */
+       status = "okay";
+};
index 1f5d45eff45e25be4ec0ff027c044b1526a5d65e..fcf035bf7c5af45a453c6ad2d8c78811329b4e4f 100644 (file)
                                #interrupt-cells = <2>;
                        };
 
+                       kpp: kpp@73f94000 {
+                               compatible = "fsl,imx51-kpp", "fsl,imx21-kpp";
+                               reg = <0x73f94000 0x4000>;
+                               interrupts = <60>;
+                               clocks = <&clks 0>;
+                               status = "disabled";
+                       };
+
                        wdog1: wdog@73f98000 {
                                compatible = "fsl,imx51-wdt", "fsl,imx21-wdt";
                                reg = <0x73f98000 0x4000>;
                                                        260 0x80000000  /* MX51_PAD_NANDF_RDY_INT__FEC_TX_CLK */
                                                >;
                                        };
+
+                                       pinctrl_fec_2: fecgrp-2 {
+                                               fsl,pins = <
+                                                       589 0x80000000 /* MX51_PAD_DI_GP3__FEC_TX_ER */
+                                                       592 0x80000000 /* MX51_PAD_DI2_PIN4__FEC_CRS */
+                                                       594 0x80000000 /* MX51_PAD_DI2_PIN2__FEC_MDC */
+                                                       596 0x80000000 /* MX51_PAD_DI2_PIN3__FEC_MDIO */
+                                                       598 0x80000000 /* MX51_PAD_DI2_DISP_CLK__FEC_RDATA1 */
+                                                       602 0x80000000 /* MX51_PAD_DI_GP4__FEC_RDATA2 */
+                                                       604 0x80000000 /* MX51_PAD_DISP2_DAT0__FEC_RDATA3 */
+                                                       609 0x80000000 /* MX51_PAD_DISP2_DAT1__FEC_RX_ER */
+                                                       618 0x80000000 /* MX51_PAD_DISP2_DAT6__FEC_TDATA1 */
+                                                       623 0x80000000 /* MX51_PAD_DISP2_DAT7__FEC_TDATA2 */
+                                                       628 0x80000000 /* MX51_PAD_DISP2_DAT8__FEC_TDATA3 */
+                                                       634 0x80000000 /* MX51_PAD_DISP2_DAT9__FEC_TX_EN */
+                                                       639 0x80000000 /* MX51_PAD_DISP2_DAT10__FEC_COL */
+                                                       644 0x80000000 /* MX51_PAD_DISP2_DAT11__FEC_RX_CLK */
+                                                       649 0x80000000 /* MX51_PAD_DISP2_DAT12__FEC_RX_DV */
+                                                       653 0x80000000 /* MX51_PAD_DISP2_DAT13__FEC_TX_CLK */
+                                                       657 0x80000000 /* MX51_PAD_DISP2_DAT14__FEC_RDATA0 */
+                                                       662 0x80000000 /* MX51_PAD_DISP2_DAT15__FEC_TDATA0 */
+                                               >;
+                                       };
                                };
 
                                ecspi1 {
                                                        49 0x1c5        /* MX51_PAD_EIM_D24__UART3_CTS */
                                                >;
                                        };
+
+                                       pinctrl_uart3_2: uart3grp-2 {
+                                               fsl,pins = <
+                                                       434 0x1c5       /* MX51_PAD_UART3_RXD__UART3_RXD */
+                                                       430 0x1c5       /* MX51_PAD_UART3_TXD__UART3_TXD */
+                                               >;
+                                       };
+                               };
+
+                               kpp {
+                                       pinctrl_kpp_1: kppgrp-1 {
+                                               fsl,pins = <
+                                                       438 0xe0        /* MX51_PAD_KEY_ROW0__KEY_ROW0 */
+                                                       439 0xe0        /* MX51_PAD_KEY_ROW1__KEY_ROW1 */
+                                                       440 0xe0        /* MX51_PAD_KEY_ROW2__KEY_ROW2 */
+                                                       441 0xe0        /* MX51_PAD_KEY_ROW3__KEY_ROW3 */
+                                                       442 0xe8        /* MX51_PAD_KEY_COL0__KEY_COL0 */
+                                                       444 0xe8        /* MX51_PAD_KEY_COL1__KEY_COL1 */
+                                                       446 0xe8        /* MX51_PAD_KEY_COL2__KEY_COL2 */
+                                                       448 0xe8        /* MX51_PAD_KEY_COL3__KEY_COL3 */
+                                               >;
+                                       };
                                };
                        };
 
index 4be76f223526c648035f737e15f7c1ed2048285b..e049fd0319e83c941da1c3a0691e75bb2346de53 100644 (file)
                reg = <0x70000000 0x40000000>;
        };
 
-       soc {
-               aips@50000000 { /* AIPS1 */
-                       spba@50000000 {
-                               esdhc@50004000 { /* ESDHC1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc1_2>;
-                                       cd-gpios = <&gpio1 1 0>;
-                                       wp-gpios = <&gpio1 9 0>;
-                                       status = "okay";
-                               };
-                       };
-
-                       iomuxc@53fa8000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       1077 0x80000000 /* MX53_PAD_GPIO_1__GPIO1_1 */
-                                                       1085 0x80000000 /* MX53_PAD_GPIO_9__GPIO1_9 */
-                                                       486  0x80000000 /* MX53_PAD_EIM_EB3__GPIO2_31 */
-                                                       739  0x80000000 /* MX53_PAD_GPIO_10__GPIO4_0 */
-                                                       218  0x80000000 /* MX53_PAD_DISP0_DAT16__GPIO5_10 */
-                                                       226  0x80000000 /* MX53_PAD_DISP0_DAT17__GPIO5_11 */
-                                                       233  0x80000000 /* MX53_PAD_DISP0_DAT18__GPIO5_12 */
-                                                       241  0x80000000 /* MX53_PAD_DISP0_DAT19__GPIO5_13 */
-                                                       429  0x80000000 /* MX53_PAD_EIM_D16__EMI_WEIM_D_16 */
-                                                       435  0x80000000 /* MX53_PAD_EIM_D17__EMI_WEIM_D_17 */
-                                                       441  0x80000000 /* MX53_PAD_EIM_D18__EMI_WEIM_D_18 */
-                                                       448  0x80000000 /* MX53_PAD_EIM_D19__EMI_WEIM_D_19 */
-                                                       456  0x80000000 /* MX53_PAD_EIM_D20__EMI_WEIM_D_20 */
-                                                       464  0x80000000 /* MX53_PAD_EIM_D21__EMI_WEIM_D_21 */
-                                                       471  0x80000000 /* MX53_PAD_EIM_D22__EMI_WEIM_D_22 */
-                                                       477  0x80000000 /* MX53_PAD_EIM_D23__EMI_WEIM_D_23 */
-                                                       492  0x80000000 /* MX53_PAD_EIM_D24__EMI_WEIM_D_24 */
-                                                       500  0x80000000 /* MX53_PAD_EIM_D25__EMI_WEIM_D_25 */
-                                                       508  0x80000000 /* MX53_PAD_EIM_D26__EMI_WEIM_D_26 */
-                                                       516  0x80000000 /* MX53_PAD_EIM_D27__EMI_WEIM_D_27 */
-                                                       524  0x80000000 /* MX53_PAD_EIM_D28__EMI_WEIM_D_28 */
-                                                       532  0x80000000 /* MX53_PAD_EIM_D29__EMI_WEIM_D_29 */
-                                                       540  0x80000000 /* MX53_PAD_EIM_D30__EMI_WEIM_D_30 */
-                                                       548  0x80000000 /* MX53_PAD_EIM_D31__EMI_WEIM_D_31 */
-                                                       637  0x80000000 /* MX53_PAD_EIM_DA0__EMI_NAND_WEIM_DA_0 */
-                                                       642  0x80000000 /* MX53_PAD_EIM_DA1__EMI_NAND_WEIM_DA_1 */
-                                                       647  0x80000000 /* MX53_PAD_EIM_DA2__EMI_NAND_WEIM_DA_2 */
-                                                       652  0x80000000 /* MX53_PAD_EIM_DA3__EMI_NAND_WEIM_DA_3 */
-                                                       657  0x80000000 /* MX53_PAD_EIM_DA4__EMI_NAND_WEIM_DA_4 */
-                                                       662  0x80000000 /* MX53_PAD_EIM_DA5__EMI_NAND_WEIM_DA_5 */
-                                                       667  0x80000000 /* MX53_PAD_EIM_DA6__EMI_NAND_WEIM_DA_6 */
-                                                       611  0x80000000 /* MX53_PAD_EIM_OE__EMI_WEIM_OE */
-                                                       616  0x80000000 /* MX53_PAD_EIM_RW__EMI_WEIM_RW */
-                                                       607  0x80000000 /* MX53_PAD_EIM_CS1__EMI_WEIM_CS_1 */
-                                               >;
-                                       };
-                               };
-                       };
-
-                       uart1: serial@53fbc000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart1_2>;
-                               status = "okay";
-                       };
-               };
-       };
-
        eim-cs1@f4000000 {
                #address-cells = <1>;
                #size-cells = <1>;
                };
        };
 };
+
+&esdhc1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc1_2>;
+       cd-gpios = <&gpio1 1 0>;
+       wp-gpios = <&gpio1 9 0>;
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               1077 0x80000000 /* MX53_PAD_GPIO_1__GPIO1_1 */
+                               1085 0x80000000 /* MX53_PAD_GPIO_9__GPIO1_9 */
+                               486  0x80000000 /* MX53_PAD_EIM_EB3__GPIO2_31 */
+                               739  0x80000000 /* MX53_PAD_GPIO_10__GPIO4_0 */
+                               218  0x80000000 /* MX53_PAD_DISP0_DAT16__GPIO5_10 */
+                               226  0x80000000 /* MX53_PAD_DISP0_DAT17__GPIO5_11 */
+                               233  0x80000000 /* MX53_PAD_DISP0_DAT18__GPIO5_12 */
+                               241  0x80000000 /* MX53_PAD_DISP0_DAT19__GPIO5_13 */
+                               429  0x80000000 /* MX53_PAD_EIM_D16__EMI_WEIM_D_16 */
+                               435  0x80000000 /* MX53_PAD_EIM_D17__EMI_WEIM_D_17 */
+                               441  0x80000000 /* MX53_PAD_EIM_D18__EMI_WEIM_D_18 */
+                               448  0x80000000 /* MX53_PAD_EIM_D19__EMI_WEIM_D_19 */
+                               456  0x80000000 /* MX53_PAD_EIM_D20__EMI_WEIM_D_20 */
+                               464  0x80000000 /* MX53_PAD_EIM_D21__EMI_WEIM_D_21 */
+                               471  0x80000000 /* MX53_PAD_EIM_D22__EMI_WEIM_D_22 */
+                               477  0x80000000 /* MX53_PAD_EIM_D23__EMI_WEIM_D_23 */
+                               492  0x80000000 /* MX53_PAD_EIM_D24__EMI_WEIM_D_24 */
+                               500  0x80000000 /* MX53_PAD_EIM_D25__EMI_WEIM_D_25 */
+                               508  0x80000000 /* MX53_PAD_EIM_D26__EMI_WEIM_D_26 */
+                               516  0x80000000 /* MX53_PAD_EIM_D27__EMI_WEIM_D_27 */
+                               524  0x80000000 /* MX53_PAD_EIM_D28__EMI_WEIM_D_28 */
+                               532  0x80000000 /* MX53_PAD_EIM_D29__EMI_WEIM_D_29 */
+                               540  0x80000000 /* MX53_PAD_EIM_D30__EMI_WEIM_D_30 */
+                               548  0x80000000 /* MX53_PAD_EIM_D31__EMI_WEIM_D_31 */
+                               637  0x80000000 /* MX53_PAD_EIM_DA0__EMI_NAND_WEIM_DA_0 */
+                               642  0x80000000 /* MX53_PAD_EIM_DA1__EMI_NAND_WEIM_DA_1 */
+                               647  0x80000000 /* MX53_PAD_EIM_DA2__EMI_NAND_WEIM_DA_2 */
+                               652  0x80000000 /* MX53_PAD_EIM_DA3__EMI_NAND_WEIM_DA_3 */
+                               657  0x80000000 /* MX53_PAD_EIM_DA4__EMI_NAND_WEIM_DA_4 */
+                               662  0x80000000 /* MX53_PAD_EIM_DA5__EMI_NAND_WEIM_DA_5 */
+                               667  0x80000000 /* MX53_PAD_EIM_DA6__EMI_NAND_WEIM_DA_6 */
+                               611  0x80000000 /* MX53_PAD_EIM_OE__EMI_WEIM_OE */
+                               616  0x80000000 /* MX53_PAD_EIM_RW__EMI_WEIM_RW */
+                               607  0x80000000 /* MX53_PAD_EIM_CS1__EMI_WEIM_CS_1 */
+                       >;
+               };
+       };
+};
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1_2>;
+       status = "okay";
+};
index a124d1e25258784645fd596be19558afd19ce176..85a89b52f9b89e87b078478b5701e729b1b4f605 100644 (file)
                reg = <0x70000000 0x80000000>;
        };
 
-       soc {
-               aips@50000000 { /* AIPS1 */
-                       spba@50000000 {
-                               esdhc@50004000 { /* ESDHC1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc1_1>;
-                                       cd-gpios = <&gpio3 13 0>;
-                                       wp-gpios = <&gpio3 14 0>;
-                                       status = "okay";
-                               };
-
-                               ecspi@50010000 { /* ECSPI1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_ecspi1_1>;
-                                       fsl,spi-num-chipselects = <2>;
-                                       cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>;
-                                       status = "okay";
-
-                                       flash: at45db321d@1 {
-                                               #address-cells = <1>;
-                                               #size-cells = <1>;
-                                               compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash";
-                                               spi-max-frequency = <25000000>;
-                                               reg = <1>;
-
-                                               partition@0 {
-                                                       label = "U-Boot";
-                                                       reg = <0x0 0x40000>;
-                                                       read-only;
-                                               };
-
-                                               partition@40000 {
-                                                       label = "Kernel";
-                                                       reg = <0x40000 0x3c0000>;
-                                               };
-                                       };
-                               };
-
-                               esdhc@50020000 { /* ESDHC3 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc3_1>;
-                                       cd-gpios = <&gpio3 11 0>;
-                                       wp-gpios = <&gpio3 12 0>;
-                                       status = "okay";
-                               };
-                       };
-
-                       iomuxc@53fa8000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       424  0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */
-                                                       449  0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */
-                                                       693  0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */
-                                                       697  0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */
-                                                       701  0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */
-                                                       705  0x80000000 /* MX53_PAD_EIM_DA14__GPIO3_14 */
-                                                       868  0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */
-                                                       873  0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */
-                                               >;
-                                       };
-                               };
-                       };
-
-                       uart1: serial@53fbc000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart1_1>;
-                               status = "okay";
-                       };
-               };
-
-               aips@60000000 { /* AIPS2 */
-                       i2c@63fc4000 { /* I2C2 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_i2c2_1>;
-                               status = "okay";
-
-                               pmic: mc13892@08 {
-                                       compatible = "fsl,mc13892", "fsl,mc13xxx";
-                                       reg = <0x08>;
-                               };
-
-                               codec: sgtl5000@0a {
-                                       compatible = "fsl,sgtl5000";
-                                       reg = <0x0a>;
-                               };
-                       };
-
-                       ethernet@63fec000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_fec_1>;
-                               phy-mode = "rmii";
-                               phy-reset-gpios = <&gpio7 6 0>;
-                               status = "okay";
-                       };
-               };
-       };
-
        leds {
                compatible = "gpio-leds";
 
                };
        };
 };
+
+&esdhc1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc1_1>;
+       cd-gpios = <&gpio3 13 0>;
+       wp-gpios = <&gpio3 14 0>;
+       status = "okay";
+};
+
+&ecspi1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_ecspi1_1>;
+       fsl,spi-num-chipselects = <2>;
+       cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>;
+       status = "okay";
+
+       flash: at45db321d@1 {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash";
+               spi-max-frequency = <25000000>;
+               reg = <1>;
+
+               partition@0 {
+                       label = "U-Boot";
+                       reg = <0x0 0x40000>;
+                       read-only;
+               };
+
+               partition@40000 {
+                       label = "Kernel";
+                       reg = <0x40000 0x3c0000>;
+               };
+       };
+};
+
+&esdhc3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc3_1>;
+       cd-gpios = <&gpio3 11 0>;
+       wp-gpios = <&gpio3 12 0>;
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               424  0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */
+                               449  0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */
+                               693  0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */
+                               697  0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */
+                               701  0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */
+                               705  0x80000000 /* MX53_PAD_EIM_DA14__GPIO3_14 */
+                               868  0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */
+                               873  0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */
+                       >;
+               };
+       };
+};
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1_1>;
+       status = "okay";
+};
+
+&i2c2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c2_1>;
+       status = "okay";
+
+       pmic: mc13892@08 {
+               compatible = "fsl,mc13892", "fsl,mc13xxx";
+               reg = <0x08>;
+       };
+
+       codec: sgtl5000@0a {
+               compatible = "fsl,sgtl5000";
+               reg = <0x0a>;
+       };
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_fec_1>;
+       phy-mode = "rmii";
+       phy-reset-gpios = <&gpio7 6 0>;
+       status = "okay";
+};
diff --git a/arch/arm/boot/dts/imx53-mba53.dts b/arch/arm/boot/dts/imx53-mba53.dts
new file mode 100644 (file)
index 0000000..e54fffd
--- /dev/null
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2012 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
+ * Copyright 2012 Steffen Trumtrar <s.trumtrar@pengutronix.de>, Pengutronix
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/dts-v1/;
+/include/ "imx53-tqma53.dtsi"
+
+/ {
+       model = "TQ MBa53 starter kit";
+       compatible = "tq,mba53", "tq,tqma53", "fsl,imx53";
+};
+
+&iomuxc {
+       lvds1 {
+               pinctrl_lvds1_1: lvds1-grp1 {
+                       fsl,pins = <730 0x10000         /* LVDS0_TX3 */
+                                   732 0x10000         /* LVDS0_CLK */
+                                   734 0x10000         /* LVDS0_TX2 */
+                                   736 0x10000         /* LVDS0_TX1 */
+                                   738 0x10000>;       /* LVDS0_TX0 */
+               };
+
+               pinctrl_lvds1_2: lvds1-grp2 {
+                       fsl,pins = <720 0x10000         /* LVDS1_TX3 */
+                                   722 0x10000         /* LVDS1_TX2 */
+                                   724 0x10000         /* LVDS1_CLK */
+                                   726 0x10000         /* LVDS1_TX1 */
+                                   728 0x10000>;       /* LVDS1_TX0 */
+               };
+       };
+
+       disp1 {
+               pinctrl_disp1_1: disp1-grp1 {
+                       fsl,pins = <689 0x10000         /* DISP1_DRDY   */
+                                   482 0x10000         /* DISP1_HSYNC  */
+                                   489 0x10000         /* DISP1_VSYNC  */
+                                   684 0x10000         /* DISP1_DAT_0  */
+                                   515 0x10000         /* DISP1_DAT_22 */
+                                   523 0x10000         /* DISP1_DAT_23 */
+                                   543 0x10000         /* DISP1_DAT_21 */
+                                   553 0x10000         /* DISP1_DAT_20 */
+                                   558 0x10000         /* DISP1_DAT_19 */
+                                   564 0x10000         /* DISP1_DAT_18 */
+                                   570 0x10000         /* DISP1_DAT_17 */
+                                   575 0x10000         /* DISP1_DAT_16 */
+                                   580 0x10000         /* DISP1_DAT_15 */
+                                   585 0x10000         /* DISP1_DAT_14 */
+                                   590 0x10000         /* DISP1_DAT_13 */
+                                   595 0x10000         /* DISP1_DAT_12 */
+                                   628 0x10000         /* DISP1_DAT_11 */
+                                   634 0x10000         /* DISP1_DAT_10 */
+                                   639 0x10000         /* DISP1_DAT_9  */
+                                   644 0x10000         /* DISP1_DAT_8  */
+                                   649 0x10000         /* DISP1_DAT_7  */
+                                   654 0x10000         /* DISP1_DAT_6  */
+                                   659 0x10000         /* DISP1_DAT_5  */
+                                   664 0x10000         /* DISP1_DAT_4  */
+                                   669 0x10000         /* DISP1_DAT_3  */
+                                   674 0x10000         /* DISP1_DAT_2  */
+                                   679 0x10000         /* DISP1_DAT_1  */
+                                   684 0x10000>;       /* DISP1_DAT_0  */
+               };
+       };
+};
+
+&cspi {
+       status = "okay";
+};
+
+&i2c2 {
+       codec: sgtl5000@a {
+               compatible = "fsl,sgtl5000";
+               reg = <0x0a>;
+       };
+
+       expander: pca9554@20 {
+               compatible = "pca9554";
+               reg = <0x20>;
+               interrupts = <109>;
+       };
+
+       sensor2: lm75@49 {
+               compatible = "lm75";
+               reg = <0x49>;
+       };
+};
+
+&fec {
+       status = "okay";
+};
+
+&esdhc2 {
+       status = "okay";
+};
+
+&uart3 {
+       status = "okay";
+};
+
+&ecspi1 {
+       status = "okay";
+};
+
+&uart1 {
+       status = "okay";
+};
+
+&uart2 {
+       status = "okay";
+};
+
+&can1 {
+       status = "okay";
+};
+
+&can2 {
+       status = "okay";
+};
+
+&i2c3 {
+       status = "okay";
+};
index b0075537195bda29f5a19ef232c4d5d9b2c3a2f6..05cc5620436b1f4103a5b59cc3890669e2ba739a 100644 (file)
                reg = <0x70000000 0x40000000>;
        };
 
-       soc {
-               aips@50000000 { /* AIPS1 */
-                       spba@50000000 {
-                               esdhc@50004000 { /* ESDHC1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc1_1>;
-                                       cd-gpios = <&gpio3 13 0>;
-                                       status = "okay";
-                               };
-
-                               ssi2: ssi@50014000 {
-                                       fsl,mode = "i2s-slave";
-                                       status = "okay";
-                               };
-
-                               esdhc@50020000 { /* ESDHC3 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc3_1>;
-                                       cd-gpios = <&gpio3 11 0>;
-                                       wp-gpios = <&gpio3 12 0>;
-                                       status = "okay";
-                               };
-                       };
-
-                       iomuxc@53fa8000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       1071 0x80000000 /* MX53_PAD_GPIO_0__CCM_SSI_EXT1_CLK */
-                                                       1141 0x80000000 /* MX53_PAD_GPIO_8__GPIO1_8 */
-                                                       982  0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */
-                                                       989  0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */
-                                                       693  0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */
-                                                       697  0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */
-                                                       701  0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */
-                                                       868  0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */
-                                                       1149 0x80000000 /* MX53_PAD_GPIO_16__GPIO7_11 */
-                                               >;
-                                       };
-
-                                       led_pin_gpio7_7: led_gpio7_7@0 {
-                                               fsl,pins = <
-                                                       873  0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */
-                                               >;
-                                       };
-                               };
-
-                       };
-
-                       uart1: serial@53fbc000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart1_1>;
-                               status = "okay";
-                       };
-               };
-
-               aips@60000000 { /* AIPS2 */
-                       i2c@63fc4000 { /* I2C2 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_i2c2_1>;
-                               status = "okay";
-
-                               sgtl5000: codec@0a {
-                                       compatible = "fsl,sgtl5000";
-                                       reg = <0x0a>;
-                                       VDDA-supply = <&reg_3p2v>;
-                                       VDDIO-supply = <&reg_3p2v>;
-                               };
-                       };
-
-                       i2c@63fc8000 { /* I2C1 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_i2c1_1>;
-                               status = "okay";
-
-                               accelerometer: mma8450@1c {
-                                       compatible = "fsl,mma8450";
-                                       reg = <0x1c>;
-                               };
-
-                               pmic: dialog@48 {
-                                       compatible = "dlg,da9053-aa", "dlg,da9052";
-                                       reg = <0x48>;
-                                       interrupt-parent = <&gpio7>;
-                                       interrupts = <11 0x8>; /* low-level active IRQ at GPIO7_11 */
-
-                                       regulators {
-                                               buck1_reg: buck1 {
-                                                       regulator-min-microvolt = <500000>;
-                                                       regulator-max-microvolt = <2075000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               buck2_reg: buck2 {
-                                                       regulator-min-microvolt = <500000>;
-                                                       regulator-max-microvolt = <2075000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               buck3_reg: buck3 {
-                                                       regulator-min-microvolt = <925000>;
-                                                       regulator-max-microvolt = <2500000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               buck4_reg: buck4 {
-                                                       regulator-min-microvolt = <925000>;
-                                                       regulator-max-microvolt = <2500000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo1_reg: ldo1 {
-                                                       regulator-min-microvolt = <600000>;
-                                                       regulator-max-microvolt = <1800000>;
-                                                       regulator-boot-on;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo2_reg: ldo2 {
-                                                       regulator-min-microvolt = <600000>;
-                                                       regulator-max-microvolt = <1800000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo3_reg: ldo3 {
-                                                       regulator-min-microvolt = <600000>;
-                                                       regulator-max-microvolt = <1800000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo4_reg: ldo4 {
-                                                       regulator-min-microvolt = <1725000>;
-                                                       regulator-max-microvolt = <3300000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo5_reg: ldo5 {
-                                                       regulator-min-microvolt = <1725000>;
-                                                       regulator-max-microvolt = <3300000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo6_reg: ldo6 {
-                                                       regulator-min-microvolt = <1200000>;
-                                                       regulator-max-microvolt = <3600000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo7_reg: ldo7 {
-                                                       regulator-min-microvolt = <1200000>;
-                                                       regulator-max-microvolt = <3600000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo8_reg: ldo8 {
-                                                       regulator-min-microvolt = <1200000>;
-                                                       regulator-max-microvolt = <3600000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo9_reg: ldo9 {
-                                                       regulator-min-microvolt = <1200000>;
-                                                       regulator-max-microvolt = <3600000>;
-                                                       regulator-always-on;
-                                               };
-
-                                               ldo10_reg: ldo10 {
-                                                       regulator-min-microvolt = <1250000>;
-                                                       regulator-max-microvolt = <3650000>;
-                                                       regulator-always-on;
-                                               };
-                                       };
-                               };
-                       };
-
-                       audmux@63fd0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_audmux_1>;
-                               status = "okay";
-                       };
-
-                       ethernet@63fec000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_fec_1>;
-                               phy-mode = "rmii";
-                               phy-reset-gpios = <&gpio7 6 0>;
-                               status = "okay";
-                       };
-               };
-       };
-
        gpio-keys {
                compatible = "gpio-keys";
 
                mux-ext-port = <5>;
        };
 };
+
+&esdhc1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc1_1>;
+       cd-gpios = <&gpio3 13 0>;
+       status = "okay";
+};
+
+&ssi2 {
+       fsl,mode = "i2s-slave";
+       status = "okay";
+};
+
+&esdhc3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc3_1>;
+       cd-gpios = <&gpio3 11 0>;
+       wp-gpios = <&gpio3 12 0>;
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               1071 0x80000000 /* MX53_PAD_GPIO_0__CCM_SSI_EXT1_CLK */
+                               1141 0x80000000 /* MX53_PAD_GPIO_8__GPIO1_8 */
+                               982  0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */
+                               989  0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */
+                               693  0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */
+                               697  0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */
+                               701  0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */
+                               868  0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */
+                               1149 0x80000000 /* MX53_PAD_GPIO_16__GPIO7_11 */
+                       >;
+               };
+
+               led_pin_gpio7_7: led_gpio7_7@0 {
+                       fsl,pins = <
+                               873  0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */
+                       >;
+               };
+       };
+
+};
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1_1>;
+       status = "okay";
+};
+
+&i2c2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c2_1>;
+       status = "okay";
+
+       sgtl5000: codec@0a {
+               compatible = "fsl,sgtl5000";
+               reg = <0x0a>;
+               VDDA-supply = <&reg_3p2v>;
+               VDDIO-supply = <&reg_3p2v>;
+       };
+};
+
+&i2c1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c1_1>;
+       status = "okay";
+
+       accelerometer: mma8450@1c {
+               compatible = "fsl,mma8450";
+               reg = <0x1c>;
+       };
+
+       pmic: dialog@48 {
+               compatible = "dlg,da9053-aa", "dlg,da9052";
+               reg = <0x48>;
+               interrupt-parent = <&gpio7>;
+               interrupts = <11 0x8>; /* low-level active IRQ at GPIO7_11 */
+
+               regulators {
+                       buck1_reg: buck1 {
+                               regulator-min-microvolt = <500000>;
+                               regulator-max-microvolt = <2075000>;
+                               regulator-always-on;
+                       };
+
+                       buck2_reg: buck2 {
+                               regulator-min-microvolt = <500000>;
+                               regulator-max-microvolt = <2075000>;
+                               regulator-always-on;
+                       };
+
+                       buck3_reg: buck3 {
+                               regulator-min-microvolt = <925000>;
+                               regulator-max-microvolt = <2500000>;
+                               regulator-always-on;
+                       };
+
+                       buck4_reg: buck4 {
+                               regulator-min-microvolt = <925000>;
+                               regulator-max-microvolt = <2500000>;
+                               regulator-always-on;
+                       };
+
+                       ldo1_reg: ldo1 {
+                               regulator-min-microvolt = <600000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-boot-on;
+                               regulator-always-on;
+                       };
+
+                       ldo2_reg: ldo2 {
+                               regulator-min-microvolt = <600000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-always-on;
+                       };
+
+                       ldo3_reg: ldo3 {
+                               regulator-min-microvolt = <600000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-always-on;
+                       };
+
+                       ldo4_reg: ldo4 {
+                               regulator-min-microvolt = <1725000>;
+                               regulator-max-microvolt = <3300000>;
+                               regulator-always-on;
+                       };
+
+                       ldo5_reg: ldo5 {
+                               regulator-min-microvolt = <1725000>;
+                               regulator-max-microvolt = <3300000>;
+                               regulator-always-on;
+                       };
+
+                       ldo6_reg: ldo6 {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <3600000>;
+                               regulator-always-on;
+                       };
+
+                       ldo7_reg: ldo7 {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <3600000>;
+                               regulator-always-on;
+                       };
+
+                       ldo8_reg: ldo8 {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <3600000>;
+                               regulator-always-on;
+                       };
+
+                       ldo9_reg: ldo9 {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <3600000>;
+                               regulator-always-on;
+                       };
+
+                       ldo10_reg: ldo10 {
+                               regulator-min-microvolt = <1250000>;
+                               regulator-max-microvolt = <3650000>;
+                               regulator-always-on;
+                       };
+               };
+       };
+};
+
+&audmux {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_audmux_1>;
+       status = "okay";
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_fec_1>;
+       phy-mode = "rmii";
+       phy-reset-gpios = <&gpio7 6 0>;
+       status = "okay";
+};
index 06c68580c842586f4846860c2875854dd347ee82..995554c324b84ec6ba747f945e64db10c7bd7a5c 100644 (file)
                reg = <0x70000000 0x40000000>;
        };
 
-       soc {
-               aips@50000000 { /* AIPS1 */
-                       spba@50000000 {
-                               esdhc@50004000 { /* ESDHC1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc1_1>;
-                                       cd-gpios = <&gpio3 13 0>;
-                                       wp-gpios = <&gpio4 11 0>;
-                                       status = "okay";
-                               };
-
-                               esdhc@50008000 { /* ESDHC2 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc2_1>;
-                                       non-removable;
-                                       status = "okay";
-                               };
-
-                               uart3: serial@5000c000 {
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_uart3_1>;
-                                       fsl,uart-has-rtscts;
-                                       status = "okay";
-                               };
-
-                               ecspi@50010000 { /* ECSPI1 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_ecspi1_1>;
-                                       fsl,spi-num-chipselects = <2>;
-                                       cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>;
-                                       status = "okay";
-
-                                       zigbee: mc1323@0 {
-                                               compatible = "fsl,mc1323";
-                                               spi-max-frequency = <8000000>;
-                                               reg = <0>;
-                                       };
-
-                                       flash: m25p32@1 {
-                                               #address-cells = <1>;
-                                               #size-cells = <1>;
-                                               compatible = "st,m25p32", "st,m25p";
-                                               spi-max-frequency = <20000000>;
-                                               reg = <1>;
-
-                                               partition@0 {
-                                                       label = "U-Boot";
-                                                       reg = <0x0 0x40000>;
-                                                       read-only;
-                                               };
-
-                                               partition@40000 {
-                                                       label = "Kernel";
-                                                       reg = <0x40000 0x3c0000>;
-                                               };
-                                       };
-                               };
-
-                               esdhc@50020000 { /* ESDHC3 */
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_esdhc3_1>;
-                                       non-removable;
-                                       status = "okay";
-                               };
-                       };
-
-                       iomuxc@53fa8000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       982  0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */
-                                                       989  0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */
-                                                       424  0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */
-                                                       701  0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */
-                                                       449  0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */
-                                                       43   0x80000000 /* MX53_PAD_KEY_ROW2__GPIO4_11 */
-                                                       868  0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */
-                                               >;
-                                       };
-                               };
-                       };
-
-                       uart1: serial@53fbc000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart1_1>;
-                               status = "okay";
-                       };
-
-                       uart2: serial@53fc0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart2_1>;
-                               status = "okay";
-                       };
-               };
-
-               aips@60000000 { /* AIPS2 */
-                       i2c@63fc4000 { /* I2C2 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_i2c2_1>;
-                               status = "okay";
-
-                               codec: sgtl5000@0a {
-                                       compatible = "fsl,sgtl5000";
-                                       reg = <0x0a>;
-                               };
-
-                               magnetometer: mag3110@0e {
-                                       compatible = "fsl,mag3110";
-                                       reg = <0x0e>;
-                               };
-
-                               touchkey: mpr121@5a {
-                                       compatible = "fsl,mpr121";
-                                       reg = <0x5a>;
-                               };
-                       };
-
-                       i2c@63fc8000 { /* I2C1 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_i2c1_1>;
-                               status = "okay";
-
-                               accelerometer: mma8450@1c {
-                                       compatible = "fsl,mma8450";
-                                       reg = <0x1c>;
-                               };
-
-                               camera: ov5642@3c {
-                                       compatible = "ovti,ov5642";
-                                       reg = <0x3c>;
-                               };
-
-                               pmic: dialog@48 {
-                                       compatible = "dialog,da9053", "dialog,da9052";
-                                       reg = <0x48>;
-                               };
-                       };
-
-                       ethernet@63fec000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_fec_1>;
-                               phy-mode = "rmii";
-                               phy-reset-gpios = <&gpio7 6 0>;
-                               status = "okay";
-                       };
-               };
-       };
-
        gpio-keys {
                compatible = "gpio-keys";
 
                };
        };
 };
+
+&esdhc1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc1_1>;
+       cd-gpios = <&gpio3 13 0>;
+       wp-gpios = <&gpio4 11 0>;
+       status = "okay";
+};
+
+&esdhc2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc2_1>;
+       non-removable;
+       status = "okay";
+};
+
+&uart3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart3_1>;
+       fsl,uart-has-rtscts;
+       status = "okay";
+};
+
+&ecspi1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_ecspi1_1>;
+       fsl,spi-num-chipselects = <2>;
+       cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>;
+       status = "okay";
+
+       zigbee: mc1323@0 {
+               compatible = "fsl,mc1323";
+               spi-max-frequency = <8000000>;
+               reg = <0>;
+       };
+
+       flash: m25p32@1 {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "st,m25p32", "st,m25p";
+               spi-max-frequency = <20000000>;
+               reg = <1>;
+
+               partition@0 {
+                       label = "U-Boot";
+                       reg = <0x0 0x40000>;
+                       read-only;
+               };
+
+               partition@40000 {
+                       label = "Kernel";
+                       reg = <0x40000 0x3c0000>;
+               };
+       };
+};
+
+&esdhc3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc3_1>;
+       non-removable;
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               982  0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */
+                               989  0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */
+                               424  0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */
+                               701  0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */
+                               449  0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */
+                               43   0x80000000 /* MX53_PAD_KEY_ROW2__GPIO4_11 */
+                               868  0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */
+                       >;
+               };
+       };
+};
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1_1>;
+       status = "okay";
+};
+
+&uart2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart2_1>;
+       status = "okay";
+};
+
+&i2c2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c2_1>;
+       status = "okay";
+
+       codec: sgtl5000@0a {
+               compatible = "fsl,sgtl5000";
+               reg = <0x0a>;
+       };
+
+       magnetometer: mag3110@0e {
+               compatible = "fsl,mag3110";
+               reg = <0x0e>;
+       };
+
+       touchkey: mpr121@5a {
+               compatible = "fsl,mpr121";
+               reg = <0x5a>;
+       };
+};
+
+&i2c1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c1_1>;
+       status = "okay";
+
+       accelerometer: mma8450@1c {
+               compatible = "fsl,mma8450";
+               reg = <0x1c>;
+       };
+
+       camera: ov5642@3c {
+               compatible = "ovti,ov5642";
+               reg = <0x3c>;
+       };
+
+       pmic: dialog@48 {
+               compatible = "dialog,da9053", "dialog,da9052";
+               reg = <0x48>;
+       };
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_fec_1>;
+       phy-mode = "rmii";
+       phy-reset-gpios = <&gpio7 6 0>;
+       status = "okay";
+};
diff --git a/arch/arm/boot/dts/imx53-tqma53.dtsi b/arch/arm/boot/dts/imx53-tqma53.dtsi
new file mode 100644 (file)
index 0000000..8278ec5
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2012 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
+ * Copyright 2012 Steffen Trumtrar <s.trumtrar@pengutronix.de>, Pengutronix
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/include/ "imx53.dtsi"
+
+/ {
+       model = "TQ TQMa53";
+       compatible = "tq,tqma53", "fsl,imx53";
+
+       memory {
+               reg = <0x70000000 0x40000000>; /* Up to 1GiB */
+       };
+
+       regulators {
+               compatible = "simple-bus";
+
+               reg_3p3v: 3p3v {
+                       compatible = "regulator-fixed";
+                       regulator-name = "3P3V";
+                       regulator-min-microvolt = <3300000>;
+                       regulator-max-microvolt = <3300000>;
+                       regulator-always-on;
+               };
+       };
+};
+
+&esdhc2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc2_1>;
+       wp-gpios = <&gpio1 2 0>;
+       cd-gpios = <&gpio1 4 0>;
+       status = "disabled";
+};
+
+&uart3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart3_2>;
+       status = "disabled";
+};
+
+&ecspi1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_ecspi1_1>;
+       fsl,spi-num-chipselects = <4>;
+       cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>,
+                  <&gpio3 24 0>, <&gpio3 25 0>;
+       status = "disabled";
+};
+
+&esdhc3 { /* EMMC */
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_esdhc3_1>;
+       vmmc-supply = <&reg_3p3v>;
+       non-removable;
+       bus-width = <8>;
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       i2s {
+               pinctrl_i2s_1: i2s-grp1 {
+                       fsl,pins = <
+                                1   0x10000    /* I2S_MCLK */
+                                10  0x10000    /* I2S_SCLK */
+                                17  0x10000    /* I2S_DOUT */
+                                23  0x10000    /* I2S_LRCLK*/
+                                30  0x10000    /* I2S_DIN  */
+                       >;
+               };
+       };
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                                610  0x10000   /* MX53_PAD_EIM_CS1__IPU_DI1_PIN6 (VSYNC)*/
+                                711  0x10000   /* MX53_PAD_EIM_DA15__IPU_DI1_PIN4 (HSYNC)*/
+                                873  0x10000   /* MX53_PAD_PATA_DA_1__GPIO7_7 (LCD_BLT_EN)*/
+                                878  0x10000   /* MX53_PAD_PATA_DA_2__GPIO7_8 (LCD_RESET)*/
+                                922  0x10000   /* MX53_PAD_PATA_DATA5__GPIO2_5 (LCD_POWER)*/
+                                928  0x10000   /* MX53_PAD_PATA_DATA6__GPIO2_6 (PMIC_INT)*/
+                                982  0x10000   /* MX53_PAD_PATA_DATA14__GPIO2_14 (CSI_RST)*/
+                                989  0x10000   /* MX53_PAD_PATA_DATA15__GPIO2_15 (CSI_PWDN)*/
+                                1069 0x10000   /* MX53_PAD_GPIO_0__GPIO1_0 (SYSTEM_DOWN)*/
+                                1093 0x10000   /* MX53_PAD_GPIO_3__GPIO1_3 */
+                       >;
+               };
+       };
+};
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1_2>;
+       fsl,uart-has-rtscts;
+       status = "disabled";
+};
+
+&uart2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart2_1>;
+       status = "disabled";
+};
+
+&can1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_can1_2>;
+       status = "disabled";
+};
+
+&can2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_can2_1>;
+       status = "disabled";
+};
+
+&i2c3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c3_1>;
+       status = "disabled";
+};
+
+&cspi {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_cspi_1>;
+       fsl,spi-num-chipselects = <3>;
+       cs-gpios = <&gpio1 18 0>, <&gpio1 19 0>,
+                  <&gpio1 21 0>;
+       status = "disabled";
+};
+
+&i2c2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c2_1>;
+       status = "okay";
+
+       pmic: mc34708@8 {
+               compatible = "fsl,mc34708";
+               reg = <0x8>;
+               fsl,mc13xxx-uses-rtc;
+               interrupt-parent = <&gpio2>;
+               interrupts = <6 8>; /* PDATA_DATA6, low active */
+       };
+
+       sensor1: lm75@48 {
+               compatible = "lm75";
+               reg = <0x48>;
+       };
+
+       eeprom: 24c64@50 {
+               compatible = "at,24c64";
+               pagesize = <32>;
+               reg = <0x50>;
+       };
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_fec_1>;
+       phy-mode = "rmii";
+       status = "disabled";
+};
index edc3f1eb6699238b776a1affc9f647103dfdc4ca..d05aa215c7f93591b20f2687c8bac109905acaa1 100644 (file)
                                        };
                                };
 
+                               csi {
+                                       pinctrl_csi_1: csigrp-1 {
+                                               fsl,pins = <
+                                                       286 0x1d5       /* MX53_PAD_CSI0_DATA_EN__IPU_CSI0_DATA_EN */
+                                                       291 0x1d5       /* MX53_PAD_CSI0_VSYNC__IPU_CSI0_VSYNC */
+                                                       280 0x1d5       /* MX53_PAD_CSI0_MCLK__IPU_CSI0_HSYNC */
+                                                       276 0x1d5       /* MX53_PAD_CSI0_PIXCLK__IPU_CSI0_PIXCLK */
+                                                       409 0x1d5       /* MX53_PAD_CSI0_DAT19__IPU_CSI0_D_19 */
+                                                       402 0x1d5       /* MX53_PAD_CSI0_DAT18__IPU_CSI0_D_18 */
+                                                       395 0x1d5       /* MX53_PAD_CSI0_DAT17__IPU_CSI0_D_17 */
+                                                       388 0x1d5       /* MX53_PAD_CSI0_DAT16__IPU_CSI0_D_16 */
+                                                       381 0x1d5       /* MX53_PAD_CSI0_DAT15__IPU_CSI0_D_15 */
+                                                       374 0x1d5       /* MX53_PAD_CSI0_DAT14__IPU_CSI0_D_14 */
+                                                       367 0x1d5       /* MX53_PAD_CSI0_DAT13__IPU_CSI0_D_13 */
+                                                       360 0x1d5       /* MX53_PAD_CSI0_DAT12__IPU_CSI0_D_12 */
+                                                       352 0x1d5       /* MX53_PAD_CSI0_DAT11__IPU_CSI0_D_11 */
+                                                       344 0x1d5       /* MX53_PAD_CSI0_DAT10__IPU_CSI0_D_10 */
+                                                       336 0x1d5       /* MX53_PAD_CSI0_DAT9__IPU_CSI0_D_9 */
+                                                       328 0x1d5       /* MX53_PAD_CSI0_DAT8__IPU_CSI0_D_8 */
+                                                       320 0x1d5       /* MX53_PAD_CSI0_DAT7__IPU_CSI0_D_7 */
+                                                       312 0x1d5       /* MX53_PAD_CSI0_DAT6__IPU_CSI0_D_6 */
+                                                       304 0x1d5       /* MX53_PAD_CSI0_DAT5__IPU_CSI0_D_5 */
+                                                       296 0x1d5       /* MX53_PAD_CSI0_DAT4__IPU_CSI0_D_4 */
+                                                       276 0x1d5       /* MX53_PAD_CSI0_PIXCLK__IPU_CSI0_PIXCLK */
+                                               >;
+                                       };
+                               };
+
+                               cspi {
+                                       pinctrl_cspi_1: cspigrp-1 {
+                                               fsl,pins = <
+                                                       998  0x1d5      /* MX53_PAD_SD1_DATA0__CSPI_MISO */
+                                                       1008 0x1d5      /* MX53_PAD_SD1_CMD__CSPI_MOSI */
+                                                       1022 0x1d5      /* MX53_PAD_SD1_CLK__CSPI_SCLK */
+                                               >;
+                                       };
+                               };
+
                                ecspi1 {
                                        pinctrl_ecspi1_1: ecspi1grp-1 {
                                                fsl,pins = <
                                                        853 0x80000000  /* MX53_PAD_PATA_DIOR__CAN1_RXCAN */
                                                >;
                                        };
+
+                                       pinctrl_can1_2: can1grp-2 {
+                                               fsl,pins = <
+                                                       37  0x80000000  /* MX53_PAD_KEY_COL2__CAN1_TXCAN */
+                                                       44  0x80000000  /* MX53_PAD_KEY_ROW2__CAN1_RXCAN */
+                                               >;
+                                       };
                                };
 
                                can2 {
                                        };
                                };
 
+                               owire {
+                                       pinctrl_owire_1: owiregrp-1 {
+                                               fsl,pins = <
+                                                               1166 0x80000000 /* MX53_PAD_GPIO_18__OWIRE_LINE */
+                                               >;
+                                       };
+                               };
+
                                uart1 {
                                        pinctrl_uart1_1: uart1grp-1 {
                                                fsl,pins = <
                                                        880 0x1c5       /* MX53_PAD_PATA_DA_2__UART3_RTS */
                                                >;
                                        };
+
+                                       pinctrl_uart3_2: uart3grp-2 {
+                                               fsl,pins = <
+                                                       884 0x1c5       /* MX53_PAD_PATA_CS_0__UART3_TXD_MUX */
+                                                       888 0x1c5       /* MX53_PAD_PATA_CS_1__UART3_RXD_MUX */
+                                               >;
+                                       };
+
                                };
 
                                uart4 {
                                status = "disabled";
                        };
 
+                       owire: owire@63fa4000 {
+                               compatible = "fsl,imx53-owire", "fsl,imx21-owire";
+                               reg = <0x63fa4000 0x4000>;
+                               clocks = <&clks 159>;
+                               status = "disabled";
+                       };
+
                        ecspi2: ecspi@63fac000 {
                                #address-cells = <1>;
                                #size-cells = <0>;
diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi
new file mode 100644 (file)
index 0000000..63fafe2
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/include/ "imx6qdl.dtsi"
+
+/ {
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       compatible = "arm,cortex-a9";
+                       reg = <0>;
+                       next-level-cache = <&L2>;
+               };
+
+               cpu@1 {
+                       compatible = "arm,cortex-a9";
+                       reg = <1>;
+                       next-level-cache = <&L2>;
+               };
+       };
+
+       soc {
+               aips1: aips-bus@02000000 {
+                       pxp: pxp@020f0000 {
+                               reg = <0x020f0000 0x4000>;
+                               interrupts = <0 98 0x04>;
+                       };
+
+                       epdc: epdc@020f4000 {
+                               reg = <0x020f4000 0x4000>;
+                               interrupts = <0 97 0x04>;
+                       };
+
+                       lcdif: lcdif@020f8000 {
+                               reg = <0x020f8000 0x4000>;
+                               interrupts = <0 39 0x04>;
+                       };
+               };
+
+               aips2: aips-bus@02100000 {
+                       i2c4: i2c@021f8000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "fsl,imx1-i2c";
+                               reg = <0x021f8000 0x4000>;
+                               interrupts = <0 35 0x04>;
+                               status = "disabled";
+                       };
+               };
+       };
+};
index 5bfa02a3f85cdd027d25d488513272ec4082579c..53eb241fa5adac61cb1885cee8999e04510a3a50 100644 (file)
                reg = <0x10000000 0x80000000>;
        };
 
-       soc {
-               gpmi-nand@00112000 {
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&pinctrl_gpmi_nand_1>;
-                       status = "disabled"; /* gpmi nand conflicts with SD */
-               };
-
-               aips-bus@02000000 { /* AIPS1 */
-                       iomuxc@020e0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       176  0x80000000 /* MX6Q_PAD_EIM_D25__GPIO_3_25 */
-                                               >;
-                                       };
-                               };
-
-                               arm2 {
-                                       pinctrl_usdhc3_arm2: usdhc3grp-arm2 {
-                                               fsl,pins = <
-                                                       1363 0x80000000 /* MX6Q_PAD_NANDF_CS0__GPIO_6_11 */
-                                                       1369 0x80000000 /* MX6Q_PAD_NANDF_CS1__GPIO_6_14 */
-                                               >;
-                                       };
-                               };
-                       };
-               };
-
-               aips-bus@02100000 { /* AIPS2 */
-                       ethernet@02188000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_enet_2>;
-                               phy-mode = "rgmii";
-                               status = "okay";
-                       };
-
-                       usdhc@02198000 { /* uSDHC3 */
-                               cd-gpios = <&gpio6 11 0>;
-                               wp-gpios = <&gpio6 14 0>;
-                               vmmc-supply = <&reg_3p3v>;
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usdhc3_1
-                                            &pinctrl_usdhc3_arm2>;
-                               status = "okay";
-                       };
-
-                       usdhc@0219c000 { /* uSDHC4 */
-                               non-removable;
-                               vmmc-supply = <&reg_3p3v>;
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usdhc4_1>;
-                               status = "okay";
-                       };
-
-                       uart4: serial@021f0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart4_1>;
-                               status = "okay";
-                       };
-               };
-       };
-
        regulators {
                compatible = "simple-bus";
 
                };
        };
 };
+
+&gpmi {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_gpmi_nand_1>;
+       status = "disabled"; /* gpmi nand conflicts with SD */
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               176  0x80000000 /* MX6Q_PAD_EIM_D25__GPIO_3_25 */
+                       >;
+               };
+       };
+
+       arm2 {
+               pinctrl_usdhc3_arm2: usdhc3grp-arm2 {
+                       fsl,pins = <
+                               1363 0x80000000 /* MX6Q_PAD_NANDF_CS0__GPIO_6_11 */
+                               1369 0x80000000 /* MX6Q_PAD_NANDF_CS1__GPIO_6_14 */
+                       >;
+               };
+       };
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_enet_2>;
+       phy-mode = "rgmii";
+       status = "okay";
+};
+
+&usdhc3 {
+       cd-gpios = <&gpio6 11 0>;
+       wp-gpios = <&gpio6 14 0>;
+       vmmc-supply = <&reg_3p3v>;
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usdhc3_1
+                    &pinctrl_usdhc3_arm2>;
+       status = "okay";
+};
+
+&usdhc4 {
+       non-removable;
+       vmmc-supply = <&reg_3p3v>;
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usdhc4_1>;
+       status = "okay";
+};
+
+&uart4 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart4_1>;
+       status = "okay";
+};
index 826e4ad1477ee9ca7c6ac69944f665a29886ec1b..656d489122fe6681d12d274b3eb662a85e02eceb 100644 (file)
        memory {
                reg = <0x10000000 0x80000000>;
        };
+};
 
-       soc {
-               aips-bus@02000000 { /* AIPS1 */
-                       iomuxc@020e0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
 
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       1376 0x80000000 /* MX6Q_PAD_NANDF_CS2__GPIO_6_15 */
-                                                       13   0x80000000 /* MX6Q_PAD_SD2_DAT2__GPIO_1_13 */
-                                               >;
-                                       };
-                               };
-                       };
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               1376 0x80000000 /* MX6Q_PAD_NANDF_CS2__GPIO_6_15 */
+                               13   0x80000000 /* MX6Q_PAD_SD2_DAT2__GPIO_1_13 */
+                       >;
                };
+       };
+};
 
-               aips-bus@02100000 { /* AIPS2 */
-                       uart4: serial@021f0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart4_1>;
-                               status = "okay";
-                       };
+&uart4 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart4_1>;
+       status = "okay";
+};
 
-                       ethernet@02188000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_enet_2>;
-                               phy-mode = "rgmii";
-                               status = "okay";
-                       };
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_enet_2>;
+       phy-mode = "rgmii";
+       status = "okay";
+};
 
-                       usdhc@02198000 { /* uSDHC3 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usdhc3_1>;
-                               cd-gpios = <&gpio6 15 0>;
-                               wp-gpios = <&gpio1 13 0>;
-                               status = "okay";
-                       };
-               };
-       };
+&usdhc3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usdhc3_1>;
+       cd-gpios = <&gpio6 15 0>;
+       wp-gpios = <&gpio1 13 0>;
+       status = "okay";
 };
index d152328285a1387b32b9353cf84f579b20a2a813..2ce355cd05e573e81536599668a6b8f7e0e66ea7 100644 (file)
                reg = <0x10000000 0x40000000>;
        };
 
-       soc {
-               aips-bus@02000000 { /* AIPS1 */
-                       spba-bus@02000000 {
-                               ecspi@02008000 { /* eCSPI1 */
-                                       fsl,spi-num-chipselects = <1>;
-                                       cs-gpios = <&gpio3 19 0>;
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_ecspi1_1>;
-                                       status = "okay";
-
-                                       flash: m25p80@0 {
-                                               compatible = "sst,sst25vf016b";
-                                               spi-max-frequency = <20000000>;
-                                               reg = <0>;
-                                       };
-                               };
-
-                               ssi1: ssi@02028000 {
-                                       fsl,mode = "i2s-slave";
-                                       status = "okay";
-                               };
-                       };
-
-                       iomuxc@020e0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       1450 0x80000000 /* MX6Q_PAD_NANDF_D6__GPIO_2_6 */
-                                                       1458 0x80000000 /* MX6Q_PAD_NANDF_D7__GPIO_2_7 */
-                                                       121  0x80000000 /* MX6Q_PAD_EIM_D19__GPIO_3_19 */
-                                                       144  0x80000000 /* MX6Q_PAD_EIM_D22__GPIO_3_22 */
-                                                       152  0x80000000 /* MX6Q_PAD_EIM_D23__GPIO_3_23 */
-                                                       1262 0x80000000 /* MX6Q_PAD_SD3_DAT5__GPIO_7_0 */
-                                                       1270 0x1f0b0    /* MX6Q_PAD_SD3_DAT4__GPIO_7_1 */
-                                                       953  0x80000000 /* MX6Q_PAD_GPIO_0__CCM_CLKO */
-                                               >;
-                                       };
-                               };
-                       };
-               };
-
-               aips-bus@02100000 { /* AIPS2 */
-                       usb@02184000 { /* USB OTG */
-                               vbus-supply = <&reg_usb_otg_vbus>;
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usbotg_1>;
-                               disable-over-current;
-                               status = "okay";
-                       };
-
-                       usb@02184200 { /* USB1 */
-                               status = "okay";
-                       };
-
-                       ethernet@02188000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_enet_1>;
-                               phy-mode = "rgmii";
-                               phy-reset-gpios = <&gpio3 23 0>;
-                               status = "okay";
-                       };
-
-                       usdhc@02198000 { /* uSDHC3 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usdhc3_2>;
-                               cd-gpios = <&gpio7 0 0>;
-                               wp-gpios = <&gpio7 1 0>;
-                               vmmc-supply = <&reg_3p3v>;
-                               status = "okay";
-                       };
-
-                       usdhc@0219c000 { /* uSDHC4 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usdhc4_2>;
-                               cd-gpios = <&gpio2 6 0>;
-                               wp-gpios = <&gpio2 7 0>;
-                               vmmc-supply = <&reg_3p3v>;
-                               status = "okay";
-                       };
-
-                       audmux@021d8000 {
-                               status = "okay";
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_audmux_1>;
-                       };
-
-                       uart2: serial@021e8000 {
-                               status = "okay";
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_uart2_1>;
-                       };
-
-                       i2c@021a0000 { /* I2C1 */
-                               status = "okay";
-                               clock-frequency = <100000>;
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_i2c1_1>;
-
-                               codec: sgtl5000@0a {
-                                       compatible = "fsl,sgtl5000";
-                                       reg = <0x0a>;
-                                       clocks = <&clks 169>;
-                                       VDDA-supply = <&reg_2p5v>;
-                                       VDDIO-supply = <&reg_3p3v>;
-                               };
-                       };
-               };
-       };
-
        regulators {
                compatible = "simple-bus";
 
                mux-ext-port = <4>;
        };
 };
+
+&ecspi1 {
+       fsl,spi-num-chipselects = <1>;
+       cs-gpios = <&gpio3 19 0>;
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_ecspi1_1>;
+       status = "okay";
+
+       flash: m25p80@0 {
+               compatible = "sst,sst25vf016b";
+               spi-max-frequency = <20000000>;
+               reg = <0>;
+       };
+};
+
+&ssi1 {
+       fsl,mode = "i2s-slave";
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               1450 0x80000000 /* MX6Q_PAD_NANDF_D6__GPIO_2_6 */
+                               1458 0x80000000 /* MX6Q_PAD_NANDF_D7__GPIO_2_7 */
+                               121  0x80000000 /* MX6Q_PAD_EIM_D19__GPIO_3_19 */
+                               144  0x80000000 /* MX6Q_PAD_EIM_D22__GPIO_3_22 */
+                               152  0x80000000 /* MX6Q_PAD_EIM_D23__GPIO_3_23 */
+                               1262 0x80000000 /* MX6Q_PAD_SD3_DAT5__GPIO_7_0 */
+                               1270 0x1f0b0    /* MX6Q_PAD_SD3_DAT4__GPIO_7_1 */
+                               953  0x80000000 /* MX6Q_PAD_GPIO_0__CCM_CLKO */
+                       >;
+               };
+       };
+};
+
+&usbotg {
+       vbus-supply = <&reg_usb_otg_vbus>;
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usbotg_1>;
+       disable-over-current;
+       status = "okay";
+};
+
+&usbh1 {
+       status = "okay";
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_enet_1>;
+       phy-mode = "rgmii";
+       phy-reset-gpios = <&gpio3 23 0>;
+       status = "okay";
+};
+
+&usdhc3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usdhc3_2>;
+       cd-gpios = <&gpio7 0 0>;
+       wp-gpios = <&gpio7 1 0>;
+       vmmc-supply = <&reg_3p3v>;
+       status = "okay";
+};
+
+&usdhc4 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usdhc4_2>;
+       cd-gpios = <&gpio2 6 0>;
+       wp-gpios = <&gpio2 7 0>;
+       vmmc-supply = <&reg_3p3v>;
+       status = "okay";
+};
+
+&audmux {
+       status = "okay";
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_audmux_1>;
+};
+
+&uart2 {
+       status = "okay";
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart2_1>;
+};
+
+&i2c1 {
+       status = "okay";
+       clock-frequency = <100000>;
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_i2c1_1>;
+
+       codec: sgtl5000@0a {
+               compatible = "fsl,sgtl5000";
+               reg = <0x0a>;
+               clocks = <&clks 169>;
+               VDDA-supply = <&reg_2p5v>;
+               VDDIO-supply = <&reg_3p3v>;
+       };
+};
index a42402562b7b55a4b0f67dab681faf812ac4b1c5..2dea304a7980816167b61ea09cde5b9a6642ef1a 100644 (file)
                reg = <0x10000000 0x40000000>;
        };
 
-       soc {
-               aips-bus@02000000 { /* AIPS1 */
-                       spba-bus@02000000 {
-                               uart1: serial@02020000 {
-                                       pinctrl-names = "default";
-                                       pinctrl-0 = <&pinctrl_uart1_1>;
-                                       status = "okay";
-                               };
-                       };
-
-                       iomuxc@020e0000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_hog>;
-
-                               hog {
-                                       pinctrl_hog: hoggrp {
-                                               fsl,pins = <
-                                                       1004 0x80000000 /* MX6Q_PAD_GPIO_4__GPIO_1_4 */
-                                                       1012 0x80000000 /* MX6Q_PAD_GPIO_5__GPIO_1_5 */
-                                                       1402 0x80000000 /* MX6Q_PAD_NANDF_D0__GPIO_2_0 */
-                                                       1410 0x80000000 /* MX6Q_PAD_NANDF_D1__GPIO_2_1 */
-                                                       1418 0x80000000 /* MX6Q_PAD_NANDF_D2__GPIO_2_2 */
-                                                       1426 0x80000000 /* MX6Q_PAD_NANDF_D3__GPIO_2_3 */
-                                               >;
-                                       };
-                               };
-                       };
-               };
-
-               aips-bus@02100000 { /* AIPS2 */
-                       ethernet@02188000 {
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_enet_1>;
-                               phy-mode = "rgmii";
-                               status = "okay";
-                       };
-
-                       usdhc@02194000 { /* uSDHC2 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usdhc2_1>;
-                               cd-gpios = <&gpio2 2 0>;
-                               wp-gpios = <&gpio2 3 0>;
-                               status = "okay";
-                       };
-
-                       usdhc@02198000 { /* uSDHC3 */
-                               pinctrl-names = "default";
-                               pinctrl-0 = <&pinctrl_usdhc3_1>;
-                               cd-gpios = <&gpio2 0 0>;
-                               wp-gpios = <&gpio2 1 0>;
-                               status = "okay";
-                       };
-               };
-       };
-
        gpio-keys {
                compatible = "gpio-keys";
 
                };
        };
 };
+
+&uart1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_uart1_1>;
+       status = "okay";
+};
+
+&iomuxc {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_hog>;
+
+       hog {
+               pinctrl_hog: hoggrp {
+                       fsl,pins = <
+                               1004 0x80000000 /* MX6Q_PAD_GPIO_4__GPIO_1_4 */
+                               1012 0x80000000 /* MX6Q_PAD_GPIO_5__GPIO_1_5 */
+                               1402 0x80000000 /* MX6Q_PAD_NANDF_D0__GPIO_2_0 */
+                               1410 0x80000000 /* MX6Q_PAD_NANDF_D1__GPIO_2_1 */
+                               1418 0x80000000 /* MX6Q_PAD_NANDF_D2__GPIO_2_2 */
+                               1426 0x80000000 /* MX6Q_PAD_NANDF_D3__GPIO_2_3 */
+                       >;
+               };
+       };
+};
+
+&fec {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_enet_1>;
+       phy-mode = "rgmii";
+       status = "okay";
+};
+
+&usdhc2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usdhc2_1>;
+       cd-gpios = <&gpio2 2 0>;
+       wp-gpios = <&gpio2 3 0>;
+       status = "okay";
+};
+
+&usdhc3 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_usdhc3_1>;
+       cd-gpios = <&gpio2 0 0>;
+       wp-gpios = <&gpio2 1 0>;
+       status = "okay";
+};
index ff1205ea57190385fefb75fbc320851c669fae1f..cba021eb035e7b62c7dc166d291e8e02196b3b1a 100644 (file)
@@ -1,33 +1,16 @@
+
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
- * Copyright 2011 Linaro Ltd.
+ * Copyright 2013 Freescale Semiconductor, Inc.
  *
- * The code contained herein is licensed under the GNU General Public
- * License. You may obtain a copy of the GNU General Public License
- * Version 2 or later at the following locations:
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
  *
- * http://www.opensource.org/licenses/gpl-license.html
- * http://www.gnu.org/copyleft/gpl.html
  */
 
-/include/ "skeleton.dtsi"
+/include/ "imx6qdl.dtsi"
 
 / {
-       aliases {
-               serial0 = &uart1;
-               serial1 = &uart2;
-               serial2 = &uart3;
-               serial3 = &uart4;
-               serial4 = &uart5;
-               gpio0 = &gpio1;
-               gpio1 = &gpio2;
-               gpio2 = &gpio3;
-               gpio3 = &gpio4;
-               gpio4 = &gpio5;
-               gpio5 = &gpio6;
-               gpio6 = &gpio7;
-       };
-
        cpus {
                #address-cells = <1>;
                #size-cells = <0>;
                        next-level-cache = <&L2>;
                        operating-points = <
                                /* kHz    uV */
-                               792000  1100000
+                               1200000 1275000
+                               996000  1250000
+                               792000  1150000
                                396000  950000
-                               198000  850000
                        >;
                        clock-latency = <61036>; /* two CLK32 periods */
-                       cpu0-supply = <&reg_cpu>;
+                       clocks = <&clks 104>, <&clks 6>, <&clks 16>,
+                                <&clks 17>, <&clks 170>;
+                       clock-names = "arm", "pll2_pfd2_396m", "step",
+                                     "pll1_sw", "pll1_sys";
+                       arm-supply = <&reg_arm>;
+                       pu-supply = <&reg_pu>;
+                       soc-supply = <&reg_soc>;
                };
 
                cpu@1 {
                };
        };
 
-       intc: interrupt-controller@00a01000 {
-               compatible = "arm,cortex-a9-gic";
-               #interrupt-cells = <3>;
-               #address-cells = <1>;
-               #size-cells = <1>;
-               interrupt-controller;
-               reg = <0x00a01000 0x1000>,
-                     <0x00a00100 0x100>;
-       };
-
-       clocks {
-               #address-cells = <1>;
-               #size-cells = <0>;
-
-               ckil {
-                       compatible = "fsl,imx-ckil", "fixed-clock";
-                       clock-frequency = <32768>;
-               };
-
-               ckih1 {
-                       compatible = "fsl,imx-ckih1", "fixed-clock";
-                       clock-frequency = <0>;
-               };
-
-               osc {
-                       compatible = "fsl,imx-osc", "fixed-clock";
-                       clock-frequency = <24000000>;
-               };
-       };
-
        soc {
-               #address-cells = <1>;
-               #size-cells = <1>;
-               compatible = "simple-bus";
-               interrupt-parent = <&intc>;
-               ranges;
-
-               dma-apbh@00110000 {
-                       compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
-                       reg = <0x00110000 0x2000>;
-                       clocks = <&clks 106>;
-               };
-
-               nfc: gpmi-nand@00112000 {
-                       compatible = "fsl,imx6q-gpmi-nand";
-                       #address-cells = <1>;
-                       #size-cells = <1>;
-                       reg = <0x00112000 0x2000>, <0x00114000 0x2000>;
-                       reg-names = "gpmi-nand", "bch";
-                       interrupts = <0 13 0x04>, <0 15 0x04>;
-                       interrupt-names = "gpmi-dma", "bch";
-                       clocks = <&clks 152>, <&clks 153>, <&clks 151>,
-                                <&clks 150>, <&clks 149>;
-                       clock-names = "gpmi_io", "gpmi_apb", "gpmi_bch",
-                                     "gpmi_bch_apb", "per1_bch";
-                       fsl,gpmi-dma-channel = <0>;
-                       status = "disabled";
-               };
-
-               timer@00a00600 {
-                       compatible = "arm,cortex-a9-twd-timer";
-                       reg = <0x00a00600 0x20>;
-                       interrupts = <1 13 0xf01>;
-               };
-
-               L2: l2-cache@00a02000 {
-                       compatible = "arm,pl310-cache";
-                       reg = <0x00a02000 0x1000>;
-                       interrupts = <0 92 0x04>;
-                       cache-unified;
-                       cache-level = <2>;
-               };
-
                aips-bus@02000000 { /* AIPS1 */
-                       compatible = "fsl,aips-bus", "simple-bus";
-                       #address-cells = <1>;
-                       #size-cells = <1>;
-                       reg = <0x02000000 0x100000>;
-                       ranges;
-
                        spba-bus@02000000 {
-                               compatible = "fsl,spba-bus", "simple-bus";
-                               #address-cells = <1>;
-                               #size-cells = <1>;
-                               reg = <0x02000000 0x40000>;
-                               ranges;
-
-                               spdif: spdif@02004000 {
-                                       reg = <0x02004000 0x4000>;
-                                       interrupts = <0 52 0x04>;
-                               };
-
-                               ecspi1: ecspi@02008000 {
-                                       #address-cells = <1>;
-                                       #size-cells = <0>;
-                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
-                                       reg = <0x02008000 0x4000>;
-                                       interrupts = <0 31 0x04>;
-                                       clocks = <&clks 112>, <&clks 112>;
-                                       clock-names = "ipg", "per";
-                                       status = "disabled";
-                               };
-
-                               ecspi2: ecspi@0200c000 {
-                                       #address-cells = <1>;
-                                       #size-cells = <0>;
-                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
-                                       reg = <0x0200c000 0x4000>;
-                                       interrupts = <0 32 0x04>;
-                                       clocks = <&clks 113>, <&clks 113>;
-                                       clock-names = "ipg", "per";
-                                       status = "disabled";
-                               };
-
-                               ecspi3: ecspi@02010000 {
-                                       #address-cells = <1>;
-                                       #size-cells = <0>;
-                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
-                                       reg = <0x02010000 0x4000>;
-                                       interrupts = <0 33 0x04>;
-                                       clocks = <&clks 114>, <&clks 114>;
-                                       clock-names = "ipg", "per";
-                                       status = "disabled";
-                               };
-
-                               ecspi4: ecspi@02014000 {
-                                       #address-cells = <1>;
-                                       #size-cells = <0>;
-                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
-                                       reg = <0x02014000 0x4000>;
-                                       interrupts = <0 34 0x04>;
-                                       clocks = <&clks 115>, <&clks 115>;
-                                       clock-names = "ipg", "per";
-                                       status = "disabled";
-                               };
-
                                ecspi5: ecspi@02018000 {
                                        #address-cells = <1>;
                                        #size-cells = <0>;
                                        clock-names = "ipg", "per";
                                        status = "disabled";
                                };
-
-                               uart1: serial@02020000 {
-                                       compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
-                                       reg = <0x02020000 0x4000>;
-                                       interrupts = <0 26 0x04>;
-                                       clocks = <&clks 160>, <&clks 161>;
-                                       clock-names = "ipg", "per";
-                                       status = "disabled";
-                               };
-
-                               esai: esai@02024000 {
-                                       reg = <0x02024000 0x4000>;
-                                       interrupts = <0 51 0x04>;
-                               };
-
-                               ssi1: ssi@02028000 {
-                                       compatible = "fsl,imx6q-ssi","fsl,imx21-ssi";
-                                       reg = <0x02028000 0x4000>;
-                                       interrupts = <0 46 0x04>;
-                                       clocks = <&clks 178>;
-                                       fsl,fifo-depth = <15>;
-                                       fsl,ssi-dma-events = <38 37>;
-                                       status = "disabled";
-                               };
-
-                               ssi2: ssi@0202c000 {
-                                       compatible = "fsl,imx6q-ssi","fsl,imx21-ssi";
-                                       reg = <0x0202c000 0x4000>;
-                                       interrupts = <0 47 0x04>;
-                                       clocks = <&clks 179>;
-                                       fsl,fifo-depth = <15>;
-                                       fsl,ssi-dma-events = <42 41>;
-                                       status = "disabled";
-                               };
-
-                               ssi3: ssi@02030000 {
-                                       compatible = "fsl,imx6q-ssi","fsl,imx21-ssi";
-                                       reg = <0x02030000 0x4000>;
-                                       interrupts = <0 48 0x04>;
-                                       clocks = <&clks 180>;
-                                       fsl,fifo-depth = <15>;
-                                       fsl,ssi-dma-events = <46 45>;
-                                       status = "disabled";
-                               };
-
-                               asrc: asrc@02034000 {
-                                       reg = <0x02034000 0x4000>;
-                                       interrupts = <0 50 0x04>;
-                               };
-
-                               spba@0203c000 {
-                                       reg = <0x0203c000 0x4000>;
-                               };
-                       };
-
-                       vpu: vpu@02040000 {
-                               reg = <0x02040000 0x3c000>;
-                               interrupts = <0 3 0x04 0 12 0x04>;
-                       };
-
-                       aipstz@0207c000 { /* AIPSTZ1 */
-                               reg = <0x0207c000 0x4000>;
-                       };
-
-                       pwm1: pwm@02080000 {
-                               #pwm-cells = <2>;
-                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
-                               reg = <0x02080000 0x4000>;
-                               interrupts = <0 83 0x04>;
-                               clocks = <&clks 62>, <&clks 145>;
-                               clock-names = "ipg", "per";
-                       };
-
-                       pwm2: pwm@02084000 {
-                               #pwm-cells = <2>;
-                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
-                               reg = <0x02084000 0x4000>;
-                               interrupts = <0 84 0x04>;
-                               clocks = <&clks 62>, <&clks 146>;
-                               clock-names = "ipg", "per";
-                       };
-
-                       pwm3: pwm@02088000 {
-                               #pwm-cells = <2>;
-                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
-                               reg = <0x02088000 0x4000>;
-                               interrupts = <0 85 0x04>;
-                               clocks = <&clks 62>, <&clks 147>;
-                               clock-names = "ipg", "per";
-                       };
-
-                       pwm4: pwm@0208c000 {
-                               #pwm-cells = <2>;
-                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
-                               reg = <0x0208c000 0x4000>;
-                               interrupts = <0 86 0x04>;
-                               clocks = <&clks 62>, <&clks 148>;
-                               clock-names = "ipg", "per";
-                       };
-
-                       can1: flexcan@02090000 {
-                               reg = <0x02090000 0x4000>;
-                               interrupts = <0 110 0x04>;
-                       };
-
-                       can2: flexcan@02094000 {
-                               reg = <0x02094000 0x4000>;
-                               interrupts = <0 111 0x04>;
-                       };
-
-                       gpt: gpt@02098000 {
-                               compatible = "fsl,imx6q-gpt";
-                               reg = <0x02098000 0x4000>;
-                               interrupts = <0 55 0x04>;
-                       };
-
-                       gpio1: gpio@0209c000 {
-                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
-                               reg = <0x0209c000 0x4000>;
-                               interrupts = <0 66 0x04 0 67 0x04>;
-                               gpio-controller;
-                               #gpio-cells = <2>;
-                               interrupt-controller;
-                               #interrupt-cells = <2>;
-                       };
-
-                       gpio2: gpio@020a0000 {
-                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
-                               reg = <0x020a0000 0x4000>;
-                               interrupts = <0 68 0x04 0 69 0x04>;
-                               gpio-controller;
-                               #gpio-cells = <2>;
-                               interrupt-controller;
-                               #interrupt-cells = <2>;
-                       };
-
-                       gpio3: gpio@020a4000 {
-                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
-                               reg = <0x020a4000 0x4000>;
-                               interrupts = <0 70 0x04 0 71 0x04>;
-                               gpio-controller;
-                               #gpio-cells = <2>;
-                               interrupt-controller;
-                               #interrupt-cells = <2>;
-                       };
-
-                       gpio4: gpio@020a8000 {
-                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
-                               reg = <0x020a8000 0x4000>;
-                               interrupts = <0 72 0x04 0 73 0x04>;
-                               gpio-controller;
-                               #gpio-cells = <2>;
-                               interrupt-controller;
-                               #interrupt-cells = <2>;
-                       };
-
-                       gpio5: gpio@020ac000 {
-                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
-                               reg = <0x020ac000 0x4000>;
-                               interrupts = <0 74 0x04 0 75 0x04>;
-                               gpio-controller;
-                               #gpio-cells = <2>;
-                               interrupt-controller;
-                               #interrupt-cells = <2>;
-                       };
-
-                       gpio6: gpio@020b0000 {
-                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
-                               reg = <0x020b0000 0x4000>;
-                               interrupts = <0 76 0x04 0 77 0x04>;
-                               gpio-controller;
-                               #gpio-cells = <2>;
-                               interrupt-controller;
-                               #interrupt-cells = <2>;
-                       };
-
-                       gpio7: gpio@020b4000 {
-                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
-                               reg = <0x020b4000 0x4000>;
-                               interrupts = <0 78 0x04 0 79 0x04>;
-                               gpio-controller;
-                               #gpio-cells = <2>;
-                               interrupt-controller;
-                               #interrupt-cells = <2>;
-                       };
-
-                       kpp: kpp@020b8000 {
-                               reg = <0x020b8000 0x4000>;
-                               interrupts = <0 82 0x04>;
-                       };
-
-                       wdog1: wdog@020bc000 {
-                               compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt";
-                               reg = <0x020bc000 0x4000>;
-                               interrupts = <0 80 0x04>;
-                               clocks = <&clks 0>;
-                       };
-
-                       wdog2: wdog@020c0000 {
-                               compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt";
-                               reg = <0x020c0000 0x4000>;
-                               interrupts = <0 81 0x04>;
-                               clocks = <&clks 0>;
-                               status = "disabled";
-                       };
-
-                       clks: ccm@020c4000 {
-                               compatible = "fsl,imx6q-ccm";
-                               reg = <0x020c4000 0x4000>;
-                               interrupts = <0 87 0x04 0 88 0x04>;
-                               #clock-cells = <1>;
-                       };
-
-                       anatop: anatop@020c8000 {
-                               compatible = "fsl,imx6q-anatop", "syscon", "simple-bus";
-                               reg = <0x020c8000 0x1000>;
-                               interrupts = <0 49 0x04 0 54 0x04 0 127 0x04>;
-
-                               regulator-1p1@110 {
-                                       compatible = "fsl,anatop-regulator";
-                                       regulator-name = "vdd1p1";
-                                       regulator-min-microvolt = <800000>;
-                                       regulator-max-microvolt = <1375000>;
-                                       regulator-always-on;
-                                       anatop-reg-offset = <0x110>;
-                                       anatop-vol-bit-shift = <8>;
-                                       anatop-vol-bit-width = <5>;
-                                       anatop-min-bit-val = <4>;
-                                       anatop-min-voltage = <800000>;
-                                       anatop-max-voltage = <1375000>;
-                               };
-
-                               regulator-3p0@120 {
-                                       compatible = "fsl,anatop-regulator";
-                                       regulator-name = "vdd3p0";
-                                       regulator-min-microvolt = <2800000>;
-                                       regulator-max-microvolt = <3150000>;
-                                       regulator-always-on;
-                                       anatop-reg-offset = <0x120>;
-                                       anatop-vol-bit-shift = <8>;
-                                       anatop-vol-bit-width = <5>;
-                                       anatop-min-bit-val = <0>;
-                                       anatop-min-voltage = <2625000>;
-                                       anatop-max-voltage = <3400000>;
-                               };
-
-                               regulator-2p5@130 {
-                                       compatible = "fsl,anatop-regulator";
-                                       regulator-name = "vdd2p5";
-                                       regulator-min-microvolt = <2000000>;
-                                       regulator-max-microvolt = <2750000>;
-                                       regulator-always-on;
-                                       anatop-reg-offset = <0x130>;
-                                       anatop-vol-bit-shift = <8>;
-                                       anatop-vol-bit-width = <5>;
-                                       anatop-min-bit-val = <0>;
-                                       anatop-min-voltage = <2000000>;
-                                       anatop-max-voltage = <2750000>;
-                               };
-
-                               reg_cpu: regulator-vddcore@140 {
-                                       compatible = "fsl,anatop-regulator";
-                                       regulator-name = "cpu";
-                                       regulator-min-microvolt = <725000>;
-                                       regulator-max-microvolt = <1450000>;
-                                       regulator-always-on;
-                                       anatop-reg-offset = <0x140>;
-                                       anatop-vol-bit-shift = <0>;
-                                       anatop-vol-bit-width = <5>;
-                                       anatop-min-bit-val = <1>;
-                                       anatop-min-voltage = <725000>;
-                                       anatop-max-voltage = <1450000>;
-                               };
-
-                               regulator-vddpu@140 {
-                                       compatible = "fsl,anatop-regulator";
-                                       regulator-name = "vddpu";
-                                       regulator-min-microvolt = <725000>;
-                                       regulator-max-microvolt = <1450000>;
-                                       regulator-always-on;
-                                       anatop-reg-offset = <0x140>;
-                                       anatop-vol-bit-shift = <9>;
-                                       anatop-vol-bit-width = <5>;
-                                       anatop-min-bit-val = <1>;
-                                       anatop-min-voltage = <725000>;
-                                       anatop-max-voltage = <1450000>;
-                               };
-
-                               regulator-vddsoc@140 {
-                                       compatible = "fsl,anatop-regulator";
-                                       regulator-name = "vddsoc";
-                                       regulator-min-microvolt = <725000>;
-                                       regulator-max-microvolt = <1450000>;
-                                       regulator-always-on;
-                                       anatop-reg-offset = <0x140>;
-                                       anatop-vol-bit-shift = <18>;
-                                       anatop-vol-bit-width = <5>;
-                                       anatop-min-bit-val = <1>;
-                                       anatop-min-voltage = <725000>;
-                                       anatop-max-voltage = <1450000>;
-                               };
-                       };
-
-                       usbphy1: usbphy@020c9000 {
-                               compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy";
-                               reg = <0x020c9000 0x1000>;
-                               interrupts = <0 44 0x04>;
-                               clocks = <&clks 182>;
-                       };
-
-                       usbphy2: usbphy@020ca000 {
-                               compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy";
-                               reg = <0x020ca000 0x1000>;
-                               interrupts = <0 45 0x04>;
-                               clocks = <&clks 183>;
-                       };
-
-                       snvs@020cc000 {
-                               compatible = "fsl,sec-v4.0-mon", "simple-bus";
-                               #address-cells = <1>;
-                               #size-cells = <1>;
-                               ranges = <0 0x020cc000 0x4000>;
-
-                               snvs-rtc-lp@34 {
-                                       compatible = "fsl,sec-v4.0-mon-rtc-lp";
-                                       reg = <0x34 0x58>;
-                                       interrupts = <0 19 0x04 0 20 0x04>;
-                               };
-                       };
-
-                       epit1: epit@020d0000 { /* EPIT1 */
-                               reg = <0x020d0000 0x4000>;
-                               interrupts = <0 56 0x04>;
-                       };
-
-                       epit2: epit@020d4000 { /* EPIT2 */
-                               reg = <0x020d4000 0x4000>;
-                               interrupts = <0 57 0x04>;
-                       };
-
-                       src: src@020d8000 {
-                               compatible = "fsl,imx6q-src";
-                               reg = <0x020d8000 0x4000>;
-                               interrupts = <0 91 0x04 0 96 0x04>;
-                       };
-
-                       gpc: gpc@020dc000 {
-                               compatible = "fsl,imx6q-gpc";
-                               reg = <0x020dc000 0x4000>;
-                               interrupts = <0 89 0x04 0 90 0x04>;
-                       };
-
-                       gpr: iomuxc-gpr@020e0000 {
-                               compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
-                               reg = <0x020e0000 0x38>;
                        };
 
                        iomuxc: iomuxc@020e0000 {
                                        };
                                };
                        };
-
-                       dcic1: dcic@020e4000 {
-                               reg = <0x020e4000 0x4000>;
-                               interrupts = <0 124 0x04>;
-                       };
-
-                       dcic2: dcic@020e8000 {
-                               reg = <0x020e8000 0x4000>;
-                               interrupts = <0 125 0x04>;
-                       };
-
-                       sdma: sdma@020ec000 {
-                               compatible = "fsl,imx6q-sdma", "fsl,imx35-sdma";
-                               reg = <0x020ec000 0x4000>;
-                               interrupts = <0 2 0x04>;
-                               clocks = <&clks 155>, <&clks 155>;
-                               clock-names = "ipg", "ahb";
-                               fsl,sdma-ram-script-name = "imx/sdma/sdma-imx6q-to1.bin";
-                       };
-               };
-
-               aips-bus@02100000 { /* AIPS2 */
-                       compatible = "fsl,aips-bus", "simple-bus";
-                       #address-cells = <1>;
-                       #size-cells = <1>;
-                       reg = <0x02100000 0x100000>;
-                       ranges;
-
-                       caam@02100000 {
-                               reg = <0x02100000 0x40000>;
-                               interrupts = <0 105 0x04 0 106 0x04>;
-                       };
-
-                       aipstz@0217c000 { /* AIPSTZ2 */
-                               reg = <0x0217c000 0x4000>;
-                       };
-
-                       usbotg: usb@02184000 {
-                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
-                               reg = <0x02184000 0x200>;
-                               interrupts = <0 43 0x04>;
-                               clocks = <&clks 162>;
-                               fsl,usbphy = <&usbphy1>;
-                               fsl,usbmisc = <&usbmisc 0>;
-                               status = "disabled";
-                       };
-
-                       usbh1: usb@02184200 {
-                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
-                               reg = <0x02184200 0x200>;
-                               interrupts = <0 40 0x04>;
-                               clocks = <&clks 162>;
-                               fsl,usbphy = <&usbphy2>;
-                               fsl,usbmisc = <&usbmisc 1>;
-                               status = "disabled";
-                       };
-
-                       usbh2: usb@02184400 {
-                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
-                               reg = <0x02184400 0x200>;
-                               interrupts = <0 41 0x04>;
-                               clocks = <&clks 162>;
-                               fsl,usbmisc = <&usbmisc 2>;
-                               status = "disabled";
-                       };
-
-                       usbh3: usb@02184600 {
-                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
-                               reg = <0x02184600 0x200>;
-                               interrupts = <0 42 0x04>;
-                               clocks = <&clks 162>;
-                               fsl,usbmisc = <&usbmisc 3>;
-                               status = "disabled";
-                       };
-
-                       usbmisc: usbmisc: usbmisc@02184800 {
-                               #index-cells = <1>;
-                               compatible = "fsl,imx6q-usbmisc";
-                               reg = <0x02184800 0x200>;
-                               clocks = <&clks 162>;
-                       };
-
-                       fec: ethernet@02188000 {
-                               compatible = "fsl,imx6q-fec";
-                               reg = <0x02188000 0x4000>;
-                               interrupts = <0 118 0x04 0 119 0x04>;
-                               clocks = <&clks 117>, <&clks 117>, <&clks 190>;
-                               clock-names = "ipg", "ahb", "ptp";
-                               status = "disabled";
-                       };
-
-                       mlb@0218c000 {
-                               reg = <0x0218c000 0x4000>;
-                               interrupts = <0 53 0x04 0 117 0x04 0 126 0x04>;
-                       };
-
-                       usdhc1: usdhc@02190000 {
-                               compatible = "fsl,imx6q-usdhc";
-                               reg = <0x02190000 0x4000>;
-                               interrupts = <0 22 0x04>;
-                               clocks = <&clks 163>, <&clks 163>, <&clks 163>;
-                               clock-names = "ipg", "ahb", "per";
-                               bus-width = <4>;
-                               status = "disabled";
-                       };
-
-                       usdhc2: usdhc@02194000 {
-                               compatible = "fsl,imx6q-usdhc";
-                               reg = <0x02194000 0x4000>;
-                               interrupts = <0 23 0x04>;
-                               clocks = <&clks 164>, <&clks 164>, <&clks 164>;
-                               clock-names = "ipg", "ahb", "per";
-                               bus-width = <4>;
-                               status = "disabled";
-                       };
-
-                       usdhc3: usdhc@02198000 {
-                               compatible = "fsl,imx6q-usdhc";
-                               reg = <0x02198000 0x4000>;
-                               interrupts = <0 24 0x04>;
-                               clocks = <&clks 165>, <&clks 165>, <&clks 165>;
-                               clock-names = "ipg", "ahb", "per";
-                               bus-width = <4>;
-                               status = "disabled";
-                       };
-
-                       usdhc4: usdhc@0219c000 {
-                               compatible = "fsl,imx6q-usdhc";
-                               reg = <0x0219c000 0x4000>;
-                               interrupts = <0 25 0x04>;
-                               clocks = <&clks 166>, <&clks 166>, <&clks 166>;
-                               clock-names = "ipg", "ahb", "per";
-                               bus-width = <4>;
-                               status = "disabled";
-                       };
-
-                       i2c1: i2c@021a0000 {
-                               #address-cells = <1>;
-                               #size-cells = <0>;
-                               compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c";
-                               reg = <0x021a0000 0x4000>;
-                               interrupts = <0 36 0x04>;
-                               clocks = <&clks 125>;
-                               status = "disabled";
-                       };
-
-                       i2c2: i2c@021a4000 {
-                               #address-cells = <1>;
-                               #size-cells = <0>;
-                               compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c";
-                               reg = <0x021a4000 0x4000>;
-                               interrupts = <0 37 0x04>;
-                               clocks = <&clks 126>;
-                               status = "disabled";
-                       };
-
-                       i2c3: i2c@021a8000 {
-                               #address-cells = <1>;
-                               #size-cells = <0>;
-                               compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c";
-                               reg = <0x021a8000 0x4000>;
-                               interrupts = <0 38 0x04>;
-                               clocks = <&clks 127>;
-                               status = "disabled";
-                       };
-
-                       romcp@021ac000 {
-                               reg = <0x021ac000 0x4000>;
-                       };
-
-                       mmdc0: mmdc@021b0000 { /* MMDC0 */
-                               compatible = "fsl,imx6q-mmdc";
-                               reg = <0x021b0000 0x4000>;
-                       };
-
-                       mmdc1: mmdc@021b4000 { /* MMDC1 */
-                               reg = <0x021b4000 0x4000>;
-                       };
-
-                       weim@021b8000 {
-                               reg = <0x021b8000 0x4000>;
-                               interrupts = <0 14 0x04>;
-                       };
-
-                       ocotp@021bc000 {
-                               reg = <0x021bc000 0x4000>;
-                       };
-
-                       ocotp@021c0000 {
-                               reg = <0x021c0000 0x4000>;
-                               interrupts = <0 21 0x04>;
-                       };
-
-                       tzasc@021d0000 { /* TZASC1 */
-                               reg = <0x021d0000 0x4000>;
-                               interrupts = <0 108 0x04>;
-                       };
-
-                       tzasc@021d4000 { /* TZASC2 */
-                               reg = <0x021d4000 0x4000>;
-                               interrupts = <0 109 0x04>;
-                       };
-
-                       audmux: audmux@021d8000 {
-                               compatible = "fsl,imx6q-audmux", "fsl,imx31-audmux";
-                               reg = <0x021d8000 0x4000>;
-                               status = "disabled";
-                       };
-
-                       mipi@021dc000 { /* MIPI-CSI */
-                               reg = <0x021dc000 0x4000>;
-                       };
-
-                       mipi@021e0000 { /* MIPI-DSI */
-                               reg = <0x021e0000 0x4000>;
-                       };
-
-                       vdoa@021e4000 {
-                               reg = <0x021e4000 0x4000>;
-                               interrupts = <0 18 0x04>;
-                       };
-
-                       uart2: serial@021e8000 {
-                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
-                               reg = <0x021e8000 0x4000>;
-                               interrupts = <0 27 0x04>;
-                               clocks = <&clks 160>, <&clks 161>;
-                               clock-names = "ipg", "per";
-                               status = "disabled";
-                       };
-
-                       uart3: serial@021ec000 {
-                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
-                               reg = <0x021ec000 0x4000>;
-                               interrupts = <0 28 0x04>;
-                               clocks = <&clks 160>, <&clks 161>;
-                               clock-names = "ipg", "per";
-                               status = "disabled";
-                       };
-
-                       uart4: serial@021f0000 {
-                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
-                               reg = <0x021f0000 0x4000>;
-                               interrupts = <0 29 0x04>;
-                               clocks = <&clks 160>, <&clks 161>;
-                               clock-names = "ipg", "per";
-                               status = "disabled";
-                       };
-
-                       uart5: serial@021f4000 {
-                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
-                               reg = <0x021f4000 0x4000>;
-                               interrupts = <0 30 0x04>;
-                               clocks = <&clks 160>, <&clks 161>;
-                               clock-names = "ipg", "per";
-                               status = "disabled";
-                       };
-               };
-
-               ipu1: ipu@02400000 {
-                       #crtc-cells = <1>;
-                       compatible = "fsl,imx6q-ipu";
-                       reg = <0x02400000 0x400000>;
-                       interrupts = <0 6 0x4 0 5 0x4>;
-                       clocks = <&clks 130>, <&clks 131>, <&clks 132>;
-                       clock-names = "bus", "di0", "di1";
                };
 
                ipu2: ipu@02800000 {
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
new file mode 100644 (file)
index 0000000..06ec460
--- /dev/null
@@ -0,0 +1,800 @@
+/*
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011 Linaro Ltd.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/include/ "skeleton.dtsi"
+
+/ {
+       aliases {
+               serial0 = &uart1;
+               serial1 = &uart2;
+               serial2 = &uart3;
+               serial3 = &uart4;
+               serial4 = &uart5;
+               gpio0 = &gpio1;
+               gpio1 = &gpio2;
+               gpio2 = &gpio3;
+               gpio3 = &gpio4;
+               gpio4 = &gpio5;
+               gpio5 = &gpio6;
+               gpio6 = &gpio7;
+       };
+
+       intc: interrupt-controller@00a01000 {
+               compatible = "arm,cortex-a9-gic";
+               #interrupt-cells = <3>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               interrupt-controller;
+               reg = <0x00a01000 0x1000>,
+                     <0x00a00100 0x100>;
+       };
+
+       clocks {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ckil {
+                       compatible = "fsl,imx-ckil", "fixed-clock";
+                       clock-frequency = <32768>;
+               };
+
+               ckih1 {
+                       compatible = "fsl,imx-ckih1", "fixed-clock";
+                       clock-frequency = <0>;
+               };
+
+               osc {
+                       compatible = "fsl,imx-osc", "fixed-clock";
+                       clock-frequency = <24000000>;
+               };
+       };
+
+       soc {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "simple-bus";
+               interrupt-parent = <&intc>;
+               ranges;
+
+               dma-apbh@00110000 {
+                       compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh";
+                       reg = <0x00110000 0x2000>;
+                       clocks = <&clks 106>;
+               };
+
+               gpmi: gpmi-nand@00112000 {
+                       compatible = "fsl,imx6q-gpmi-nand";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       reg = <0x00112000 0x2000>, <0x00114000 0x2000>;
+                       reg-names = "gpmi-nand", "bch";
+                       interrupts = <0 13 0x04>, <0 15 0x04>;
+                       interrupt-names = "gpmi-dma", "bch";
+                       clocks = <&clks 152>, <&clks 153>, <&clks 151>,
+                                <&clks 150>, <&clks 149>;
+                       clock-names = "gpmi_io", "gpmi_apb", "gpmi_bch",
+                                     "gpmi_bch_apb", "per1_bch";
+                       fsl,gpmi-dma-channel = <0>;
+                       status = "disabled";
+               };
+
+               timer@00a00600 {
+                       compatible = "arm,cortex-a9-twd-timer";
+                       reg = <0x00a00600 0x20>;
+                       interrupts = <1 13 0xf01>;
+               };
+
+               L2: l2-cache@00a02000 {
+                       compatible = "arm,pl310-cache";
+                       reg = <0x00a02000 0x1000>;
+                       interrupts = <0 92 0x04>;
+                       cache-unified;
+                       cache-level = <2>;
+               };
+
+               aips-bus@02000000 { /* AIPS1 */
+                       compatible = "fsl,aips-bus", "simple-bus";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       reg = <0x02000000 0x100000>;
+                       ranges;
+
+                       spba-bus@02000000 {
+                               compatible = "fsl,spba-bus", "simple-bus";
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               reg = <0x02000000 0x40000>;
+                               ranges;
+
+                               spdif: spdif@02004000 {
+                                       reg = <0x02004000 0x4000>;
+                                       interrupts = <0 52 0x04>;
+                               };
+
+                               ecspi1: ecspi@02008000 {
+                                       #address-cells = <1>;
+                                       #size-cells = <0>;
+                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
+                                       reg = <0x02008000 0x4000>;
+                                       interrupts = <0 31 0x04>;
+                                       clocks = <&clks 112>, <&clks 112>;
+                                       clock-names = "ipg", "per";
+                                       status = "disabled";
+                               };
+
+                               ecspi2: ecspi@0200c000 {
+                                       #address-cells = <1>;
+                                       #size-cells = <0>;
+                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
+                                       reg = <0x0200c000 0x4000>;
+                                       interrupts = <0 32 0x04>;
+                                       clocks = <&clks 113>, <&clks 113>;
+                                       clock-names = "ipg", "per";
+                                       status = "disabled";
+                               };
+
+                               ecspi3: ecspi@02010000 {
+                                       #address-cells = <1>;
+                                       #size-cells = <0>;
+                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
+                                       reg = <0x02010000 0x4000>;
+                                       interrupts = <0 33 0x04>;
+                                       clocks = <&clks 114>, <&clks 114>;
+                                       clock-names = "ipg", "per";
+                                       status = "disabled";
+                               };
+
+                               ecspi4: ecspi@02014000 {
+                                       #address-cells = <1>;
+                                       #size-cells = <0>;
+                                       compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi";
+                                       reg = <0x02014000 0x4000>;
+                                       interrupts = <0 34 0x04>;
+                                       clocks = <&clks 115>, <&clks 115>;
+                                       clock-names = "ipg", "per";
+                                       status = "disabled";
+                               };
+
+                               uart1: serial@02020000 {
+                                       compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
+                                       reg = <0x02020000 0x4000>;
+                                       interrupts = <0 26 0x04>;
+                                       clocks = <&clks 160>, <&clks 161>;
+                                       clock-names = "ipg", "per";
+                                       status = "disabled";
+                               };
+
+                               esai: esai@02024000 {
+                                       reg = <0x02024000 0x4000>;
+                                       interrupts = <0 51 0x04>;
+                               };
+
+                               ssi1: ssi@02028000 {
+                                       compatible = "fsl,imx6q-ssi","fsl,imx21-ssi";
+                                       reg = <0x02028000 0x4000>;
+                                       interrupts = <0 46 0x04>;
+                                       clocks = <&clks 178>;
+                                       fsl,fifo-depth = <15>;
+                                       fsl,ssi-dma-events = <38 37>;
+                                       status = "disabled";
+                               };
+
+                               ssi2: ssi@0202c000 {
+                                       compatible = "fsl,imx6q-ssi","fsl,imx21-ssi";
+                                       reg = <0x0202c000 0x4000>;
+                                       interrupts = <0 47 0x04>;
+                                       clocks = <&clks 179>;
+                                       fsl,fifo-depth = <15>;
+                                       fsl,ssi-dma-events = <42 41>;
+                                       status = "disabled";
+                               };
+
+                               ssi3: ssi@02030000 {
+                                       compatible = "fsl,imx6q-ssi","fsl,imx21-ssi";
+                                       reg = <0x02030000 0x4000>;
+                                       interrupts = <0 48 0x04>;
+                                       clocks = <&clks 180>;
+                                       fsl,fifo-depth = <15>;
+                                       fsl,ssi-dma-events = <46 45>;
+                                       status = "disabled";
+                               };
+
+                               asrc: asrc@02034000 {
+                                       reg = <0x02034000 0x4000>;
+                                       interrupts = <0 50 0x04>;
+                               };
+
+                               spba@0203c000 {
+                                       reg = <0x0203c000 0x4000>;
+                               };
+                       };
+
+                       vpu: vpu@02040000 {
+                               reg = <0x02040000 0x3c000>;
+                               interrupts = <0 3 0x04 0 12 0x04>;
+                       };
+
+                       aipstz@0207c000 { /* AIPSTZ1 */
+                               reg = <0x0207c000 0x4000>;
+                       };
+
+                       pwm1: pwm@02080000 {
+                               #pwm-cells = <2>;
+                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
+                               reg = <0x02080000 0x4000>;
+                               interrupts = <0 83 0x04>;
+                               clocks = <&clks 62>, <&clks 145>;
+                               clock-names = "ipg", "per";
+                       };
+
+                       pwm2: pwm@02084000 {
+                               #pwm-cells = <2>;
+                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
+                               reg = <0x02084000 0x4000>;
+                               interrupts = <0 84 0x04>;
+                               clocks = <&clks 62>, <&clks 146>;
+                               clock-names = "ipg", "per";
+                       };
+
+                       pwm3: pwm@02088000 {
+                               #pwm-cells = <2>;
+                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
+                               reg = <0x02088000 0x4000>;
+                               interrupts = <0 85 0x04>;
+                               clocks = <&clks 62>, <&clks 147>;
+                               clock-names = "ipg", "per";
+                       };
+
+                       pwm4: pwm@0208c000 {
+                               #pwm-cells = <2>;
+                               compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm";
+                               reg = <0x0208c000 0x4000>;
+                               interrupts = <0 86 0x04>;
+                               clocks = <&clks 62>, <&clks 148>;
+                               clock-names = "ipg", "per";
+                       };
+
+                       can1: flexcan@02090000 {
+                               reg = <0x02090000 0x4000>;
+                               interrupts = <0 110 0x04>;
+                       };
+
+                       can2: flexcan@02094000 {
+                               reg = <0x02094000 0x4000>;
+                               interrupts = <0 111 0x04>;
+                       };
+
+                       gpt: gpt@02098000 {
+                               compatible = "fsl,imx6q-gpt";
+                               reg = <0x02098000 0x4000>;
+                               interrupts = <0 55 0x04>;
+                       };
+
+                       gpio1: gpio@0209c000 {
+                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
+                               reg = <0x0209c000 0x4000>;
+                               interrupts = <0 66 0x04 0 67 0x04>;
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                       };
+
+                       gpio2: gpio@020a0000 {
+                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
+                               reg = <0x020a0000 0x4000>;
+                               interrupts = <0 68 0x04 0 69 0x04>;
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                       };
+
+                       gpio3: gpio@020a4000 {
+                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
+                               reg = <0x020a4000 0x4000>;
+                               interrupts = <0 70 0x04 0 71 0x04>;
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                       };
+
+                       gpio4: gpio@020a8000 {
+                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
+                               reg = <0x020a8000 0x4000>;
+                               interrupts = <0 72 0x04 0 73 0x04>;
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                       };
+
+                       gpio5: gpio@020ac000 {
+                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
+                               reg = <0x020ac000 0x4000>;
+                               interrupts = <0 74 0x04 0 75 0x04>;
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                       };
+
+                       gpio6: gpio@020b0000 {
+                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
+                               reg = <0x020b0000 0x4000>;
+                               interrupts = <0 76 0x04 0 77 0x04>;
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                       };
+
+                       gpio7: gpio@020b4000 {
+                               compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio";
+                               reg = <0x020b4000 0x4000>;
+                               interrupts = <0 78 0x04 0 79 0x04>;
+                               gpio-controller;
+                               #gpio-cells = <2>;
+                               interrupt-controller;
+                               #interrupt-cells = <2>;
+                       };
+
+                       kpp: kpp@020b8000 {
+                               reg = <0x020b8000 0x4000>;
+                               interrupts = <0 82 0x04>;
+                       };
+
+                       wdog1: wdog@020bc000 {
+                               compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt";
+                               reg = <0x020bc000 0x4000>;
+                               interrupts = <0 80 0x04>;
+                               clocks = <&clks 0>;
+                       };
+
+                       wdog2: wdog@020c0000 {
+                               compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt";
+                               reg = <0x020c0000 0x4000>;
+                               interrupts = <0 81 0x04>;
+                               clocks = <&clks 0>;
+                               status = "disabled";
+                       };
+
+                       clks: ccm@020c4000 {
+                               compatible = "fsl,imx6q-ccm";
+                               reg = <0x020c4000 0x4000>;
+                               interrupts = <0 87 0x04 0 88 0x04>;
+                               #clock-cells = <1>;
+                       };
+
+                       anatop: anatop@020c8000 {
+                               compatible = "fsl,imx6q-anatop", "syscon", "simple-bus";
+                               reg = <0x020c8000 0x1000>;
+                               interrupts = <0 49 0x04 0 54 0x04 0 127 0x04>;
+
+                               regulator-1p1@110 {
+                                       compatible = "fsl,anatop-regulator";
+                                       regulator-name = "vdd1p1";
+                                       regulator-min-microvolt = <800000>;
+                                       regulator-max-microvolt = <1375000>;
+                                       regulator-always-on;
+                                       anatop-reg-offset = <0x110>;
+                                       anatop-vol-bit-shift = <8>;
+                                       anatop-vol-bit-width = <5>;
+                                       anatop-min-bit-val = <4>;
+                                       anatop-min-voltage = <800000>;
+                                       anatop-max-voltage = <1375000>;
+                               };
+
+                               regulator-3p0@120 {
+                                       compatible = "fsl,anatop-regulator";
+                                       regulator-name = "vdd3p0";
+                                       regulator-min-microvolt = <2800000>;
+                                       regulator-max-microvolt = <3150000>;
+                                       regulator-always-on;
+                                       anatop-reg-offset = <0x120>;
+                                       anatop-vol-bit-shift = <8>;
+                                       anatop-vol-bit-width = <5>;
+                                       anatop-min-bit-val = <0>;
+                                       anatop-min-voltage = <2625000>;
+                                       anatop-max-voltage = <3400000>;
+                               };
+
+                               regulator-2p5@130 {
+                                       compatible = "fsl,anatop-regulator";
+                                       regulator-name = "vdd2p5";
+                                       regulator-min-microvolt = <2000000>;
+                                       regulator-max-microvolt = <2750000>;
+                                       regulator-always-on;
+                                       anatop-reg-offset = <0x130>;
+                                       anatop-vol-bit-shift = <8>;
+                                       anatop-vol-bit-width = <5>;
+                                       anatop-min-bit-val = <0>;
+                                       anatop-min-voltage = <2000000>;
+                                       anatop-max-voltage = <2750000>;
+                               };
+
+                               reg_arm: regulator-vddcore@140 {
+                                       compatible = "fsl,anatop-regulator";
+                                       regulator-name = "cpu";
+                                       regulator-min-microvolt = <725000>;
+                                       regulator-max-microvolt = <1450000>;
+                                       regulator-always-on;
+                                       anatop-reg-offset = <0x140>;
+                                       anatop-vol-bit-shift = <0>;
+                                       anatop-vol-bit-width = <5>;
+                                       anatop-delay-reg-offset = <0x170>;
+                                       anatop-delay-bit-shift = <24>;
+                                       anatop-delay-bit-width = <2>;
+                                       anatop-min-bit-val = <1>;
+                                       anatop-min-voltage = <725000>;
+                                       anatop-max-voltage = <1450000>;
+                               };
+
+                               reg_pu: regulator-vddpu@140 {
+                                       compatible = "fsl,anatop-regulator";
+                                       regulator-name = "vddpu";
+                                       regulator-min-microvolt = <725000>;
+                                       regulator-max-microvolt = <1450000>;
+                                       regulator-always-on;
+                                       anatop-reg-offset = <0x140>;
+                                       anatop-vol-bit-shift = <9>;
+                                       anatop-vol-bit-width = <5>;
+                                       anatop-delay-reg-offset = <0x170>;
+                                       anatop-delay-bit-shift = <26>;
+                                       anatop-delay-bit-width = <2>;
+                                       anatop-min-bit-val = <1>;
+                                       anatop-min-voltage = <725000>;
+                                       anatop-max-voltage = <1450000>;
+                               };
+
+                               reg_soc: regulator-vddsoc@140 {
+                                       compatible = "fsl,anatop-regulator";
+                                       regulator-name = "vddsoc";
+                                       regulator-min-microvolt = <725000>;
+                                       regulator-max-microvolt = <1450000>;
+                                       regulator-always-on;
+                                       anatop-reg-offset = <0x140>;
+                                       anatop-vol-bit-shift = <18>;
+                                       anatop-vol-bit-width = <5>;
+                                       anatop-delay-reg-offset = <0x170>;
+                                       anatop-delay-bit-shift = <28>;
+                                       anatop-delay-bit-width = <2>;
+                                       anatop-min-bit-val = <1>;
+                                       anatop-min-voltage = <725000>;
+                                       anatop-max-voltage = <1450000>;
+                               };
+                       };
+
+                       usbphy1: usbphy@020c9000 {
+                               compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy";
+                               reg = <0x020c9000 0x1000>;
+                               interrupts = <0 44 0x04>;
+                               clocks = <&clks 182>;
+                       };
+
+                       usbphy2: usbphy@020ca000 {
+                               compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy";
+                               reg = <0x020ca000 0x1000>;
+                               interrupts = <0 45 0x04>;
+                               clocks = <&clks 183>;
+                       };
+
+                       snvs@020cc000 {
+                               compatible = "fsl,sec-v4.0-mon", "simple-bus";
+                               #address-cells = <1>;
+                               #size-cells = <1>;
+                               ranges = <0 0x020cc000 0x4000>;
+
+                               snvs-rtc-lp@34 {
+                                       compatible = "fsl,sec-v4.0-mon-rtc-lp";
+                                       reg = <0x34 0x58>;
+                                       interrupts = <0 19 0x04 0 20 0x04>;
+                               };
+                       };
+
+                       epit1: epit@020d0000 { /* EPIT1 */
+                               reg = <0x020d0000 0x4000>;
+                               interrupts = <0 56 0x04>;
+                       };
+
+                       epit2: epit@020d4000 { /* EPIT2 */
+                               reg = <0x020d4000 0x4000>;
+                               interrupts = <0 57 0x04>;
+                       };
+
+                       src: src@020d8000 {
+                               compatible = "fsl,imx6q-src";
+                               reg = <0x020d8000 0x4000>;
+                               interrupts = <0 91 0x04 0 96 0x04>;
+                       };
+
+                       gpc: gpc@020dc000 {
+                               compatible = "fsl,imx6q-gpc";
+                               reg = <0x020dc000 0x4000>;
+                               interrupts = <0 89 0x04 0 90 0x04>;
+                       };
+
+                       gpr: iomuxc-gpr@020e0000 {
+                               compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
+                               reg = <0x020e0000 0x38>;
+                       };
+
+                       dcic1: dcic@020e4000 {
+                               reg = <0x020e4000 0x4000>;
+                               interrupts = <0 124 0x04>;
+                       };
+
+                       dcic2: dcic@020e8000 {
+                               reg = <0x020e8000 0x4000>;
+                               interrupts = <0 125 0x04>;
+                       };
+
+                       sdma: sdma@020ec000 {
+                               compatible = "fsl,imx6q-sdma", "fsl,imx35-sdma";
+                               reg = <0x020ec000 0x4000>;
+                               interrupts = <0 2 0x04>;
+                               clocks = <&clks 155>, <&clks 155>;
+                               clock-names = "ipg", "ahb";
+                               fsl,sdma-ram-script-name = "imx/sdma/sdma-imx6q.bin";
+                       };
+               };
+
+               aips-bus@02100000 { /* AIPS2 */
+                       compatible = "fsl,aips-bus", "simple-bus";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       reg = <0x02100000 0x100000>;
+                       ranges;
+
+                       caam@02100000 {
+                               reg = <0x02100000 0x40000>;
+                               interrupts = <0 105 0x04 0 106 0x04>;
+                       };
+
+                       aipstz@0217c000 { /* AIPSTZ2 */
+                               reg = <0x0217c000 0x4000>;
+                       };
+
+                       usbotg: usb@02184000 {
+                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+                               reg = <0x02184000 0x200>;
+                               interrupts = <0 43 0x04>;
+                               clocks = <&clks 162>;
+                               fsl,usbphy = <&usbphy1>;
+                               fsl,usbmisc = <&usbmisc 0>;
+                               status = "disabled";
+                       };
+
+                       usbh1: usb@02184200 {
+                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+                               reg = <0x02184200 0x200>;
+                               interrupts = <0 40 0x04>;
+                               clocks = <&clks 162>;
+                               fsl,usbphy = <&usbphy2>;
+                               fsl,usbmisc = <&usbmisc 1>;
+                               status = "disabled";
+                       };
+
+                       usbh2: usb@02184400 {
+                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+                               reg = <0x02184400 0x200>;
+                               interrupts = <0 41 0x04>;
+                               clocks = <&clks 162>;
+                               fsl,usbmisc = <&usbmisc 2>;
+                               status = "disabled";
+                       };
+
+                       usbh3: usb@02184600 {
+                               compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+                               reg = <0x02184600 0x200>;
+                               interrupts = <0 42 0x04>;
+                               clocks = <&clks 162>;
+                               fsl,usbmisc = <&usbmisc 3>;
+                               status = "disabled";
+                       };
+
+                       usbmisc: usbmisc: usbmisc@02184800 {
+                               #index-cells = <1>;
+                               compatible = "fsl,imx6q-usbmisc";
+                               reg = <0x02184800 0x200>;
+                               clocks = <&clks 162>;
+                       };
+
+                       fec: ethernet@02188000 {
+                               compatible = "fsl,imx6q-fec";
+                               reg = <0x02188000 0x4000>;
+                               interrupts = <0 118 0x04 0 119 0x04>;
+                               clocks = <&clks 117>, <&clks 117>, <&clks 190>;
+                               clock-names = "ipg", "ahb", "ptp";
+                               status = "disabled";
+                       };
+
+                       mlb@0218c000 {
+                               reg = <0x0218c000 0x4000>;
+                               interrupts = <0 53 0x04 0 117 0x04 0 126 0x04>;
+                       };
+
+                       usdhc1: usdhc@02190000 {
+                               compatible = "fsl,imx6q-usdhc";
+                               reg = <0x02190000 0x4000>;
+                               interrupts = <0 22 0x04>;
+                               clocks = <&clks 163>, <&clks 163>, <&clks 163>;
+                               clock-names = "ipg", "ahb", "per";
+                               bus-width = <4>;
+                               status = "disabled";
+                       };
+
+                       usdhc2: usdhc@02194000 {
+                               compatible = "fsl,imx6q-usdhc";
+                               reg = <0x02194000 0x4000>;
+                               interrupts = <0 23 0x04>;
+                               clocks = <&clks 164>, <&clks 164>, <&clks 164>;
+                               clock-names = "ipg", "ahb", "per";
+                               bus-width = <4>;
+                               status = "disabled";
+                       };
+
+                       usdhc3: usdhc@02198000 {
+                               compatible = "fsl,imx6q-usdhc";
+                               reg = <0x02198000 0x4000>;
+                               interrupts = <0 24 0x04>;
+                               clocks = <&clks 165>, <&clks 165>, <&clks 165>;
+                               clock-names = "ipg", "ahb", "per";
+                               bus-width = <4>;
+                               status = "disabled";
+                       };
+
+                       usdhc4: usdhc@0219c000 {
+                               compatible = "fsl,imx6q-usdhc";
+                               reg = <0x0219c000 0x4000>;
+                               interrupts = <0 25 0x04>;
+                               clocks = <&clks 166>, <&clks 166>, <&clks 166>;
+                               clock-names = "ipg", "ahb", "per";
+                               bus-width = <4>;
+                               status = "disabled";
+                       };
+
+                       i2c1: i2c@021a0000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c";
+                               reg = <0x021a0000 0x4000>;
+                               interrupts = <0 36 0x04>;
+                               clocks = <&clks 125>;
+                               status = "disabled";
+                       };
+
+                       i2c2: i2c@021a4000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c";
+                               reg = <0x021a4000 0x4000>;
+                               interrupts = <0 37 0x04>;
+                               clocks = <&clks 126>;
+                               status = "disabled";
+                       };
+
+                       i2c3: i2c@021a8000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c";
+                               reg = <0x021a8000 0x4000>;
+                               interrupts = <0 38 0x04>;
+                               clocks = <&clks 127>;
+                               status = "disabled";
+                       };
+
+                       romcp@021ac000 {
+                               reg = <0x021ac000 0x4000>;
+                       };
+
+                       mmdc0: mmdc@021b0000 { /* MMDC0 */
+                               compatible = "fsl,imx6q-mmdc";
+                               reg = <0x021b0000 0x4000>;
+                       };
+
+                       mmdc1: mmdc@021b4000 { /* MMDC1 */
+                               reg = <0x021b4000 0x4000>;
+                       };
+
+                       weim@021b8000 {
+                               reg = <0x021b8000 0x4000>;
+                               interrupts = <0 14 0x04>;
+                       };
+
+                       ocotp@021bc000 {
+                               compatible = "fsl,imx6q-ocotp";
+                               reg = <0x021bc000 0x4000>;
+                       };
+
+                       ocotp@021c0000 {
+                               reg = <0x021c0000 0x4000>;
+                               interrupts = <0 21 0x04>;
+                       };
+
+                       tzasc@021d0000 { /* TZASC1 */
+                               reg = <0x021d0000 0x4000>;
+                               interrupts = <0 108 0x04>;
+                       };
+
+                       tzasc@021d4000 { /* TZASC2 */
+                               reg = <0x021d4000 0x4000>;
+                               interrupts = <0 109 0x04>;
+                       };
+
+                       audmux: audmux@021d8000 {
+                               compatible = "fsl,imx6q-audmux", "fsl,imx31-audmux";
+                               reg = <0x021d8000 0x4000>;
+                               status = "disabled";
+                       };
+
+                       mipi@021dc000 { /* MIPI-CSI */
+                               reg = <0x021dc000 0x4000>;
+                       };
+
+                       mipi@021e0000 { /* MIPI-DSI */
+                               reg = <0x021e0000 0x4000>;
+                       };
+
+                       vdoa@021e4000 {
+                               reg = <0x021e4000 0x4000>;
+                               interrupts = <0 18 0x04>;
+                       };
+
+                       uart2: serial@021e8000 {
+                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
+                               reg = <0x021e8000 0x4000>;
+                               interrupts = <0 27 0x04>;
+                               clocks = <&clks 160>, <&clks 161>;
+                               clock-names = "ipg", "per";
+                               status = "disabled";
+                       };
+
+                       uart3: serial@021ec000 {
+                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
+                               reg = <0x021ec000 0x4000>;
+                               interrupts = <0 28 0x04>;
+                               clocks = <&clks 160>, <&clks 161>;
+                               clock-names = "ipg", "per";
+                               status = "disabled";
+                       };
+
+                       uart4: serial@021f0000 {
+                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
+                               reg = <0x021f0000 0x4000>;
+                               interrupts = <0 29 0x04>;
+                               clocks = <&clks 160>, <&clks 161>;
+                               clock-names = "ipg", "per";
+                               status = "disabled";
+                       };
+
+                       uart5: serial@021f4000 {
+                               compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
+                               reg = <0x021f4000 0x4000>;
+                               interrupts = <0 30 0x04>;
+                               clocks = <&clks 160>, <&clks 161>;
+                               clock-names = "ipg", "per";
+                               status = "disabled";
+                       };
+               };
+
+               ipu1: ipu@02400000 {
+                       #crtc-cells = <1>;
+                       compatible = "fsl,imx6q-ipu";
+                       reg = <0x02400000 0x400000>;
+                       interrupts = <0 6 0x4 0 5 0x4>;
+                       clocks = <&clks 130>, <&clks 131>, <&clks 132>;
+                       clock-names = "bus", "di0", "di1";
+               };
+       };
+};
index 4ccea2130a6cd2b42029522ac2c8c527fba2728f..192cf76fbf93ca92914a76df62b9fd45bf892249 100644 (file)
@@ -5,6 +5,12 @@
                        compatible = "marvell,88f6282-pinctrl";
                        reg = <0x10000 0x20>;
 
+                       pmx_nand: pmx-nand {
+                               marvell,pins = "mpp0", "mpp1", "mpp2", "mpp3",
+                                                       "mpp4", "mpp5", "mpp18", "mpp19";
+                               marvell,function = "nand";
+                       };
+
                        pmx_sata0: pmx-sata0 {
                                marvell,pins = "mpp5", "mpp21", "mpp23";
                                marvell,function = "sata0";
                                marvell,pins = "mpp8", "mpp9";
                                marvell,function = "twsi0";
                        };
+
+                       pmx_twsi1: pmx-twsi1 {
+                               marvell,pins = "mpp36", "mpp37";
+                               marvell,function = "twsi1";
+                       };
+
                        pmx_uart0: pmx-uart0 {
                                marvell,pins = "mpp10", "mpp11";
                                marvell,function = "uart0";
                                marvell,pins = "mpp13", "mpp14";
                                marvell,function = "uart1";
                        };
+                       pmx_sdio: pmx-sdio {
+                               marvell,pins = "mpp12", "mpp13", "mpp14",
+                                              "mpp15", "mpp16", "mpp17";
+                               marvell,function = "sdio";
+                       };
                };
 
                i2c@11100 {
index f2d386c95b070ba097f6ec7e716a5c9c441a6938..ef2d8c7057093b8ef609488fe56ae45d9991c7b3 100644 (file)
                        status = "okay";
                        nr-ports = <1>;
                };
+
+               mvsdio@90000 {
+                       pinctrl-0 = <&pmx_sdio>;
+                       pinctrl-names = "default";
+                       status = "okay";
+                       /* No CD or WP GPIOs */
+               };
        };
 
        gpio-leds {
diff --git a/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts b/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts
new file mode 100644 (file)
index 0000000..9555a86
--- /dev/null
@@ -0,0 +1,94 @@
+/dts-v1/;
+
+/include/ "kirkwood.dtsi"
+/include/ "kirkwood-6281.dtsi"
+
+/ {
+       model = "Globalscale Technologies Guruplug Server Plus";
+       compatible = "globalscale,guruplug-server-plus", "globalscale,guruplug", "marvell,kirkwood-88f6281", "marvell,kirkwood";
+
+       memory {
+               device_type = "memory";
+               reg = <0x00000000 0x20000000>;
+       };
+
+       chosen {
+               bootargs = "console=ttyS0,115200n8 earlyprintk";
+       };
+
+       ocp@f1000000 {
+               pinctrl: pinctrl@10000 {
+
+                       pinctrl-0 = < &pmx_led_health_r &pmx_led_health_g
+                                     &pmx_led_wmode_r &pmx_led_wmode_g >;
+                       pinctrl-names = "default";
+
+                       pmx_led_health_r: pmx-led-health-r {
+                               marvell,pins = "mpp46";
+                               marvell,function = "gpio";
+                       };
+                       pmx_led_health_g: pmx-led-health-g {
+                               marvell,pins = "mpp47";
+                               marvell,function = "gpio";
+                       };
+                       pmx_led_wmode_r: pmx-led-wmode-r {
+                               marvell,pins = "mpp48";
+                               marvell,function = "gpio";
+                       };
+                       pmx_led_wmode_g: pmx-led-wmode-g {
+                               marvell,pins = "mpp49";
+                               marvell,function = "gpio";
+                       };
+               };
+               serial@12000 {
+                       clock-frequency = <200000000>;
+                       status = "ok";
+               };
+
+               nand@3000000 {
+                       status = "okay";
+
+                       partition@0 {
+                               label = "u-boot";
+                               reg = <0x00000000 0x00100000>;
+                               read-only;
+                       };
+
+                       partition@100000 {
+                               label = "uImage";
+                               reg = <0x00100000 0x00400000>;
+                       };
+
+                       partition@500000 {
+                               label = "data";
+                               reg = <0x00500000 0x1fb00000>;
+                       };
+               };
+
+               sata@80000 {
+                       status = "okay";
+                       nr-ports = <1>;
+               };
+       };
+
+       gpio-leds {
+               compatible = "gpio-leds";
+
+               health-r {
+                       label = "guruplug:red:health";
+                       gpios = <&gpio1 14 1>;
+               };
+               health-g {
+                       label = "guruplug:green:health";
+                       gpios = <&gpio1 15 1>;
+               };
+               wmode-r {
+                       label = "guruplug:red:wmode";
+                       gpios = <&gpio1 16 1>;
+               };
+               wmode-g {
+                       label = "guruplug:green:wmode";
+                       gpios = <&gpio1 17 1>;
+               };
+       };
+};
index 262c654037605cee7c32310b28d481b5fab3d1cf..662dfd81b1cee275358db30328259529928d99f1 100644 (file)
                pinctrl: pinctrl@10000 {
 
                        pinctrl-0 = < &pmx_nand &pmx_uart0
-                                     &pmx_led_health &pmx_sdio
+                                     &pmx_led_health
                                      &pmx_sata0 &pmx_sata1
                                      &pmx_led_user1o
                                      &pmx_led_user1g &pmx_led_user0o
                                      &pmx_led_user0g &pmx_led_misc
-                                     &pmx_sdio_cd
                                    >;
                        pinctrl-names = "default";
 
                        status = "okay";
 
                };
+
+               mvsdio@90000 {
+                       pinctrl-0 = <&pmx_sdio &pmx_sdio_cd>;
+                       pinctrl-names = "default";
+                       status = "okay";
+                       cd-gpios = <&gpio1 15 0>;
+                       /* No WP GPIO */
+               };
        };
 
        gpio-leds {
index 77d21abfcdf73a3430f529be076cb4f17cd9e13d..e8e7ecef1650e3bb9ad6b73d43382a0946a4abff 100644 (file)
                        gpios = <&gpio0 12 0>;
                };
        };
+
+       gpio_poweroff {
+               compatible = "gpio-poweroff";
+               gpios = <&gpio0 31 0>;
+       };
+
 };
index 5509f96595466552cb84c20be6eb0f81f5bd9832..3a178cf708d729885c78d81926349e9c15fc4f56 100644 (file)
        };
 
        ocp@f1000000 {
+               pinctrl: pinctrl@10000 {
+                       pinctrl-0 = < &pmx_led_esata_green
+                                     &pmx_led_esata_red
+                                     &pmx_led_usb_green
+                                     &pmx_led_usb_red
+                                     &pmx_usb_power_off
+                                     &pmx_led_sys_green
+                                     &pmx_led_sys_red
+                                     &pmx_btn_reset
+                                     &pmx_btn_copy
+                                     &pmx_led_copy_green
+                                     &pmx_led_copy_red
+                                     &pmx_led_hdd_green
+                                     &pmx_led_hdd_red
+                                     &pmx_unknown
+                                     &pmx_btn_power
+                                     &pmx_pwr_off >;
+                       pinctrl-names = "default";
+
+                       pmx_led_esata_green: pmx-led-esata-green {
+                               marvell,pins = "mpp12";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_esata_red: pmx-led-esata-red {
+                               marvell,pins = "mpp13";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_usb_green: pmx-led-usb-green {
+                               marvell,pins = "mpp15";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_usb_red: pmx-led-usb-red {
+                               marvell,pins = "mpp16";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_usb_power_off: pmx-usb-power-off {
+                               marvell,pins = "mpp21";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_sys_green: pmx-led-sys-green {
+                               marvell,pins = "mpp28";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_sys_red: pmx-led-sys-red {
+                               marvell,pins = "mpp29";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_btn_reset: pmx-btn-reset {
+                               marvell,pins = "mpp36";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_btn_copy: pmx-btn-copy {
+                               marvell,pins = "mpp37";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_copy_green: pmx-led-copy-green {
+                               marvell,pins = "mpp39";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_copy_red: pmx-led-copy-red {
+                               marvell,pins = "mpp40";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_hdd_green: pmx-led-hdd-green {
+                               marvell,pins = "mpp41";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_hdd_red: pmx-led-hdd-red {
+                               marvell,pins = "mpp42";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_unknown: pmx-unknown {
+                               marvell,pins = "mpp44";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_btn_power: pmx-btn-power {
+                               marvell,pins = "mpp46";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_pwr_off: pmx-pwr-off {
+                               marvell,pins = "mpp48";
+                               marvell,function = "gpio";
+                       };
+               };
 
                serial@12000 {
                        clock-frequency = <200000000>;
 
                i2c@11000 {
                        status = "okay";
+
+                       adt7476: adt7476a@2e {
+                               compatible = "adt7476";
+                               reg = <0x2e>;
+                       };
                };
 
                nand@3000000 {
                        gpios = <&gpio1 8 0>;
                };
        };
+
+       gpio_poweroff {
+               compatible = "gpio-poweroff";
+               gpios = <&gpio1 16 0>;
+       };
+
+       regulators {
+               compatible = "simple-bus";
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               usb0_power_off: regulator@1 {
+                       compatible = "regulator-fixed";
+                       reg = <1>;
+                       regulator-name = "USB Power Off";
+                       regulator-min-microvolt = <5000000>;
+                       regulator-max-microvolt = <5000000>;
+                       regulator-always-on;
+                       regulator-boot-on;
+                       gpio = <&gpio0 21 0>;
+               };
+       };
 };
index 49d3d74d4d3827534e08830e3d1a84d2b4bf12d3..ede7fe0d7a87d872f9a108f25c479d6fc0c1349d 100644 (file)
                                reg = <0x30>;
                        };
                };
+
+               pinctrl: pinctrl@10000 {
+                       pinctrl-0 = < &pmx_nand &pmx_uart0
+                               &pmx_uart1 &pmx_twsi1
+                               &pmx_dip_sw0 &pmx_dip_sw1
+                               &pmx_dip_sw2 &pmx_dip_sw3
+                               &pmx_gpio_0 &pmx_gpio_1
+                               &pmx_gpio_2 &pmx_gpio_3
+                               &pmx_gpio_4 &pmx_gpio_5
+                               &pmx_gpio_6 &pmx_gpio_7
+                               &pmx_led_red &pmx_led_green
+                               &pmx_led_yellow >;
+                       pinctrl-names = "default";
+
+                       pmx_uart0: pmx-uart0 {
+                               marvell,pins = "mpp10", "mpp11", "mpp15",
+                                       "mpp16";
+                               marvell,function = "uart0";
+                       };
+
+                       pmx_uart1: pmx-uart1 {
+                               marvell,pins = "mpp13", "mpp14", "mpp8",
+                                       "mpp9";
+                               marvell,function = "uart1";
+                       };
+
+                       pmx_sysrst: pmx-sysrst {
+                               marvell,pins = "mpp6";
+                               marvell,function = "sysrst";
+                       };
+
+                       pmx_dip_sw0: pmx-dip-sw0 {
+                               marvell,pins = "mpp20";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_dip_sw1: pmx-dip-sw1 {
+                               marvell,pins = "mpp21";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_dip_sw2: pmx-dip-sw2 {
+                               marvell,pins = "mpp22";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_dip_sw3: pmx-dip-sw3 {
+                               marvell,pins = "mpp23";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_0: pmx-gpio-0 {
+                               marvell,pins = "mpp24";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_1: pmx-gpio-1 {
+                               marvell,pins = "mpp25";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_2: pmx-gpio-2 {
+                               marvell,pins = "mpp26";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_3: pmx-gpio-3 {
+                               marvell,pins = "mpp27";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_4: pmx-gpio-4 {
+                               marvell,pins = "mpp28";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_5: pmx-gpio-5 {
+                               marvell,pins = "mpp29";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_6: pmx-gpio-6 {
+                               marvell,pins = "mpp30";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_7: pmx-gpio-7 {
+                               marvell,pins = "mpp31";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_gpio_init: pmx-init {
+                               marvell,pins = "mpp38";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_usb_oc: pmx-usb-oc {
+                               marvell,pins = "mpp39";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_red: pmx-led-red {
+                               marvell,pins = "mpp41";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_green: pmx-led-green {
+                               marvell,pins = "mpp42";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_yellow: pmx-led-yellow {
+                               marvell,pins = "mpp43";
+                               marvell,function = "gpio";
+                       };
+               };
        };
 
        gpio-leds {
index cd15452a52a62024f9518a5abdb677ab8292fd9c..842ff95d60df1f392347abb9b5a1d44981a309be 100644 (file)
@@ -1,6 +1,7 @@
 /dts-v1/;
 
 /include/ "kirkwood.dtsi"
+/include/ "kirkwood-6282.dtsi"
 
 / {
        model = "Univeral Scientific Industrial Co. Topkick-1281P2";
        };
 
        ocp@f1000000 {
+               pinctrl: pinctrl@10000 {
+                       /*
+                        * GPIO LED layout
+                        *
+                        *       /-SYS_LED(2)
+                        *       |
+                        *       |   /-DISK_LED
+                        *       |   |
+                        *       |   |   /-WLAN_LED(2)
+                        *       |   |   |
+                        * [SW] [*] [*] [*]
+                        */
+
+                       /*
+                        * Switch positions
+                        *
+                        *     /-SW_LEFT(2)
+                        *     |
+                        *     |   /-SW_IDLE
+                        *     |   |
+                        *     |   |   /-SW_RIGHT
+                        *     |   |   |
+                        * PS [L] [I] [R] LEDS
+                        */
+                       pinctrl-0 = < &pmx_led_disk_yellow
+                                     &pmx_sata0_pwr_enable
+                                     &pmx_led_sys_red
+                                     &pmx_led_sys_blue
+                                     &pmx_led_wifi_green
+                                     &pmx_sw_left
+                                     &pmx_sw_right
+                                     &pmx_sw_idle
+                                     &pmx_sw_left2
+                                     &pmx_led_wifi_yellow
+                                     &pmx_uart0
+                                     &pmx_nand
+                                     &pmx_twsi0 >;
+                       pinctrl-names = "default";
+
+                       pmx_led_disk_yellow: pmx-led-disk-yellow {
+                               marvell,pins = "mpp21";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_sata0_pwr_enable: pmx-sata0-pwr-enable {
+                               marvell,pins = "mpp36";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_sys_red: pmx-led-sys-red {
+                               marvell,pins = "mpp37";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_sys_blue: pmx-led-sys-blue {
+                               marvell,pins = "mpp38";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_wifi_green: pmx-led-wifi-green {
+                               marvell,pins = "mpp39";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_sw_left: pmx-sw-left {
+                               marvell,pins = "mpp43";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_sw_right: pmx-sw-right {
+                               marvell,pins = "mpp44";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_sw_idle: pmx-sw-idle {
+                               marvell,pins = "mpp45";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_sw_left2: pmx-sw-left2 {
+                               marvell,pins = "mpp46";
+                               marvell,function = "gpio";
+                       };
+
+                       pmx_led_wifi_yellow: pmx-led-wifi-yellow {
+                               marvell,pins = "mpp48";
+                               marvell,function = "gpio";
+                       };
+               };
+
                serial@12000 {
                        clock-frequency = <200000000>;
                        status = "ok";
                        status = "okay";
                        nr-ports = <1>;
                };
+
+               i2c@11000 {
+                       status = "ok";
+               };
+
+               mvsdio@90000 {
+                       pinctrl-0 = <&pmx_sdio>;
+                       pinctrl-names = "default";
+                       status = "okay";
+                       /* No CD or WP GPIOs */
+               };
        };
 
        gpio-leds {
index d6ab442b7011e8777efa036b551aa8e8ef438e49..2c738d9dc82a17143b1578c3867dbf87e304cef0 100644 (file)
                        clocks = <&gate_clk 17>;
                        status = "okay";
                };
+
+               mvsdio@90000 {
+                       compatible = "marvell,orion-sdio";
+                       reg = <0x90000 0x200>;
+                       interrupts = <28>;
+                       clocks = <&gate_clk 4>;
+                       status = "disabled";
+               };
        };
 };
index 0b7ee92c5713b3566141ca45da0155c235949d9c..3fe8dae8d32df9df58296701c9de71b86d2a3222 100644 (file)
@@ -1,26 +1,24 @@
 CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_EXPERT=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARCH_DOVE=y
 CONFIG_MACH_DOVE_DB=y
 CONFIG_MACH_CM_A510=y
 CONFIG_MACH_DOVE_DT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_AEABI=y
+CONFIG_HIGHMEM=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_HIGHMEM=y
-CONFIG_USE_OF=y
-CONFIG_ATAGS=y
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
-CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER=y
 CONFIG_VFP=y
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -32,8 +30,9 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 # CONFIG_IPV6 is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
@@ -57,7 +56,6 @@ CONFIG_ATA=y
 CONFIG_SATA_MV=y
 CONFIG_NETDEVICES=y
 CONFIG_MV643XX_ETH=y
-# CONFIG_NETDEV_10000 is not set
 CONFIG_INPUT_POLLDEV=y
 # CONFIG_INPUT_MOUSEDEV is not set
 CONFIG_INPUT_EVDEV=y
@@ -68,10 +66,7 @@ CONFIG_LEGACY_PTY_COUNT=16
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
 CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_I2C=y
@@ -81,13 +76,11 @@ CONFIG_SPI=y
 CONFIG_SPI_ORION=y
 # CONFIG_HWMON is not set
 CONFIG_USB=y
-CONFIG_USB_DEVICEFS=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
 CONFIG_USB_STORAGE=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
-CONFIG_MMC_SDHCI_IO_ACCESSORS=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_DOVE=y
 CONFIG_NEW_LEDS=y
@@ -104,6 +97,7 @@ CONFIG_MV_XOR=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_FS_XATTR is not set
+CONFIG_EXT4_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_UDF_FS=m
@@ -112,24 +106,20 @@ CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_850=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_ISO8859_2=y
 CONFIG_NLS_UTF8=y
+CONFIG_PRINTK_TIME=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_TIMER_STATS=y
 # CONFIG_DEBUG_BUGVERBOSE is not set
 CONFIG_DEBUG_INFO=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_DEBUG_USER=y
-CONFIG_DEBUG_ERRORS=y
 CONFIG_CRYPTO_NULL=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
@@ -138,7 +128,6 @@ CONFIG_CRYPTO_MD4=y
 CONFIG_CRYPTO_SHA1=y
 CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_AES=y
 CONFIG_CRYPTO_BLOWFISH=y
 CONFIG_CRYPTO_TEA=y
 CONFIG_CRYPTO_TWOFISH=y
@@ -147,5 +136,4 @@ CONFIG_CRYPTO_LZO=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_MV_CESA=y
 CONFIG_CRC_CCITT=y
-CONFIG_CRC16=y
 CONFIG_LIBCRC32C=y
index cbd91bce1ca9f13ebbb0da7a4eb92ec2f42724b8..2ec8119cff731db06d8b8a922226109724d0f8d4 100644 (file)
@@ -14,16 +14,20 @@ CONFIG_MACH_ARMADA_XP=y
 # CONFIG_CACHE_L2X0 is not set
 # CONFIG_SWP_EMULATE is not set
 CONFIG_SMP=y
-# CONFIG_LOCAL_TIMERS is not set
 CONFIG_AEABI=y
 CONFIG_HIGHMEM=y
 # CONFIG_COMPACTION is not set
 CONFIG_ZBOOT_ROM_TEXT=0x0
 CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_ARM_APPENDED_DTB=y
+CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NET=y
 CONFIG_INET=y
+CONFIG_BT=y
+CONFIG_BT_MRVL=y
+CONFIG_BT_MRVL_SDIO=y
+CONFIG_CFG80211=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
@@ -31,16 +35,34 @@ CONFIG_SATA_MV=y
 CONFIG_NETDEVICES=y
 CONFIG_MVNETA=y
 CONFIG_MARVELL_PHY=y
+CONFIG_MWIFIEX=y
+CONFIG_MWIFIEX_SDIO=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_I2C=y
+CONFIG_SPI=y
+CONFIG_SPI_ORION=y
 CONFIG_I2C_MV64XXX=y
+CONFIG_MTD=y
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_M25P80=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
-# CONFIG_USB_SUPPORT is not set
+CONFIG_USB_SUPPORT=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_MMC=y
+CONFIG_MMC_MVSDIO=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_S35390A=y
+CONFIG_RTC_DRV_MV=y
 CONFIG_DMADEVICES=y
 CONFIG_MV_XOR=y
 # CONFIG_IOMMU_SUPPORT is not set
index 603c5fd99e8a30df2ecb63af8011c23a28ef90f2..36469d813951dc65e76f2e94acb1960706d6d600 100644 (file)
@@ -2,8 +2,12 @@ if ARCH_DOVE
 
 menu "Marvell Dove Implementations"
 
+config DOVE_LEGACY
+       bool
+
 config MACH_DOVE_DB
        bool "Marvell DB-MV88AP510 Development Board"
+       select DOVE_LEGACY
        select I2C_BOARDINFO
        help
          Say 'Y' here if you want your kernel to support the
@@ -11,6 +15,7 @@ config MACH_DOVE_DB
 
 config MACH_CM_A510
        bool "CompuLab CM-A510 Board"
+       select DOVE_LEGACY
        help
          Say 'Y' here if you want your kernel to support the
          CompuLab CM-A510 Board.
@@ -19,6 +24,8 @@ config MACH_DOVE_DT
        bool "Marvell Dove Flattened Device Tree"
        select MVEBU_CLK_CORE
        select MVEBU_CLK_GATING
+       select REGULATOR
+       select REGULATOR_FIXED_VOLTAGE
        select USE_OF
        help
          Say 'Y' here if you want your kernel to support the
index 5e683baf96cffc53e0ac8410be6e7505d4fb8557..3f0a858fb59759702b018b47b1c9a78269b28adf 100644 (file)
@@ -1,4 +1,6 @@
-obj-y                          += common.o addr-map.o irq.o mpp.o
+obj-y                          += common.o addr-map.o irq.o
+obj-$(CONFIG_DOVE_LEGACY)      += mpp.o
 obj-$(CONFIG_PCI)              += pcie.o
 obj-$(CONFIG_MACH_DOVE_DB)     += dove-db-setup.o
+obj-$(CONFIG_MACH_DOVE_DT)     += board-dt.o
 obj-$(CONFIG_MACH_CM_A510)     += cm-a510.o
diff --git a/arch/arm/mach-dove/board-dt.c b/arch/arm/mach-dove/board-dt.c
new file mode 100644 (file)
index 0000000..fbde1dd
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * arch/arm/mach-dove/board-dt.c
+ *
+ * Marvell Dove 88AP510 System On Chip FDT Board
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/clk-provider.h>
+#include <linux/clk/mvebu.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_data/usb-ehci-orion.h>
+#include <asm/hardware/cache-tauros2.h>
+#include <asm/mach/arch.h>
+#include <mach/pm.h>
+#include <plat/common.h>
+#include <plat/irq.h>
+#include "common.h"
+
+/*
+ * There are still devices that doesn't even know about DT,
+ * get clock gates here and add a clock lookup.
+ */
+static void __init dove_legacy_clk_init(void)
+{
+       struct device_node *np = of_find_compatible_node(NULL, NULL,
+                                        "marvell,dove-gating-clock");
+       struct of_phandle_args clkspec;
+
+       clkspec.np = np;
+       clkspec.args_count = 1;
+
+       clkspec.args[0] = CLOCK_GATING_BIT_GBE;
+       orion_clkdev_add(NULL, "mv643xx_eth_port.0",
+                        of_clk_get_from_provider(&clkspec));
+
+       clkspec.args[0] = CLOCK_GATING_BIT_PCIE0;
+       orion_clkdev_add("0", "pcie",
+                        of_clk_get_from_provider(&clkspec));
+
+       clkspec.args[0] = CLOCK_GATING_BIT_PCIE1;
+       orion_clkdev_add("1", "pcie",
+                        of_clk_get_from_provider(&clkspec));
+}
+
+static void __init dove_of_clk_init(void)
+{
+       mvebu_clocks_init();
+       dove_legacy_clk_init();
+}
+
+static struct mv643xx_eth_platform_data dove_dt_ge00_data = {
+       .phy_addr = MV643XX_ETH_PHY_ADDR_DEFAULT,
+};
+
+static void __init dove_dt_init(void)
+{
+       pr_info("Dove 88AP510 SoC\n");
+
+#ifdef CONFIG_CACHE_TAUROS2
+       tauros2_init(0);
+#endif
+       dove_setup_cpu_mbus();
+
+       /* Setup root of clk tree */
+       dove_of_clk_init();
+
+       /* Internal devices not ported to DT yet */
+       dove_ge00_init(&dove_dt_ge00_data);
+       dove_pcie_init(1, 1);
+
+       of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+}
+
+static const char * const dove_dt_board_compat[] = {
+       "marvell,dove",
+       NULL
+};
+
+DT_MACHINE_START(DOVE_DT, "Marvell Dove (Flattened Device Tree)")
+       .map_io         = dove_map_io,
+       .init_early     = dove_init_early,
+       .init_irq       = orion_dt_init_irq,
+       .init_time      = dove_timer_init,
+       .init_machine   = dove_dt_init,
+       .restart        = dove_restart,
+       .dt_compat      = dove_dt_board_compat,
+MACHINE_END
index ea84c535a110100a2e77650cf5d3f94403363ad6..c6b3b2bb50e76984e6f1ea5195e8c4b8f707332e 100644 (file)
@@ -360,88 +360,3 @@ void dove_restart(char mode, const char *cmd)
        while (1)
                ;
 }
-
-#if defined(CONFIG_MACH_DOVE_DT)
-/*
- * There are still devices that doesn't even know about DT,
- * get clock gates here and add a clock lookup.
- */
-static void __init dove_legacy_clk_init(void)
-{
-       struct device_node *np = of_find_compatible_node(NULL, NULL,
-                                        "marvell,dove-gating-clock");
-       struct of_phandle_args clkspec;
-
-       clkspec.np = np;
-       clkspec.args_count = 1;
-
-       clkspec.args[0] = CLOCK_GATING_BIT_USB0;
-       orion_clkdev_add(NULL, "orion-ehci.0",
-                        of_clk_get_from_provider(&clkspec));
-
-       clkspec.args[0] = CLOCK_GATING_BIT_USB1;
-       orion_clkdev_add(NULL, "orion-ehci.1",
-                        of_clk_get_from_provider(&clkspec));
-
-       clkspec.args[0] = CLOCK_GATING_BIT_GBE;
-       orion_clkdev_add(NULL, "mv643xx_eth_port.0",
-                        of_clk_get_from_provider(&clkspec));
-
-       clkspec.args[0] = CLOCK_GATING_BIT_PCIE0;
-       orion_clkdev_add("0", "pcie",
-                        of_clk_get_from_provider(&clkspec));
-
-       clkspec.args[0] = CLOCK_GATING_BIT_PCIE1;
-       orion_clkdev_add("1", "pcie",
-                        of_clk_get_from_provider(&clkspec));
-}
-
-static void __init dove_of_clk_init(void)
-{
-       mvebu_clocks_init();
-       dove_legacy_clk_init();
-}
-
-static struct mv643xx_eth_platform_data dove_dt_ge00_data = {
-       .phy_addr = MV643XX_ETH_PHY_ADDR_DEFAULT,
-};
-
-static void __init dove_dt_init(void)
-{
-       pr_info("Dove 88AP510 SoC, TCLK = %d MHz.\n",
-               (dove_tclk + 499999) / 1000000);
-
-#ifdef CONFIG_CACHE_TAUROS2
-       tauros2_init(0);
-#endif
-       dove_setup_cpu_mbus();
-
-       /* Setup root of clk tree */
-       dove_of_clk_init();
-
-       /* Internal devices not ported to DT yet */
-       dove_rtc_init();
-
-       dove_ge00_init(&dove_dt_ge00_data);
-       dove_ehci0_init();
-       dove_ehci1_init();
-       dove_pcie_init(1, 1);
-
-       of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
-static const char * const dove_dt_board_compat[] = {
-       "marvell,dove",
-       NULL
-};
-
-DT_MACHINE_START(DOVE_DT, "Marvell Dove (Flattened Device Tree)")
-       .map_io         = dove_map_io,
-       .init_early     = dove_init_early,
-       .init_irq       = orion_dt_init_irq,
-       .init_time      = dove_timer_init,
-       .init_machine   = dove_dt_init,
-       .restart        = dove_restart,
-       .dt_compat      = dove_dt_board_compat,
-MACHINE_END
-#endif
index fb7cb841b64c70aa8a411fd061cbf34ff6d7b480..0f39f8c93b947bc0d8b8257612d391a93dd67302 100644 (file)
@@ -83,6 +83,7 @@ enum imx5_clks {
        ssi2_root_gate, ssi3_root_gate, ssi_ext1_gate, ssi_ext2_gate,
        epit1_ipg_gate, epit1_hf_gate, epit2_ipg_gate, epit2_hf_gate,
        can_sel, can1_serial_gate, can1_ipg_gate,
+       owire_gate,
        clk_max
 };
 
@@ -233,12 +234,13 @@ static void __init mx5_clocks_common_init(unsigned long rate_ckil,
        clk[epit1_hf_gate] = imx_clk_gate2("epit1_hf_gate", "per_root", MXC_CCM_CCGR2, 4);
        clk[epit2_ipg_gate] = imx_clk_gate2("epit2_ipg_gate", "ipg", MXC_CCM_CCGR2, 6);
        clk[epit2_hf_gate] = imx_clk_gate2("epit2_hf_gate", "per_root", MXC_CCM_CCGR2, 8);
+       clk[owire_gate] = imx_clk_gate2("owire_gate", "per_root", MXC_CCM_CCGR2, 22);
 
        for (i = 0; i < ARRAY_SIZE(clk); i++)
                if (IS_ERR(clk[i]))
                        pr_err("i.MX5 clk %d: register failed with %ld\n",
                                i, PTR_ERR(clk[i]));
-       
+
        clk_register_clkdev(clk[gpt_hf_gate], "per", "imx-gpt.0");
        clk_register_clkdev(clk[gpt_ipg_gate], "ipg", "imx-gpt.0");
        clk_register_clkdev(clk[uart1_per_gate], "per", "imx21-uart.0");
index 540138c4606c41e0bf70c175f2ceea188c63936a..7b025ee528a517bdb7d584e90cebefc40fb56a48 100644 (file)
@@ -164,8 +164,8 @@ enum mx6q_clks {
        usdhc4, vdo_axi, vpu_axi, cko1, pll1_sys, pll2_bus, pll3_usb_otg,
        pll4_audio, pll5_video, pll8_mlb, pll7_usb_host, pll6_enet, ssi1_ipg,
        ssi2_ipg, ssi3_ipg, rom, usbphy1, usbphy2, ldb_di0_div_3_5, ldb_di1_div_3_5,
-       sata_ref, sata_ref_100m, pcie_ref, pcie_ref_125m, enet_ref,
-       clk_max
+       sata_ref, sata_ref_100m, pcie_ref, pcie_ref_125m, enet_ref, usbphy1_gate,
+       usbphy2_gate, clk_max
 };
 
 static struct clk *clk[clk_max];
@@ -218,8 +218,21 @@ int __init mx6q_clocks_init(void)
        clk[pll7_usb_host] = imx_clk_pllv3(IMX_PLLV3_USB,       "pll7_usb_host","osc", base + 0x20, 0x3);
        clk[pll8_mlb]      = imx_clk_pllv3(IMX_PLLV3_MLB,       "pll8_mlb",     "osc", base + 0xd0, 0x0);
 
-       clk[usbphy1] = imx_clk_gate("usbphy1", "pll3_usb_otg", base + 0x10, 6);
-       clk[usbphy2] = imx_clk_gate("usbphy2", "pll7_usb_host", base + 0x20, 6);
+       /*
+        * Bit 20 is the reserved and read-only bit, we do this only for:
+        * - Do nothing for usbphy clk_enable/disable
+        * - Keep refcount when do usbphy clk_enable/disable, in that case,
+        * the clk framework may need to enable/disable usbphy's parent
+        */
+       clk[usbphy1] = imx_clk_gate("usbphy1", "pll3_usb_otg", base + 0x10, 20);
+       clk[usbphy2] = imx_clk_gate("usbphy2", "pll7_usb_host", base + 0x20, 20);
+
+       /*
+        * usbphy*_gate needs to be on after system boots up, and software
+        * never needs to control it anymore.
+        */
+       clk[usbphy1_gate] = imx_clk_gate("usbphy1_gate", "dummy", base + 0x10, 6);
+       clk[usbphy2_gate] = imx_clk_gate("usbphy2_gate", "dummy", base + 0x20, 6);
 
        clk[sata_ref] = imx_clk_fixed_factor("sata_ref", "pll6_enet", 1, 5);
        clk[pcie_ref] = imx_clk_fixed_factor("pcie_ref", "pll6_enet", 1, 4);
@@ -446,6 +459,11 @@ int __init mx6q_clocks_init(void)
        for (i = 0; i < ARRAY_SIZE(clks_init_on); i++)
                clk_prepare_enable(clk[clks_init_on[i]]);
 
+       if (IS_ENABLED(CONFIG_USB_MXS_PHY)) {
+               clk_prepare_enable(clk[usbphy1_gate]);
+               clk_prepare_enable(clk[usbphy2_gate]);
+       }
+
        /* Set initial power mode */
        imx6q_set_lpm(WAIT_CLOCKED);
 
index 1786b2d1257eb0f77b92564f93f07f17b8fbdf65..9ffd103b27e4da660f229cca79359a36fbeb1d38 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/clk.h>
 #include <linux/clkdev.h>
+#include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/init.h>
@@ -22,6 +23,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/opp.h>
 #include <linux/phy.h>
 #include <linux/regmap.h>
 #include <linux/micrel_phy.h>
@@ -200,6 +202,64 @@ static void __init imx6q_init_machine(void)
        imx6q_1588_init();
 }
 
+#define OCOTP_CFG3                     0x440
+#define OCOTP_CFG3_SPEED_SHIFT         16
+#define OCOTP_CFG3_SPEED_1P2GHZ                0x3
+
+static void __init imx6q_opp_check_1p2ghz(struct device *cpu_dev)
+{
+       struct device_node *np;
+       void __iomem *base;
+       u32 val;
+
+       np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp");
+       if (!np) {
+               pr_warn("failed to find ocotp node\n");
+               return;
+       }
+
+       base = of_iomap(np, 0);
+       if (!base) {
+               pr_warn("failed to map ocotp\n");
+               goto put_node;
+       }
+
+       val = readl_relaxed(base + OCOTP_CFG3);
+       val >>= OCOTP_CFG3_SPEED_SHIFT;
+       if ((val & 0x3) != OCOTP_CFG3_SPEED_1P2GHZ)
+               if (opp_disable(cpu_dev, 1200000000))
+                       pr_warn("failed to disable 1.2 GHz OPP\n");
+
+put_node:
+       of_node_put(np);
+}
+
+static void __init imx6q_opp_init(struct device *cpu_dev)
+{
+       struct device_node *np;
+
+       np = of_find_node_by_path("/cpus/cpu@0");
+       if (!np) {
+               pr_warn("failed to find cpu0 node\n");
+               return;
+       }
+
+       cpu_dev->of_node = np;
+       if (of_init_opp_table(cpu_dev)) {
+               pr_warn("failed to init OPP table\n");
+               goto put_node;
+       }
+
+       imx6q_opp_check_1p2ghz(cpu_dev);
+
+put_node:
+       of_node_put(np);
+}
+
+struct platform_device imx6q_cpufreq_pdev = {
+       .name = "imx6q-cpufreq",
+};
+
 static void __init imx6q_init_late(void)
 {
        /*
@@ -208,6 +268,11 @@ static void __init imx6q_init_late(void)
         */
        if (imx6q_revision() > IMX_CHIP_REVISION_1_1)
                imx6q_cpuidle_init();
+
+       if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) {
+               imx6q_opp_init(&imx6q_cpufreq_pdev.dev);
+               platform_device_register(&imx6q_cpufreq_pdev);
+       }
 }
 
 static void __init imx6q_map_io(void)
index f91cdff5a3e469a6ddce7f072a0e74084af07fa4..7b6a64bc5f40b0cf1dc4d18d0f9eeca858f6ea35 100644 (file)
@@ -58,6 +58,13 @@ config ARCH_KIRKWOOD_DT
          Say 'Y' here if you want your kernel to support the
          Marvell Kirkwood using flattened device tree.
 
+config MACH_GURUPLUG_DT
+       bool "Marvell GuruPlug Reference Board (Flattened Device Tree)"
+       select ARCH_KIRKWOOD_DT
+       help
+         Say 'Y' here if you want your kernel to support the
+         Marvell GuruPlug Reference Board (Flattened Device Tree).
+
 config MACH_DREAMPLUG_DT
        bool "Marvell DreamPlug (Flattened Device Tree)"
        select ARCH_KIRKWOOD_DT
index d6653095a1eb5f2c3acb6d40d93242b63e27ac39..4cc4bee4d0cf67b963d2e9aa8237e294f24c3f56 100644 (file)
@@ -21,6 +21,7 @@ obj-$(CONFIG_MACH_T5325)              += t5325-setup.o
 
 obj-$(CONFIG_ARCH_KIRKWOOD_DT)         += board-dt.o
 obj-$(CONFIG_MACH_DREAMPLUG_DT)                += board-dreamplug.o
+obj-$(CONFIG_MACH_GURUPLUG_DT)         += board-guruplug.o
 obj-$(CONFIG_MACH_ICONNECT_DT)         += board-iconnect.o
 obj-$(CONFIG_MACH_DLINK_KIRKWOOD_DT)   += board-dnskw.o
 obj-$(CONFIG_MACH_IB62X0_DT)           += board-ib62x0.o
index 08248e24ffcdba8179413c15dd25b60a55b40d96..0903242c00dc9a21d2f4f8868360cc92a63569a1 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/gpio.h>
-#include <linux/platform_data/mmc-mvsdio.h>
 #include "common.h"
 
 static struct mv643xx_eth_platform_data dreamplug_ge00_data = {
@@ -26,10 +25,6 @@ static struct mv643xx_eth_platform_data dreamplug_ge01_data = {
        .phy_addr       = MV643XX_ETH_PHY_ADDR(1),
 };
 
-static struct mvsdio_platform_data dreamplug_mvsdio_data = {
-       /* unfortunately the CD signal has not been connected */
-};
-
 void __init dreamplug_init(void)
 {
        /*
@@ -37,5 +32,4 @@ void __init dreamplug_init(void)
         */
        kirkwood_ge00_init(&dreamplug_ge00_data);
        kirkwood_ge01_init(&dreamplug_ge01_data);
-       kirkwood_sdio_init(&dreamplug_mvsdio_data);
 }
index 95cc04d14b659ac6836e0279058a28ef14a3f33d..2e73e9d53f705775246a057bb7cfbdf8219f44a2 100644 (file)
@@ -55,10 +55,6 @@ static void __init kirkwood_legacy_clk_init(void)
        orion_clkdev_add("0", "pcie",
                         of_clk_get_from_provider(&clkspec));
 
-       clkspec.args[0] = CGC_BIT_USB0;
-       orion_clkdev_add(NULL, "orion-ehci.0",
-                        of_clk_get_from_provider(&clkspec));
-
        clkspec.args[0] = CGC_BIT_PEX1;
        orion_clkdev_add("1", "pcie",
                         of_clk_get_from_provider(&clkspec));
@@ -66,11 +62,6 @@ static void __init kirkwood_legacy_clk_init(void)
        clkspec.args[0] = CGC_BIT_GE1;
        orion_clkdev_add(NULL, "mv643xx_eth_port.1",
                         of_clk_get_from_provider(&clkspec));
-
-       clkspec.args[0] = CGC_BIT_SDIO;
-       orion_clkdev_add(NULL, "mvsdio",
-                        of_clk_get_from_provider(&clkspec));
-
 }
 
 static void __init kirkwood_of_clk_init(void)
@@ -107,6 +98,9 @@ static void __init kirkwood_dt_init(void)
        if (of_machine_is_compatible("globalscale,dreamplug"))
                dreamplug_init();
 
+       if (of_machine_is_compatible("globalscale,guruplug"))
+               guruplug_dt_init();
+
        if (of_machine_is_compatible("dlink,dns-kirkwood"))
                dnskw_init();
 
@@ -150,14 +144,12 @@ static void __init kirkwood_dt_init(void)
        if (of_machine_is_compatible("usi,topkick"))
                usi_topkick_init();
 
-       if (of_machine_is_compatible("zyxel,nsa310"))
-               nsa310_init();
-
        of_platform_populate(NULL, kirkwood_dt_match_table, NULL, NULL);
 }
 
 static const char * const kirkwood_dt_board_compat[] = {
        "globalscale,dreamplug",
+       "globalscale,guruplug",
        "dlink,dns-320",
        "dlink,dns-325",
        "iom,iconnect",
diff --git a/arch/arm/mach-kirkwood/board-guruplug.c b/arch/arm/mach-kirkwood/board-guruplug.c
new file mode 100644 (file)
index 0000000..0a0df45
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * arch/arm/mach-kirkwood/board-guruplug.c
+ *
+ * Marvell Guruplug Reference Board Init for drivers not converted to
+ * flattened device tree yet.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mv643xx_eth.h>
+#include <linux/gpio.h>
+#include <linux/platform_data/mmc-mvsdio.h>
+#include "common.h"
+
+static struct mv643xx_eth_platform_data guruplug_ge00_data = {
+       .phy_addr       = MV643XX_ETH_PHY_ADDR(0),
+};
+
+static struct mv643xx_eth_platform_data guruplug_ge01_data = {
+       .phy_addr       = MV643XX_ETH_PHY_ADDR(1),
+};
+
+static struct mvsdio_platform_data guruplug_mvsdio_data = {
+       /* unfortunately the CD signal has not been connected */
+};
+
+void __init guruplug_dt_init(void)
+{
+       /*
+        * Basic setup. Needs to be called early.
+        */
+       kirkwood_ge00_init(&guruplug_ge00_data);
+       kirkwood_ge01_init(&guruplug_ge01_data);
+       kirkwood_sdio_init(&guruplug_mvsdio_data);
+}
index 3264925b8318df5c4d4bea002ada90e92aa11443..7d6dc669e17fb4dfc6338a5c7e1a129868ae5a0a 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/mv643xx_eth.h>
-#include <linux/platform_data/mmc-mvsdio.h>
 #include "common.h"
 
 static struct mv643xx_eth_platform_data mplcec4_ge00_data = {
@@ -23,11 +22,6 @@ static struct mv643xx_eth_platform_data mplcec4_ge01_data = {
        .phy_addr       = MV643XX_ETH_PHY_ADDR(2),
 };
 
-static struct mvsdio_platform_data mplcec4_mvsdio_data = {
-       .gpio_card_detect = 47, /* MPP47 used as SD card detect */
-};
-
-
 void __init mplcec4_init(void)
 {
        /*
@@ -35,7 +29,6 @@ void __init mplcec4_init(void)
         */
        kirkwood_ge00_init(&mplcec4_ge00_data);
        kirkwood_ge01_init(&mplcec4_ge01_data);
-       kirkwood_sdio_init(&mplcec4_mvsdio_data);
        kirkwood_pcie_init(KW_PCIE0);
 }
 
index f4632a809f6895a0481ea1876e3e0fe25da36fe8..f2ea3b7ad72692d7e5e5cd38f0e7ffe235396c55 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/mv643xx_eth.h>
-#include <linux/gpio.h>
 #include <linux/of.h>
 #include "common.h"
 
@@ -23,13 +22,6 @@ static struct mv643xx_eth_platform_data ns2_ge00_data = {
        .phy_addr       = MV643XX_ETH_PHY_ADDR(8),
 };
 
-#define NS2_GPIO_POWER_OFF     31
-
-static void ns2_power_off(void)
-{
-       gpio_set_value(NS2_GPIO_POWER_OFF, 1);
-}
-
 void __init ns2_init(void)
 {
        /*
@@ -39,10 +31,4 @@ void __init ns2_init(void)
            of_machine_is_compatible("lacie,netspace_mini_v2"))
                ns2_ge00_data.phy_addr = MV643XX_ETH_PHY_ADDR(0);
        kirkwood_ge00_init(&ns2_ge00_data);
-
-       if (gpio_request(NS2_GPIO_POWER_OFF, "power-off") == 0 &&
-           gpio_direction_output(NS2_GPIO_POWER_OFF, 0) == 0)
-               pm_power_off = ns2_power_off;
-       else
-               pr_err("ns2: failed to configure power-off GPIO\n");
 }
index 970174ad4a70d4ea55f0a9807232ec000fbdcd69..55ade93b93bf62ac68a23c1c0e72c5a2e1e9ee41 100644 (file)
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/gpio.h>
-#include <linux/i2c.h>
 #include <mach/kirkwood.h>
 #include <linux/of.h>
 #include "common.h"
-#include "mpp.h"
-
-#define NSA310_GPIO_USB_POWER_OFF      21
-#define NSA310_GPIO_POWER_OFF          48
-
-static unsigned int nsa310_mpp_config[] __initdata = {
-       MPP12_GPIO, /* led esata green */
-       MPP13_GPIO, /* led esata red */
-       MPP15_GPIO, /* led usb green */
-       MPP16_GPIO, /* led usb red */
-       MPP21_GPIO, /* control usb power off */
-       MPP28_GPIO, /* led sys green */
-       MPP29_GPIO, /* led sys red */
-       MPP36_GPIO, /* key reset */
-       MPP37_GPIO, /* key copy */
-       MPP39_GPIO, /* led copy green */
-       MPP40_GPIO, /* led copy red */
-       MPP41_GPIO, /* led hdd green */
-       MPP42_GPIO, /* led hdd red */
-       MPP44_GPIO, /* ?? */
-       MPP46_GPIO, /* key power */
-       MPP48_GPIO, /* control power off */
-       0
-};
-
-static struct i2c_board_info __initdata nsa310_i2c_info[] = {
-       { I2C_BOARD_INFO("adt7476", 0x2e) },
-};
-
-static void nsa310_power_off(void)
-{
-       gpio_set_value(NSA310_GPIO_POWER_OFF, 1);
-}
-
-static int __init nsa310_gpio_request(unsigned int gpio, unsigned long flags,
-                                      const char *label)
-{
-       int err;
-
-       err = gpio_request_one(gpio, flags, label);
-       if (err)
-               pr_err("NSA-310: can't setup GPIO%u (%s), err=%d\n",
-                       gpio, label, err);
-
-       return err;
-}
-
-static void __init nsa310_gpio_init(void)
-{
-       int err;
-
-       err = nsa310_gpio_request(NSA310_GPIO_POWER_OFF, GPIOF_OUT_INIT_LOW,
-                                 "Power Off");
-       if (!err)
-               pm_power_off = nsa310_power_off;
-
-       nsa310_gpio_request(NSA310_GPIO_USB_POWER_OFF, GPIOF_OUT_INIT_LOW,
-                           "USB Power Off");
-}
-
-void __init nsa310_init(void)
-{
-       kirkwood_mpp_conf(nsa310_mpp_config);
-
-       nsa310_gpio_init();
-
-       i2c_register_board_info(0, ARRAY_AND_SIZE(nsa310_i2c_info));
-}
 
 static int __init nsa310_pci_init(void)
 {
index 815fc6451d526bdf3db94fa082879c1711093e36..b11d8fdeca9351bfd69438b78a7083eadbf45d54 100644 (file)
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/mv643xx_eth.h>
-#include <linux/clk.h>
-#include <linux/clk-private.h>
 #include "common.h"
-#include "mpp.h"
 
 static struct mv643xx_eth_platform_data openblocks_ge00_data = {
        .phy_addr       = MV643XX_ETH_PHY_ADDR(0),
 };
 
-static unsigned int openblocks_a6_mpp_config[] __initdata = {
-       MPP0_NF_IO2,
-       MPP1_NF_IO3,
-       MPP2_NF_IO4,
-       MPP3_NF_IO5,
-       MPP4_NF_IO6,
-       MPP5_NF_IO7,
-       MPP6_SYSRST_OUTn,
-       MPP8_UART1_RTS,
-       MPP9_UART1_CTS,
-       MPP10_UART0_TXD,
-       MPP11_UART0_RXD,
-       MPP13_UART1_TXD,
-       MPP14_UART1_RXD,
-       MPP15_UART0_RTS,
-       MPP16_UART0_CTS,
-       MPP18_NF_IO0,
-       MPP19_NF_IO1,
-       MPP20_GPIO, /* DIP SW0 */
-       MPP21_GPIO, /* DIP SW1 */
-       MPP22_GPIO, /* DIP SW2 */
-       MPP23_GPIO, /* DIP SW3 */
-       MPP24_GPIO, /* GPIO 0 */
-       MPP25_GPIO, /* GPIO 1 */
-       MPP26_GPIO, /* GPIO 2 */
-       MPP27_GPIO, /* GPIO 3 */
-       MPP28_GPIO, /* GPIO 4 */
-       MPP29_GPIO, /* GPIO 5 */
-       MPP30_GPIO, /* GPIO 6 */
-       MPP31_GPIO, /* GPIO 7 */
-       MPP36_TW1_SDA,
-       MPP37_TW1_SCK,
-       MPP38_GPIO, /* INIT */
-       MPP39_GPIO, /* USB OC */
-       MPP41_GPIO, /* LED: Red */
-       MPP42_GPIO, /* LED: Green */
-       MPP43_GPIO, /* LED: Yellow */
-       0,
-};
-
 void __init openblocks_a6_init(void)
 {
        /*
         * Basic setup. Needs to be called early.
         */
-       kirkwood_mpp_conf(openblocks_a6_mpp_config);
        kirkwood_ge00_init(&openblocks_ge00_data);
 }
index 23d2dd1b1b1ed4079d61d20fd60ec898d9901de9..1cc04ec33f0b77d2363694e62fa8f81a153716fe 100644 (file)
 #include <linux/init.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/gpio.h>
-#include <linux/platform_data/mmc-mvsdio.h>
 #include "common.h"
-#include "mpp.h"
 
 static struct mv643xx_eth_platform_data topkick_ge00_data = {
        .phy_addr       = MV643XX_ETH_PHY_ADDR(0),
 };
 
-static struct mvsdio_platform_data topkick_mvsdio_data = {
-       /* unfortunately the CD signal has not been connected */
-};
-
-/*
- * GPIO LED layout
- *
- *       /-SYS_LED(2)
- *       |
- *       |   /-DISK_LED
- *       |   |
- *       |   |   /-WLAN_LED(2)
- *       |   |   |
- * [SW] [*] [*] [*]
- */
-
-/*
- * Switch positions
- *
- *     /-SW_LEFT
- *     |
- *     |   /-SW_IDLE
- *     |   |
- *     |   |   /-SW_RIGHT
- *     |   |   |
- * PS [L] [I] [R] LEDS
- */
-
-static unsigned int topkick_mpp_config[] __initdata = {
-       MPP21_GPIO,     /* DISK_LED           (low active) - yellow */
-       MPP36_GPIO,     /* SATA0 power enable (high active) */
-       MPP37_GPIO,     /* SYS_LED2           (low active) - red */
-       MPP38_GPIO,     /* SYS_LED            (low active) - blue */
-       MPP39_GPIO,     /* WLAN_LED           (low active) - green */
-       MPP43_GPIO,     /* SW_LEFT            (low active) */
-       MPP44_GPIO,     /* SW_RIGHT           (low active) */
-       MPP45_GPIO,     /* SW_IDLE            (low active) */
-       MPP46_GPIO,     /* SW_LEFT            (low active) */
-       MPP48_GPIO,     /* WLAN_LED2          (low active) - yellow */
-       0
-};
-
 void __init usi_topkick_init(void)
 {
        /*
         * Basic setup. Needs to be called early.
         */
-       kirkwood_mpp_conf(topkick_mpp_config);
-
-
        kirkwood_ge00_init(&topkick_ge00_data);
-       kirkwood_sdio_init(&topkick_mvsdio_data);
 }
index e956d0277dd186a7145e6c8badc108ba1f504f44..5ed70565c8435d2efd9e529bb0bab39bdb519727 100644 (file)
@@ -60,6 +60,11 @@ void dreamplug_init(void);
 #else
 static inline void dreamplug_init(void) {};
 #endif
+#ifdef CONFIG_MACH_GURUPLUG_DT
+void guruplug_dt_init(void);
+#else
+static inline void guruplug_dt_init(void) {};
+#endif
 #ifdef CONFIG_MACH_TS219_DT
 void qnap_dt_ts219_init(void);
 #else
@@ -130,12 +135,6 @@ void ns2_init(void);
 static inline void ns2_init(void) {};
 #endif
 
-#ifdef CONFIG_MACH_NSA310_DT
-void nsa310_init(void);
-#else
-static inline void nsa310_init(void) {};
-#endif
-
 #ifdef CONFIG_MACH_OPENBLOCKS_A6_DT
 void openblocks_a6_init(void);
 #else
index 8e3fb082c3c6e350232d3e91ffc8c4dc9c805643..274ff58271de149f168a1a901ccc435ea131f690 100644 (file)
@@ -34,6 +34,7 @@
 #define ARMADA_370_XP_INT_CONTROL              (0x00)
 #define ARMADA_370_XP_INT_SET_ENABLE_OFFS      (0x30)
 #define ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS    (0x34)
+#define ARMADA_370_XP_INT_SOURCE_CTL(irq)      (0x100 + irq*4)
 
 #define ARMADA_370_XP_CPU_INTACK_OFFS          (0x44)
 
 #define ARMADA_370_XP_IN_DRBEL_MSK_OFFS          (0xc)
 #define ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS        (0x8)
 
+#define ARMADA_370_XP_MAX_PER_CPU_IRQS         (28)
+
 #define ACTIVE_DOORBELLS                       (8)
 
+static DEFINE_RAW_SPINLOCK(irq_controller_lock);
+
 static void __iomem *per_cpu_int_base;
 static void __iomem *main_int_base;
 static struct irq_domain *armada_370_xp_mpic_domain;
 
+/*
+ * In SMP mode:
+ * For shared global interrupts, mask/unmask global enable bit
+ * For CPU interrtups, mask/unmask the calling CPU's bit
+ */
 static void armada_370_xp_irq_mask(struct irq_data *d)
 {
+#ifdef CONFIG_SMP
+       irq_hw_number_t hwirq = irqd_to_hwirq(d);
+
+       if (hwirq > ARMADA_370_XP_MAX_PER_CPU_IRQS)
+               writel(hwirq, main_int_base +
+                               ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS);
+       else
+               writel(hwirq, per_cpu_int_base +
+                               ARMADA_370_XP_INT_SET_MASK_OFFS);
+#else
        writel(irqd_to_hwirq(d),
               per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS);
+#endif
 }
 
 static void armada_370_xp_irq_unmask(struct irq_data *d)
 {
+#ifdef CONFIG_SMP
+       irq_hw_number_t hwirq = irqd_to_hwirq(d);
+
+       if (hwirq > ARMADA_370_XP_MAX_PER_CPU_IRQS)
+               writel(hwirq, main_int_base +
+                               ARMADA_370_XP_INT_SET_ENABLE_OFFS);
+       else
+               writel(hwirq, per_cpu_int_base +
+                               ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+#else
        writel(irqd_to_hwirq(d),
               per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
+#endif
 }
 
 #ifdef CONFIG_SMP
 static int armada_xp_set_affinity(struct irq_data *d,
                                  const struct cpumask *mask_val, bool force)
 {
+       unsigned long reg;
+       unsigned long new_mask = 0;
+       unsigned long online_mask = 0;
+       unsigned long count = 0;
+       irq_hw_number_t hwirq = irqd_to_hwirq(d);
+       int cpu;
+
+       for_each_cpu(cpu, mask_val) {
+               new_mask |= 1 << cpu_logical_map(cpu);
+               count++;
+       }
+
+       /*
+        * Forbid mutlicore interrupt affinity
+        * This is required since the MPIC HW doesn't limit
+        * several CPUs from acknowledging the same interrupt.
+        */
+       if (count > 1)
+               return -EINVAL;
+
+       for_each_cpu(cpu, cpu_online_mask)
+               online_mask |= 1 << cpu_logical_map(cpu);
+
+       raw_spin_lock(&irq_controller_lock);
+
+       reg = readl(main_int_base + ARMADA_370_XP_INT_SOURCE_CTL(hwirq));
+       reg = (reg & (~online_mask)) | new_mask;
+       writel(reg, main_int_base + ARMADA_370_XP_INT_SOURCE_CTL(hwirq));
+
+       raw_spin_unlock(&irq_controller_lock);
+
        return 0;
 }
 #endif
@@ -82,10 +145,17 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h,
 {
        armada_370_xp_irq_mask(irq_get_irq_data(virq));
        writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS);
-
-       irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip,
-                                handle_level_irq);
        irq_set_status_flags(virq, IRQ_LEVEL);
+
+       if (hw < ARMADA_370_XP_MAX_PER_CPU_IRQS) {
+               irq_set_percpu_devid(virq);
+               irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip,
+                                       handle_percpu_devid_irq);
+
+       } else {
+               irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip,
+                                       handle_level_irq);
+       }
        set_irq_flags(virq, IRQF_VALID | IRQF_PROBE);
 
        return 0;
@@ -155,6 +225,15 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 
 #ifdef CONFIG_SMP
        armada_xp_mpic_smp_cpu_init();
+
+       /*
+        * Set the default affinity from all CPUs to the boot cpu.
+        * This is required since the MPIC doesn't limit several CPUs
+        * from acknowledging the same interrupt.
+        */
+       cpumask_clear(irq_default_affinity);
+       cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+
 #endif
 
        return 0;
@@ -173,7 +252,7 @@ asmlinkage void __exception_irq_entry armada_370_xp_handle_irq(struct pt_regs
                if (irqnr > 1022)
                        break;
 
-               if (irqnr >= 8) {
+               if (irqnr > 0) {
                        irqnr = irq_find_mapping(armada_370_xp_mpic_domain,
                                        irqnr);
                        handle_IRQ(irqnr, regs);
index ff528df70119312586e3b21a5afb48a5df3a891c..b068b7fe99ef3239c98938de5cc0eab820bc371d 100644 (file)
@@ -11,7 +11,7 @@ obj-y := id.o io.o control.o mux.o devices.o fb.o serial.o gpmc.o timer.o pm.o \
         omap_device.o sram.o
 
 omap-2-3-common                                = irq.o
-hwmod-common                           = omap_hwmod.o \
+hwmod-common                           = omap_hwmod.o omap_hwmod_reset.o \
                                          omap_hwmod_common_data.o
 clock-common                           = clock.o clock_common_data.o \
                                          clkt_dpll.o clkt_clksel.o
@@ -56,6 +56,7 @@ AFLAGS_sram34xx.o                     :=-Wa,-march=armv7-a
 # Restart code (OMAP4/5 currently in omap4-common.c)
 obj-$(CONFIG_SOC_OMAP2420)             += omap2-restart.o
 obj-$(CONFIG_SOC_OMAP2430)             += omap2-restart.o
+obj-$(CONFIG_SOC_AM33XX)               += am33xx-restart.o
 obj-$(CONFIG_ARCH_OMAP3)               += omap3-restart.o
 
 # Pin multiplexing
diff --git a/arch/arm/mach-omap2/am33xx-restart.c b/arch/arm/mach-omap2/am33xx-restart.c
new file mode 100644 (file)
index 0000000..88e4fa8
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * am33xx-restart.c - Code common to all AM33xx machines.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+
+#include "common.h"
+#include "prm-regbits-33xx.h"
+#include "prm33xx.h"
+
+/**
+ * am3xx_restart - trigger a software restart of the SoC
+ * @mode: the "reboot mode", see arch/arm/kernel/{setup,process}.c
+ * @cmd: passed from the userspace program rebooting the system (if provided)
+ *
+ * Resets the SoC.  For @cmd, see the 'reboot' syscall in
+ * kernel/sys.c.  No return value.
+ */
+void am33xx_restart(char mode, const char *cmd)
+{
+       /* TODO: Handle mode and cmd if necessary */
+
+       am33xx_prm_rmw_reg_bits(AM33XX_GLOBAL_WARM_SW_RST_MASK,
+                               AM33XX_GLOBAL_WARM_SW_RST_MASK,
+                               AM33XX_PRM_DEVICE_MOD,
+                               AM33XX_PRM_RSTCTRL_OFFSET);
+
+       /* OCP barrier */
+       (void)am33xx_prm_read_reg(AM33XX_PRM_DEVICE_MOD,
+                                 AM33XX_PRM_RSTCTRL_OFFSET);
+}
index a00d39107a21ab9c295328fd0765e319999be597..25b79a29736519884306a5c98c742650b35b15c0 100644 (file)
@@ -62,8 +62,7 @@ static int __init omap_davinci_emac_dev_init(struct omap_hwmod *oh,
 {
        struct platform_device *pdev;
 
-       pdev = omap_device_build(oh->class->name, 0, oh, pdata, pdata_len,
-                                false);
+       pdev = omap_device_build(oh->class->name, 0, oh, pdata, pdata_len);
        if (IS_ERR(pdev)) {
                WARN(1, "Can't build omap_device for %s:%s.\n",
                     oh->class->name, oh->name);
index 2590463e4b57d5729cc7e22ab9198c4c60464426..0274ff7a2a2b1c33dcdcf5e6aaac541f15a86c84 100644 (file)
@@ -140,6 +140,7 @@ DT_MACHINE_START(AM33XX_DT, "Generic AM33XX (Flattened Device Tree)")
        .init_machine   = omap_generic_init,
        .init_time      = omap3_am33xx_gptimer_timer_init,
        .dt_compat      = am33xx_boards_compat,
+       .restart        = am33xx_restart,
 MACHINE_END
 #endif
 
index ea64ad60675999e98927574432dc816db82b3392..476b82066cb6b27368e18155998ad85cff910240 100644 (file)
@@ -284,9 +284,10 @@ DEFINE_STRUCT_CLK(dpll_disp_ck, dpll_core_ck_parents, dpll_ddr_ck_ops);
  * TODO: Add clksel here (sys_clkin, CORE_CLKOUTM6, PER_CLKOUTM2
  * and ALT_CLK1/2)
  */
-DEFINE_CLK_DIVIDER(dpll_disp_m2_ck, "dpll_disp_ck", &dpll_disp_ck, 0x0,
-                  AM33XX_CM_DIV_M2_DPLL_DISP, AM33XX_DPLL_CLKOUT_DIV_SHIFT,
-                  AM33XX_DPLL_CLKOUT_DIV_WIDTH, CLK_DIVIDER_ONE_BASED, NULL);
+DEFINE_CLK_DIVIDER(dpll_disp_m2_ck, "dpll_disp_ck", &dpll_disp_ck,
+                  CLK_SET_RATE_PARENT, AM33XX_CM_DIV_M2_DPLL_DISP,
+                  AM33XX_DPLL_CLKOUT_DIV_SHIFT, AM33XX_DPLL_CLKOUT_DIV_WIDTH,
+                  CLK_DIVIDER_ONE_BASED, NULL);
 
 /* DPLL_PER */
 static struct dpll_data dpll_per_dd = {
@@ -723,7 +724,8 @@ static struct clk_hw_omap lcd_gclk_hw = {
        .clksel_mask    = AM33XX_CLKSEL_0_1_MASK,
 };
 
-DEFINE_STRUCT_CLK(lcd_gclk, lcd_ck_parents, gpio_fck_ops);
+DEFINE_STRUCT_CLK_FLAGS(lcd_gclk, lcd_ck_parents,
+                       gpio_fck_ops, CLK_SET_RATE_PARENT);
 
 DEFINE_CLK_FIXED_FACTOR(mmc_clk, "dpll_per_m2_ck", &dpll_per_m2_ck, 0x0, 1, 2);
 
index 6ef87580c33f6772a1fed5445b1bb2344fb96a6d..4579c3c5338fac99e759e97368ab7ab08d9d8c4b 100644 (file)
@@ -426,6 +426,7 @@ static struct clk dpll4_m5x2_ck_3630 = {
        .parent_names   = dpll4_m5x2_ck_parent_names,
        .num_parents    = ARRAY_SIZE(dpll4_m5x2_ck_parent_names),
        .ops            = &dpll4_m5x2_ck_3630_ops,
+       .flags          = CLK_SET_RATE_PARENT,
 };
 
 static struct clk cam_mclk;
@@ -443,7 +444,14 @@ static struct clk_hw_omap cam_mclk_hw = {
        .clkdm_name     = "cam_clkdm",
 };
 
-DEFINE_STRUCT_CLK(cam_mclk, cam_mclk_parent_names, aes2_ick_ops);
+static struct clk cam_mclk = {
+       .name           = "cam_mclk",
+       .hw             = &cam_mclk_hw.hw,
+       .parent_names   = cam_mclk_parent_names,
+       .num_parents    = ARRAY_SIZE(cam_mclk_parent_names),
+       .ops            = &aes2_ick_ops,
+       .flags          = CLK_SET_RATE_PARENT,
+};
 
 static const struct clksel_rate clkout2_src_core_rates[] = {
        { .div = 1, .val = 0, .flags = RATE_IN_3XXX },
index cebe2b31943ec98e5f4a86521659898058dc1745..3d58f335f173fe4c3a9542590d43a98dbebedefb 100644 (file)
@@ -605,15 +605,26 @@ static const char *dpll_usb_ck_parents[] = {
 
 static struct clk dpll_usb_ck;
 
+static const struct clk_ops dpll_usb_ck_ops = {
+       .enable         = &omap3_noncore_dpll_enable,
+       .disable        = &omap3_noncore_dpll_disable,
+       .recalc_rate    = &omap3_dpll_recalc,
+       .round_rate     = &omap2_dpll_round_rate,
+       .set_rate       = &omap3_noncore_dpll_set_rate,
+       .get_parent     = &omap2_init_dpll_parent,
+       .init           = &omap2_init_clk_clkdm,
+};
+
 static struct clk_hw_omap dpll_usb_ck_hw = {
        .hw = {
                .clk = &dpll_usb_ck,
        },
        .dpll_data      = &dpll_usb_dd,
+       .clkdm_name     = "l3_init_clkdm",
        .ops            = &clkhwops_omap3_dpll,
 };
 
-DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_usb_ck_parents, dpll_ck_ops);
+DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_usb_ck_parents, dpll_usb_ck_ops);
 
 static const char *dpll_usb_clkdcoldo_ck_parents[] = {
        "dpll_usb_ck",
index b40204837bd7e22564e81874b25b6fe35712e638..60ddd8612b4d68654b3c2c3790a2559773c1b317 100644 (file)
@@ -65,6 +65,17 @@ struct clockdomain;
                .ops = &_clkops_name,                           \
        };
 
+#define DEFINE_STRUCT_CLK_FLAGS(_name, _parent_array_name,     \
+                               _clkops_name, _flags)           \
+       static struct clk _name = {                             \
+               .name = #_name,                                 \
+               .hw = &_name##_hw.hw,                           \
+               .parent_names = _parent_array_name,             \
+               .num_parents = ARRAY_SIZE(_parent_array_name),  \
+               .ops = &_clkops_name,                           \
+               .flags = _flags,                                \
+       };
+
 #define DEFINE_STRUCT_CLK_HW_OMAP(_name, _clkdm_name)          \
        static struct clk_hw_omap _name##_hw = {                \
                .hw = {                                         \
index 058ce3c0873ecb7aa0f4356d4d596c152aea5f03..325a515765766c1757c5fd858e9bd8b3c226f44e 100644 (file)
@@ -241,9 +241,6 @@ int am33xx_cm_wait_module_ready(u16 inst, s16 cdoffs, u16 clkctrl_offs)
 {
        int i = 0;
 
-       if (!clkctrl_offs)
-               return 0;
-
        omap_test_timeout(_is_module_ready(inst, cdoffs, clkctrl_offs),
                          MAX_MODULE_READY_TIME, i);
 
index 5fa0b62e1a797954d7b8b4cb92eec25b88d0dde5..64f4bafe7bd9bb25a0054bf27139196ed2ce6256 100644 (file)
 #ifndef __ARCH_ARM_MACH_OMAP2_CM_33XX_H
 #define __ARCH_ARM_MACH_OMAP2_CM_33XX_H
 
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/io.h>
-
 #include "common.h"
 
 #include "cm.h"
 #include "cm-regbits-33xx.h"
-#include "cm33xx.h"
+#include "iomap.h"
 
 /* CM base address */
 #define AM33XX_CM_BASE         0x44e00000
 #define AM33XX_CM_CEFUSE_CEFUSE_CLKCTRL                        AM33XX_CM_REGADDR(AM33XX_CM_CEFUSE_MOD, 0x0020)
 
 
+#ifndef __ASSEMBLER__
 extern bool am33xx_cm_is_clkdm_in_hwsup(s16 inst, u16 cdoffs);
 extern void am33xx_cm_clkdm_enable_hwsup(s16 inst, u16 cdoffs);
 extern void am33xx_cm_clkdm_disable_hwsup(s16 inst, u16 cdoffs);
@@ -417,4 +413,5 @@ static inline int am33xx_cm_wait_module_ready(u16 inst, s16 cdoffs,
 }
 #endif
 
+#endif /* ASSEMBLER */
 #endif
index b4350274361bb3fca31721301ada1e03dcffb036..0a6b9c7a63da494f6596f702cc3763fe1a25a3d2 100644 (file)
@@ -119,6 +119,14 @@ static inline void omap2xxx_restart(char mode, const char *cmd)
 }
 #endif
 
+#ifdef CONFIG_SOC_AM33XX
+void am33xx_restart(char mode, const char *cmd);
+#else
+static inline void am33xx_restart(char mode, const char *cmd)
+{
+}
+#endif
+
 #ifdef CONFIG_ARCH_OMAP3
 void omap3xxx_restart(char mode, const char *cmd);
 #else
index 142d9c616f1bfa8445ad8979baf37ead6af2fb42..1ec7f05977102759383f05351469baeecbd3e856 100644 (file)
@@ -426,7 +426,7 @@ static void __init omap_init_hdmi_audio(void)
                return;
        }
 
-       pdev = omap_device_build("omap-hdmi-audio-dai", -1, oh, NULL, 0, 0);
+       pdev = omap_device_build("omap-hdmi-audio-dai", -1, oh, NULL, 0);
        WARN(IS_ERR(pdev),
             "Can't build omap_device for omap-hdmi-audio-dai.\n");
 
index 0a02aab5df677db9bc5577093f5f6091d4c86ff8..3aed4b0b95632dbdf244b7e6a8d6349840872d1e 100644 (file)
@@ -500,8 +500,9 @@ int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate,
                if (dd->last_rounded_rate == 0)
                        return -EINVAL;
 
-               /* No freqsel on OMAP4 and OMAP3630 */
-               if (!cpu_is_omap44xx() && !cpu_is_omap3630()) {
+               /* No freqsel on AM335x, OMAP4 and OMAP3630 */
+               if (!soc_is_am33xx() && !cpu_is_omap44xx() &&
+                   !cpu_is_omap3630()) {
                        freqsel = _omap3_dpll_compute_freqsel(clk,
                                                dd->last_rounded_n);
                        WARN_ON(!freqsel);
index 45cc7ed4dd5875e4a9ceb290ca14a85b931b33db..8a68f1ec66b9ac07dd9e20cf7c6239e794972407 100644 (file)
@@ -399,8 +399,18 @@ void __init omap3xxx_check_revision(void)
                }
                break;
        case 0xb944:
-               omap_revision = AM335X_REV_ES1_0;
-               cpu_rev = "1.0";
+               switch (rev) {
+               case 0:
+                       omap_revision = AM335X_REV_ES1_0;
+                       cpu_rev = "1.0";
+                       break;
+               case 1:
+               /* FALLTHROUGH */
+               default:
+                       omap_revision = AM335X_REV_ES2_0;
+                       cpu_rev = "2.0";
+                       break;
+               }
                break;
        case 0xb8f2:
                switch (rev) {
index a8984989dec842098442ecb2a6e42833224ecf0f..c2c798c08c2b9f63a55bc11be5f654f80006431c 100644 (file)
@@ -2054,6 +2054,23 @@ static int _omap4_get_context_lost(struct omap_hwmod *oh)
        return oh->prcm.omap4.context_lost_counter;
 }
 
+/**
+ * _enable_preprogram - Pre-program an IP block during the _enable() process
+ * @oh: struct omap_hwmod *
+ *
+ * Some IP blocks (such as AESS) require some additional programming
+ * after enable before they can enter idle.  If a function pointer to
+ * do so is present in the hwmod data, then call it and pass along the
+ * return value; otherwise, return 0.
+ */
+static int __init _enable_preprogram(struct omap_hwmod *oh)
+{
+       if (!oh->class->enable_preprogram)
+               return 0;
+
+       return oh->class->enable_preprogram(oh);
+}
+
 /**
  * _enable - enable an omap_hwmod
  * @oh: struct omap_hwmod *
@@ -2160,6 +2177,7 @@ static int _enable(struct omap_hwmod *oh)
                                _update_sysc_cache(oh);
                        _enable_sysc(oh);
                }
+               r = _enable_preprogram(oh);
        } else {
                if (soc_ops.disable_module)
                        soc_ops.disable_module(oh);
@@ -3049,11 +3067,8 @@ static int _am33xx_assert_hardreset(struct omap_hwmod *oh,
 static int _am33xx_deassert_hardreset(struct omap_hwmod *oh,
                                     struct omap_hwmod_rst_info *ohri)
 {
-       if (ohri->st_shift)
-               pr_err("omap_hwmod: %s: %s: hwmod data error: OMAP4 does not support st_shift\n",
-                      oh->name, ohri->name);
-
        return am33xx_prm_deassert_hardreset(ohri->rst_shift,
+                               ohri->st_shift,
                                oh->clkdm->pwrdm.ptr->prcm_offs,
                                oh->prcm.omap4.rstctrl_offs,
                                oh->prcm.omap4.rstst_offs);
index 80c00e706d69254c18d0a9ca1af789e78ec7299b..d43d9b608edab3c1b7e69c96e711ec1eefc2d7c8 100644 (file)
@@ -510,6 +510,7 @@ struct omap_hwmod_omap4_prcm {
  * @rev: revision of the IP class
  * @pre_shutdown: ptr to fn to be executed immediately prior to device shutdown
  * @reset: ptr to fn to be executed in place of the standard hwmod reset fn
+ * @enable_preprogram:  ptr to fn to be executed during device enable
  *
  * Represent the class of a OMAP hardware "modules" (e.g. timer,
  * smartreflex, gpio, uart...)
@@ -533,6 +534,7 @@ struct omap_hwmod_class {
        u32                                     rev;
        int                                     (*pre_shutdown)(struct omap_hwmod *oh);
        int                                     (*reset)(struct omap_hwmod *oh);
+       int                                     (*enable_preprogram)(struct omap_hwmod *oh);
 };
 
 /**
@@ -679,6 +681,12 @@ extern void __init omap_hwmod_init(void);
 
 const char *omap_hwmod_get_main_clk(struct omap_hwmod *oh);
 
+/*
+ *
+ */
+
+extern int omap_hwmod_aess_preprogram(struct omap_hwmod *oh);
+
 /*
  * Chip variant-specific hwmod init routines - XXX should be converted
  * to use initcalls once the initial boot ordering is straightened out
index 646c14d9fdb9dfbe656437b45c6757c8db4d1b15..26eee4a556ad13a82f27a09d1820eb48f6d3ebac 100644 (file)
@@ -262,13 +262,15 @@ static struct omap_hwmod am33xx_wkup_m3_hwmod = {
        .name           = "wkup_m3",
        .class          = &am33xx_wkup_m3_hwmod_class,
        .clkdm_name     = "l4_wkup_aon_clkdm",
-       .flags          = HWMOD_INIT_NO_RESET,  /* Keep hardreset asserted */
+       /* Keep hardreset asserted */
+       .flags          = HWMOD_INIT_NO_RESET | HWMOD_NO_IDLEST,
        .mpu_irqs       = am33xx_wkup_m3_irqs,
        .main_clk       = "dpll_core_m4_div2_ck",
        .prcm           = {
                .omap4  = {
                        .clkctrl_offs   = AM33XX_CM_WKUP_WKUP_M3_CLKCTRL_OFFSET,
                        .rstctrl_offs   = AM33XX_RM_WKUP_RSTCTRL_OFFSET,
+                       .rstst_offs     = AM33XX_RM_WKUP_RSTST_OFFSET,
                        .modulemode     = MODULEMODE_SWCTRL,
                },
        },
@@ -414,7 +416,6 @@ static struct omap_hwmod am33xx_adc_tsc_hwmod = {
  *    - cEFUSE (doesn't fall under any ocp_if)
  *    - clkdiv32k
  *    - debugss
- *    - ocmc ram
  *    - ocp watch point
  *    - aes0
  *    - sha0
@@ -481,25 +482,6 @@ static struct omap_hwmod am33xx_debugss_hwmod = {
        },
 };
 
-/* ocmcram */
-static struct omap_hwmod_class am33xx_ocmcram_hwmod_class = {
-       .name = "ocmcram",
-};
-
-static struct omap_hwmod am33xx_ocmcram_hwmod = {
-       .name           = "ocmcram",
-       .class          = &am33xx_ocmcram_hwmod_class,
-       .clkdm_name     = "l3_clkdm",
-       .flags          = (HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET),
-       .main_clk       = "l3_gclk",
-       .prcm           = {
-               .omap4  = {
-                       .clkctrl_offs   = AM33XX_CM_PER_OCMCRAM_CLKCTRL_OFFSET,
-                       .modulemode     = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
 /* ocpwp */
 static struct omap_hwmod_class am33xx_ocpwp_hwmod_class = {
        .name           = "ocpwp",
@@ -570,6 +552,25 @@ static struct omap_hwmod am33xx_sha0_hwmod = {
 
 #endif
 
+/* ocmcram */
+static struct omap_hwmod_class am33xx_ocmcram_hwmod_class = {
+       .name = "ocmcram",
+};
+
+static struct omap_hwmod am33xx_ocmcram_hwmod = {
+       .name           = "ocmcram",
+       .class          = &am33xx_ocmcram_hwmod_class,
+       .clkdm_name     = "l3_clkdm",
+       .flags          = (HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET),
+       .main_clk       = "l3_gclk",
+       .prcm           = {
+               .omap4  = {
+                       .clkctrl_offs   = AM33XX_CM_PER_OCMCRAM_CLKCTRL_OFFSET,
+                       .modulemode     = MODULEMODE_SWCTRL,
+               },
+       },
+};
+
 /* 'smartreflex' class */
 static struct omap_hwmod_class am33xx_smartreflex_hwmod_class = {
        .name           = "smartreflex",
@@ -783,9 +784,7 @@ static struct omap_hwmod am33xx_elm_hwmod = {
        },
 };
 
-/*
- * 'epwmss' class: ecap0,1,2,  ehrpwm0,1,2
- */
+/* pwmss  */
 static struct omap_hwmod_class_sysconfig am33xx_epwmss_sysc = {
        .rev_offs       = 0x0,
        .sysc_offs      = 0x4,
@@ -801,18 +800,23 @@ static struct omap_hwmod_class am33xx_epwmss_hwmod_class = {
        .sysc           = &am33xx_epwmss_sysc,
 };
 
-/* ehrpwm0 */
-static struct omap_hwmod_irq_info am33xx_ehrpwm0_irqs[] = {
-       { .name = "int", .irq = 86 + OMAP_INTC_START, },
-       { .name = "tzint", .irq = 58 + OMAP_INTC_START, },
-       { .irq = -1 },
+static struct omap_hwmod_class am33xx_ecap_hwmod_class = {
+       .name           = "ecap",
 };
 
-static struct omap_hwmod am33xx_ehrpwm0_hwmod = {
-       .name           = "ehrpwm0",
+static struct omap_hwmod_class am33xx_eqep_hwmod_class = {
+       .name           = "eqep",
+};
+
+static struct omap_hwmod_class am33xx_ehrpwm_hwmod_class = {
+       .name           = "ehrpwm",
+};
+
+/* epwmss0 */
+static struct omap_hwmod am33xx_epwmss0_hwmod = {
+       .name           = "epwmss0",
        .class          = &am33xx_epwmss_hwmod_class,
        .clkdm_name     = "l4ls_clkdm",
-       .mpu_irqs       = am33xx_ehrpwm0_irqs,
        .main_clk       = "l4ls_gclk",
        .prcm           = {
                .omap4  = {
@@ -822,63 +826,58 @@ static struct omap_hwmod am33xx_ehrpwm0_hwmod = {
        },
 };
 
-/* ehrpwm1 */
-static struct omap_hwmod_irq_info am33xx_ehrpwm1_irqs[] = {
-       { .name = "int", .irq = 87 + OMAP_INTC_START, },
-       { .name = "tzint", .irq = 59 + OMAP_INTC_START, },
+/* ecap0 */
+static struct omap_hwmod_irq_info am33xx_ecap0_irqs[] = {
+       { .irq = 31 + OMAP_INTC_START, },
        { .irq = -1 },
 };
 
-static struct omap_hwmod am33xx_ehrpwm1_hwmod = {
-       .name           = "ehrpwm1",
-       .class          = &am33xx_epwmss_hwmod_class,
+static struct omap_hwmod am33xx_ecap0_hwmod = {
+       .name           = "ecap0",
+       .class          = &am33xx_ecap_hwmod_class,
        .clkdm_name     = "l4ls_clkdm",
-       .mpu_irqs       = am33xx_ehrpwm1_irqs,
+       .mpu_irqs       = am33xx_ecap0_irqs,
        .main_clk       = "l4ls_gclk",
-       .prcm           = {
-               .omap4  = {
-                       .clkctrl_offs   = AM33XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET,
-                       .modulemode     = MODULEMODE_SWCTRL,
-               },
-       },
 };
 
-/* ehrpwm2 */
-static struct omap_hwmod_irq_info am33xx_ehrpwm2_irqs[] = {
-       { .name = "int", .irq = 39 + OMAP_INTC_START, },
-       { .name = "tzint", .irq = 60 + OMAP_INTC_START, },
+/* eqep0 */
+static struct omap_hwmod_irq_info am33xx_eqep0_irqs[] = {
+       { .irq = 79 + OMAP_INTC_START, },
        { .irq = -1 },
 };
 
-static struct omap_hwmod am33xx_ehrpwm2_hwmod = {
-       .name           = "ehrpwm2",
-       .class          = &am33xx_epwmss_hwmod_class,
+static struct omap_hwmod am33xx_eqep0_hwmod = {
+       .name           = "eqep0",
+       .class          = &am33xx_eqep_hwmod_class,
        .clkdm_name     = "l4ls_clkdm",
-       .mpu_irqs       = am33xx_ehrpwm2_irqs,
+       .mpu_irqs       = am33xx_eqep0_irqs,
        .main_clk       = "l4ls_gclk",
-       .prcm           = {
-               .omap4  = {
-                       .clkctrl_offs   = AM33XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET,
-                       .modulemode     = MODULEMODE_SWCTRL,
-               },
-       },
 };
 
-/* ecap0 */
-static struct omap_hwmod_irq_info am33xx_ecap0_irqs[] = {
-       { .irq = 31 + OMAP_INTC_START, },
+/* ehrpwm0 */
+static struct omap_hwmod_irq_info am33xx_ehrpwm0_irqs[] = {
+       { .name = "int", .irq = 86 + OMAP_INTC_START, },
+       { .name = "tzint", .irq = 58 + OMAP_INTC_START, },
        { .irq = -1 },
 };
 
-static struct omap_hwmod am33xx_ecap0_hwmod = {
-       .name           = "ecap0",
+static struct omap_hwmod am33xx_ehrpwm0_hwmod = {
+       .name           = "ehrpwm0",
+       .class          = &am33xx_ehrpwm_hwmod_class,
+       .clkdm_name     = "l4ls_clkdm",
+       .mpu_irqs       = am33xx_ehrpwm0_irqs,
+       .main_clk       = "l4ls_gclk",
+};
+
+/* epwmss1 */
+static struct omap_hwmod am33xx_epwmss1_hwmod = {
+       .name           = "epwmss1",
        .class          = &am33xx_epwmss_hwmod_class,
        .clkdm_name     = "l4ls_clkdm",
-       .mpu_irqs       = am33xx_ecap0_irqs,
        .main_clk       = "l4ls_gclk",
        .prcm           = {
                .omap4  = {
-                       .clkctrl_offs   = AM33XX_CM_PER_EPWMSS0_CLKCTRL_OFFSET,
+                       .clkctrl_offs   = AM33XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET,
                        .modulemode     = MODULEMODE_SWCTRL,
                },
        },
@@ -892,13 +891,50 @@ static struct omap_hwmod_irq_info am33xx_ecap1_irqs[] = {
 
 static struct omap_hwmod am33xx_ecap1_hwmod = {
        .name           = "ecap1",
-       .class          = &am33xx_epwmss_hwmod_class,
+       .class          = &am33xx_ecap_hwmod_class,
        .clkdm_name     = "l4ls_clkdm",
        .mpu_irqs       = am33xx_ecap1_irqs,
        .main_clk       = "l4ls_gclk",
+};
+
+/* eqep1 */
+static struct omap_hwmod_irq_info am33xx_eqep1_irqs[] = {
+       { .irq = 88 + OMAP_INTC_START, },
+       { .irq = -1 },
+};
+
+static struct omap_hwmod am33xx_eqep1_hwmod = {
+       .name           = "eqep1",
+       .class          = &am33xx_eqep_hwmod_class,
+       .clkdm_name     = "l4ls_clkdm",
+       .mpu_irqs       = am33xx_eqep1_irqs,
+       .main_clk       = "l4ls_gclk",
+};
+
+/* ehrpwm1 */
+static struct omap_hwmod_irq_info am33xx_ehrpwm1_irqs[] = {
+       { .name = "int", .irq = 87 + OMAP_INTC_START, },
+       { .name = "tzint", .irq = 59 + OMAP_INTC_START, },
+       { .irq = -1 },
+};
+
+static struct omap_hwmod am33xx_ehrpwm1_hwmod = {
+       .name           = "ehrpwm1",
+       .class          = &am33xx_ehrpwm_hwmod_class,
+       .clkdm_name     = "l4ls_clkdm",
+       .mpu_irqs       = am33xx_ehrpwm1_irqs,
+       .main_clk       = "l4ls_gclk",
+};
+
+/* epwmss2 */
+static struct omap_hwmod am33xx_epwmss2_hwmod = {
+       .name           = "epwmss2",
+       .class          = &am33xx_epwmss_hwmod_class,
+       .clkdm_name     = "l4ls_clkdm",
+       .main_clk       = "l4ls_gclk",
        .prcm           = {
                .omap4  = {
-                       .clkctrl_offs   = AM33XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET,
+                       .clkctrl_offs   = AM33XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET,
                        .modulemode     = MODULEMODE_SWCTRL,
                },
        },
@@ -912,16 +948,39 @@ static struct omap_hwmod_irq_info am33xx_ecap2_irqs[] = {
 
 static struct omap_hwmod am33xx_ecap2_hwmod = {
        .name           = "ecap2",
+       .class          = &am33xx_ecap_hwmod_class,
+       .clkdm_name     = "l4ls_clkdm",
        .mpu_irqs       = am33xx_ecap2_irqs,
-       .class          = &am33xx_epwmss_hwmod_class,
+       .main_clk       = "l4ls_gclk",
+};
+
+/* eqep2 */
+static struct omap_hwmod_irq_info am33xx_eqep2_irqs[] = {
+       { .irq = 89 + OMAP_INTC_START, },
+       { .irq = -1 },
+};
+
+static struct omap_hwmod am33xx_eqep2_hwmod = {
+       .name           = "eqep2",
+       .class          = &am33xx_eqep_hwmod_class,
        .clkdm_name     = "l4ls_clkdm",
+       .mpu_irqs       = am33xx_eqep2_irqs,
+       .main_clk       = "l4ls_gclk",
+};
+
+/* ehrpwm2 */
+static struct omap_hwmod_irq_info am33xx_ehrpwm2_irqs[] = {
+       { .name = "int", .irq = 39 + OMAP_INTC_START, },
+       { .name = "tzint", .irq = 60 + OMAP_INTC_START, },
+       { .irq = -1 },
+};
+
+static struct omap_hwmod am33xx_ehrpwm2_hwmod = {
+       .name           = "ehrpwm2",
+       .class          = &am33xx_ehrpwm_hwmod_class,
+       .clkdm_name     = "l4ls_clkdm",
+       .mpu_irqs       = am33xx_ehrpwm2_irqs,
        .main_clk       = "l4ls_gclk",
-       .prcm           = {
-               .omap4  = {
-                       .clkctrl_offs   = AM33XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET,
-                       .modulemode     = MODULEMODE_SWCTRL,
-               },
-       },
 };
 
 /*
@@ -1824,6 +1883,7 @@ static struct omap_hwmod am33xx_tptc0_hwmod = {
        .class          = &am33xx_tptc_hwmod_class,
        .clkdm_name     = "l3_clkdm",
        .mpu_irqs       = am33xx_tptc0_irqs,
+       .flags          = HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY,
        .main_clk       = "l3_gclk",
        .prcm           = {
                .omap4  = {
@@ -2496,7 +2556,6 @@ static struct omap_hwmod_addr_space am33xx_cpgmac0_addr_space[] = {
        {
                .pa_start       = 0x4a100000,
                .pa_end         = 0x4a100000 + SZ_2K - 1,
-               .flags          = ADDR_TYPE_RT,
        },
        /* cpsw wr */
        {
@@ -2547,162 +2606,202 @@ static struct omap_hwmod_ocp_if am33xx_l4_ls__elm = {
        .user           = OCP_USER_MPU,
 };
 
-/*
- * Splitting the resources to handle access of PWMSS config space
- * and module specific part independently
- */
-static struct omap_hwmod_addr_space am33xx_ehrpwm0_addr_space[] = {
+static struct omap_hwmod_addr_space am33xx_epwmss0_addr_space[] = {
        {
                .pa_start       = 0x48300000,
                .pa_end         = 0x48300000 + SZ_16 - 1,
                .flags          = ADDR_TYPE_RT
        },
-       {
-               .pa_start       = 0x48300200,
-               .pa_end         = 0x48300200 + SZ_256 - 1,
-               .flags          = ADDR_TYPE_RT
-       },
        { }
 };
 
-static struct omap_hwmod_ocp_if am33xx_l4_ls__ehrpwm0 = {
+static struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss0 = {
        .master         = &am33xx_l4_ls_hwmod,
-       .slave          = &am33xx_ehrpwm0_hwmod,
+       .slave          = &am33xx_epwmss0_hwmod,
        .clk            = "l4ls_gclk",
-       .addr           = am33xx_ehrpwm0_addr_space,
+       .addr           = am33xx_epwmss0_addr_space,
        .user           = OCP_USER_MPU,
 };
 
-/*
- * Splitting the resources to handle access of PWMSS config space
- * and module specific part independently
- */
-static struct omap_hwmod_addr_space am33xx_ehrpwm1_addr_space[] = {
-       {
-               .pa_start       = 0x48302000,
-               .pa_end         = 0x48302000 + SZ_16 - 1,
-               .flags          = ADDR_TYPE_RT
-       },
+static struct omap_hwmod_addr_space am33xx_ecap0_addr_space[] = {
        {
-               .pa_start       = 0x48302200,
-               .pa_end         = 0x48302200 + SZ_256 - 1,
-               .flags          = ADDR_TYPE_RT
+               .pa_start       = 0x48300100,
+               .pa_end         = 0x48300100 + SZ_128 - 1,
        },
        { }
 };
 
-static struct omap_hwmod_ocp_if am33xx_l4_ls__ehrpwm1 = {
-       .master         = &am33xx_l4_ls_hwmod,
-       .slave          = &am33xx_ehrpwm1_hwmod,
+static struct omap_hwmod_ocp_if am33xx_epwmss0__ecap0 = {
+       .master         = &am33xx_epwmss0_hwmod,
+       .slave          = &am33xx_ecap0_hwmod,
        .clk            = "l4ls_gclk",
-       .addr           = am33xx_ehrpwm1_addr_space,
+       .addr           = am33xx_ecap0_addr_space,
        .user           = OCP_USER_MPU,
 };
 
-/*
- * Splitting the resources to handle access of PWMSS config space
- * and module specific part independently
- */
-static struct omap_hwmod_addr_space am33xx_ehrpwm2_addr_space[] = {
+static struct omap_hwmod_addr_space am33xx_eqep0_addr_space[] = {
        {
-               .pa_start       = 0x48304000,
-               .pa_end         = 0x48304000 + SZ_16 - 1,
-               .flags          = ADDR_TYPE_RT
-       },
-       {
-               .pa_start       = 0x48304200,
-               .pa_end         = 0x48304200 + SZ_256 - 1,
-               .flags          = ADDR_TYPE_RT
+               .pa_start       = 0x48300180,
+               .pa_end         = 0x48300180 + SZ_128 - 1,
        },
        { }
 };
 
-static struct omap_hwmod_ocp_if am33xx_l4_ls__ehrpwm2 = {
-       .master         = &am33xx_l4_ls_hwmod,
-       .slave          = &am33xx_ehrpwm2_hwmod,
+static struct omap_hwmod_ocp_if am33xx_epwmss0__eqep0 = {
+       .master         = &am33xx_epwmss0_hwmod,
+       .slave          = &am33xx_eqep0_hwmod,
        .clk            = "l4ls_gclk",
-       .addr           = am33xx_ehrpwm2_addr_space,
+       .addr           = am33xx_eqep0_addr_space,
        .user           = OCP_USER_MPU,
 };
 
-/*
- * Splitting the resources to handle access of PWMSS config space
- * and module specific part independently
- */
-static struct omap_hwmod_addr_space am33xx_ecap0_addr_space[] = {
-       {
-               .pa_start       = 0x48300000,
-               .pa_end         = 0x48300000 + SZ_16 - 1,
-               .flags          = ADDR_TYPE_RT
-       },
+static struct omap_hwmod_addr_space am33xx_ehrpwm0_addr_space[] = {
        {
-               .pa_start       = 0x48300100,
-               .pa_end         = 0x48300100 + SZ_256 - 1,
-               .flags          = ADDR_TYPE_RT
+               .pa_start       = 0x48300200,
+               .pa_end         = 0x48300200 + SZ_128 - 1,
        },
        { }
 };
 
-static struct omap_hwmod_ocp_if am33xx_l4_ls__ecap0 = {
-       .master         = &am33xx_l4_ls_hwmod,
-       .slave          = &am33xx_ecap0_hwmod,
+static struct omap_hwmod_ocp_if am33xx_epwmss0__ehrpwm0 = {
+       .master         = &am33xx_epwmss0_hwmod,
+       .slave          = &am33xx_ehrpwm0_hwmod,
        .clk            = "l4ls_gclk",
-       .addr           = am33xx_ecap0_addr_space,
+       .addr           = am33xx_ehrpwm0_addr_space,
        .user           = OCP_USER_MPU,
 };
 
-/*
- * Splitting the resources to handle access of PWMSS config space
- * and module specific part independently
- */
-static struct omap_hwmod_addr_space am33xx_ecap1_addr_space[] = {
+
+static struct omap_hwmod_addr_space am33xx_epwmss1_addr_space[] = {
        {
                .pa_start       = 0x48302000,
                .pa_end         = 0x48302000 + SZ_16 - 1,
                .flags          = ADDR_TYPE_RT
        },
+       { }
+};
+
+static struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss1 = {
+       .master         = &am33xx_l4_ls_hwmod,
+       .slave          = &am33xx_epwmss1_hwmod,
+       .clk            = "l4ls_gclk",
+       .addr           = am33xx_epwmss1_addr_space,
+       .user           = OCP_USER_MPU,
+};
+
+static struct omap_hwmod_addr_space am33xx_ecap1_addr_space[] = {
        {
                .pa_start       = 0x48302100,
-               .pa_end         = 0x48302100 + SZ_256 - 1,
-               .flags          = ADDR_TYPE_RT
+               .pa_end         = 0x48302100 + SZ_128 - 1,
        },
        { }
 };
 
-static struct omap_hwmod_ocp_if am33xx_l4_ls__ecap1 = {
-       .master         = &am33xx_l4_ls_hwmod,
+static struct omap_hwmod_ocp_if am33xx_epwmss1__ecap1 = {
+       .master         = &am33xx_epwmss1_hwmod,
        .slave          = &am33xx_ecap1_hwmod,
        .clk            = "l4ls_gclk",
        .addr           = am33xx_ecap1_addr_space,
        .user           = OCP_USER_MPU,
 };
 
-/*
- * Splitting the resources to handle access of PWMSS config space
- * and module specific part independently
- */
-static struct omap_hwmod_addr_space am33xx_ecap2_addr_space[] = {
+static struct omap_hwmod_addr_space am33xx_eqep1_addr_space[] = {
+       {
+               .pa_start       = 0x48302180,
+               .pa_end         = 0x48302180 + SZ_128 - 1,
+       },
+       { }
+};
+
+static struct omap_hwmod_ocp_if am33xx_epwmss1__eqep1 = {
+       .master         = &am33xx_epwmss1_hwmod,
+       .slave          = &am33xx_eqep1_hwmod,
+       .clk            = "l4ls_gclk",
+       .addr           = am33xx_eqep1_addr_space,
+       .user           = OCP_USER_MPU,
+};
+
+static struct omap_hwmod_addr_space am33xx_ehrpwm1_addr_space[] = {
+       {
+               .pa_start       = 0x48302200,
+               .pa_end         = 0x48302200 + SZ_128 - 1,
+       },
+       { }
+};
+
+static struct omap_hwmod_ocp_if am33xx_epwmss1__ehrpwm1 = {
+       .master         = &am33xx_epwmss1_hwmod,
+       .slave          = &am33xx_ehrpwm1_hwmod,
+       .clk            = "l4ls_gclk",
+       .addr           = am33xx_ehrpwm1_addr_space,
+       .user           = OCP_USER_MPU,
+};
+
+static struct omap_hwmod_addr_space am33xx_epwmss2_addr_space[] = {
        {
                .pa_start       = 0x48304000,
                .pa_end         = 0x48304000 + SZ_16 - 1,
                .flags          = ADDR_TYPE_RT
        },
+       { }
+};
+
+static struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss2 = {
+       .master         = &am33xx_l4_ls_hwmod,
+       .slave          = &am33xx_epwmss2_hwmod,
+       .clk            = "l4ls_gclk",
+       .addr           = am33xx_epwmss2_addr_space,
+       .user           = OCP_USER_MPU,
+};
+
+static struct omap_hwmod_addr_space am33xx_ecap2_addr_space[] = {
        {
                .pa_start       = 0x48304100,
-               .pa_end         = 0x48304100 + SZ_256 - 1,
-               .flags          = ADDR_TYPE_RT
+               .pa_end         = 0x48304100 + SZ_128 - 1,
        },
        { }
 };
 
-static struct omap_hwmod_ocp_if am33xx_l4_ls__ecap2 = {
-       .master         = &am33xx_l4_ls_hwmod,
+static struct omap_hwmod_ocp_if am33xx_epwmss2__ecap2 = {
+       .master         = &am33xx_epwmss2_hwmod,
        .slave          = &am33xx_ecap2_hwmod,
        .clk            = "l4ls_gclk",
        .addr           = am33xx_ecap2_addr_space,
        .user           = OCP_USER_MPU,
 };
 
+static struct omap_hwmod_addr_space am33xx_eqep2_addr_space[] = {
+       {
+               .pa_start       = 0x48304180,
+               .pa_end         = 0x48304180 + SZ_128 - 1,
+       },
+       { }
+};
+
+static struct omap_hwmod_ocp_if am33xx_epwmss2__eqep2 = {
+       .master         = &am33xx_epwmss2_hwmod,
+       .slave          = &am33xx_eqep2_hwmod,
+       .clk            = "l4ls_gclk",
+       .addr           = am33xx_eqep2_addr_space,
+       .user           = OCP_USER_MPU,
+};
+
+static struct omap_hwmod_addr_space am33xx_ehrpwm2_addr_space[] = {
+       {
+               .pa_start       = 0x48304200,
+               .pa_end         = 0x48304200 + SZ_128 - 1,
+       },
+       { }
+};
+
+static struct omap_hwmod_ocp_if am33xx_epwmss2__ehrpwm2 = {
+       .master         = &am33xx_epwmss2_hwmod,
+       .slave          = &am33xx_ehrpwm2_hwmod,
+       .clk            = "l4ls_gclk",
+       .addr           = am33xx_ehrpwm2_addr_space,
+       .user           = OCP_USER_MPU,
+};
+
 /* l3s cfg -> gpmc */
 static struct omap_hwmod_addr_space am33xx_gpmc_addr_space[] = {
        {
@@ -3328,6 +3427,13 @@ static struct omap_hwmod_ocp_if am33xx_l3_s__usbss = {
        .flags          = OCPIF_SWSUP_IDLE,
 };
 
+/* l3 main -> ocmc */
+static struct omap_hwmod_ocp_if am33xx_l3_main__ocmc = {
+       .master         = &am33xx_l3_main_hwmod,
+       .slave          = &am33xx_ocmcram_hwmod,
+       .user           = OCP_USER_MPU | OCP_USER_SDMA,
+};
+
 static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = {
        &am33xx_l4_fw__emif_fw,
        &am33xx_l3_main__emif,
@@ -3385,12 +3491,18 @@ static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = {
        &am33xx_l4_ls__uart6,
        &am33xx_l4_ls__spinlock,
        &am33xx_l4_ls__elm,
-       &am33xx_l4_ls__ehrpwm0,
-       &am33xx_l4_ls__ehrpwm1,
-       &am33xx_l4_ls__ehrpwm2,
-       &am33xx_l4_ls__ecap0,
-       &am33xx_l4_ls__ecap1,
-       &am33xx_l4_ls__ecap2,
+       &am33xx_l4_ls__epwmss0,
+       &am33xx_epwmss0__ecap0,
+       &am33xx_epwmss0__eqep0,
+       &am33xx_epwmss0__ehrpwm0,
+       &am33xx_l4_ls__epwmss1,
+       &am33xx_epwmss1__ecap1,
+       &am33xx_epwmss1__eqep1,
+       &am33xx_epwmss1__ehrpwm1,
+       &am33xx_l4_ls__epwmss2,
+       &am33xx_epwmss2__ecap2,
+       &am33xx_epwmss2__eqep2,
+       &am33xx_epwmss2__ehrpwm2,
        &am33xx_l3_s__gpmc,
        &am33xx_l3_main__lcdc,
        &am33xx_l4_ls__mcspi0,
@@ -3398,6 +3510,7 @@ static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = {
        &am33xx_l3_main__tptc0,
        &am33xx_l3_main__tptc1,
        &am33xx_l3_main__tptc2,
+       &am33xx_l3_main__ocmc,
        &am33xx_l3_s__usbss,
        &am33xx_l4_hs__cpgmac0,
        &am33xx_cpgmac0__mdio,
index 8bb2628df34ed26522b30560f5eafc2c6a8c459c..ac7e03ec952f22f7c856084c15e2de3e767e4273 100644 (file)
@@ -3493,7 +3493,12 @@ static struct omap_hwmod am35xx_emac_hwmod = {
        .name           = "davinci_emac",
        .mpu_irqs       = am35xx_emac_mpu_irqs,
        .class          = &am35xx_emac_class,
-       .flags          = HWMOD_NO_IDLEST,
+       /*
+        * According to Mark Greer, the MPU will not return from WFI
+        * when the EMAC signals an interrupt.
+        * http://www.spinics.net/lists/arm-kernel/msg174734.html
+        */
+       .flags          = (HWMOD_NO_IDLEST | HWMOD_BLOCK_WFI),
 };
 
 /* l3_core -> davinci emac interface */
index 7ec1083ff604840e3e4198a3d0ecb2904a4f99e2..0e47d2e1687c76004ea7449b8e496340617b09d1 100644 (file)
@@ -322,6 +322,7 @@ static struct omap_hwmod_class_sysconfig omap44xx_aess_sysc = {
 static struct omap_hwmod_class omap44xx_aess_hwmod_class = {
        .name   = "aess",
        .sysc   = &omap44xx_aess_sysc,
+       .enable_preprogram = omap_hwmod_aess_preprogram,
 };
 
 /* aess */
@@ -348,7 +349,7 @@ static struct omap_hwmod omap44xx_aess_hwmod = {
        .clkdm_name     = "abe_clkdm",
        .mpu_irqs       = omap44xx_aess_irqs,
        .sdma_reqs      = omap44xx_aess_sdma_reqs,
-       .main_clk       = "aess_fck",
+       .main_clk       = "aess_fclk",
        .prcm = {
                .omap4 = {
                        .clkctrl_offs = OMAP4_CM1_ABE_AESS_CLKCTRL_OFFSET,
@@ -4241,6 +4242,27 @@ static struct omap_hwmod_ocp_if omap44xx_l4_cfg__ocp_wp_noc = {
 
 static struct omap_hwmod_addr_space omap44xx_aess_addrs[] = {
        {
+               .name           = "dmem",
+               .pa_start       = 0x40180000,
+               .pa_end         = 0x4018ffff
+       },
+       {
+               .name           = "cmem",
+               .pa_start       = 0x401a0000,
+               .pa_end         = 0x401a1fff
+       },
+       {
+               .name           = "smem",
+               .pa_start       = 0x401c0000,
+               .pa_end         = 0x401c5fff
+       },
+       {
+               .name           = "pmem",
+               .pa_start       = 0x401e0000,
+               .pa_end         = 0x401e1fff
+       },
+       {
+               .name           = "mpu",
                .pa_start       = 0x401f1000,
                .pa_end         = 0x401f13ff,
                .flags          = ADDR_TYPE_RT
@@ -4259,6 +4281,27 @@ static struct omap_hwmod_ocp_if __maybe_unused omap44xx_l4_abe__aess = {
 
 static struct omap_hwmod_addr_space omap44xx_aess_dma_addrs[] = {
        {
+               .name           = "dmem_dma",
+               .pa_start       = 0x49080000,
+               .pa_end         = 0x4908ffff
+       },
+       {
+               .name           = "cmem_dma",
+               .pa_start       = 0x490a0000,
+               .pa_end         = 0x490a1fff
+       },
+       {
+               .name           = "smem_dma",
+               .pa_start       = 0x490c0000,
+               .pa_end         = 0x490c5fff
+       },
+       {
+               .name           = "pmem_dma",
+               .pa_start       = 0x490e0000,
+               .pa_end         = 0x490e1fff
+       },
+       {
+               .name           = "dma",
                .pa_start       = 0x490f1000,
                .pa_end         = 0x490f13ff,
                .flags          = ADDR_TYPE_RT
@@ -6268,7 +6311,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
        &omap44xx_l3_main_1__l3_main_3,
        &omap44xx_l3_main_2__l3_main_3,
        &omap44xx_l4_cfg__l3_main_3,
-       /* &omap44xx_aess__l4_abe, */
+       &omap44xx_aess__l4_abe,
        &omap44xx_dsp__l4_abe,
        &omap44xx_l3_main_1__l4_abe,
        &omap44xx_mpu__l4_abe,
@@ -6277,8 +6320,8 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = {
        &omap44xx_l4_cfg__l4_wkup,
        &omap44xx_mpu__mpu_private,
        &omap44xx_l4_cfg__ocp_wp_noc,
-       /* &omap44xx_l4_abe__aess, */
-       /* &omap44xx_l4_abe__aess_dma, */
+       &omap44xx_l4_abe__aess,
+       &omap44xx_l4_abe__aess_dma,
        &omap44xx_l3_main_2__c2c,
        &omap44xx_l4_wkup__counter_32k,
        &omap44xx_l4_cfg__ctrl_module_core,
diff --git a/arch/arm/mach-omap2/omap_hwmod_reset.c b/arch/arm/mach-omap2/omap_hwmod_reset.c
new file mode 100644 (file)
index 0000000..65e186c
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * OMAP IP block custom reset and preprogramming stubs
+ *
+ * Copyright (C) 2012 Texas Instruments, Inc.
+ * Paul Walmsley
+ *
+ * A small number of IP blocks need custom reset and preprogramming
+ * functions.  The stubs in this file provide a standard way for the
+ * hwmod code to call these functions, which are to be located under
+ * drivers/.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include <sound/aess.h>
+
+#include "omap_hwmod.h"
+
+/**
+ * omap_hwmod_aess_preprogram - enable AESS internal autogating
+ * @oh: struct omap_hwmod *
+ *
+ * The AESS will not IdleAck to the PRCM until its internal autogating
+ * is enabled.  Since internal autogating is disabled by default after
+ * AESS reset, we must enable autogating after the hwmod code resets
+ * the AESS.  Returns 0.
+ */
+int omap_hwmod_aess_preprogram(struct omap_hwmod *oh)
+{
+       void __iomem *va;
+
+       va = omap_hwmod_get_mpu_rt_va(oh);
+       if (!va)
+               return -EINVAL;
+
+       aess_enable_autogating(va);
+
+       return 0;
+}
index cd6682df5625effe2e72e63e107e8bdc45ab512b..673a4c1d1d7627df0c390a371d84db8d2c7a3c32 100644 (file)
@@ -282,19 +282,19 @@ int __init omap2_common_pm_late_init(void)
         * a completely different mechanism.
         * Disable this part if a DT blob is available.
         */
-       if (of_have_populated_dt())
-               return 0;
+       if (!of_have_populated_dt()) {
 
-       /* Init the voltage layer */
-       omap_pmic_late_init();
-       omap_voltage_late_init();
+               /* Init the voltage layer */
+               omap_pmic_late_init();
+               omap_voltage_late_init();
 
-       /* Initialize the voltages */
-       omap3_init_voltages();
-       omap4_init_voltages();
+               /* Initialize the voltages */
+               omap3_init_voltages();
+               omap4_init_voltages();
 
-       /* Smartreflex device init */
-       omap_devinit_smartreflex();
+               /* Smartreflex device init */
+               omap_devinit_smartreflex();
+       }
 
 #ifdef CONFIG_SUSPEND
        suspend_set_ops(&omap_pm_ops);
index b2a4df623545a6065eea40652276f7fbf37757ff..b59d93908341bc91f10e9fae9692e7d3c322198c 100644 (file)
@@ -54,7 +54,6 @@
 #include "powerdomain.h"
 #include "clockdomain.h"
 
-static void (*omap2_sram_idle)(void);
 static void (*omap2_sram_suspend)(u32 dllctrl, void __iomem *sdrc_dlla_ctrl,
                                  void __iomem *sdrc_power);
 
@@ -163,6 +162,8 @@ static int omap2_allow_mpu_retention(void)
 
 static void omap2_enter_mpu_retention(void)
 {
+       const int zero = 0;
+
        /* The peripherals seem not to be able to wake up the MPU when
         * it is in retention mode. */
        if (omap2_allow_mpu_retention()) {
@@ -179,7 +180,8 @@ static void omap2_enter_mpu_retention(void)
                pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
        }
 
-       omap2_sram_idle();
+       /* WFI */
+       asm("mcr p15, 0, %0, c7, c0, 4" : : "r" (zero) : "memory", "cc");
 
        pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
 }
@@ -333,11 +335,9 @@ int __init omap2_pm_init(void)
        /*
         * We copy the assembler sleep/wakeup routines to SRAM.
         * These routines need to be in SRAM as that's the only
-        * memory the MPU can see when it wakes up.
+        * memory the MPU can see when it wakes up after the entire
+        * chip enters idle.
         */
-       omap2_sram_idle = omap_sram_push(omap24xx_idle_loop_suspend,
-                                        omap24xx_idle_loop_suspend_sz);
-
        omap2_sram_suspend = omap_sram_push(omap24xx_cpu_suspend,
                                            omap24xx_cpu_suspend_sz);
 
index aa6fd98f606e30aa96c164b2ce3dd40719777e0c..ea62e75ef21d39678632c13808c3d8847cd0b422 100644 (file)
@@ -77,10 +77,20 @@ static int omap4_pm_suspend(void)
                omap_set_pwrdm_state(pwrst->pwrdm, pwrst->saved_state);
                pwrdm_set_logic_retst(pwrst->pwrdm, pwrst->saved_logic_state);
        }
-       if (ret)
+       if (ret) {
                pr_crit("Could not enter target state in pm_suspend\n");
-       else
+               /*
+                * OMAP4 chip PM currently works only with certain (newer)
+                * versions of bootloaders. This is due to missing code in the
+                * kernel to properly reset and initialize some devices.
+                * Warn the user about the bootloader version being one of the
+                * possible causes.
+                * http://www.spinics.net/lists/arm-kernel/msg218641.html
+                */
+               pr_warn("A possible cause could be an old bootloader - try u-boot >= v2012.07\n");
+       } else {
                pr_info("Successfully put all powerdomains to target state\n");
+       }
 
        return 0;
 }
@@ -146,6 +156,13 @@ int __init omap4_pm_init(void)
        }
 
        pr_err("Power Management for TI OMAP4.\n");
+       /*
+        * OMAP4 chip PM currently works only with certain (newer)
+        * versions of bootloaders. This is due to missing code in the
+        * kernel to properly reset and initialize some devices.
+        * http://www.spinics.net/lists/arm-kernel/msg218641.html
+        */
+       pr_warn("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n");
 
        ret = pwrdm_for_each(pwrdms_setup, NULL);
        if (ret) {
index 1ac73883f8913c5894aff5f5267222c9f35343ee..44c0d7216aa72e1f5f578e0c7e4d8da1ada8871a 100644 (file)
@@ -110,11 +110,11 @@ int am33xx_prm_assert_hardreset(u8 shift, s16 inst, u16 rstctrl_offs)
  * -EINVAL upon an argument error, -EEXIST if the submodule was already out
  * of reset, or -EBUSY if the submodule did not exit reset promptly.
  */
-int am33xx_prm_deassert_hardreset(u8 shift, s16 inst,
+int am33xx_prm_deassert_hardreset(u8 shift, u8 st_shift, s16 inst,
                u16 rstctrl_offs, u16 rstst_offs)
 {
        int c;
-       u32 mask = 1 << shift;
+       u32 mask = 1 << st_shift;
 
        /* Check the current status to avoid  de-asserting the line twice */
        if (am33xx_prm_is_hardreset_asserted(shift, inst, rstctrl_offs) == 0)
@@ -122,11 +122,14 @@ int am33xx_prm_deassert_hardreset(u8 shift, s16 inst,
 
        /* Clear the reset status by writing 1 to the status bit */
        am33xx_prm_rmw_reg_bits(0xffffffff, mask, inst, rstst_offs);
+
        /* de-assert the reset control line */
+       mask = 1 << shift;
+
        am33xx_prm_rmw_reg_bits(mask, 0, inst, rstctrl_offs);
-       /* wait the status to be set */
 
-       omap_test_timeout(am33xx_prm_is_hardreset_asserted(shift, inst,
+       /* wait the status to be set */
+       omap_test_timeout(am33xx_prm_is_hardreset_asserted(st_shift, inst,
                                                           rstst_offs),
                          MAX_MODULE_HARDRESET_WAIT, c);
 
index 3f25c563a82185bf07c423787a0df41b71ca9798..9b9918dfb119967e91c3c1ee791bb48e63219b41 100644 (file)
 #define AM33XX_PM_CEFUSE_PWRSTST_OFFSET                0x0004
 #define AM33XX_PM_CEFUSE_PWRSTST               AM33XX_PRM_REGADDR(AM33XX_PRM_CEFUSE_MOD, 0x0004)
 
+#ifndef __ASSEMBLER__
 extern u32 am33xx_prm_read_reg(s16 inst, u16 idx);
 extern void am33xx_prm_write_reg(u32 val, s16 inst, u16 idx);
 extern u32 am33xx_prm_rmw_reg_bits(u32 mask, u32 bits, s16 inst, s16 idx);
@@ -124,6 +125,7 @@ extern void am33xx_prm_global_warm_sw_reset(void);
 extern int am33xx_prm_is_hardreset_asserted(u8 shift, s16 inst,
                u16 rstctrl_offs);
 extern int am33xx_prm_assert_hardreset(u8 shift, s16 inst, u16 rstctrl_offs);
-extern int am33xx_prm_deassert_hardreset(u8 shift, s16 inst,
+extern int am33xx_prm_deassert_hardreset(u8 shift, u8 st_shift, s16 inst,
                u16 rstctrl_offs, u16 rstst_offs);
+#endif /* ASSEMBLER */
 #endif
index ce0ccd26efbd4dfa992e507fda3385a35ea6503c..1d3cb25c9629e8ae8f28f7e615a582f16e4bfeaa 100644 (file)
 
        .text
 
-/*
- * Forces OMAP into idle state
- *
- * omap24xx_idle_loop_suspend() - This bit of code just executes the WFI
- * for normal idles.
- *
- * Note: This code get's copied to internal SRAM at boot. When the OMAP
- *      wakes up it continues execution at the point it went to sleep.
- */
-       .align  3
-ENTRY(omap24xx_idle_loop_suspend)
-       stmfd   sp!, {r0, lr}           @ save registers on stack
-       mov     r0, #0                  @ clear for mcr setup
-       mcr     p15, 0, r0, c7, c0, 4   @ wait for interrupt
-       ldmfd   sp!, {r0, pc}           @ restore regs and return
-
-ENTRY(omap24xx_idle_loop_suspend_sz)
-       .word   . - omap24xx_idle_loop_suspend
-
 /*
  * omap24xx_cpu_suspend() - Forces OMAP into deep sleep state by completing
  * SDRC shutdown then ARM shutdown.  Upon wake MPU is back on so just restore
index 092aedd7ed13325ed34224e197fefc818e534608..c62116bbc760d3a1245f722ecc3b19be5ba09955 100644 (file)
@@ -395,6 +395,7 @@ IS_OMAP_TYPE(3430, 0x3430)
 
 #define AM335X_CLASS           0x33500033
 #define AM335X_REV_ES1_0       AM335X_CLASS
+#define AM335X_REV_ES2_0       (AM335X_CLASS | (0x1 << 8))
 
 #define OMAP443X_CLASS         0x44300044
 #define OMAP4430_REV_ES1_0     (OMAP443X_CLASS | (0x10 << 8))
index bb829e065400a540766d1d2bab977ad11c3368e7..d7bc33f153440c8fe4ee313aeff889d67002ad7b 100644 (file)
@@ -152,7 +152,7 @@ static int __init sr_dev_init(struct omap_hwmod *oh, void *user)
 
        sr_data->enable_on_init = sr_enable_on_init;
 
-       pdev = omap_device_build(name, i, oh, sr_data, sizeof(*sr_data), 0);
+       pdev = omap_device_build(name, i, oh, sr_data, sizeof(*sr_data));
        if (IS_ERR(pdev))
                pr_warning("%s: Could not build omap_device for %s: %s.\n\n",
                        __func__, name, oh->name);
index 67c859cf16bcb3d7306ca0e348ceaa55017422a4..ce66eb9be481f118159c6c2305079795a933f1df 100644 (file)
@@ -147,15 +147,6 @@ config OMAP3_L2_AUX_SECURE_SERVICE_SET_ID
        help
          PPA routine service ID for setting L2 auxiliary control register.
 
-config OMAP_32K_TIMER_HZ
-       int "Kernel internal timer frequency for 32KHz timer"
-       range 32 1024
-       depends on OMAP_32K_TIMER
-       default "128"
-       help
-         Kernel internal timer frequency should be a divisor of 32768,
-         such as 64 or 128.
-
 config OMAP_DM_TIMER
        bool "Use dual-mode timer"
        depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS
index 6d35767bc48f0f1ebb1e682ab46ead9f9d697e5c..e27d2daa779065f1d6d2ff0cb24a963caf9b74d6 100644 (file)
 #if !defined(__ASM_ARCH_OMAP_TIMEX_H)
 #define __ASM_ARCH_OMAP_TIMEX_H
 
-/*
- * OMAP 32KHz timer updates time one jiffie at a time from a secondary timer,
- * and that's why the CLOCK_TICK_RATE is not 32768.
- */
-#ifdef CONFIG_OMAP_32K_TIMER
-#define CLOCK_TICK_RATE                (CONFIG_OMAP_32K_TIMER_HZ)
-#else
 #define CLOCK_TICK_RATE                (HZ * 100000UL)
-#endif
 
 #endif /* __ASM_ARCH_OMAP_TIMEX_H */
index f84f5c57de35b0bb5693067c3799de8093ad237e..60308053fdb2aeb2aaaa35112096e3b23a8dc2f8 100644 (file)
@@ -509,3 +509,4 @@ void local_touch_nmi(void)
 {
        __this_cpu_write(last_nmi_rip, 0);
 }
+EXPORT_SYMBOL_GPL(local_touch_nmi);
index 4a85ccf8d4cfca02daab96709e35cc14a92a35b5..a7e40a7c821427cd27f6c7019411030ea00e33e8 100644 (file)
@@ -4,7 +4,6 @@
 menuconfig BLOCK
        bool "Enable the block layer" if EXPERT
        default y
-       select PERCPU_RWSEM
        help
         Provide block layer support for the kernel.
 
index 8bdebb6781e1445ff629fb4dd06ffbcf97257f38..b2b9837f9dd3475be841d41ed52dd7715e936d4b 100644 (file)
 
 static DEFINE_MUTEX(blkcg_pol_mutex);
 
-struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
+struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
+                           .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
 EXPORT_SYMBOL_GPL(blkcg_root);
 
 static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 
+static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
+                                     struct request_queue *q, bool update_hint);
+
+/**
+ * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
+ * @d_blkg: loop cursor pointing to the current descendant
+ * @pos_cgrp: used for iteration
+ * @p_blkg: target blkg to walk descendants of
+ *
+ * Walk @c_blkg through the descendants of @p_blkg.  Must be used with RCU
+ * read locked.  If called under either blkcg or queue lock, the iteration
+ * is guaranteed to include all and only online blkgs.  The caller may
+ * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
+ * subtree.
+ */
+#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg)         \
+       cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
+               if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
+                                             (p_blkg)->q, false)))
+
 static bool blkcg_policy_enabled(struct request_queue *q,
                                 const struct blkcg_policy *pol)
 {
@@ -112,9 +133,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 
                blkg->pd[i] = pd;
                pd->blkg = blkg;
+               pd->plid = i;
 
                /* invoke per-policy init */
-               if (blkcg_policy_enabled(blkg->q, pol))
+               if (pol->pd_init_fn)
                        pol->pd_init_fn(blkg);
        }
 
@@ -125,8 +147,19 @@ err_free:
        return NULL;
 }
 
+/**
+ * __blkg_lookup - internal version of blkg_lookup()
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ * @update_hint: whether to update lookup hint with the result or not
+ *
+ * This is internal version and shouldn't be used by policy
+ * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
+ * @q's bypass state.  If @update_hint is %true, the caller should be
+ * holding @q->queue_lock and lookup hint is updated on success.
+ */
 static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
-                                     struct request_queue *q)
+                                     struct request_queue *q, bool update_hint)
 {
        struct blkcg_gq *blkg;
 
@@ -135,14 +168,19 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
                return blkg;
 
        /*
-        * Hint didn't match.  Look up from the radix tree.  Note that we
-        * may not be holding queue_lock and thus are not sure whether
-        * @blkg from blkg_tree has already been removed or not, so we
-        * can't update hint to the lookup result.  Leave it to the caller.
+        * Hint didn't match.  Look up from the radix tree.  Note that the
+        * hint can only be updated under queue_lock as otherwise @blkg
+        * could have already been removed from blkg_tree.  The caller is
+        * responsible for grabbing queue_lock if @update_hint.
         */
        blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
-       if (blkg && blkg->q == q)
+       if (blkg && blkg->q == q) {
+               if (update_hint) {
+                       lockdep_assert_held(q->queue_lock);
+                       rcu_assign_pointer(blkcg->blkg_hint, blkg);
+               }
                return blkg;
+       }
 
        return NULL;
 }
@@ -162,7 +200,7 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
 
        if (unlikely(blk_queue_bypass(q)))
                return NULL;
-       return __blkg_lookup(blkcg, q);
+       return __blkg_lookup(blkcg, q, false);
 }
 EXPORT_SYMBOL_GPL(blkg_lookup);
 
@@ -170,75 +208,129 @@ EXPORT_SYMBOL_GPL(blkg_lookup);
  * If @new_blkg is %NULL, this function tries to allocate a new one as
  * necessary using %GFP_ATOMIC.  @new_blkg is always consumed on return.
  */
-static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-                                            struct request_queue *q,
-                                            struct blkcg_gq *new_blkg)
+static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
+                                   struct request_queue *q,
+                                   struct blkcg_gq *new_blkg)
 {
        struct blkcg_gq *blkg;
-       int ret;
+       int i, ret;
 
        WARN_ON_ONCE(!rcu_read_lock_held());
        lockdep_assert_held(q->queue_lock);
 
-       /* lookup and update hint on success, see __blkg_lookup() for details */
-       blkg = __blkg_lookup(blkcg, q);
-       if (blkg) {
-               rcu_assign_pointer(blkcg->blkg_hint, blkg);
-               goto out_free;
-       }
-
        /* blkg holds a reference to blkcg */
        if (!css_tryget(&blkcg->css)) {
-               blkg = ERR_PTR(-EINVAL);
-               goto out_free;
+               ret = -EINVAL;
+               goto err_free_blkg;
        }
 
        /* allocate */
        if (!new_blkg) {
                new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
                if (unlikely(!new_blkg)) {
-                       blkg = ERR_PTR(-ENOMEM);
-                       goto out_put;
+                       ret = -ENOMEM;
+                       goto err_put_css;
                }
        }
        blkg = new_blkg;
 
-       /* insert */
+       /* link parent and insert */
+       if (blkcg_parent(blkcg)) {
+               blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
+               if (WARN_ON_ONCE(!blkg->parent)) {
+                       blkg = ERR_PTR(-EINVAL);
+                       goto err_put_css;
+               }
+               blkg_get(blkg->parent);
+       }
+
        spin_lock(&blkcg->lock);
        ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
        if (likely(!ret)) {
                hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
                list_add(&blkg->q_node, &q->blkg_list);
+
+               for (i = 0; i < BLKCG_MAX_POLS; i++) {
+                       struct blkcg_policy *pol = blkcg_policy[i];
+
+                       if (blkg->pd[i] && pol->pd_online_fn)
+                               pol->pd_online_fn(blkg);
+               }
        }
+       blkg->online = true;
        spin_unlock(&blkcg->lock);
 
        if (!ret)
                return blkg;
 
-       blkg = ERR_PTR(ret);
-out_put:
+       /* @blkg failed fully initialized, use the usual release path */
+       blkg_put(blkg);
+       return ERR_PTR(ret);
+
+err_put_css:
        css_put(&blkcg->css);
-out_free:
+err_free_blkg:
        blkg_free(new_blkg);
-       return blkg;
+       return ERR_PTR(ret);
 }
 
+/**
+ * blkg_lookup_create - lookup blkg, try to create one if not there
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for the @blkcg - @q pair.  If it doesn't exist, try to
+ * create one.  blkg creation is performed recursively from blkcg_root such
+ * that all non-root blkg's have access to the parent blkg.  This function
+ * should be called under RCU read lock and @q->queue_lock.
+ *
+ * Returns pointer to the looked up or created blkg on success, ERR_PTR()
+ * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
+ * dead and bypassing, returns ERR_PTR(-EBUSY).
+ */
 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
                                    struct request_queue *q)
 {
+       struct blkcg_gq *blkg;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+       lockdep_assert_held(q->queue_lock);
+
        /*
         * This could be the first entry point of blkcg implementation and
         * we shouldn't allow anything to go through for a bypassing queue.
         */
        if (unlikely(blk_queue_bypass(q)))
                return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
-       return __blkg_lookup_create(blkcg, q, NULL);
+
+       blkg = __blkg_lookup(blkcg, q, true);
+       if (blkg)
+               return blkg;
+
+       /*
+        * Create blkgs walking down from blkcg_root to @blkcg, so that all
+        * non-root blkgs have access to their parents.
+        */
+       while (true) {
+               struct blkcg *pos = blkcg;
+               struct blkcg *parent = blkcg_parent(blkcg);
+
+               while (parent && !__blkg_lookup(parent, q, false)) {
+                       pos = parent;
+                       parent = blkcg_parent(parent);
+               }
+
+               blkg = blkg_create(pos, q, NULL);
+               if (pos == blkcg || IS_ERR(blkg))
+                       return blkg;
+       }
 }
 EXPORT_SYMBOL_GPL(blkg_lookup_create);
 
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
        struct blkcg *blkcg = blkg->blkcg;
+       int i;
 
        lockdep_assert_held(blkg->q->queue_lock);
        lockdep_assert_held(&blkcg->lock);
@@ -247,6 +339,14 @@ static void blkg_destroy(struct blkcg_gq *blkg)
        WARN_ON_ONCE(list_empty(&blkg->q_node));
        WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
 
+       for (i = 0; i < BLKCG_MAX_POLS; i++) {
+               struct blkcg_policy *pol = blkcg_policy[i];
+
+               if (blkg->pd[i] && pol->pd_offline_fn)
+                       pol->pd_offline_fn(blkg);
+       }
+       blkg->online = false;
+
        radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
        list_del_init(&blkg->q_node);
        hlist_del_init_rcu(&blkg->blkcg_node);
@@ -301,8 +401,10 @@ static void blkg_rcu_free(struct rcu_head *rcu_head)
 
 void __blkg_release(struct blkcg_gq *blkg)
 {
-       /* release the extra blkcg reference this blkg has been holding */
+       /* release the blkcg and parent blkg refs this blkg has been holding */
        css_put(&blkg->blkcg->css);
+       if (blkg->parent)
+               blkg_put(blkg->parent);
 
        /*
         * A group is freed in rcu manner. But having an rcu lock does not
@@ -401,8 +503,9 @@ static const char *blkg_dev_name(struct blkcg_gq *blkg)
  *
  * This function invokes @prfill on each blkg of @blkcg if pd for the
  * policy specified by @pol exists.  @prfill is invoked with @sf, the
- * policy data and @data.  If @show_total is %true, the sum of the return
- * values from @prfill is printed with "Total" label at the end.
+ * policy data and @data and the matching queue lock held.  If @show_total
+ * is %true, the sum of the return values from @prfill is printed with
+ * "Total" label at the end.
  *
  * This is to be used to construct print functions for
  * cftype->read_seq_string method.
@@ -416,11 +519,14 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
        struct blkcg_gq *blkg;
        u64 total = 0;
 
-       spin_lock_irq(&blkcg->lock);
-       hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node)
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+               spin_lock_irq(blkg->q->queue_lock);
                if (blkcg_policy_enabled(blkg->q, pol))
                        total += prfill(sf, blkg->pd[pol->plid], data);
-       spin_unlock_irq(&blkcg->lock);
+               spin_unlock_irq(blkg->q->queue_lock);
+       }
+       rcu_read_unlock();
 
        if (show_total)
                seq_printf(sf, "Total %llu\n", (unsigned long long)total);
@@ -479,6 +585,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
        seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
        return v;
 }
+EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
 
 /**
  * blkg_prfill_stat - prfill callback for blkg_stat
@@ -511,6 +618,82 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 }
 EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
 
+/**
+ * blkg_stat_recursive_sum - collect hierarchical blkg_stat
+ * @pd: policy private data of interest
+ * @off: offset to the blkg_stat in @pd
+ *
+ * Collect the blkg_stat specified by @off from @pd and all its online
+ * descendants and return the sum.  The caller must be holding the queue
+ * lock for online tests.
+ */
+u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
+{
+       struct blkcg_policy *pol = blkcg_policy[pd->plid];
+       struct blkcg_gq *pos_blkg;
+       struct cgroup *pos_cgrp;
+       u64 sum;
+
+       lockdep_assert_held(pd->blkg->q->queue_lock);
+
+       sum = blkg_stat_read((void *)pd + off);
+
+       rcu_read_lock();
+       blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+               struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
+               struct blkg_stat *stat = (void *)pos_pd + off;
+
+               if (pos_blkg->online)
+                       sum += blkg_stat_read(stat);
+       }
+       rcu_read_unlock();
+
+       return sum;
+}
+EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
+
+/**
+ * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
+ * @pd: policy private data of interest
+ * @off: offset to the blkg_stat in @pd
+ *
+ * Collect the blkg_rwstat specified by @off from @pd and all its online
+ * descendants and return the sum.  The caller must be holding the queue
+ * lock for online tests.
+ */
+struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
+                                            int off)
+{
+       struct blkcg_policy *pol = blkcg_policy[pd->plid];
+       struct blkcg_gq *pos_blkg;
+       struct cgroup *pos_cgrp;
+       struct blkg_rwstat sum;
+       int i;
+
+       lockdep_assert_held(pd->blkg->q->queue_lock);
+
+       sum = blkg_rwstat_read((void *)pd + off);
+
+       rcu_read_lock();
+       blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+               struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
+               struct blkg_rwstat *rwstat = (void *)pos_pd + off;
+               struct blkg_rwstat tmp;
+
+               if (!pos_blkg->online)
+                       continue;
+
+               tmp = blkg_rwstat_read(rwstat);
+
+               for (i = 0; i < BLKG_RWSTAT_NR; i++)
+                       sum.cnt[i] += tmp.cnt[i];
+       }
+       rcu_read_unlock();
+
+       return sum;
+}
+EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
+
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
  * @blkcg: target block cgroup
@@ -656,6 +839,7 @@ static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
                return ERR_PTR(-ENOMEM);
 
        blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
+       blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
        blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
 done:
        spin_lock_init(&blkcg->lock);
@@ -775,7 +959,7 @@ int blkcg_activate_policy(struct request_queue *q,
                          const struct blkcg_policy *pol)
 {
        LIST_HEAD(pds);
-       struct blkcg_gq *blkg;
+       struct blkcg_gq *blkg, *new_blkg;
        struct blkg_policy_data *pd, *n;
        int cnt = 0, ret;
        bool preloaded;
@@ -784,19 +968,27 @@ int blkcg_activate_policy(struct request_queue *q,
                return 0;
 
        /* preallocations for root blkg */
-       blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
-       if (!blkg)
+       new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
+       if (!new_blkg)
                return -ENOMEM;
 
        preloaded = !radix_tree_preload(GFP_KERNEL);
 
        blk_queue_bypass_start(q);
 
-       /* make sure the root blkg exists and count the existing blkgs */
+       /*
+        * Make sure the root blkg exists and count the existing blkgs.  As
+        * @q is bypassing at this point, blkg_lookup_create() can't be
+        * used.  Open code it.
+        */
        spin_lock_irq(q->queue_lock);
 
        rcu_read_lock();
-       blkg = __blkg_lookup_create(&blkcg_root, q, blkg);
+       blkg = __blkg_lookup(&blkcg_root, q, false);
+       if (blkg)
+               blkg_free(new_blkg);
+       else
+               blkg = blkg_create(&blkcg_root, q, new_blkg);
        rcu_read_unlock();
 
        if (preloaded)
@@ -844,6 +1036,7 @@ int blkcg_activate_policy(struct request_queue *q,
 
                blkg->pd[pol->plid] = pd;
                pd->blkg = blkg;
+               pd->plid = pol->plid;
                pol->pd_init_fn(blkg);
 
                spin_unlock(&blkg->blkcg->lock);
@@ -890,6 +1083,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
                /* grab blkcg lock too while removing @pd from @blkg */
                spin_lock(&blkg->blkcg->lock);
 
+               if (pol->pd_offline_fn)
+                       pol->pd_offline_fn(blkg);
                if (pol->pd_exit_fn)
                        pol->pd_exit_fn(blkg);
 
index 24597309e23d38700a6ca2ae80cd9aab71aa3474..f2b292925ccda3a2e8301eec4924e5fac989a354 100644 (file)
@@ -54,6 +54,7 @@ struct blkcg {
 
        /* TODO: per-policy storage in blkcg */
        unsigned int                    cfq_weight;     /* belongs to cfq */
+       unsigned int                    cfq_leaf_weight;
 };
 
 struct blkg_stat {
@@ -80,8 +81,9 @@ struct blkg_rwstat {
  * beginning and pd_size can't be smaller than pd.
  */
 struct blkg_policy_data {
-       /* the blkg this per-policy data belongs to */
+       /* the blkg and policy id this per-policy data belongs to */
        struct blkcg_gq                 *blkg;
+       int                             plid;
 
        /* used during policy activation */
        struct list_head                alloc_node;
@@ -94,17 +96,27 @@ struct blkcg_gq {
        struct list_head                q_node;
        struct hlist_node               blkcg_node;
        struct blkcg                    *blkcg;
+
+       /* all non-root blkcg_gq's are guaranteed to have access to parent */
+       struct blkcg_gq                 *parent;
+
        /* request allocation list for this blkcg-q pair */
        struct request_list             rl;
+
        /* reference count */
        int                             refcnt;
 
+       /* is this blkg online? protected by both blkcg and q locks */
+       bool                            online;
+
        struct blkg_policy_data         *pd[BLKCG_MAX_POLS];
 
        struct rcu_head                 rcu_head;
 };
 
 typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
+typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
 typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg);
 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg);
 
@@ -117,6 +129,8 @@ struct blkcg_policy {
 
        /* operations */
        blkcg_pol_init_pd_fn            *pd_init_fn;
+       blkcg_pol_online_pd_fn          *pd_online_fn;
+       blkcg_pol_offline_pd_fn         *pd_offline_fn;
        blkcg_pol_exit_pd_fn            *pd_exit_fn;
        blkcg_pol_reset_pd_stats_fn     *pd_reset_stats_fn;
 };
@@ -150,6 +164,10 @@ u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
                       int off);
 
+u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off);
+struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
+                                            int off);
+
 struct blkg_conf_ctx {
        struct gendisk                  *disk;
        struct blkcg_gq                 *blkg;
@@ -180,6 +198,19 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
        return task_blkcg(current);
 }
 
+/**
+ * blkcg_parent - get the parent of a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * Return the parent blkcg of @blkcg.  Can be called anytime.
+ */
+static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
+{
+       struct cgroup *pcg = blkcg->css.cgroup->parent;
+
+       return pcg ? cgroup_to_blkcg(pcg) : NULL;
+}
+
 /**
  * blkg_to_pdata - get policy private data
  * @blkg: blkg of interest
@@ -386,6 +417,18 @@ static inline void blkg_stat_reset(struct blkg_stat *stat)
        stat->cnt = 0;
 }
 
+/**
+ * blkg_stat_merge - merge a blkg_stat into another
+ * @to: the destination blkg_stat
+ * @from: the source
+ *
+ * Add @from's count to @to.
+ */
+static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from)
+{
+       blkg_stat_add(to, blkg_stat_read(from));
+}
+
 /**
  * blkg_rwstat_add - add a value to a blkg_rwstat
  * @rwstat: target blkg_rwstat
@@ -434,14 +477,14 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
 }
 
 /**
- * blkg_rwstat_sum - read the total count of a blkg_rwstat
+ * blkg_rwstat_total - read the total count of a blkg_rwstat
  * @rwstat: blkg_rwstat to read
  *
  * Return the total count of @rwstat regardless of the IO direction.  This
  * function can be called without synchronization and takes care of u64
  * atomicity.
  */
-static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
+static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
 {
        struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
 
@@ -457,6 +500,25 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
        memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
 }
 
+/**
+ * blkg_rwstat_merge - merge a blkg_rwstat into another
+ * @to: the destination blkg_rwstat
+ * @from: the source
+ *
+ * Add @from's counts to @to.
+ */
+static inline void blkg_rwstat_merge(struct blkg_rwstat *to,
+                                    struct blkg_rwstat *from)
+{
+       struct blkg_rwstat v = blkg_rwstat_read(from);
+       int i;
+
+       u64_stats_update_begin(&to->syncp);
+       for (i = 0; i < BLKG_RWSTAT_NR; i++)
+               to->cnt[i] += v.cnt[i];
+       u64_stats_update_end(&to->syncp);
+}
+
 #else  /* CONFIG_BLK_CGROUP */
 
 struct cgroup;
index 277134cb5d324955e36ad11f10987677325049dd..074b758efc42cf116d61f4755f0107f491890980 100644 (file)
@@ -39,7 +39,6 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
-EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
@@ -1348,7 +1347,7 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
        if (!ll_back_merge_fn(q, req, bio))
                return false;
 
-       trace_block_bio_backmerge(q, bio);
+       trace_block_bio_backmerge(q, req, bio);
 
        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
                blk_rq_set_mixed_merge(req);
@@ -1370,7 +1369,7 @@ static bool bio_attempt_front_merge(struct request_queue *q,
        if (!ll_front_merge_fn(q, req, bio))
                return false;
 
-       trace_block_bio_frontmerge(q, bio);
+       trace_block_bio_frontmerge(q, req, bio);
 
        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
                blk_rq_set_mixed_merge(req);
@@ -1553,13 +1552,6 @@ get_rq:
                if (list_empty(&plug->list))
                        trace_block_plug(q);
                else {
-                       if (!plug->should_sort) {
-                               struct request *__rq;
-
-                               __rq = list_entry_rq(plug->list.prev);
-                               if (__rq->q != q)
-                                       plug->should_sort = 1;
-                       }
                        if (request_count >= BLK_MAX_REQUEST_COUNT) {
                                blk_flush_plug_list(plug, false);
                                trace_block_plug(q);
@@ -2890,7 +2882,6 @@ void blk_start_plug(struct blk_plug *plug)
        plug->magic = PLUG_MAGIC;
        INIT_LIST_HEAD(&plug->list);
        INIT_LIST_HEAD(&plug->cb_list);
-       plug->should_sort = 0;
 
        /*
         * If this is a nested plug, don't actually assign it. It will be
@@ -2992,10 +2983,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
        list_splice_init(&plug->list, &list);
 
-       if (plug->should_sort) {
-               list_sort(NULL, &list, plug_rq_cmp);
-               plug->should_sort = 0;
-       }
+       list_sort(NULL, &list, plug_rq_cmp);
 
        q = NULL;
        depth = 0;
index c88202f973d944f7e9c847b13d48c8ce24c87230..e7062139612914b95917405fc3da52e4498f066d 100644 (file)
@@ -121,9 +121,9 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
        /* Prevent hang_check timer from firing at us during very long I/O */
        hang_check = sysctl_hung_task_timeout_secs;
        if (hang_check)
-               while (!wait_for_completion_timeout(&wait, hang_check * (HZ/2)));
+               while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
        else
-               wait_for_completion(&wait);
+               wait_for_completion_io(&wait);
 
        if (rq->errors)
                err = -EIO;
index 720ad607ff91c31de69a888001399132f5eaad01..db8f1b5078570fe98afb8dd748636060511ba683 100644 (file)
@@ -436,7 +436,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 
        bio_get(bio);
        submit_bio(WRITE_FLUSH, bio);
-       wait_for_completion(&wait);
+       wait_for_completion_io(&wait);
 
        /*
         * The driver must store the error location in ->bi_sector, if
index b3a1f2b70b3166e011e4ef0f617ecd786b7be82b..d6f50d572565ad41a730ec531ea4849a6ef042bb 100644 (file)
@@ -126,7 +126,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
        /* Wait for bios in-flight */
        if (!atomic_dec_and_test(&bb.done))
-               wait_for_completion(&wait);
+               wait_for_completion_io(&wait);
 
        if (!test_bit(BIO_UPTODATE, &bb.flags))
                ret = -EIO;
@@ -200,7 +200,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 
        /* Wait for bios in-flight */
        if (!atomic_dec_and_test(&bb.done))
-               wait_for_completion(&wait);
+               wait_for_completion_io(&wait);
 
        if (!test_bit(BIO_UPTODATE, &bb.flags))
                ret = -ENOTSUPP;
@@ -262,7 +262,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 
        /* Wait for bios in-flight */
        if (!atomic_dec_and_test(&bb.done))
-               wait_for_completion(&wait);
+               wait_for_completion_io(&wait);
 
        if (!test_bit(BIO_UPTODATE, &bb.flags))
                /* One of bios in the batch was completed with error.*/
index 788147797a798b0041e8d3fa3bf70955016b0915..6206a934eb8c85a2f21636759f1b64afe8e3fb70 100644 (file)
@@ -497,6 +497,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
        return res;
 }
 
+static void blk_free_queue_rcu(struct rcu_head *rcu_head)
+{
+       struct request_queue *q = container_of(rcu_head, struct request_queue,
+                                              rcu_head);
+       kmem_cache_free(blk_requestq_cachep, q);
+}
+
 /**
  * blk_release_queue: - release a &struct request_queue when it is no longer needed
  * @kobj:    the kobj belonging to the request queue to be released
@@ -538,7 +545,7 @@ static void blk_release_queue(struct kobject *kobj)
        bdi_destroy(&q->backing_dev_info);
 
        ida_simple_remove(&blk_queue_ida, q->id);
-       kmem_cache_free(blk_requestq_cachep, q);
+       call_rcu(&q->rcu_head, blk_free_queue_rcu);
 }
 
 static const struct sysfs_ops queue_sysfs_ops = {
index 47fdfdd41520611f59caca02bfeb0623951fa9d5..e837b8f619b7d646825d43ea5ddeb9beacbea3d2 100644 (file)
@@ -61,7 +61,7 @@ static inline void blk_clear_rq_complete(struct request *rq)
 /*
  * Internal elevator interface
  */
-#define ELV_ON_HASH(rq)                (!hlist_unhashed(&(rq)->hash))
+#define ELV_ON_HASH(rq) hash_hashed(&(rq)->hash)
 
 void blk_insert_flush(struct request *rq);
 void blk_abort_flushes(struct request_queue *q);
index ec52807cdd0949ec842319303d4dc3e8aa27c273..4f0ade74cfd04a1c48f22218a6c4369517efa88b 100644 (file)
@@ -85,7 +85,6 @@ struct cfq_rb_root {
        struct rb_root rb;
        struct rb_node *left;
        unsigned count;
-       unsigned total_weight;
        u64 min_vdisktime;
        struct cfq_ttime ttime;
 };
@@ -155,7 +154,7 @@ struct cfq_queue {
  * First index in the service_trees.
  * IDLE is handled separately, so it has negative index
  */
-enum wl_prio_t {
+enum wl_class_t {
        BE_WORKLOAD = 0,
        RT_WORKLOAD = 1,
        IDLE_WORKLOAD = 2,
@@ -223,10 +222,45 @@ struct cfq_group {
 
        /* group service_tree key */
        u64 vdisktime;
+
+       /*
+        * The number of active cfqgs and sum of their weights under this
+        * cfqg.  This covers this cfqg's leaf_weight and all children's
+        * weights, but does not cover weights of further descendants.
+        *
+        * If a cfqg is on the service tree, it's active.  An active cfqg
+        * also activates its parent and contributes to the children_weight
+        * of the parent.
+        */
+       int nr_active;
+       unsigned int children_weight;
+
+       /*
+        * vfraction is the fraction of vdisktime that the tasks in this
+        * cfqg are entitled to.  This is determined by compounding the
+        * ratios walking up from this cfqg to the root.
+        *
+        * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all
+        * vfractions on a service tree is approximately 1.  The sum may
+        * deviate a bit due to rounding errors and fluctuations caused by
+        * cfqgs entering and leaving the service tree.
+        */
+       unsigned int vfraction;
+
+       /*
+        * There are two weights - (internal) weight is the weight of this
+        * cfqg against the sibling cfqgs.  leaf_weight is the wight of
+        * this cfqg against the child cfqgs.  For the root cfqg, both
+        * weights are kept in sync for backward compatibility.
+        */
        unsigned int weight;
        unsigned int new_weight;
        unsigned int dev_weight;
 
+       unsigned int leaf_weight;
+       unsigned int new_leaf_weight;
+       unsigned int dev_leaf_weight;
+
        /* number of cfqq currently on this group */
        int nr_cfqq;
 
@@ -248,14 +282,15 @@ struct cfq_group {
        struct cfq_rb_root service_trees[2][3];
        struct cfq_rb_root service_tree_idle;
 
-       unsigned long saved_workload_slice;
-       enum wl_type_t saved_workload;
-       enum wl_prio_t saved_serving_prio;
+       unsigned long saved_wl_slice;
+       enum wl_type_t saved_wl_type;
+       enum wl_class_t saved_wl_class;
 
        /* number of requests that are on the dispatch list or inside driver */
        int dispatched;
        struct cfq_ttime ttime;
-       struct cfqg_stats stats;
+       struct cfqg_stats stats;        /* stats for this cfqg */
+       struct cfqg_stats dead_stats;   /* stats pushed from dead children */
 };
 
 struct cfq_io_cq {
@@ -280,8 +315,8 @@ struct cfq_data {
        /*
         * The priority currently being served
         */
-       enum wl_prio_t serving_prio;
-       enum wl_type_t serving_type;
+       enum wl_class_t serving_wl_class;
+       enum wl_type_t serving_wl_type;
        unsigned long workload_expires;
        struct cfq_group *serving_group;
 
@@ -353,17 +388,17 @@ struct cfq_data {
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
 
-static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
-                                           enum wl_prio_t prio,
+static struct cfq_rb_root *st_for(struct cfq_group *cfqg,
+                                           enum wl_class_t class,
                                            enum wl_type_t type)
 {
        if (!cfqg)
                return NULL;
 
-       if (prio == IDLE_WORKLOAD)
+       if (class == IDLE_WORKLOAD)
                return &cfqg->service_tree_idle;
 
-       return &cfqg->service_trees[prio][type];
+       return &cfqg->service_trees[class][type];
 }
 
 enum cfqq_state_flags {
@@ -502,7 +537,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg)
 {
        struct cfqg_stats *stats = &cfqg->stats;
 
-       if (blkg_rwstat_sum(&stats->queued))
+       if (blkg_rwstat_total(&stats->queued))
                return;
 
        /*
@@ -546,7 +581,7 @@ static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg)
        struct cfqg_stats *stats = &cfqg->stats;
 
        blkg_stat_add(&stats->avg_queue_size_sum,
-                     blkg_rwstat_sum(&stats->queued));
+                     blkg_rwstat_total(&stats->queued));
        blkg_stat_add(&stats->avg_queue_size_samples, 1);
        cfqg_stats_update_group_wait_time(stats);
 }
@@ -572,6 +607,13 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
        return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
 }
 
+static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
+{
+       struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent;
+
+       return pblkg ? blkg_to_cfqg(pblkg) : NULL;
+}
+
 static inline void cfqg_get(struct cfq_group *cfqg)
 {
        return blkg_get(cfqg_to_blkg(cfqg));
@@ -586,8 +628,9 @@ static inline void cfqg_put(struct cfq_group *cfqg)
        char __pbuf[128];                                               \
                                                                        \
        blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf));  \
-       blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
-                         cfq_cfqq_sync((cfqq)) ? 'S' : 'A',            \
+       blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c %s " fmt, (cfqq)->pid, \
+                       cfq_cfqq_sync((cfqq)) ? 'S' : 'A',              \
+                       cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
                          __pbuf, ##args);                              \
 } while (0)
 
@@ -646,11 +689,9 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
                                io_start_time - start_time);
 }
 
-static void cfq_pd_reset_stats(struct blkcg_gq *blkg)
+/* @stats = 0 */
+static void cfqg_stats_reset(struct cfqg_stats *stats)
 {
-       struct cfq_group *cfqg = blkg_to_cfqg(blkg);
-       struct cfqg_stats *stats = &cfqg->stats;
-
        /* queued stats shouldn't be cleared */
        blkg_rwstat_reset(&stats->service_bytes);
        blkg_rwstat_reset(&stats->serviced);
@@ -669,13 +710,58 @@ static void cfq_pd_reset_stats(struct blkcg_gq *blkg)
 #endif
 }
 
+/* @to += @from */
+static void cfqg_stats_merge(struct cfqg_stats *to, struct cfqg_stats *from)
+{
+       /* queued stats shouldn't be cleared */
+       blkg_rwstat_merge(&to->service_bytes, &from->service_bytes);
+       blkg_rwstat_merge(&to->serviced, &from->serviced);
+       blkg_rwstat_merge(&to->merged, &from->merged);
+       blkg_rwstat_merge(&to->service_time, &from->service_time);
+       blkg_rwstat_merge(&to->wait_time, &from->wait_time);
+       blkg_stat_merge(&from->time, &from->time);
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+       blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time);
+       blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+       blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
+       blkg_stat_merge(&to->dequeue, &from->dequeue);
+       blkg_stat_merge(&to->group_wait_time, &from->group_wait_time);
+       blkg_stat_merge(&to->idle_time, &from->idle_time);
+       blkg_stat_merge(&to->empty_time, &from->empty_time);
+#endif
+}
+
+/*
+ * Transfer @cfqg's stats to its parent's dead_stats so that the ancestors'
+ * recursive stats can still account for the amount used by this cfqg after
+ * it's gone.
+ */
+static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
+{
+       struct cfq_group *parent = cfqg_parent(cfqg);
+
+       lockdep_assert_held(cfqg_to_blkg(cfqg)->q->queue_lock);
+
+       if (unlikely(!parent))
+               return;
+
+       cfqg_stats_merge(&parent->dead_stats, &cfqg->stats);
+       cfqg_stats_merge(&parent->dead_stats, &cfqg->dead_stats);
+       cfqg_stats_reset(&cfqg->stats);
+       cfqg_stats_reset(&cfqg->dead_stats);
+}
+
 #else  /* CONFIG_CFQ_GROUP_IOSCHED */
 
+static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
 static inline void cfqg_get(struct cfq_group *cfqg) { }
 static inline void cfqg_put(struct cfq_group *cfqg) { }
 
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
-       blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
+       blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c " fmt, (cfqq)->pid, \
+                       cfq_cfqq_sync((cfqq)) ? 'S' : 'A',              \
+                       cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\
+                               ##args)
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)         do {} while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
@@ -732,7 +818,7 @@ static inline bool iops_mode(struct cfq_data *cfqd)
                return false;
 }
 
-static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
+static inline enum wl_class_t cfqq_class(struct cfq_queue *cfqq)
 {
        if (cfq_class_idle(cfqq))
                return IDLE_WORKLOAD;
@@ -751,23 +837,23 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
        return SYNC_WORKLOAD;
 }
 
-static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
+static inline int cfq_group_busy_queues_wl(enum wl_class_t wl_class,
                                        struct cfq_data *cfqd,
                                        struct cfq_group *cfqg)
 {
-       if (wl == IDLE_WORKLOAD)
+       if (wl_class == IDLE_WORKLOAD)
                return cfqg->service_tree_idle.count;
 
-       return cfqg->service_trees[wl][ASYNC_WORKLOAD].count
-               + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
-               + cfqg->service_trees[wl][SYNC_WORKLOAD].count;
+       return cfqg->service_trees[wl_class][ASYNC_WORKLOAD].count +
+               cfqg->service_trees[wl_class][SYNC_NOIDLE_WORKLOAD].count +
+               cfqg->service_trees[wl_class][SYNC_WORKLOAD].count;
 }
 
 static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
                                        struct cfq_group *cfqg)
 {
-       return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count
-               cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
+       return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count +
+               cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
 }
 
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
@@ -847,13 +933,27 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
        return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
 }
 
-static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
+/**
+ * cfqg_scale_charge - scale disk time charge according to cfqg weight
+ * @charge: disk time being charged
+ * @vfraction: vfraction of the cfqg, fixed point w/ CFQ_SERVICE_SHIFT
+ *
+ * Scale @charge according to @vfraction, which is in range (0, 1].  The
+ * scaling is inversely proportional.
+ *
+ * scaled = charge / vfraction
+ *
+ * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
+ */
+static inline u64 cfqg_scale_charge(unsigned long charge,
+                                   unsigned int vfraction)
 {
-       u64 d = delta << CFQ_SERVICE_SHIFT;
+       u64 c = charge << CFQ_SERVICE_SHIFT;    /* make it fixed point */
 
-       d = d * CFQ_WEIGHT_DEFAULT;
-       do_div(d, cfqg->weight);
-       return d;
+       /* charge / vfraction */
+       c <<= CFQ_SERVICE_SHIFT;
+       do_div(c, vfraction);
+       return c;
 }
 
 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
@@ -909,9 +1009,7 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
 static inline unsigned
 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
-       struct cfq_rb_root *st = &cfqd->grp_service_tree;
-
-       return cfqd->cfq_target_latency * cfqg->weight / st->total_weight;
+       return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
 }
 
 static inline unsigned
@@ -1178,20 +1276,61 @@ static void
 cfq_update_group_weight(struct cfq_group *cfqg)
 {
        BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
+
        if (cfqg->new_weight) {
                cfqg->weight = cfqg->new_weight;
                cfqg->new_weight = 0;
        }
+
+       if (cfqg->new_leaf_weight) {
+               cfqg->leaf_weight = cfqg->new_leaf_weight;
+               cfqg->new_leaf_weight = 0;
+       }
 }
 
 static void
 cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
 {
+       unsigned int vfr = 1 << CFQ_SERVICE_SHIFT;      /* start with 1 */
+       struct cfq_group *pos = cfqg;
+       struct cfq_group *parent;
+       bool propagate;
+
+       /* add to the service tree */
        BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
 
        cfq_update_group_weight(cfqg);
        __cfq_group_service_tree_add(st, cfqg);
-       st->total_weight += cfqg->weight;
+
+       /*
+        * Activate @cfqg and calculate the portion of vfraction @cfqg is
+        * entitled to.  vfraction is calculated by walking the tree
+        * towards the root calculating the fraction it has at each level.
+        * The compounded ratio is how much vfraction @cfqg owns.
+        *
+        * Start with the proportion tasks in this cfqg has against active
+        * children cfqgs - its leaf_weight against children_weight.
+        */
+       propagate = !pos->nr_active++;
+       pos->children_weight += pos->leaf_weight;
+       vfr = vfr * pos->leaf_weight / pos->children_weight;
+
+       /*
+        * Compound ->weight walking up the tree.  Both activation and
+        * vfraction calculation are done in the same loop.  Propagation
+        * stops once an already activated node is met.  vfraction
+        * calculation should always continue to the root.
+        */
+       while ((parent = cfqg_parent(pos))) {
+               if (propagate) {
+                       propagate = !parent->nr_active++;
+                       parent->children_weight += pos->weight;
+               }
+               vfr = vfr * pos->weight / parent->children_weight;
+               pos = parent;
+       }
+
+       cfqg->vfraction = max_t(unsigned, vfr, 1);
 }
 
 static void
@@ -1222,7 +1361,32 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 static void
 cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
 {
-       st->total_weight -= cfqg->weight;
+       struct cfq_group *pos = cfqg;
+       bool propagate;
+
+       /*
+        * Undo activation from cfq_group_service_tree_add().  Deactivate
+        * @cfqg and propagate deactivation upwards.
+        */
+       propagate = !--pos->nr_active;
+       pos->children_weight -= pos->leaf_weight;
+
+       while (propagate) {
+               struct cfq_group *parent = cfqg_parent(pos);
+
+               /* @pos has 0 nr_active at this point */
+               WARN_ON_ONCE(pos->children_weight);
+               pos->vfraction = 0;
+
+               if (!parent)
+                       break;
+
+               propagate = !--parent->nr_active;
+               parent->children_weight -= pos->weight;
+               pos = parent;
+       }
+
+       /* remove from the service tree */
        if (!RB_EMPTY_NODE(&cfqg->rb_node))
                cfq_rb_erase(&cfqg->rb_node, st);
 }
@@ -1241,7 +1405,7 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
 
        cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
        cfq_group_service_tree_del(st, cfqg);
-       cfqg->saved_workload_slice = 0;
+       cfqg->saved_wl_slice = 0;
        cfqg_stats_update_dequeue(cfqg);
 }
 
@@ -1284,6 +1448,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        unsigned int used_sl, charge, unaccounted_sl = 0;
        int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
                        - cfqg->service_tree_idle.count;
+       unsigned int vfr;
 
        BUG_ON(nr_sync < 0);
        used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1293,20 +1458,25 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
                charge = cfqq->allocated_slice;
 
-       /* Can't update vdisktime while group is on service tree */
+       /*
+        * Can't update vdisktime while on service tree and cfqg->vfraction
+        * is valid only while on it.  Cache vfr, leave the service tree,
+        * update vdisktime and go back on.  The re-addition to the tree
+        * will also update the weights as necessary.
+        */
+       vfr = cfqg->vfraction;
        cfq_group_service_tree_del(st, cfqg);
-       cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
-       /* If a new weight was requested, update now, off tree */
+       cfqg->vdisktime += cfqg_scale_charge(charge, vfr);
        cfq_group_service_tree_add(st, cfqg);
 
        /* This group is being expired. Save the context */
        if (time_after(cfqd->workload_expires, jiffies)) {
-               cfqg->saved_workload_slice = cfqd->workload_expires
+               cfqg->saved_wl_slice = cfqd->workload_expires
                                                - jiffies;
-               cfqg->saved_workload = cfqd->serving_type;
-               cfqg->saved_serving_prio = cfqd->serving_prio;
+               cfqg->saved_wl_type = cfqd->serving_wl_type;
+               cfqg->saved_wl_class = cfqd->serving_wl_class;
        } else
-               cfqg->saved_workload_slice = 0;
+               cfqg->saved_wl_slice = 0;
 
        cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                        st->min_vdisktime);
@@ -1344,6 +1514,52 @@ static void cfq_pd_init(struct blkcg_gq *blkg)
 
        cfq_init_cfqg_base(cfqg);
        cfqg->weight = blkg->blkcg->cfq_weight;
+       cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight;
+}
+
+static void cfq_pd_offline(struct blkcg_gq *blkg)
+{
+       /*
+        * @blkg is going offline and will be ignored by
+        * blkg_[rw]stat_recursive_sum().  Transfer stats to the parent so
+        * that they don't get lost.  If IOs complete after this point, the
+        * stats for them will be lost.  Oh well...
+        */
+       cfqg_stats_xfer_dead(blkg_to_cfqg(blkg));
+}
+
+/* offset delta from cfqg->stats to cfqg->dead_stats */
+static const int dead_stats_off_delta = offsetof(struct cfq_group, dead_stats) -
+                                       offsetof(struct cfq_group, stats);
+
+/* to be used by recursive prfill, sums live and dead stats recursively */
+static u64 cfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
+{
+       u64 sum = 0;
+
+       sum += blkg_stat_recursive_sum(pd, off);
+       sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta);
+       return sum;
+}
+
+/* to be used by recursive prfill, sums live and dead rwstats recursively */
+static struct blkg_rwstat cfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
+                                                      int off)
+{
+       struct blkg_rwstat a, b;
+
+       a = blkg_rwstat_recursive_sum(pd, off);
+       b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta);
+       blkg_rwstat_merge(&a, &b);
+       return a;
+}
+
+static void cfq_pd_reset_stats(struct blkcg_gq *blkg)
+{
+       struct cfq_group *cfqg = blkg_to_cfqg(blkg);
+
+       cfqg_stats_reset(&cfqg->stats);
+       cfqg_stats_reset(&cfqg->dead_stats);
 }
 
 /*
@@ -1400,6 +1616,26 @@ static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
        return 0;
 }
 
+static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
+                                         struct blkg_policy_data *pd, int off)
+{
+       struct cfq_group *cfqg = pd_to_cfqg(pd);
+
+       if (!cfqg->dev_leaf_weight)
+               return 0;
+       return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
+}
+
+static int cfqg_print_leaf_weight_device(struct cgroup *cgrp,
+                                        struct cftype *cft,
+                                        struct seq_file *sf)
+{
+       blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp),
+                         cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0,
+                         false);
+       return 0;
+}
+
 static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft,
                            struct seq_file *sf)
 {
@@ -1407,8 +1643,16 @@ static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft,
        return 0;
 }
 
-static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
-                                 const char *buf)
+static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft,
+                                struct seq_file *sf)
+{
+       seq_printf(sf, "%u\n",
+                  cgroup_to_blkcg(cgrp)->cfq_leaf_weight);
+       return 0;
+}
+
+static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
+                                   const char *buf, bool is_leaf_weight)
 {
        struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
        struct blkg_conf_ctx ctx;
@@ -1422,8 +1666,13 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
        ret = -EINVAL;
        cfqg = blkg_to_cfqg(ctx.blkg);
        if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) {
-               cfqg->dev_weight = ctx.v;
-               cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight;
+               if (!is_leaf_weight) {
+                       cfqg->dev_weight = ctx.v;
+                       cfqg->new_weight = ctx.v ?: blkcg->cfq_weight;
+               } else {
+                       cfqg->dev_leaf_weight = ctx.v;
+                       cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight;
+               }
                ret = 0;
        }
 
@@ -1431,7 +1680,20 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
        return ret;
 }
 
-static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
+                                 const char *buf)
+{
+       return __cfqg_set_weight_device(cgrp, cft, buf, false);
+}
+
+static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft,
+                                      const char *buf)
+{
+       return __cfqg_set_weight_device(cgrp, cft, buf, true);
+}
+
+static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val,
+                           bool is_leaf_weight)
 {
        struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
        struct blkcg_gq *blkg;
@@ -1440,19 +1702,41 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
                return -EINVAL;
 
        spin_lock_irq(&blkcg->lock);
-       blkcg->cfq_weight = (unsigned int)val;
+
+       if (!is_leaf_weight)
+               blkcg->cfq_weight = val;
+       else
+               blkcg->cfq_leaf_weight = val;
 
        hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
                struct cfq_group *cfqg = blkg_to_cfqg(blkg);
 
-               if (cfqg && !cfqg->dev_weight)
-                       cfqg->new_weight = blkcg->cfq_weight;
+               if (!cfqg)
+                       continue;
+
+               if (!is_leaf_weight) {
+                       if (!cfqg->dev_weight)
+                               cfqg->new_weight = blkcg->cfq_weight;
+               } else {
+                       if (!cfqg->dev_leaf_weight)
+                               cfqg->new_leaf_weight = blkcg->cfq_leaf_weight;
+               }
        }
 
        spin_unlock_irq(&blkcg->lock);
        return 0;
 }
 
+static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+{
+       return __cfq_set_weight(cgrp, cft, val, false);
+}
+
+static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
+{
+       return __cfq_set_weight(cgrp, cft, val, true);
+}
+
 static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft,
                           struct seq_file *sf)
 {
@@ -1473,6 +1757,42 @@ static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
        return 0;
 }
 
+static u64 cfqg_prfill_stat_recursive(struct seq_file *sf,
+                                     struct blkg_policy_data *pd, int off)
+{
+       u64 sum = cfqg_stat_pd_recursive_sum(pd, off);
+
+       return __blkg_prfill_u64(sf, pd, sum);
+}
+
+static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
+                                       struct blkg_policy_data *pd, int off)
+{
+       struct blkg_rwstat sum = cfqg_rwstat_pd_recursive_sum(pd, off);
+
+       return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int cfqg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft,
+                                    struct seq_file *sf)
+{
+       struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+
+       blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive,
+                         &blkcg_policy_cfq, cft->private, false);
+       return 0;
+}
+
+static int cfqg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft,
+                                      struct seq_file *sf)
+{
+       struct blkcg *blkcg = cgroup_to_blkcg(cgrp);
+
+       blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive,
+                         &blkcg_policy_cfq, cft->private, true);
+       return 0;
+}
+
 #ifdef CONFIG_DEBUG_BLK_CGROUP
 static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
                                      struct blkg_policy_data *pd, int off)
@@ -1502,17 +1822,49 @@ static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
 #endif /* CONFIG_DEBUG_BLK_CGROUP */
 
 static struct cftype cfq_blkcg_files[] = {
+       /* on root, weight is mapped to leaf_weight */
+       {
+               .name = "weight_device",
+               .flags = CFTYPE_ONLY_ON_ROOT,
+               .read_seq_string = cfqg_print_leaf_weight_device,
+               .write_string = cfqg_set_leaf_weight_device,
+               .max_write_len = 256,
+       },
+       {
+               .name = "weight",
+               .flags = CFTYPE_ONLY_ON_ROOT,
+               .read_seq_string = cfq_print_leaf_weight,
+               .write_u64 = cfq_set_leaf_weight,
+       },
+
+       /* no such mapping necessary for !roots */
        {
                .name = "weight_device",
+               .flags = CFTYPE_NOT_ON_ROOT,
                .read_seq_string = cfqg_print_weight_device,
                .write_string = cfqg_set_weight_device,
                .max_write_len = 256,
        },
        {
                .name = "weight",
+               .flags = CFTYPE_NOT_ON_ROOT,
                .read_seq_string = cfq_print_weight,
                .write_u64 = cfq_set_weight,
        },
+
+       {
+               .name = "leaf_weight_device",
+               .read_seq_string = cfqg_print_leaf_weight_device,
+               .write_string = cfqg_set_leaf_weight_device,
+               .max_write_len = 256,
+       },
+       {
+               .name = "leaf_weight",
+               .read_seq_string = cfq_print_leaf_weight,
+               .write_u64 = cfq_set_leaf_weight,
+       },
+
+       /* statistics, covers only the tasks in the cfqg */
        {
                .name = "time",
                .private = offsetof(struct cfq_group, stats.time),
@@ -1553,6 +1905,48 @@ static struct cftype cfq_blkcg_files[] = {
                .private = offsetof(struct cfq_group, stats.queued),
                .read_seq_string = cfqg_print_rwstat,
        },
+
+       /* the same statictics which cover the cfqg and its descendants */
+       {
+               .name = "time_recursive",
+               .private = offsetof(struct cfq_group, stats.time),
+               .read_seq_string = cfqg_print_stat_recursive,
+       },
+       {
+               .name = "sectors_recursive",
+               .private = offsetof(struct cfq_group, stats.sectors),
+               .read_seq_string = cfqg_print_stat_recursive,
+       },
+       {
+               .name = "io_service_bytes_recursive",
+               .private = offsetof(struct cfq_group, stats.service_bytes),
+               .read_seq_string = cfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "io_serviced_recursive",
+               .private = offsetof(struct cfq_group, stats.serviced),
+               .read_seq_string = cfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "io_service_time_recursive",
+               .private = offsetof(struct cfq_group, stats.service_time),
+               .read_seq_string = cfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "io_wait_time_recursive",
+               .private = offsetof(struct cfq_group, stats.wait_time),
+               .read_seq_string = cfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "io_merged_recursive",
+               .private = offsetof(struct cfq_group, stats.merged),
+               .read_seq_string = cfqg_print_rwstat_recursive,
+       },
+       {
+               .name = "io_queued_recursive",
+               .private = offsetof(struct cfq_group, stats.queued),
+               .read_seq_string = cfqg_print_rwstat_recursive,
+       },
 #ifdef CONFIG_DEBUG_BLK_CGROUP
        {
                .name = "avg_queue_size",
@@ -1611,15 +2005,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        struct rb_node **p, *parent;
        struct cfq_queue *__cfqq;
        unsigned long rb_key;
-       struct cfq_rb_root *service_tree;
+       struct cfq_rb_root *st;
        int left;
        int new_cfqq = 1;
 
-       service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
-                                               cfqq_type(cfqq));
+       st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
        if (cfq_class_idle(cfqq)) {
                rb_key = CFQ_IDLE_DELAY;
-               parent = rb_last(&service_tree->rb);
+               parent = rb_last(&st->rb);
                if (parent && parent != &cfqq->rb_node) {
                        __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
                        rb_key += __cfqq->rb_key;
@@ -1637,7 +2030,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                cfqq->slice_resid = 0;
        } else {
                rb_key = -HZ;
-               __cfqq = cfq_rb_first(service_tree);
+               __cfqq = cfq_rb_first(st);
                rb_key += __cfqq ? __cfqq->rb_key : jiffies;
        }
 
@@ -1646,8 +2039,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                /*
                 * same position, nothing more to do
                 */
-               if (rb_key == cfqq->rb_key &&
-                   cfqq->service_tree == service_tree)
+               if (rb_key == cfqq->rb_key && cfqq->service_tree == st)
                        return;
 
                cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
@@ -1656,11 +2048,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 
        left = 1;
        parent = NULL;
-       cfqq->service_tree = service_tree;
-       p = &service_tree->rb.rb_node;
+       cfqq->service_tree = st;
+       p = &st->rb.rb_node;
        while (*p) {
-               struct rb_node **n;
-
                parent = *p;
                __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
 
@@ -1668,22 +2058,20 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 * sort by key, that represents service time.
                 */
                if (time_before(rb_key, __cfqq->rb_key))
-                       n = &(*p)->rb_left;
+                       p = &parent->rb_left;
                else {
-                       n = &(*p)->rb_right;
+                       p = &parent->rb_right;
                        left = 0;
                }
-
-               p = n;
        }
 
        if (left)
-               service_tree->left = &cfqq->rb_node;
+               st->left = &cfqq->rb_node;
 
        cfqq->rb_key = rb_key;
        rb_link_node(&cfqq->rb_node, parent, p);
-       rb_insert_color(&cfqq->rb_node, &service_tree->rb);
-       service_tree->count++;
+       rb_insert_color(&cfqq->rb_node, &st->rb);
+       st->count++;
        if (add_front || !new_cfqq)
                return;
        cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
@@ -2029,8 +2417,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
                                   struct cfq_queue *cfqq)
 {
        if (cfqq) {
-               cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
-                               cfqd->serving_prio, cfqd->serving_type);
+               cfq_log_cfqq(cfqd, cfqq, "set_active wl_class:%d wl_type:%d",
+                               cfqd->serving_wl_class, cfqd->serving_wl_type);
                cfqg_stats_update_avg_queue_size(cfqq->cfqg);
                cfqq->slice_start = 0;
                cfqq->dispatch_start = jiffies;
@@ -2116,19 +2504,18 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
  */
 static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
 {
-       struct cfq_rb_root *service_tree =
-               service_tree_for(cfqd->serving_group, cfqd->serving_prio,
-                                       cfqd->serving_type);
+       struct cfq_rb_root *st = st_for(cfqd->serving_group,
+                       cfqd->serving_wl_class, cfqd->serving_wl_type);
 
        if (!cfqd->rq_queued)
                return NULL;
 
        /* There is nothing to dispatch */
-       if (!service_tree)
+       if (!st)
                return NULL;
-       if (RB_EMPTY_ROOT(&service_tree->rb))
+       if (RB_EMPTY_ROOT(&st->rb))
                return NULL;
-       return cfq_rb_first(service_tree);
+       return cfq_rb_first(st);
 }
 
 static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
@@ -2284,17 +2671,17 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
 
 static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-       enum wl_prio_t prio = cfqq_prio(cfqq);
-       struct cfq_rb_root *service_tree = cfqq->service_tree;
+       enum wl_class_t wl_class = cfqq_class(cfqq);
+       struct cfq_rb_root *st = cfqq->service_tree;
 
-       BUG_ON(!service_tree);
-       BUG_ON(!service_tree->count);
+       BUG_ON(!st);
+       BUG_ON(!st->count);
 
        if (!cfqd->cfq_slice_idle)
                return false;
 
        /* We never do for idle class queues. */
-       if (prio == IDLE_WORKLOAD)
+       if (wl_class == IDLE_WORKLOAD)
                return false;
 
        /* We do for queues that were marked with idle window flag. */
@@ -2306,11 +2693,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * Otherwise, we do only if they are the last ones
         * in their service tree.
         */
-       if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
-          !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
+       if (st->count == 1 && cfq_cfqq_sync(cfqq) &&
+          !cfq_io_thinktime_big(cfqd, &st->ttime, false))
                return true;
-       cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
-                       service_tree->count);
+       cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", st->count);
        return false;
 }
 
@@ -2493,8 +2879,8 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
        }
 }
 
-static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
-                               struct cfq_group *cfqg, enum wl_prio_t prio)
+static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
+                       struct cfq_group *cfqg, enum wl_class_t wl_class)
 {
        struct cfq_queue *queue;
        int i;
@@ -2504,7 +2890,7 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
 
        for (i = 0; i <= SYNC_WORKLOAD; ++i) {
                /* select the one with lowest rb_key */
-               queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
+               queue = cfq_rb_first(st_for(cfqg, wl_class, i));
                if (queue &&
                    (!key_valid || time_before(queue->rb_key, lowest_key))) {
                        lowest_key = queue->rb_key;
@@ -2516,26 +2902,27 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
        return cur_best;
 }
 
-static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
+static void
+choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
        unsigned slice;
        unsigned count;
        struct cfq_rb_root *st;
        unsigned group_slice;
-       enum wl_prio_t original_prio = cfqd->serving_prio;
+       enum wl_class_t original_class = cfqd->serving_wl_class;
 
        /* Choose next priority. RT > BE > IDLE */
        if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
-               cfqd->serving_prio = RT_WORKLOAD;
+               cfqd->serving_wl_class = RT_WORKLOAD;
        else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
-               cfqd->serving_prio = BE_WORKLOAD;
+               cfqd->serving_wl_class = BE_WORKLOAD;
        else {
-               cfqd->serving_prio = IDLE_WORKLOAD;
+               cfqd->serving_wl_class = IDLE_WORKLOAD;
                cfqd->workload_expires = jiffies + 1;
                return;
        }
 
-       if (original_prio != cfqd->serving_prio)
+       if (original_class != cfqd->serving_wl_class)
                goto new_workload;
 
        /*
@@ -2543,7 +2930,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
         * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload
         * expiration time
         */
-       st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
+       st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
        count = st->count;
 
        /*
@@ -2554,9 +2941,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
 
 new_workload:
        /* otherwise select new workload type */
-       cfqd->serving_type =
-               cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
-       st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
+       cfqd->serving_wl_type = cfq_choose_wl_type(cfqd, cfqg,
+                                       cfqd->serving_wl_class);
+       st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type);
        count = st->count;
 
        /*
@@ -2567,10 +2954,11 @@ new_workload:
        group_slice = cfq_group_slice(cfqd, cfqg);
 
        slice = group_slice * count /
-               max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
-                     cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
+               max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
+                     cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
+                                       cfqg));
 
-       if (cfqd->serving_type == ASYNC_WORKLOAD) {
+       if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
                unsigned int tmp;
 
                /*
@@ -2616,14 +3004,14 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
        cfqd->serving_group = cfqg;
 
        /* Restore the workload type data */
-       if (cfqg->saved_workload_slice) {
-               cfqd->workload_expires = jiffies + cfqg->saved_workload_slice;
-               cfqd->serving_type = cfqg->saved_workload;
-               cfqd->serving_prio = cfqg->saved_serving_prio;
+       if (cfqg->saved_wl_slice) {
+               cfqd->workload_expires = jiffies + cfqg->saved_wl_slice;
+               cfqd->serving_wl_type = cfqg->saved_wl_type;
+               cfqd->serving_wl_class = cfqg->saved_wl_class;
        } else
                cfqd->workload_expires = jiffies - 1;
 
-       choose_service_tree(cfqd, cfqg);
+       choose_wl_class_and_type(cfqd, cfqg);
 }
 
 /*
@@ -3205,6 +3593,8 @@ retry:
                        spin_lock_irq(cfqd->queue->queue_lock);
                        if (new_cfqq)
                                goto retry;
+                       else
+                               return &cfqd->oom_cfqq;
                } else {
                        cfqq = kmem_cache_alloc_node(cfq_pool,
                                        gfp_mask | __GFP_ZERO,
@@ -3402,7 +3792,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
                return true;
 
        /* Allow preemption only if we are idling on sync-noidle tree */
-       if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
+       if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
            cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
            new_cfqq->service_tree->count == 2 &&
            RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -3454,7 +3844,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * doesn't happen
         */
        if (old_type != cfqq_type(cfqq))
-               cfqq->cfqg->saved_workload_slice = 0;
+               cfqq->cfqg->saved_wl_slice = 0;
 
        /*
         * Put the new queue at the front of the of the current list,
@@ -3636,16 +4026,17 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
        if (sync) {
-               struct cfq_rb_root *service_tree;
+               struct cfq_rb_root *st;
 
                RQ_CIC(rq)->ttime.last_end_request = now;
 
                if (cfq_cfqq_on_rr(cfqq))
-                       service_tree = cfqq->service_tree;
+                       st = cfqq->service_tree;
                else
-                       service_tree = service_tree_for(cfqq->cfqg,
-                               cfqq_prio(cfqq), cfqq_type(cfqq));
-               service_tree->ttime.last_end_request = now;
+                       st = st_for(cfqq->cfqg, cfqq_class(cfqq),
+                                       cfqq_type(cfqq));
+
+               st->ttime.last_end_request = now;
                if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
                        cfqd->last_delayed_sync = now;
        }
@@ -3992,6 +4383,7 @@ static int cfq_init_queue(struct request_queue *q)
        cfq_init_cfqg_base(cfqd->root_group);
 #endif
        cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT;
+       cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT;
 
        /*
         * Not strictly needed (since RB_ROOT just clears the node and we
@@ -4176,6 +4568,7 @@ static struct blkcg_policy blkcg_policy_cfq = {
        .cftypes                = cfq_blkcg_files,
 
        .pd_init_fn             = cfq_pd_init,
+       .pd_offline_fn          = cfq_pd_offline,
        .pd_reset_stats_fn      = cfq_pd_reset_stats,
 };
 #endif
index d0acb31cc083fca5340b9f719e0ec640607ce1eb..a0ffdd943c98aa5e0f39f102b98f1bc4cf9777f5 100644 (file)
@@ -46,11 +46,6 @@ static LIST_HEAD(elv_list);
 /*
  * Merge hash stuff.
  */
-static const int elv_hash_shift = 6;
-#define ELV_HASH_BLOCK(sec)    ((sec) >> 3)
-#define ELV_HASH_FN(sec)       \
-               (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
-#define ELV_HASH_ENTRIES       (1 << elv_hash_shift)
 #define rq_hash_key(rq)                (blk_rq_pos(rq) + blk_rq_sectors(rq))
 
 /*
@@ -158,7 +153,6 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q,
                                  struct elevator_type *e)
 {
        struct elevator_queue *eq;
-       int i;
 
        eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
        if (unlikely(!eq))
@@ -167,14 +161,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q,
        eq->type = e;
        kobject_init(&eq->kobj, &elv_ktype);
        mutex_init(&eq->sysfs_lock);
-
-       eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
-                                       GFP_KERNEL, q->node);
-       if (!eq->hash)
-               goto err;
-
-       for (i = 0; i < ELV_HASH_ENTRIES; i++)
-               INIT_HLIST_HEAD(&eq->hash[i]);
+       hash_init(eq->hash);
 
        return eq;
 err:
@@ -189,7 +176,6 @@ static void elevator_release(struct kobject *kobj)
 
        e = container_of(kobj, struct elevator_queue, kobj);
        elevator_put(e->type);
-       kfree(e->hash);
        kfree(e);
 }
 
@@ -261,7 +247,7 @@ EXPORT_SYMBOL(elevator_exit);
 
 static inline void __elv_rqhash_del(struct request *rq)
 {
-       hlist_del_init(&rq->hash);
+       hash_del(&rq->hash);
 }
 
 static void elv_rqhash_del(struct request_queue *q, struct request *rq)
@@ -275,7 +261,7 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq)
        struct elevator_queue *e = q->elevator;
 
        BUG_ON(ELV_ON_HASH(rq));
-       hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
+       hash_add(e->hash, &rq->hash, rq_hash_key(rq));
 }
 
 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
@@ -287,11 +273,10 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
 static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
 {
        struct elevator_queue *e = q->elevator;
-       struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
        struct hlist_node *next;
        struct request *rq;
 
-       hlist_for_each_entry_safe(rq, next, hash_list, hash) {
+       hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
                BUG_ON(!ELV_ON_HASH(rq));
 
                if (unlikely(!rq_mergeable(rq))) {
index 7ae2750bb4571b593f6a92104beb6c2300f85095..d668a8ae602bb4533b793911408cf138c047a4ce 100644 (file)
@@ -48,8 +48,8 @@
 #include <linux/genalloc.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
-#include <acpi/apei.h>
-#include <acpi/hed.h>
+
+#include <acpi/ghes.h>
 #include <asm/mce.h>
 #include <asm/tlbflush.h>
 #include <asm/nmi.h>
        ((struct acpi_hest_generic_status *)                            \
         ((struct ghes_estatus_node *)(estatus_node) + 1))
 
-/*
- * One struct ghes is created for each generic hardware error source.
- * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
- * handler.
- *
- * estatus: memory buffer for error status block, allocated during
- * HEST parsing.
- */
-#define GHES_TO_CLEAR          0x0001
-#define GHES_EXITING           0x0002
-
-struct ghes {
-       struct acpi_hest_generic *generic;
-       struct acpi_hest_generic_status *estatus;
-       u64 buffer_paddr;
-       unsigned long flags;
-       union {
-               struct list_head list;
-               struct timer_list timer;
-               unsigned int irq;
-       };
-};
-
-struct ghes_estatus_node {
-       struct llist_node llnode;
-       struct acpi_hest_generic *generic;
-};
-
-struct ghes_estatus_cache {
-       u32 estatus_len;
-       atomic_t count;
-       struct acpi_hest_generic *generic;
-       unsigned long long time_in;
-       struct rcu_head rcu;
-};
-
 bool ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);
 
@@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes)
        apei_unmap_generic_address(&ghes->generic->error_status_address);
 }
 
-enum {
-       GHES_SEV_NO = 0x0,
-       GHES_SEV_CORRECTED = 0x1,
-       GHES_SEV_RECOVERABLE = 0x2,
-       GHES_SEV_PANIC = 0x3,
-};
-
 static inline int ghes_severity(int severity)
 {
        switch (severity) {
@@ -452,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
        ghes->flags &= ~GHES_TO_CLEAR;
 }
 
-static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
+static void ghes_do_proc(struct ghes *ghes,
+                        const struct acpi_hest_generic_status *estatus)
 {
        int sev, sec_sev;
        struct acpi_hest_generic_data *gdata;
@@ -464,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
                                 CPER_SEC_PLATFORM_MEM)) {
                        struct cper_sec_mem_err *mem_err;
                        mem_err = (struct cper_sec_mem_err *)(gdata+1);
+                       ghes_edac_report_mem_error(ghes, sev, mem_err);
+
 #ifdef CONFIG_X86_MCE
                        apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
                                                  mem_err);
@@ -682,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
                if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
                        ghes_estatus_cache_add(ghes->generic, ghes->estatus);
        }
-       ghes_do_proc(ghes->estatus);
+       ghes_do_proc(ghes, ghes->estatus);
 out:
        ghes_clear_estatus(ghes);
        return 0;
@@ -775,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
                estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
                len = apei_estatus_len(estatus);
                node_len = GHES_ESTATUS_NODE_LEN(len);
-               ghes_do_proc(estatus);
+               ghes_do_proc(estatus_node->ghes, estatus);
                if (!ghes_estatus_cached(estatus)) {
                        generic = estatus_node->generic;
                        if (ghes_print_estatus(NULL, generic, estatus))
@@ -864,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
                estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
                                                      node_len);
                if (estatus_node) {
+                       estatus_node->ghes = ghes;
                        estatus_node->generic = ghes->generic;
                        estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
                        memcpy(estatus, ghes->estatus, len);
@@ -942,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
                ghes = NULL;
                goto err;
        }
+
+       rc = ghes_edac_register(ghes, &ghes_dev->dev);
+       if (rc < 0)
+               goto err;
+
        switch (generic->notify.type) {
        case ACPI_HEST_NOTIFY_POLLED:
                ghes->timer.function = ghes_poll_func;
@@ -954,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
                if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
                        pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
                               generic->header.source_id);
-                       goto err;
+                       goto err_edac_unreg;
                }
                if (request_irq(ghes->irq, ghes_irq_func,
                                0, "GHES IRQ", ghes)) {
                        pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
                               generic->header.source_id);
-                       goto err;
+                       goto err_edac_unreg;
                }
                break;
        case ACPI_HEST_NOTIFY_SCI:
@@ -986,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev)
        platform_set_drvdata(ghes_dev, ghes);
 
        return 0;
+err_edac_unreg:
+       ghes_edac_unregister(ghes);
 err:
        if (ghes) {
                ghes_fini(ghes);
@@ -1038,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
        }
 
        ghes_fini(ghes);
+
+       ghes_edac_unregister(ghes);
+
        kfree(ghes);
 
        platform_set_drvdata(ghes_dev, NULL);
index ff5b745c470525c5e1a19560cb81be188a76100c..2a7cb0df176bb9494b5f660c30a0a3962738868b 100644 (file)
@@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file)
 
        dmabuf = file->private_data;
 
+       BUG_ON(dmabuf->vmapping_counter);
+
        dmabuf->ops->release(dmabuf);
        kfree(dmabuf);
        return 0;
@@ -445,6 +447,9 @@ EXPORT_SYMBOL_GPL(dma_buf_kunmap);
 int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
                 unsigned long pgoff)
 {
+       struct file *oldfile;
+       int ret;
+
        if (WARN_ON(!dmabuf || !vma))
                return -EINVAL;
 
@@ -458,14 +463,22 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
                return -EINVAL;
 
        /* readjust the vma */
-       if (vma->vm_file)
-               fput(vma->vm_file);
-
-       vma->vm_file = get_file(dmabuf->file);
-
+       get_file(dmabuf->file);
+       oldfile = vma->vm_file;
+       vma->vm_file = dmabuf->file;
        vma->vm_pgoff = pgoff;
 
-       return dmabuf->ops->mmap(dmabuf, vma);
+       ret = dmabuf->ops->mmap(dmabuf, vma);
+       if (ret) {
+               /* restore old parameters on failure */
+               vma->vm_file = oldfile;
+               fput(dmabuf->file);
+       } else {
+               if (oldfile)
+                       fput(oldfile);
+       }
+       return ret;
+
 }
 EXPORT_SYMBOL_GPL(dma_buf_mmap);
 
@@ -481,12 +494,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap);
  */
 void *dma_buf_vmap(struct dma_buf *dmabuf)
 {
+       void *ptr;
+
        if (WARN_ON(!dmabuf))
                return NULL;
 
-       if (dmabuf->ops->vmap)
-               return dmabuf->ops->vmap(dmabuf);
-       return NULL;
+       if (!dmabuf->ops->vmap)
+               return NULL;
+
+       mutex_lock(&dmabuf->lock);
+       if (dmabuf->vmapping_counter) {
+               dmabuf->vmapping_counter++;
+               BUG_ON(!dmabuf->vmap_ptr);
+               ptr = dmabuf->vmap_ptr;
+               goto out_unlock;
+       }
+
+       BUG_ON(dmabuf->vmap_ptr);
+
+       ptr = dmabuf->ops->vmap(dmabuf);
+       if (IS_ERR_OR_NULL(ptr))
+               goto out_unlock;
+
+       dmabuf->vmap_ptr = ptr;
+       dmabuf->vmapping_counter = 1;
+
+out_unlock:
+       mutex_unlock(&dmabuf->lock);
+       return ptr;
 }
 EXPORT_SYMBOL_GPL(dma_buf_vmap);
 
@@ -500,7 +535,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
        if (WARN_ON(!dmabuf))
                return;
 
-       if (dmabuf->ops->vunmap)
-               dmabuf->ops->vunmap(dmabuf, vaddr);
+       BUG_ON(!dmabuf->vmap_ptr);
+       BUG_ON(dmabuf->vmapping_counter == 0);
+       BUG_ON(dmabuf->vmap_ptr != vaddr);
+
+       mutex_lock(&dmabuf->lock);
+       if (--dmabuf->vmapping_counter == 0) {
+               if (dmabuf->ops->vunmap)
+                       dmabuf->ops->vunmap(dmabuf, vaddr);
+               dmabuf->vmap_ptr = NULL;
+       }
+       mutex_unlock(&dmabuf->lock);
 }
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);
index 8f12dc78a848c8fe9446f1477d6ea2a9f34049f3..5b5ee79ff236b77e48a2afac8b668d59d4f279f2 100644 (file)
@@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
        else
                ErrorCode =  0;
       }
+      break;
       default:
        ErrorCode = -ENOTTY;
     }
index 824e09c4d0d7d1bcd83a00b87e37e1ca0115d5fb..5dc0daed8fac523f5d631c3cd4a022f6fb4c5aca 100644 (file)
@@ -63,19 +63,6 @@ config AMIGA_Z2RAM
          To compile this driver as a module, choose M here: the
          module will be called z2ram.
 
-config BLK_DEV_XD
-       tristate "XT hard disk support"
-       depends on ISA && ISA_DMA_API
-       select CHECK_SIGNATURE
-       help
-         Very old 8 bit hard disk controllers used in the IBM XT computer
-         will be supported if you say Y here.
-
-         To compile this driver as a module, choose M here: the
-         module will be called xd.
-
-         It's pretty unlikely that you have one of these: say N.
-
 config GDROM
        tristate "SEGA Dreamcast GD-ROM drive"
        depends on SH_DREAMCAST
@@ -544,4 +531,14 @@ config BLK_DEV_RBD
 
          If unsure, say N.
 
+config BLK_DEV_RSXX
+       tristate "RamSam PCIe Flash SSD Device Driver"
+       depends on PCI
+       help
+         Device driver for IBM's high speed PCIe SSD
+         storage devices: RamSan-70 and RamSan-80.
+
+         To compile this driver as a module, choose M here: the
+         module will be called rsxx.
+
 endif # BLK_DEV
index 17e82df3df74f1bae7c48a6106ab3b8cdff1ec7b..a3b40232c6aba612c27ebbb9c19d815193c6464e 100644 (file)
@@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY)    += ataflop.o
 obj-$(CONFIG_AMIGA_Z2RAM)      += z2ram.o
 obj-$(CONFIG_BLK_DEV_RAM)      += brd.o
 obj-$(CONFIG_BLK_DEV_LOOP)     += loop.o
-obj-$(CONFIG_BLK_DEV_XD)       += xd.o
 obj-$(CONFIG_BLK_CPQ_DA)       += cpqarray.o
 obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
 obj-$(CONFIG_BLK_DEV_DAC960)   += DAC960.o
@@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/
 obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
 obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
 
+obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
+
 swim_mod-y     := swim.o swim_asm.o
index f47dccbda1d44ad799fc68d283c576684335d37d..747bb2af69dcc55fec530466a9f55d06914ba8f7 100644 (file)
@@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
 
 static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file)
 {
-       loff_t size, loopsize;
+       loff_t loopsize;
 
        /* Compute loopsize in bytes */
-       size = i_size_read(file->f_mapping->host);
-       loopsize = size - offset;
-       /* offset is beyond i_size, wierd but possible */
+       loopsize = i_size_read(file->f_mapping->host);
+       if (offset > 0)
+               loopsize -= offset;
+       /* offset is beyond i_size, weird but possible */
        if (loopsize < 0)
                return 0;
 
@@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
 {
        loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
        sector_t x = (sector_t)size;
+       struct block_device *bdev = lo->lo_device;
 
        if (unlikely((loff_t)x != size))
                return -EFBIG;
@@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
        if (lo->lo_sizelimit != sizelimit)
                lo->lo_sizelimit = sizelimit;
        set_capacity(lo->lo_disk, x);
+       bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
+       /* let user-space know about the new size */
+       kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
        return 0;
 }
 
@@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
                return err;
 
        if (lo->lo_offset != info->lo_offset ||
-           lo->lo_sizelimit != info->lo_sizelimit) {
+           lo->lo_sizelimit != info->lo_sizelimit)
                if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit))
                        return -EFBIG;
-       }
+
        loop_config_discard(lo);
 
        memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
@@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
 
 static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
 {
-       int err;
-       sector_t sec;
-       loff_t sz;
-
-       err = -ENXIO;
        if (unlikely(lo->lo_state != Lo_bound))
-               goto out;
-       err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
-       if (unlikely(err))
-               goto out;
-       sec = get_capacity(lo->lo_disk);
-       /* the width of sector_t may be narrow for bit-shift */
-       sz = sec;
-       sz <<= 9;
-       mutex_lock(&bdev->bd_mutex);
-       bd_set_size(bdev, sz);
-       /* let user-space know about the new size */
-       kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
-       mutex_unlock(&bdev->bd_mutex);
+               return -ENXIO;
 
- out:
-       return err;
+       return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
 }
 
 static int lo_ioctl(struct block_device *bdev, fmode_t mode,
@@ -1845,11 +1832,15 @@ static int __init loop_init(void)
                max_part = (1UL << part_shift) - 1;
        }
 
-       if ((1UL << part_shift) > DISK_MAX_PARTS)
-               return -EINVAL;
+       if ((1UL << part_shift) > DISK_MAX_PARTS) {
+               err = -EINVAL;
+               goto misc_out;
+       }
 
-       if (max_loop > 1UL << (MINORBITS - part_shift))
-               return -EINVAL;
+       if (max_loop > 1UL << (MINORBITS - part_shift)) {
+               err = -EINVAL;
+               goto misc_out;
+       }
 
        /*
         * If max_loop is specified, create that many devices upfront.
@@ -1867,8 +1858,10 @@ static int __init loop_init(void)
                range = 1UL << MINORBITS;
        }
 
-       if (register_blkdev(LOOP_MAJOR, "loop"))
-               return -EIO;
+       if (register_blkdev(LOOP_MAJOR, "loop")) {
+               err = -EIO;
+               goto misc_out;
+       }
 
        blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
                                  THIS_MODULE, loop_probe, NULL, NULL);
@@ -1881,6 +1874,10 @@ static int __init loop_init(void)
 
        printk(KERN_INFO "loop: module loaded\n");
        return 0;
+
+misc_out:
+       misc_deregister(&loop_misc);
+       return err;
 }
 
 static int loop_exit_cb(int id, void *ptr, void *data)
index 0ba837fc62a874511a910dd077feea2f26dfbd37..1fca1f996b45e478f781da6b4414b57e90181d3e 100644 (file)
@@ -4,6 +4,6 @@
 
 config BLK_DEV_PCIESSD_MTIP32XX
        tristate "Block Device Driver for Micron PCIe SSDs"
-       depends on PCI
+       depends on PCI && GENERIC_HARDIRQS
        help
           This enables the block driver for Micron PCIe SSDs.
index 3fd100990453b76f2297301e662e808819019ff2..11cc9522cdd439ff3db29825037c4f866bc28abb 100644 (file)
@@ -88,6 +88,8 @@ static int instance;
 static int mtip_major;
 static struct dentry *dfs_parent;
 
+static u32 cpu_use[NR_CPUS];
+
 static DEFINE_SPINLOCK(rssd_index_lock);
 static DEFINE_IDA(rssd_index_ida);
 
@@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd)
  */
 static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
 {
-       atomic_set(&port->commands[tag].active, 1);
+       int group = tag >> 5;
 
-       spin_lock(&port->cmd_issue_lock);
+       atomic_set(&port->commands[tag].active, 1);
 
+       /* guard SACT and CI registers */
+       spin_lock(&port->cmd_issue_lock[group]);
        writel((1 << MTIP_TAG_BIT(tag)),
                        port->s_active[MTIP_TAG_INDEX(tag)]);
        writel((1 << MTIP_TAG_BIT(tag)),
                        port->cmd_issue[MTIP_TAG_INDEX(tag)]);
-
-       spin_unlock(&port->cmd_issue_lock);
+       spin_unlock(&port->cmd_issue_lock[group]);
 
        /* Set the command's timeout value.*/
        port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
@@ -964,56 +967,56 @@ handle_tfe_exit:
 /*
  * Handle a set device bits interrupt
  */
-static inline void mtip_process_sdbf(struct driver_data *dd)
+static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
+                                                       u32 completed)
 {
-       struct mtip_port  *port = dd->port;
-       int group, tag, bit;
-       u32 completed;
+       struct driver_data *dd = port->dd;
+       int tag, bit;
        struct mtip_cmd *command;
 
-       /* walk all bits in all slot groups */
-       for (group = 0; group < dd->slot_groups; group++) {
-               completed = readl(port->completed[group]);
-               if (!completed)
-                       continue;
+       if (!completed) {
+               WARN_ON_ONCE(!completed);
+               return;
+       }
+       /* clear completed status register in the hardware.*/
+       writel(completed, port->completed[group]);
 
-               /* clear completed status register in the hardware.*/
-               writel(completed, port->completed[group]);
+       /* Process completed commands. */
+       for (bit = 0; (bit < 32) && completed; bit++) {
+               if (completed & 0x01) {
+                       tag = (group << 5) | bit;
 
-               /* Process completed commands. */
-               for (bit = 0;
-                    (bit < 32) && completed;
-                    bit++, completed >>= 1) {
-                       if (completed & 0x01) {
-                               tag = (group << 5) | bit;
+                       /* skip internal command slot. */
+                       if (unlikely(tag == MTIP_TAG_INTERNAL))
+                               continue;
 
-                               /* skip internal command slot. */
-                               if (unlikely(tag == MTIP_TAG_INTERNAL))
-                                       continue;
+                       command = &port->commands[tag];
+                       /* make internal callback */
+                       if (likely(command->comp_func)) {
+                               command->comp_func(
+                                       port,
+                                       tag,
+                                       command->comp_data,
+                                       0);
+                       } else {
+                               dev_warn(&dd->pdev->dev,
+                                       "Null completion "
+                                       "for tag %d",
+                                       tag);
 
-                               command = &port->commands[tag];
-                               /* make internal callback */
-                               if (likely(command->comp_func)) {
-                                       command->comp_func(
-                                               port,
-                                               tag,
-                                               command->comp_data,
-                                               0);
-                               } else {
-                                       dev_warn(&dd->pdev->dev,
-                                               "Null completion "
-                                               "for tag %d",
-                                               tag);
-
-                                       if (mtip_check_surprise_removal(
-                                               dd->pdev)) {
-                                               mtip_command_cleanup(dd);
-                                               return;
-                                       }
+                               if (mtip_check_surprise_removal(
+                                       dd->pdev)) {
+                                       mtip_command_cleanup(dd);
+                                       return;
                                }
                        }
                }
+               completed >>= 1;
        }
+
+       /* If last, re-enable interrupts */
+       if (atomic_dec_return(&dd->irq_workers_active) == 0)
+               writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
 }
 
 /*
@@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
        struct mtip_port *port = dd->port;
        u32 hba_stat, port_stat;
        int rv = IRQ_NONE;
+       int do_irq_enable = 1, i, workers;
+       struct mtip_work *twork;
 
        hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
        if (hba_stat) {
@@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
                writel(port_stat, port->mmio + PORT_IRQ_STAT);
 
                /* Demux port status */
-               if (likely(port_stat & PORT_IRQ_SDB_FIS))
-                       mtip_process_sdbf(dd);
+               if (likely(port_stat & PORT_IRQ_SDB_FIS)) {
+                       do_irq_enable = 0;
+                       WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0);
+
+                       /* Start at 1: group zero is always local? */
+                       for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS;
+                                                                       i++) {
+                               twork = &dd->work[i];
+                               twork->completed = readl(port->completed[i]);
+                               if (twork->completed)
+                                       workers++;
+                       }
+
+                       atomic_set(&dd->irq_workers_active, workers);
+                       if (workers) {
+                               for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) {
+                                       twork = &dd->work[i];
+                                       if (twork->completed)
+                                               queue_work_on(
+                                                       twork->cpu_binding,
+                                                       dd->isr_workq,
+                                                       &twork->work);
+                               }
+
+                               if (likely(dd->work[0].completed))
+                                       mtip_workq_sdbfx(port, 0,
+                                                       dd->work[0].completed);
+
+                       } else {
+                               /*
+                                * Chip quirk: SDB interrupt but nothing
+                                * to complete
+                                */
+                               do_irq_enable = 1;
+                       }
+               }
 
                if (unlikely(port_stat & PORT_IRQ_ERR)) {
                        if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
@@ -1103,20 +1142,12 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
        }
 
        /* acknowledge interrupt */
-       writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
+       if (unlikely(do_irq_enable))
+               writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
 
        return rv;
 }
 
-/*
- * Wrapper for mtip_handle_irq
- * (ignores return code)
- */
-static void mtip_tasklet(unsigned long data)
-{
-       mtip_handle_irq((struct driver_data *) data);
-}
-
 /*
  * HBA interrupt subroutine.
  *
@@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data)
 static irqreturn_t mtip_irq_handler(int irq, void *instance)
 {
        struct driver_data *dd = instance;
-       tasklet_schedule(&dd->tasklet);
-       return IRQ_HANDLED;
+
+       return mtip_handle_irq(dd);
 }
 
 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
@@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
        }
 #endif
 
+       /* Demux ID.DRAT & ID.RZAT to determine trim support */
+       if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
+               port->dd->trim_supp = true;
+       else
+               port->dd->trim_supp = false;
+
        /* Set the identify buffer as valid. */
        port->identify_valid = 1;
 
@@ -1675,6 +1712,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
        return rv;
 }
 
+/*
+ * Trim unused sectors
+ *
+ * @dd         pointer to driver_data structure
+ * @lba                starting lba
+ * @len                # of 512b sectors to trim
+ *
+ * return value
+ *      -ENOMEM                Out of dma memory
+ *      -EINVAL                Invalid parameters passed in, trim not supported
+ *      -EIO           Error submitting trim request to hw
+ */
+static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len)
+{
+       int i, rv = 0;
+       u64 tlba, tlen, sect_left;
+       struct mtip_trim_entry *buf;
+       dma_addr_t dma_addr;
+       struct host_to_dev_fis fis;
+
+       if (!len || dd->trim_supp == false)
+               return -EINVAL;
+
+       /* Trim request too big */
+       WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
+
+       /* Trim request not aligned on 4k boundary */
+       WARN_ON(len % 8 != 0);
+
+       /* Warn if vu_trim structure is too big */
+       WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE);
+
+       /* Allocate a DMA buffer for the trim structure */
+       buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
+                                                               GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+       memset(buf, 0, ATA_SECT_SIZE);
+
+       for (i = 0, sect_left = len, tlba = lba;
+                       i < MTIP_MAX_TRIM_ENTRIES && sect_left;
+                       i++) {
+               tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
+                                       MTIP_MAX_TRIM_ENTRY_LEN :
+                                       sect_left);
+               buf[i].lba = __force_bit2int cpu_to_le32(tlba);
+               buf[i].range = __force_bit2int cpu_to_le16(tlen);
+               tlba += tlen;
+               sect_left -= tlen;
+       }
+       WARN_ON(sect_left != 0);
+
+       /* Build the fis */
+       memset(&fis, 0, sizeof(struct host_to_dev_fis));
+       fis.type       = 0x27;
+       fis.opts       = 1 << 7;
+       fis.command    = 0xfb;
+       fis.features   = 0x60;
+       fis.sect_count = 1;
+       fis.device     = ATA_DEVICE_OBS;
+
+       if (mtip_exec_internal_command(dd->port,
+                                       &fis,
+                                       5,
+                                       dma_addr,
+                                       ATA_SECT_SIZE,
+                                       0,
+                                       GFP_KERNEL,
+                                       MTIP_TRIM_TIMEOUT_MS) < 0)
+               rv = -EIO;
+
+       dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
+       return rv;
+}
+
 /*
  * Get the drive capacity.
  *
@@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd)
 
        hba_setup(dd);
 
-       tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd);
-
-       dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL);
+       dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
+                               dd->numa_node);
        if (!dd->port) {
                dev_err(&dd->pdev->dev,
                        "Memory allocation: port structure\n");
                return -ENOMEM;
        }
 
+       /* Continue workqueue setup */
+       for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
+               dd->work[i].port = dd->port;
+
        /* Counting semaphore to track command slot usage */
        sema_init(&dd->port->cmd_slot, num_command_slots - 1);
 
        /* Spinlock to prevent concurrent issue */
-       spin_lock_init(&dd->port->cmd_issue_lock);
+       for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
+               spin_lock_init(&dd->port->cmd_issue_lock[i]);
 
        /* Set the port mmio base address. */
        dd->port->mmio  = dd->mmio + PORT_OFFSET;
@@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd)
                        "Unable to allocate IRQ %d\n", dd->pdev->irq);
                goto out2;
        }
+       irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding));
 
        /* Enable interrupts on the HBA. */
        writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
@@ -3241,7 +3358,8 @@ out3:
        writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
                        dd->mmio + HOST_CTL);
 
-       /*Release the IRQ. */
+       /* Release the IRQ. */
+       irq_set_affinity_hint(dd->pdev->irq, NULL);
        devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 
 out2:
@@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd)
        del_timer_sync(&dd->port->cmd_timer);
 
        /* Release the IRQ. */
+       irq_set_affinity_hint(dd->pdev->irq, NULL);
        devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 
-       /* Stop the bottom half tasklet. */
-       tasklet_kill(&dd->tasklet);
-
        /* Free the command/command header memory. */
        dmam_free_coherent(&dd->pdev->dev,
                        HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
@@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
                }
        }
 
+       if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+               bio_endio(bio, mtip_send_trim(dd, bio->bi_sector,
+                                               bio_sectors(bio)));
+               return;
+       }
+
        if (unlikely(!bio_has_data(bio))) {
                blk_queue_flush(queue, 0);
                bio_endio(bio, 0);
@@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd)
                goto protocol_init_error;
        }
 
-       dd->disk = alloc_disk(MTIP_MAX_MINORS);
+       dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node);
        if (dd->disk  == NULL) {
                dev_err(&dd->pdev->dev,
                        "Unable to allocate gendisk structure\n");
@@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd)
 
 skip_create_disk:
        /* Allocate the request queue. */
-       dd->queue = blk_alloc_queue(GFP_KERNEL);
+       dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node);
        if (dd->queue == NULL) {
                dev_err(&dd->pdev->dev,
                        "Unable to allocate request queue\n");
@@ -3783,6 +3905,15 @@ skip_create_disk:
         */
        blk_queue_flush(dd->queue, 0);
 
+       /* Signal trim support */
+       if (dd->trim_supp == true) {
+               set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
+               dd->queue->limits.discard_granularity = 4096;
+               blk_queue_max_discard_sectors(dd->queue,
+                       MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
+               dd->queue->limits.discard_zeroes_data = 0;
+       }
+
        /* Set the capacity of the device in 512 byte sectors. */
        if (!(mtip_hw_get_capacity(dd, &capacity))) {
                dev_warn(&dd->pdev->dev,
@@ -3813,9 +3944,8 @@ skip_create_disk:
 
 start_service_thread:
        sprintf(thd_name, "mtip_svc_thd_%02d", index);
-
-       dd->mtip_svc_handler = kthread_run(mtip_service_thread,
-                                               dd, thd_name);
+       dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
+                                               dd, dd->numa_node, thd_name);
 
        if (IS_ERR(dd->mtip_svc_handler)) {
                dev_err(&dd->pdev->dev, "service thread failed to start\n");
@@ -3823,7 +3953,7 @@ start_service_thread:
                rv = -EFAULT;
                goto kthread_run_error;
        }
-
+       wake_up_process(dd->mtip_svc_handler);
        if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
                rv = wait_for_rebuild;
 
@@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd)
        return 0;
 }
 
+static void drop_cpu(int cpu)
+{
+       cpu_use[cpu]--;
+}
+
+static int get_least_used_cpu_on_node(int node)
+{
+       int cpu, least_used_cpu, least_cnt;
+       const struct cpumask *node_mask;
+
+       node_mask = cpumask_of_node(node);
+       least_used_cpu = cpumask_first(node_mask);
+       least_cnt = cpu_use[least_used_cpu];
+       cpu = least_used_cpu;
+
+       for_each_cpu(cpu, node_mask) {
+               if (cpu_use[cpu] < least_cnt) {
+                       least_used_cpu = cpu;
+                       least_cnt = cpu_use[cpu];
+               }
+       }
+       cpu_use[least_used_cpu]++;
+       return least_used_cpu;
+}
+
+/* Helper for selecting a node in round robin mode */
+static inline int mtip_get_next_rr_node(void)
+{
+       static int next_node = -1;
+
+       if (next_node == -1) {
+               next_node = first_online_node;
+               return next_node;
+       }
+
+       next_node = next_online_node(next_node);
+       if (next_node == MAX_NUMNODES)
+               next_node = first_online_node;
+       return next_node;
+}
+
+static DEFINE_HANDLER(0);
+static DEFINE_HANDLER(1);
+static DEFINE_HANDLER(2);
+static DEFINE_HANDLER(3);
+static DEFINE_HANDLER(4);
+static DEFINE_HANDLER(5);
+static DEFINE_HANDLER(6);
+static DEFINE_HANDLER(7);
+
 /*
  * Called for each supported PCI device detected.
  *
@@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 {
        int rv = 0;
        struct driver_data *dd = NULL;
+       char cpu_list[256];
+       const struct cpumask *node_mask;
+       int cpu, i = 0, j = 0;
+       int my_node = NUMA_NO_NODE;
 
        /* Allocate memory for this devices private data. */
-       dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL);
+       my_node = pcibus_to_node(pdev->bus);
+       if (my_node != NUMA_NO_NODE) {
+               if (!node_online(my_node))
+                       my_node = mtip_get_next_rr_node();
+       } else {
+               dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n");
+               my_node = mtip_get_next_rr_node();
+       }
+       dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n",
+               my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev),
+               cpu_to_node(smp_processor_id()), smp_processor_id());
+
+       dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
        if (dd == NULL) {
                dev_err(&pdev->dev,
                        "Unable to allocate memory for driver data\n");
@@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev,
                }
        }
 
-       pci_set_master(pdev);
+       /* Copy the info we may need later into the private data structure. */
+       dd->major       = mtip_major;
+       dd->instance    = instance;
+       dd->pdev        = pdev;
+       dd->numa_node   = my_node;
 
+       memset(dd->workq_name, 0, 32);
+       snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
+
+       dd->isr_workq = create_workqueue(dd->workq_name);
+       if (!dd->isr_workq) {
+               dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
+               goto block_initialize_err;
+       }
+
+       memset(cpu_list, 0, sizeof(cpu_list));
+
+       node_mask = cpumask_of_node(dd->numa_node);
+       if (!cpumask_empty(node_mask)) {
+               for_each_cpu(cpu, node_mask)
+               {
+                       snprintf(&cpu_list[j], 256 - j, "%d ", cpu);
+                       j = strlen(cpu_list);
+               }
+
+               dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n",
+                       dd->numa_node,
+                       topology_physical_package_id(cpumask_first(node_mask)),
+                       nr_cpus_node(dd->numa_node),
+                       cpu_list);
+       } else
+               dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n");
+
+       dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node);
+       dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n",
+               cpu_to_node(dd->isr_binding), dd->isr_binding);
+
+       /* first worker context always runs in ISR */
+       dd->work[0].cpu_binding = dd->isr_binding;
+       dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
+       dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
+       dd->work[3].cpu_binding = dd->work[0].cpu_binding;
+       dd->work[4].cpu_binding = dd->work[1].cpu_binding;
+       dd->work[5].cpu_binding = dd->work[2].cpu_binding;
+       dd->work[6].cpu_binding = dd->work[2].cpu_binding;
+       dd->work[7].cpu_binding = dd->work[1].cpu_binding;
+
+       /* Log the bindings */
+       for_each_present_cpu(cpu) {
+               memset(cpu_list, 0, sizeof(cpu_list));
+               for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) {
+                       if (dd->work[i].cpu_binding == cpu) {
+                               snprintf(&cpu_list[j], 256 - j, "%d ", i);
+                               j = strlen(cpu_list);
+                       }
+               }
+               if (j)
+                       dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list);
+       }
+
+       INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0);
+       INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1);
+       INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2);
+       INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3);
+       INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4);
+       INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5);
+       INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6);
+       INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
+
+       pci_set_master(pdev);
        if (pci_enable_msi(pdev)) {
                dev_warn(&pdev->dev,
                        "Unable to enable MSI interrupt.\n");
                goto block_initialize_err;
        }
 
-       /* Copy the info we may need later into the private data structure. */
-       dd->major       = mtip_major;
-       dd->instance    = instance;
-       dd->pdev        = pdev;
-
        /* Initialize the block layer. */
        rv = mtip_block_initialize(dd);
        if (rv < 0) {
@@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev,
 
 block_initialize_err:
        pci_disable_msi(pdev);
-
+       if (dd->isr_workq) {
+               flush_workqueue(dd->isr_workq);
+               destroy_workqueue(dd->isr_workq);
+               drop_cpu(dd->work[0].cpu_binding);
+               drop_cpu(dd->work[1].cpu_binding);
+               drop_cpu(dd->work[2].cpu_binding);
+       }
 setmask_err:
        pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
 
@@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev)
        /* Clean up the block layer. */
        mtip_block_remove(dd);
 
+       if (dd->isr_workq) {
+               flush_workqueue(dd->isr_workq);
+               destroy_workqueue(dd->isr_workq);
+               drop_cpu(dd->work[0].cpu_binding);
+               drop_cpu(dd->work[1].cpu_binding);
+               drop_cpu(dd->work[2].cpu_binding);
+       }
+
        pci_disable_msi(pdev);
 
        kfree(dd);
index b1742640556a782cee77701c78c4ddf7e961591a..3bffff5f670cc66f55f01ca5953f9d47702468ac 100644 (file)
@@ -164,6 +164,35 @@ struct smart_attr {
        u8 res[3];
 } __packed;
 
+struct mtip_work {
+       struct work_struct work;
+       void *port;
+       int cpu_binding;
+       u32 completed;
+} ____cacheline_aligned_in_smp;
+
+#define DEFINE_HANDLER(group)                                  \
+       void mtip_workq_sdbf##group(struct work_struct *work)       \
+       {                                                      \
+               struct mtip_work *w = (struct mtip_work *) work;         \
+               mtip_workq_sdbfx(w->port, group, w->completed);     \
+       }
+
+#define MTIP_TRIM_TIMEOUT_MS           240000
+#define MTIP_MAX_TRIM_ENTRIES          8
+#define MTIP_MAX_TRIM_ENTRY_LEN        0xfff8
+
+struct mtip_trim_entry {
+       u32 lba;   /* starting lba of region */
+       u16 rsvd;  /* unused */
+       u16 range; /* # of 512b blocks to trim */
+} __packed;
+
+struct mtip_trim {
+       /* Array of regions to trim */
+       struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES];
+} __packed;
+
 /* Register Frame Information Structure (FIS), host to device. */
 struct host_to_dev_fis {
        /*
@@ -424,7 +453,7 @@ struct mtip_port {
         */
        struct semaphore cmd_slot;
        /* Spinlock for working around command-issue bug. */
-       spinlock_t cmd_issue_lock;
+       spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS];
 };
 
 /*
@@ -447,9 +476,6 @@ struct driver_data {
 
        struct mtip_port *port; /* Pointer to the port data structure. */
 
-       /* Tasklet used to process the bottom half of the ISR. */
-       struct tasklet_struct tasklet;
-
        unsigned product_type; /* magic value declaring the product type */
 
        unsigned slot_groups; /* number of slot groups the product supports */
@@ -461,6 +487,20 @@ struct driver_data {
        struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
 
        struct dentry *dfs_node;
+
+       bool trim_supp; /* flag indicating trim support */
+
+       int numa_node; /* NUMA support */
+
+       char workq_name[32];
+
+       struct workqueue_struct *isr_workq;
+
+       struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
+
+       atomic_t irq_workers_active;
+
+       int isr_binding;
 };
 
 #endif
index 89576a0b3f2ed5b099ecf7ce675104d282b0dd48..6c81a4c040b987e75d4a8b0bb5e4ff65e2108dba 100644 (file)
 #define        SECTOR_SHIFT    9
 #define        SECTOR_SIZE     (1ULL << SECTOR_SHIFT)
 
-/* It might be useful to have this defined elsewhere too */
+/* It might be useful to have these defined elsewhere */
 
-#define        U64_MAX ((u64) (~0ULL))
+#define        U8_MAX  ((u8)   (~0U))
+#define        U16_MAX ((u16)  (~0U))
+#define        U32_MAX ((u32)  (~0U))
+#define        U64_MAX ((u64)  (~0ULL))
 
 #define RBD_DRV_NAME "rbd"
 #define RBD_DRV_NAME_LONG "rbd (rados block device)"
@@ -66,7 +69,6 @@
                        (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
 
 #define RBD_MAX_SNAP_COUNT     510     /* allows max snapc to fit in 4KB */
-#define RBD_MAX_OPT_LEN                1024
 
 #define RBD_SNAP_HEAD_NAME     "-"
 
@@ -93,8 +95,6 @@
 #define DEV_NAME_LEN           32
 #define MAX_INT_FORMAT_WIDTH   ((5 * sizeof (int)) / 2 + 1)
 
-#define RBD_READ_ONLY_DEFAULT          false
-
 /*
  * block device image metadata (in-memory version)
  */
@@ -119,16 +119,33 @@ struct rbd_image_header {
  * An rbd image specification.
  *
  * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
- * identify an image.
+ * identify an image.  Each rbd_dev structure includes a pointer to
+ * an rbd_spec structure that encapsulates this identity.
+ *
+ * Each of the id's in an rbd_spec has an associated name.  For a
+ * user-mapped image, the names are supplied and the id's associated
+ * with them are looked up.  For a layered image, a parent image is
+ * defined by the tuple, and the names are looked up.
+ *
+ * An rbd_dev structure contains a parent_spec pointer which is
+ * non-null if the image it represents is a child in a layered
+ * image.  This pointer will refer to the rbd_spec structure used
+ * by the parent rbd_dev for its own identity (i.e., the structure
+ * is shared between the parent and child).
+ *
+ * Since these structures are populated once, during the discovery
+ * phase of image construction, they are effectively immutable so
+ * we make no effort to synchronize access to them.
+ *
+ * Note that code herein does not assume the image name is known (it
+ * could be a null pointer).
  */
 struct rbd_spec {
        u64             pool_id;
        char            *pool_name;
 
        char            *image_id;
-       size_t          image_id_len;
        char            *image_name;
-       size_t          image_name_len;
 
        u64             snap_id;
        char            *snap_name;
@@ -136,10 +153,6 @@ struct rbd_spec {
        struct kref     kref;
 };
 
-struct rbd_options {
-       bool    read_only;
-};
-
 /*
  * an instance of the client.  multiple devices may share an rbd client.
  */
@@ -149,37 +162,76 @@ struct rbd_client {
        struct list_head        node;
 };
 
-/*
- * a request completion status
- */
-struct rbd_req_status {
-       int done;
-       int rc;
-       u64 bytes;
+struct rbd_img_request;
+typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
+
+#define        BAD_WHICH       U32_MAX         /* Good which or bad which, which? */
+
+struct rbd_obj_request;
+typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
+
+enum obj_request_type {
+       OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
 };
 
-/*
- * a collection of requests
- */
-struct rbd_req_coll {
-       int                     total;
-       int                     num_done;
+struct rbd_obj_request {
+       const char              *object_name;
+       u64                     offset;         /* object start byte */
+       u64                     length;         /* bytes from offset */
+
+       struct rbd_img_request  *img_request;
+       struct list_head        links;          /* img_request->obj_requests */
+       u32                     which;          /* posn image request list */
+
+       enum obj_request_type   type;
+       union {
+               struct bio      *bio_list;
+               struct {
+                       struct page     **pages;
+                       u32             page_count;
+               };
+       };
+
+       struct ceph_osd_request *osd_req;
+
+       u64                     xferred;        /* bytes transferred */
+       u64                     version;
+       int                     result;
+       atomic_t                done;
+
+       rbd_obj_callback_t      callback;
+       struct completion       completion;
+
        struct kref             kref;
-       struct rbd_req_status   status[0];
 };
 
-/*
- * a single io request
- */
-struct rbd_request {
-       struct request          *rq;            /* blk layer request */
-       struct bio              *bio;           /* cloned bio */
-       struct page             **pages;        /* list of used pages */
-       u64                     len;
-       int                     coll_index;
-       struct rbd_req_coll     *coll;
+struct rbd_img_request {
+       struct request          *rq;
+       struct rbd_device       *rbd_dev;
+       u64                     offset; /* starting image byte offset */
+       u64                     length; /* byte count from offset */
+       bool                    write_request;  /* false for read */
+       union {
+               struct ceph_snap_context *snapc;        /* for writes */
+               u64             snap_id;                /* for reads */
+       };
+       spinlock_t              completion_lock;/* protects next_completion */
+       u32                     next_completion;
+       rbd_img_callback_t      callback;
+
+       u32                     obj_request_count;
+       struct list_head        obj_requests;   /* rbd_obj_request structs */
+
+       struct kref             kref;
 };
 
+#define for_each_obj_request(ireq, oreq) \
+       list_for_each_entry(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_from(ireq, oreq) \
+       list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
+#define for_each_obj_request_safe(ireq, oreq, n) \
+       list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
+
 struct rbd_snap {
        struct  device          dev;
        const char              *name;
@@ -209,16 +261,18 @@ struct rbd_device {
 
        char                    name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
 
-       spinlock_t              lock;           /* queue lock */
+       spinlock_t              lock;           /* queue, flags, open_count */
 
        struct rbd_image_header header;
-       bool                    exists;
+       unsigned long           flags;          /* possibly lock protected */
        struct rbd_spec         *spec;
 
        char                    *header_name;
 
+       struct ceph_file_layout layout;
+
        struct ceph_osd_event   *watch_event;
-       struct ceph_osd_request *watch_request;
+       struct rbd_obj_request  *watch_request;
 
        struct rbd_spec         *parent_spec;
        u64                     parent_overlap;
@@ -235,7 +289,19 @@ struct rbd_device {
 
        /* sysfs related */
        struct device           dev;
-       unsigned long           open_count;
+       unsigned long           open_count;     /* protected by lock */
+};
+
+/*
+ * Flag bits for rbd_dev->flags.  If atomicity is required,
+ * rbd_dev->lock is used to protect access.
+ *
+ * Currently, only the "removing" flag (which is coupled with the
+ * "open_count" field) requires atomic access.
+ */
+enum rbd_dev_flags {
+       RBD_DEV_FLAG_EXISTS,    /* mapped snapshot has not been deleted */
+       RBD_DEV_FLAG_REMOVING,  /* this mapping is being removed */
 };
 
 static DEFINE_MUTEX(ctl_mutex);          /* Serialize open/close/setup/teardown */
@@ -277,6 +343,33 @@ static struct device rbd_root_dev = {
        .release =      rbd_root_dev_release,
 };
 
+static __printf(2, 3)
+void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
+{
+       struct va_format vaf;
+       va_list args;
+
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       if (!rbd_dev)
+               printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf);
+       else if (rbd_dev->disk)
+               printk(KERN_WARNING "%s: %s: %pV\n",
+                       RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
+       else if (rbd_dev->spec && rbd_dev->spec->image_name)
+               printk(KERN_WARNING "%s: image %s: %pV\n",
+                       RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
+       else if (rbd_dev->spec && rbd_dev->spec->image_id)
+               printk(KERN_WARNING "%s: id %s: %pV\n",
+                       RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
+       else    /* punt */
+               printk(KERN_WARNING "%s: rbd_dev %p: %pV\n",
+                       RBD_DRV_NAME, rbd_dev, &vaf);
+       va_end(args);
+}
+
 #ifdef RBD_DEBUG
 #define rbd_assert(expr)                                               \
                if (unlikely(!(expr))) {                                \
@@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
 static int rbd_open(struct block_device *bdev, fmode_t mode)
 {
        struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+       bool removing = false;
 
        if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
                return -EROFS;
 
+       spin_lock_irq(&rbd_dev->lock);
+       if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
+               removing = true;
+       else
+               rbd_dev->open_count++;
+       spin_unlock_irq(&rbd_dev->lock);
+       if (removing)
+               return -ENOENT;
+
        mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
        (void) get_device(&rbd_dev->dev);
        set_device_ro(bdev, rbd_dev->mapping.read_only);
-       rbd_dev->open_count++;
        mutex_unlock(&ctl_mutex);
 
        return 0;
@@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
 static int rbd_release(struct gendisk *disk, fmode_t mode)
 {
        struct rbd_device *rbd_dev = disk->private_data;
+       unsigned long open_count_before;
+
+       spin_lock_irq(&rbd_dev->lock);
+       open_count_before = rbd_dev->open_count--;
+       spin_unlock_irq(&rbd_dev->lock);
+       rbd_assert(open_count_before > 0);
 
        mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
-       rbd_assert(rbd_dev->open_count > 0);
-       rbd_dev->open_count--;
        put_device(&rbd_dev->dev);
        mutex_unlock(&ctl_mutex);
 
@@ -337,7 +443,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
        struct rbd_client *rbdc;
        int ret = -ENOMEM;
 
-       dout("rbd_client_create\n");
+       dout("%s:\n", __func__);
        rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
        if (!rbdc)
                goto out_opt;
@@ -361,8 +467,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
        spin_unlock(&rbd_client_list_lock);
 
        mutex_unlock(&ctl_mutex);
+       dout("%s: rbdc %p\n", __func__, rbdc);
 
-       dout("rbd_client_create created %p\n", rbdc);
        return rbdc;
 
 out_err:
@@ -373,6 +479,8 @@ out_mutex:
 out_opt:
        if (ceph_opts)
                ceph_destroy_options(ceph_opts);
+       dout("%s: error %d\n", __func__, ret);
+
        return ERR_PTR(ret);
 }
 
@@ -426,6 +534,12 @@ static match_table_t rbd_opts_tokens = {
        {-1, NULL}
 };
 
+struct rbd_options {
+       bool    read_only;
+};
+
+#define RBD_READ_ONLY_DEFAULT  false
+
 static int parse_rbd_opts_token(char *c, void *private)
 {
        struct rbd_options *rbd_opts = private;
@@ -493,7 +607,7 @@ static void rbd_client_release(struct kref *kref)
 {
        struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
 
-       dout("rbd_release_client %p\n", rbdc);
+       dout("%s: rbdc %p\n", __func__, rbdc);
        spin_lock(&rbd_client_list_lock);
        list_del(&rbdc->node);
        spin_unlock(&rbd_client_list_lock);
@@ -512,18 +626,6 @@ static void rbd_put_client(struct rbd_client *rbdc)
                kref_put(&rbdc->kref, rbd_client_release);
 }
 
-/*
- * Destroy requests collection
- */
-static void rbd_coll_release(struct kref *kref)
-{
-       struct rbd_req_coll *coll =
-               container_of(kref, struct rbd_req_coll, kref);
-
-       dout("rbd_coll_release %p\n", coll);
-       kfree(coll);
-}
-
 static bool rbd_image_format_valid(u32 image_format)
 {
        return image_format == 1 || image_format == 2;
@@ -707,7 +809,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev)
                        goto done;
                rbd_dev->mapping.read_only = true;
        }
-       rbd_dev->exists = true;
+       set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
+
 done:
        return ret;
 }
@@ -724,7 +827,7 @@ static void rbd_header_free(struct rbd_image_header *header)
        header->snapc = NULL;
 }
 
-static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
+static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
 {
        char *name;
        u64 segment;
@@ -767,23 +870,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev,
        return length;
 }
 
-static int rbd_get_num_segments(struct rbd_image_header *header,
-                               u64 ofs, u64 len)
-{
-       u64 start_seg;
-       u64 end_seg;
-
-       if (!len)
-               return 0;
-       if (len - 1 > U64_MAX - ofs)
-               return -ERANGE;
-
-       start_seg = ofs >> header->obj_order;
-       end_seg = (ofs + len - 1) >> header->obj_order;
-
-       return end_seg - start_seg + 1;
-}
-
 /*
  * returns the size of an object in the image
  */
@@ -949,8 +1035,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src,
                unsigned int bi_size;
                struct bio *bio;
 
-               if (!bi)
+               if (!bi) {
+                       rbd_warn(NULL, "bio_chain exhausted with %u left", len);
                        goto out_err;   /* EINVAL; ran out of bio's */
+               }
                bi_size = min_t(unsigned int, bi->bi_size - off, len);
                bio = bio_clone_range(bi, off, bi_size, gfpmask);
                if (!bio)
@@ -976,399 +1064,721 @@ out_err:
        return NULL;
 }
 
-/*
- * helpers for osd request op vectors.
- */
-static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops,
-                                       int opcode, u32 payload_len)
+static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
 {
-       struct ceph_osd_req_op *ops;
+       dout("%s: obj %p (was %d)\n", __func__, obj_request,
+               atomic_read(&obj_request->kref.refcount));
+       kref_get(&obj_request->kref);
+}
+
+static void rbd_obj_request_destroy(struct kref *kref);
+static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
+{
+       rbd_assert(obj_request != NULL);
+       dout("%s: obj %p (was %d)\n", __func__, obj_request,
+               atomic_read(&obj_request->kref.refcount));
+       kref_put(&obj_request->kref, rbd_obj_request_destroy);
+}
+
+static void rbd_img_request_get(struct rbd_img_request *img_request)
+{
+       dout("%s: img %p (was %d)\n", __func__, img_request,
+               atomic_read(&img_request->kref.refcount));
+       kref_get(&img_request->kref);
+}
+
+static void rbd_img_request_destroy(struct kref *kref);
+static void rbd_img_request_put(struct rbd_img_request *img_request)
+{
+       rbd_assert(img_request != NULL);
+       dout("%s: img %p (was %d)\n", __func__, img_request,
+               atomic_read(&img_request->kref.refcount));
+       kref_put(&img_request->kref, rbd_img_request_destroy);
+}
+
+static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
+                                       struct rbd_obj_request *obj_request)
+{
+       rbd_assert(obj_request->img_request == NULL);
+
+       rbd_obj_request_get(obj_request);
+       obj_request->img_request = img_request;
+       obj_request->which = img_request->obj_request_count;
+       rbd_assert(obj_request->which != BAD_WHICH);
+       img_request->obj_request_count++;
+       list_add_tail(&obj_request->links, &img_request->obj_requests);
+       dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+               obj_request->which);
+}
 
-       ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO);
-       if (!ops)
+static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
+                                       struct rbd_obj_request *obj_request)
+{
+       rbd_assert(obj_request->which != BAD_WHICH);
+
+       dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
+               obj_request->which);
+       list_del(&obj_request->links);
+       rbd_assert(img_request->obj_request_count > 0);
+       img_request->obj_request_count--;
+       rbd_assert(obj_request->which == img_request->obj_request_count);
+       obj_request->which = BAD_WHICH;
+       rbd_assert(obj_request->img_request == img_request);
+       obj_request->img_request = NULL;
+       obj_request->callback = NULL;
+       rbd_obj_request_put(obj_request);
+}
+
+static bool obj_request_type_valid(enum obj_request_type type)
+{
+       switch (type) {
+       case OBJ_REQUEST_NODATA:
+       case OBJ_REQUEST_BIO:
+       case OBJ_REQUEST_PAGES:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...)
+{
+       struct ceph_osd_req_op *op;
+       va_list args;
+       size_t size;
+
+       op = kzalloc(sizeof (*op), GFP_NOIO);
+       if (!op)
                return NULL;
+       op->op = opcode;
+       va_start(args, opcode);
+       switch (opcode) {
+       case CEPH_OSD_OP_READ:
+       case CEPH_OSD_OP_WRITE:
+               /* rbd_osd_req_op_create(READ, offset, length) */
+               /* rbd_osd_req_op_create(WRITE, offset, length) */
+               op->extent.offset = va_arg(args, u64);
+               op->extent.length = va_arg(args, u64);
+               if (opcode == CEPH_OSD_OP_WRITE)
+                       op->payload_len = op->extent.length;
+               break;
+       case CEPH_OSD_OP_STAT:
+               break;
+       case CEPH_OSD_OP_CALL:
+               /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */
+               op->cls.class_name = va_arg(args, char *);
+               size = strlen(op->cls.class_name);
+               rbd_assert(size <= (size_t) U8_MAX);
+               op->cls.class_len = size;
+               op->payload_len = size;
+
+               op->cls.method_name = va_arg(args, char *);
+               size = strlen(op->cls.method_name);
+               rbd_assert(size <= (size_t) U8_MAX);
+               op->cls.method_len = size;
+               op->payload_len += size;
+
+               op->cls.argc = 0;
+               op->cls.indata = va_arg(args, void *);
+               size = va_arg(args, size_t);
+               rbd_assert(size <= (size_t) U32_MAX);
+               op->cls.indata_len = (u32) size;
+               op->payload_len += size;
+               break;
+       case CEPH_OSD_OP_NOTIFY_ACK:
+       case CEPH_OSD_OP_WATCH:
+               /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */
+               /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */
+               op->watch.cookie = va_arg(args, u64);
+               op->watch.ver = va_arg(args, u64);
+               op->watch.ver = cpu_to_le64(op->watch.ver);
+               if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int))
+                       op->watch.flag = (u8) 1;
+               break;
+       default:
+               rbd_warn(NULL, "unsupported opcode %hu\n", opcode);
+               kfree(op);
+               op = NULL;
+               break;
+       }
+       va_end(args);
 
-       ops[0].op = opcode;
+       return op;
+}
 
-       /*
-        * op extent offset and length will be set later on
-        * in calc_raw_layout()
-        */
-       ops[0].payload_len = payload_len;
+static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op)
+{
+       kfree(op);
+}
+
+static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
+                               struct rbd_obj_request *obj_request)
+{
+       dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request);
 
-       return ops;
+       return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
 }
 
-static void rbd_destroy_ops(struct ceph_osd_req_op *ops)
+static void rbd_img_request_complete(struct rbd_img_request *img_request)
 {
-       kfree(ops);
+       dout("%s: img %p\n", __func__, img_request);
+       if (img_request->callback)
+               img_request->callback(img_request);
+       else
+               rbd_img_request_put(img_request);
 }
 
-static void rbd_coll_end_req_index(struct request *rq,
-                                  struct rbd_req_coll *coll,
-                                  int index,
-                                  int ret, u64 len)
+/* Caller is responsible for rbd_obj_request_destroy(obj_request) */
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
 {
-       struct request_queue *q;
-       int min, max, i;
+       dout("%s: obj %p\n", __func__, obj_request);
 
-       dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n",
-            coll, index, ret, (unsigned long long) len);
+       return wait_for_completion_interruptible(&obj_request->completion);
+}
 
-       if (!rq)
-               return;
+static void obj_request_done_init(struct rbd_obj_request *obj_request)
+{
+       atomic_set(&obj_request->done, 0);
+       smp_wmb();
+}
 
-       if (!coll) {
-               blk_end_request(rq, ret, len);
-               return;
+static void obj_request_done_set(struct rbd_obj_request *obj_request)
+{
+       int done;
+
+       done = atomic_inc_return(&obj_request->done);
+       if (done > 1) {
+               struct rbd_img_request *img_request = obj_request->img_request;
+               struct rbd_device *rbd_dev;
+
+               rbd_dev = img_request ? img_request->rbd_dev : NULL;
+               rbd_warn(rbd_dev, "obj_request %p was already done\n",
+                       obj_request);
        }
+}
 
-       q = rq->q;
-
-       spin_lock_irq(q->queue_lock);
-       coll->status[index].done = 1;
-       coll->status[index].rc = ret;
-       coll->status[index].bytes = len;
-       max = min = coll->num_done;
-       while (max < coll->total && coll->status[max].done)
-               max++;
-
-       for (i = min; i<max; i++) {
-               __blk_end_request(rq, coll->status[i].rc,
-                                 coll->status[i].bytes);
-               coll->num_done++;
-               kref_put(&coll->kref, rbd_coll_release);
+static bool obj_request_done_test(struct rbd_obj_request *obj_request)
+{
+       smp_mb();
+       return atomic_read(&obj_request->done) != 0;
+}
+
+static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
+{
+       dout("%s: obj %p cb %p\n", __func__, obj_request,
+               obj_request->callback);
+       if (obj_request->callback)
+               obj_request->callback(obj_request);
+       else
+               complete_all(&obj_request->completion);
+}
+
+static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
+{
+       dout("%s: obj %p\n", __func__, obj_request);
+       obj_request_done_set(obj_request);
+}
+
+static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
+{
+       dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request,
+               obj_request->result, obj_request->xferred, obj_request->length);
+       /*
+        * ENOENT means a hole in the object.  We zero-fill the
+        * entire length of the request.  A short read also implies
+        * zero-fill to the end of the request.  Either way we
+        * update the xferred count to indicate the whole request
+        * was satisfied.
+        */
+       if (obj_request->result == -ENOENT) {
+               zero_bio_chain(obj_request->bio_list, 0);
+               obj_request->result = 0;
+               obj_request->xferred = obj_request->length;
+       } else if (obj_request->xferred < obj_request->length &&
+                       !obj_request->result) {
+               zero_bio_chain(obj_request->bio_list, obj_request->xferred);
+               obj_request->xferred = obj_request->length;
        }
-       spin_unlock_irq(q->queue_lock);
+       obj_request_done_set(obj_request);
 }
 
-static void rbd_coll_end_req(struct rbd_request *req,
-                            int ret, u64 len)
+static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
 {
-       rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len);
+       dout("%s: obj %p result %d %llu\n", __func__, obj_request,
+               obj_request->result, obj_request->length);
+       /*
+        * There is no such thing as a successful short write.
+        * Our xferred value is the number of bytes transferred
+        * back.  Set it to our originally-requested length.
+        */
+       obj_request->xferred = obj_request->length;
+       obj_request_done_set(obj_request);
 }
 
 /*
- * Send ceph osd request
+ * For a simple stat call there's nothing to do.  We'll do more if
+ * this is part of a write sequence for a layered image.
  */
-static int rbd_do_request(struct request *rq,
-                         struct rbd_device *rbd_dev,
-                         struct ceph_snap_context *snapc,
-                         u64 snapid,
-                         const char *object_name, u64 ofs, u64 len,
-                         struct bio *bio,
-                         struct page **pages,
-                         int num_pages,
-                         int flags,
-                         struct ceph_osd_req_op *ops,
-                         struct rbd_req_coll *coll,
-                         int coll_index,
-                         void (*rbd_cb)(struct ceph_osd_request *req,
-                                        struct ceph_msg *msg),
-                         struct ceph_osd_request **linger_req,
-                         u64 *ver)
-{
-       struct ceph_osd_request *req;
-       struct ceph_file_layout *layout;
-       int ret;
-       u64 bno;
-       struct timespec mtime = CURRENT_TIME;
-       struct rbd_request *req_data;
-       struct ceph_osd_request_head *reqhead;
-       struct ceph_osd_client *osdc;
+static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request)
+{
+       dout("%s: obj %p\n", __func__, obj_request);
+       obj_request_done_set(obj_request);
+}
 
-       req_data = kzalloc(sizeof(*req_data), GFP_NOIO);
-       if (!req_data) {
-               if (coll)
-                       rbd_coll_end_req_index(rq, coll, coll_index,
-                                              -ENOMEM, len);
-               return -ENOMEM;
+static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
+                               struct ceph_msg *msg)
+{
+       struct rbd_obj_request *obj_request = osd_req->r_priv;
+       u16 opcode;
+
+       dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
+       rbd_assert(osd_req == obj_request->osd_req);
+       rbd_assert(!!obj_request->img_request ^
+                               (obj_request->which == BAD_WHICH));
+
+       if (osd_req->r_result < 0)
+               obj_request->result = osd_req->r_result;
+       obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version);
+
+       WARN_ON(osd_req->r_num_ops != 1);       /* For now */
+
+       /*
+        * We support a 64-bit length, but ultimately it has to be
+        * passed to blk_end_request(), which takes an unsigned int.
+        */
+       obj_request->xferred = osd_req->r_reply_op_len[0];
+       rbd_assert(obj_request->xferred < (u64) UINT_MAX);
+       opcode = osd_req->r_request_ops[0].op;
+       switch (opcode) {
+       case CEPH_OSD_OP_READ:
+               rbd_osd_read_callback(obj_request);
+               break;
+       case CEPH_OSD_OP_WRITE:
+               rbd_osd_write_callback(obj_request);
+               break;
+       case CEPH_OSD_OP_STAT:
+               rbd_osd_stat_callback(obj_request);
+               break;
+       case CEPH_OSD_OP_CALL:
+       case CEPH_OSD_OP_NOTIFY_ACK:
+       case CEPH_OSD_OP_WATCH:
+               rbd_osd_trivial_callback(obj_request);
+               break;
+       default:
+               rbd_warn(NULL, "%s: unsupported op %hu\n",
+                       obj_request->object_name, (unsigned short) opcode);
+               break;
        }
 
-       if (coll) {
-               req_data->coll = coll;
-               req_data->coll_index = coll_index;
+       if (obj_request_done_test(obj_request))
+               rbd_obj_request_complete(obj_request);
+}
+
+static struct ceph_osd_request *rbd_osd_req_create(
+                                       struct rbd_device *rbd_dev,
+                                       bool write_request,
+                                       struct rbd_obj_request *obj_request,
+                                       struct ceph_osd_req_op *op)
+{
+       struct rbd_img_request *img_request = obj_request->img_request;
+       struct ceph_snap_context *snapc = NULL;
+       struct ceph_osd_client *osdc;
+       struct ceph_osd_request *osd_req;
+       struct timespec now;
+       struct timespec *mtime;
+       u64 snap_id = CEPH_NOSNAP;
+       u64 offset = obj_request->offset;
+       u64 length = obj_request->length;
+
+       if (img_request) {
+               rbd_assert(img_request->write_request == write_request);
+               if (img_request->write_request)
+                       snapc = img_request->snapc;
+               else
+                       snap_id = img_request->snap_id;
        }
 
-       dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n",
-               object_name, (unsigned long long) ofs,
-               (unsigned long long) len, coll, coll_index);
+       /* Allocate and initialize the request, for the single op */
 
        osdc = &rbd_dev->rbd_client->client->osdc;
-       req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
-                                       false, GFP_NOIO, pages, bio);
-       if (!req) {
-               ret = -ENOMEM;
-               goto done_pages;
+       osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC);
+       if (!osd_req)
+               return NULL;    /* ENOMEM */
+
+       rbd_assert(obj_request_type_valid(obj_request->type));
+       switch (obj_request->type) {
+       case OBJ_REQUEST_NODATA:
+               break;          /* Nothing to do */
+       case OBJ_REQUEST_BIO:
+               rbd_assert(obj_request->bio_list != NULL);
+               osd_req->r_bio = obj_request->bio_list;
+               break;
+       case OBJ_REQUEST_PAGES:
+               osd_req->r_pages = obj_request->pages;
+               osd_req->r_num_pages = obj_request->page_count;
+               osd_req->r_page_alignment = offset & ~PAGE_MASK;
+               break;
        }
 
-       req->r_callback = rbd_cb;
+       if (write_request) {
+               osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+               now = CURRENT_TIME;
+               mtime = &now;
+       } else {
+               osd_req->r_flags = CEPH_OSD_FLAG_READ;
+               mtime = NULL;   /* not needed for reads */
+               offset = 0;     /* These are not used... */
+               length = 0;     /* ...for osd read requests */
+       }
 
-       req_data->rq = rq;
-       req_data->bio = bio;
-       req_data->pages = pages;
-       req_data->len = len;
+       osd_req->r_callback = rbd_osd_req_callback;
+       osd_req->r_priv = obj_request;
 
-       req->r_priv = req_data;
+       osd_req->r_oid_len = strlen(obj_request->object_name);
+       rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid));
+       memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
 
-       reqhead = req->r_request->front.iov_base;
-       reqhead->snapid = cpu_to_le64(CEPH_NOSNAP);
+       osd_req->r_file_layout = rbd_dev->layout;       /* struct */
 
-       strncpy(req->r_oid, object_name, sizeof(req->r_oid));
-       req->r_oid_len = strlen(req->r_oid);
+       /* osd_req will get its own reference to snapc (if non-null) */
 
-       layout = &req->r_file_layout;
-       memset(layout, 0, sizeof(*layout));
-       layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-       layout->fl_stripe_count = cpu_to_le32(1);
-       layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-       layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id);
-       ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
-                                  req, ops);
-       rbd_assert(ret == 0);
+       ceph_osdc_build_request(osd_req, offset, length, 1, op,
+                               snapc, snap_id, mtime);
 
-       ceph_osdc_build_request(req, ofs, &len,
-                               ops,
-                               snapc,
-                               &mtime,
-                               req->r_oid, req->r_oid_len);
+       return osd_req;
+}
 
-       if (linger_req) {
-               ceph_osdc_set_request_linger(osdc, req);
-               *linger_req = req;
-       }
+static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
+{
+       ceph_osdc_put_request(osd_req);
+}
 
-       ret = ceph_osdc_start_request(osdc, req, false);
-       if (ret < 0)
-               goto done_err;
-
-       if (!rbd_cb) {
-               ret = ceph_osdc_wait_request(osdc, req);
-               if (ver)
-                       *ver = le64_to_cpu(req->r_reassert_version.version);
-               dout("reassert_ver=%llu\n",
-                       (unsigned long long)
-                               le64_to_cpu(req->r_reassert_version.version));
-               ceph_osdc_put_request(req);
+/* object_name is assumed to be a non-null pointer and NUL-terminated */
+
+static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
+                                               u64 offset, u64 length,
+                                               enum obj_request_type type)
+{
+       struct rbd_obj_request *obj_request;
+       size_t size;
+       char *name;
+
+       rbd_assert(obj_request_type_valid(type));
+
+       size = strlen(object_name) + 1;
+       obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL);
+       if (!obj_request)
+               return NULL;
+
+       name = (char *)(obj_request + 1);
+       obj_request->object_name = memcpy(name, object_name, size);
+       obj_request->offset = offset;
+       obj_request->length = length;
+       obj_request->which = BAD_WHICH;
+       obj_request->type = type;
+       INIT_LIST_HEAD(&obj_request->links);
+       obj_request_done_init(obj_request);
+       init_completion(&obj_request->completion);
+       kref_init(&obj_request->kref);
+
+       dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name,
+               offset, length, (int)type, obj_request);
+
+       return obj_request;
+}
+
+static void rbd_obj_request_destroy(struct kref *kref)
+{
+       struct rbd_obj_request *obj_request;
+
+       obj_request = container_of(kref, struct rbd_obj_request, kref);
+
+       dout("%s: obj %p\n", __func__, obj_request);
+
+       rbd_assert(obj_request->img_request == NULL);
+       rbd_assert(obj_request->which == BAD_WHICH);
+
+       if (obj_request->osd_req)
+               rbd_osd_req_destroy(obj_request->osd_req);
+
+       rbd_assert(obj_request_type_valid(obj_request->type));
+       switch (obj_request->type) {
+       case OBJ_REQUEST_NODATA:
+               break;          /* Nothing to do */
+       case OBJ_REQUEST_BIO:
+               if (obj_request->bio_list)
+                       bio_chain_put(obj_request->bio_list);
+               break;
+       case OBJ_REQUEST_PAGES:
+               if (obj_request->pages)
+                       ceph_release_page_vector(obj_request->pages,
+                                               obj_request->page_count);
+               break;
        }
-       return ret;
 
-done_err:
-       bio_chain_put(req_data->bio);
-       ceph_osdc_put_request(req);
-done_pages:
-       rbd_coll_end_req(req_data, ret, len);
-       kfree(req_data);
-       return ret;
+       kfree(obj_request);
 }
 
 /*
- * Ceph osd op callback
+ * Caller is responsible for filling in the list of object requests
+ * that comprises the image request, and the Linux request pointer
+ * (if there is one).
  */
-static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
-{
-       struct rbd_request *req_data = req->r_priv;
-       struct ceph_osd_reply_head *replyhead;
-       struct ceph_osd_op *op;
-       __s32 rc;
-       u64 bytes;
-       int read_op;
-
-       /* parse reply */
-       replyhead = msg->front.iov_base;
-       WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-       op = (void *)(replyhead + 1);
-       rc = le32_to_cpu(replyhead->result);
-       bytes = le64_to_cpu(op->extent.length);
-       read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ);
-
-       dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n",
-               (unsigned long long) bytes, read_op, (int) rc);
-
-       if (rc == -ENOENT && read_op) {
-               zero_bio_chain(req_data->bio, 0);
-               rc = 0;
-       } else if (rc == 0 && read_op && bytes < req_data->len) {
-               zero_bio_chain(req_data->bio, bytes);
-               bytes = req_data->len;
-       }
+static struct rbd_img_request *rbd_img_request_create(
+                                       struct rbd_device *rbd_dev,
+                                       u64 offset, u64 length,
+                                       bool write_request)
+{
+       struct rbd_img_request *img_request;
+       struct ceph_snap_context *snapc = NULL;
 
-       rbd_coll_end_req(req_data, rc, bytes);
+       img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC);
+       if (!img_request)
+               return NULL;
 
-       if (req_data->bio)
-               bio_chain_put(req_data->bio);
+       if (write_request) {
+               down_read(&rbd_dev->header_rwsem);
+               snapc = ceph_get_snap_context(rbd_dev->header.snapc);
+               up_read(&rbd_dev->header_rwsem);
+               if (WARN_ON(!snapc)) {
+                       kfree(img_request);
+                       return NULL;    /* Shouldn't happen */
+               }
+       }
 
-       ceph_osdc_put_request(req);
-       kfree(req_data);
+       img_request->rq = NULL;
+       img_request->rbd_dev = rbd_dev;
+       img_request->offset = offset;
+       img_request->length = length;
+       img_request->write_request = write_request;
+       if (write_request)
+               img_request->snapc = snapc;
+       else
+               img_request->snap_id = rbd_dev->spec->snap_id;
+       spin_lock_init(&img_request->completion_lock);
+       img_request->next_completion = 0;
+       img_request->callback = NULL;
+       img_request->obj_request_count = 0;
+       INIT_LIST_HEAD(&img_request->obj_requests);
+       kref_init(&img_request->kref);
+
+       rbd_img_request_get(img_request);       /* Avoid a warning */
+       rbd_img_request_put(img_request);       /* TEMPORARY */
+
+       dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
+               write_request ? "write" : "read", offset, length,
+               img_request);
+
+       return img_request;
 }
 
-static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
+static void rbd_img_request_destroy(struct kref *kref)
 {
-       ceph_osdc_put_request(req);
+       struct rbd_img_request *img_request;
+       struct rbd_obj_request *obj_request;
+       struct rbd_obj_request *next_obj_request;
+
+       img_request = container_of(kref, struct rbd_img_request, kref);
+
+       dout("%s: img %p\n", __func__, img_request);
+
+       for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+               rbd_img_obj_request_del(img_request, obj_request);
+       rbd_assert(img_request->obj_request_count == 0);
+
+       if (img_request->write_request)
+               ceph_put_snap_context(img_request->snapc);
+
+       kfree(img_request);
 }
 
-/*
- * Do a synchronous ceph osd operation
- */
-static int rbd_req_sync_op(struct rbd_device *rbd_dev,
-                          struct ceph_snap_context *snapc,
-                          u64 snapid,
-                          int flags,
-                          struct ceph_osd_req_op *ops,
-                          const char *object_name,
-                          u64 ofs, u64 inbound_size,
-                          char *inbound,
-                          struct ceph_osd_request **linger_req,
-                          u64 *ver)
+static int rbd_img_request_fill_bio(struct rbd_img_request *img_request,
+                                       struct bio *bio_list)
 {
-       int ret;
-       struct page **pages;
-       int num_pages;
-
-       rbd_assert(ops != NULL);
+       struct rbd_device *rbd_dev = img_request->rbd_dev;
+       struct rbd_obj_request *obj_request = NULL;
+       struct rbd_obj_request *next_obj_request;
+       unsigned int bio_offset;
+       u64 image_offset;
+       u64 resid;
+       u16 opcode;
+
+       dout("%s: img %p bio %p\n", __func__, img_request, bio_list);
+
+       opcode = img_request->write_request ? CEPH_OSD_OP_WRITE
+                                             : CEPH_OSD_OP_READ;
+       bio_offset = 0;
+       image_offset = img_request->offset;
+       rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT);
+       resid = img_request->length;
+       rbd_assert(resid > 0);
+       while (resid) {
+               const char *object_name;
+               unsigned int clone_size;
+               struct ceph_osd_req_op *op;
+               u64 offset;
+               u64 length;
+
+               object_name = rbd_segment_name(rbd_dev, image_offset);
+               if (!object_name)
+                       goto out_unwind;
+               offset = rbd_segment_offset(rbd_dev, image_offset);
+               length = rbd_segment_length(rbd_dev, image_offset, resid);
+               obj_request = rbd_obj_request_create(object_name,
+                                               offset, length,
+                                               OBJ_REQUEST_BIO);
+               kfree(object_name);     /* object request has its own copy */
+               if (!obj_request)
+                       goto out_unwind;
+
+               rbd_assert(length <= (u64) UINT_MAX);
+               clone_size = (unsigned int) length;
+               obj_request->bio_list = bio_chain_clone_range(&bio_list,
+                                               &bio_offset, clone_size,
+                                               GFP_ATOMIC);
+               if (!obj_request->bio_list)
+                       goto out_partial;
 
-       num_pages = calc_pages_for(ofs, inbound_size);
-       pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
-       if (IS_ERR(pages))
-               return PTR_ERR(pages);
+               /*
+                * Build up the op to use in building the osd
+                * request.  Note that the contents of the op are
+                * copied by rbd_osd_req_create().
+                */
+               op = rbd_osd_req_op_create(opcode, offset, length);
+               if (!op)
+                       goto out_partial;
+               obj_request->osd_req = rbd_osd_req_create(rbd_dev,
+                                               img_request->write_request,
+                                               obj_request, op);
+               rbd_osd_req_op_destroy(op);
+               if (!obj_request->osd_req)
+                       goto out_partial;
+               /* status and version are initially zero-filled */
+
+               rbd_img_obj_request_add(img_request, obj_request);
+
+               image_offset += length;
+               resid -= length;
+       }
 
-       ret = rbd_do_request(NULL, rbd_dev, snapc, snapid,
-                         object_name, ofs, inbound_size, NULL,
-                         pages, num_pages,
-                         flags,
-                         ops,
-                         NULL, 0,
-                         NULL,
-                         linger_req, ver);
-       if (ret < 0)
-               goto done;
+       return 0;
 
-       if ((flags & CEPH_OSD_FLAG_READ) && inbound)
-               ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret);
+out_partial:
+       rbd_obj_request_put(obj_request);
+out_unwind:
+       for_each_obj_request_safe(img_request, obj_request, next_obj_request)
+               rbd_obj_request_put(obj_request);
 
-done:
-       ceph_release_page_vector(pages, num_pages);
-       return ret;
+       return -ENOMEM;
 }
 
-/*
- * Do an asynchronous ceph osd operation
- */
-static int rbd_do_op(struct request *rq,
-                    struct rbd_device *rbd_dev,
-                    struct ceph_snap_context *snapc,
-                    u64 ofs, u64 len,
-                    struct bio *bio,
-                    struct rbd_req_coll *coll,
-                    int coll_index)
-{
-       char *seg_name;
-       u64 seg_ofs;
-       u64 seg_len;
-       int ret;
-       struct ceph_osd_req_op *ops;
-       u32 payload_len;
-       int opcode;
-       int flags;
-       u64 snapid;
-
-       seg_name = rbd_segment_name(rbd_dev, ofs);
-       if (!seg_name)
-               return -ENOMEM;
-       seg_len = rbd_segment_length(rbd_dev, ofs, len);
-       seg_ofs = rbd_segment_offset(rbd_dev, ofs);
-
-       if (rq_data_dir(rq) == WRITE) {
-               opcode = CEPH_OSD_OP_WRITE;
-               flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK;
-               snapid = CEPH_NOSNAP;
-               payload_len = seg_len;
-       } else {
-               opcode = CEPH_OSD_OP_READ;
-               flags = CEPH_OSD_FLAG_READ;
-               snapc = NULL;
-               snapid = rbd_dev->spec->snap_id;
-               payload_len = 0;
+static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+{
+       struct rbd_img_request *img_request;
+       u32 which = obj_request->which;
+       bool more = true;
+
+       img_request = obj_request->img_request;
+
+       dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+       rbd_assert(img_request != NULL);
+       rbd_assert(img_request->rq != NULL);
+       rbd_assert(img_request->obj_request_count > 0);
+       rbd_assert(which != BAD_WHICH);
+       rbd_assert(which < img_request->obj_request_count);
+       rbd_assert(which >= img_request->next_completion);
+
+       spin_lock_irq(&img_request->completion_lock);
+       if (which != img_request->next_completion)
+               goto out;
+
+       for_each_obj_request_from(img_request, obj_request) {
+               unsigned int xferred;
+               int result;
+
+               rbd_assert(more);
+               rbd_assert(which < img_request->obj_request_count);
+
+               if (!obj_request_done_test(obj_request))
+                       break;
+
+               rbd_assert(obj_request->xferred <= (u64) UINT_MAX);
+               xferred = (unsigned int) obj_request->xferred;
+               result = (int) obj_request->result;
+               if (result)
+                       rbd_warn(NULL, "obj_request %s result %d xferred %u\n",
+                               img_request->write_request ? "write" : "read",
+                               result, xferred);
+
+               more = blk_end_request(img_request->rq, result, xferred);
+               which++;
        }
 
-       ret = -ENOMEM;
-       ops = rbd_create_rw_ops(1, opcode, payload_len);
-       if (!ops)
-               goto done;
+       rbd_assert(more ^ (which == img_request->obj_request_count));
+       img_request->next_completion = which;
+out:
+       spin_unlock_irq(&img_request->completion_lock);
 
-       /* we've taken care of segment sizes earlier when we
-          cloned the bios. We should never have a segment
-          truncated at this point */
-       rbd_assert(seg_len == len);
-
-       ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
-                            seg_name, seg_ofs, seg_len,
-                            bio,
-                            NULL, 0,
-                            flags,
-                            ops,
-                            coll, coll_index,
-                            rbd_req_cb, 0, NULL);
-
-       rbd_destroy_ops(ops);
-done:
-       kfree(seg_name);
-       return ret;
+       if (!more)
+               rbd_img_request_complete(img_request);
 }
 
-/*
- * Request sync osd read
- */
-static int rbd_req_sync_read(struct rbd_device *rbd_dev,
-                         u64 snapid,
-                         const char *object_name,
-                         u64 ofs, u64 len,
-                         char *buf,
-                         u64 *ver)
-{
-       struct ceph_osd_req_op *ops;
-       int ret;
+static int rbd_img_request_submit(struct rbd_img_request *img_request)
+{
+       struct rbd_device *rbd_dev = img_request->rbd_dev;
+       struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       struct rbd_obj_request *obj_request;
 
-       ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0);
-       if (!ops)
-               return -ENOMEM;
+       dout("%s: img %p\n", __func__, img_request);
+       for_each_obj_request(img_request, obj_request) {
+               int ret;
 
-       ret = rbd_req_sync_op(rbd_dev, NULL,
-                              snapid,
-                              CEPH_OSD_FLAG_READ,
-                              ops, object_name, ofs, len, buf, NULL, ver);
-       rbd_destroy_ops(ops);
+               obj_request->callback = rbd_img_obj_callback;
+               ret = rbd_obj_request_submit(osdc, obj_request);
+               if (ret)
+                       return ret;
+               /*
+                * The image request has its own reference to each
+                * of its object requests, so we can safely drop the
+                * initial one here.
+                */
+               rbd_obj_request_put(obj_request);
+       }
 
-       return ret;
+       return 0;
 }
 
-/*
- * Request sync osd watch
- */
-static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev,
-                                  u64 ver,
-                                  u64 notify_id)
+static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
+                                  u64 ver, u64 notify_id)
 {
-       struct ceph_osd_req_op *ops;
+       struct rbd_obj_request *obj_request;
+       struct ceph_osd_req_op *op;
+       struct ceph_osd_client *osdc;
        int ret;
 
-       ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0);
-       if (!ops)
+       obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+                                                       OBJ_REQUEST_NODATA);
+       if (!obj_request)
                return -ENOMEM;
 
-       ops[0].watch.ver = cpu_to_le64(ver);
-       ops[0].watch.cookie = notify_id;
-       ops[0].watch.flag = 0;
+       ret = -ENOMEM;
+       op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver);
+       if (!op)
+               goto out;
+       obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+                                               obj_request, op);
+       rbd_osd_req_op_destroy(op);
+       if (!obj_request->osd_req)
+               goto out;
 
-       ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP,
-                         rbd_dev->header_name, 0, 0, NULL,
-                         NULL, 0,
-                         CEPH_OSD_FLAG_READ,
-                         ops,
-                         NULL, 0,
-                         rbd_simple_req_cb, 0, NULL);
+       osdc = &rbd_dev->rbd_client->client->osdc;
+       obj_request->callback = rbd_obj_request_put;
+       ret = rbd_obj_request_submit(osdc, obj_request);
+out:
+       if (ret)
+               rbd_obj_request_put(obj_request);
 
-       rbd_destroy_ops(ops);
        return ret;
 }
 
@@ -1381,95 +1791,103 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
        if (!rbd_dev)
                return;
 
-       dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n",
+       dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
                rbd_dev->header_name, (unsigned long long) notify_id,
                (unsigned int) opcode);
        rc = rbd_dev_refresh(rbd_dev, &hver);
        if (rc)
-               pr_warning(RBD_DRV_NAME "%d got notification but failed to "
-                          " update snaps: %d\n", rbd_dev->major, rc);
+               rbd_warn(rbd_dev, "got notification but failed to "
+                          " update snaps: %d\n", rc);
 
-       rbd_req_sync_notify_ack(rbd_dev, hver, notify_id);
+       rbd_obj_notify_ack(rbd_dev, hver, notify_id);
 }
 
 /*
- * Request sync osd watch
+ * Request sync osd watch/unwatch.  The value of "start" determines
+ * whether a watch request is being initiated or torn down.
  */
-static int rbd_req_sync_watch(struct rbd_device *rbd_dev)
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
 {
-       struct ceph_osd_req_op *ops;
        struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       struct rbd_obj_request *obj_request;
+       struct ceph_osd_req_op *op;
        int ret;
 
-       ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
-       if (!ops)
-               return -ENOMEM;
+       rbd_assert(start ^ !!rbd_dev->watch_event);
+       rbd_assert(start ^ !!rbd_dev->watch_request);
 
-       ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
-                                    (void *)rbd_dev, &rbd_dev->watch_event);
-       if (ret < 0)
-               goto fail;
+       if (start) {
+               ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
+                                               &rbd_dev->watch_event);
+               if (ret < 0)
+                       return ret;
+               rbd_assert(rbd_dev->watch_event != NULL);
+       }
 
-       ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version);
-       ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
-       ops[0].watch.flag = 1;
+       ret = -ENOMEM;
+       obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
+                                                       OBJ_REQUEST_NODATA);
+       if (!obj_request)
+               goto out_cancel;
+
+       op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH,
+                               rbd_dev->watch_event->cookie,
+                               rbd_dev->header.obj_version, start);
+       if (!op)
+               goto out_cancel;
+       obj_request->osd_req = rbd_osd_req_create(rbd_dev, true,
+                                                       obj_request, op);
+       rbd_osd_req_op_destroy(op);
+       if (!obj_request->osd_req)
+               goto out_cancel;
+
+       if (start)
+               ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
+       else
+               ceph_osdc_unregister_linger_request(osdc,
+                                       rbd_dev->watch_request->osd_req);
+       ret = rbd_obj_request_submit(osdc, obj_request);
+       if (ret)
+               goto out_cancel;
+       ret = rbd_obj_request_wait(obj_request);
+       if (ret)
+               goto out_cancel;
+       ret = obj_request->result;
+       if (ret)
+               goto out_cancel;
 
-       ret = rbd_req_sync_op(rbd_dev, NULL,
-                             CEPH_NOSNAP,
-                             CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-                             ops,
-                             rbd_dev->header_name,
-                             0, 0, NULL,
-                             &rbd_dev->watch_request, NULL);
+       /*
+        * A watch request is set to linger, so the underlying osd
+        * request won't go away until we unregister it.  We retain
+        * a pointer to the object request during that time (in
+        * rbd_dev->watch_request), so we'll keep a reference to
+        * it.  We'll drop that reference (below) after we've
+        * unregistered it.
+        */
+       if (start) {
+               rbd_dev->watch_request = obj_request;
 
-       if (ret < 0)
-               goto fail_event;
+               return 0;
+       }
 
-       rbd_destroy_ops(ops);
-       return 0;
+       /* We have successfully torn down the watch request */
 
-fail_event:
+       rbd_obj_request_put(rbd_dev->watch_request);
+       rbd_dev->watch_request = NULL;
+out_cancel:
+       /* Cancel the event if we're tearing down, or on error */
        ceph_osdc_cancel_event(rbd_dev->watch_event);
        rbd_dev->watch_event = NULL;
-fail:
-       rbd_destroy_ops(ops);
-       return ret;
-}
+       if (obj_request)
+               rbd_obj_request_put(obj_request);
 
-/*
- * Request sync osd unwatch
- */
-static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev)
-{
-       struct ceph_osd_req_op *ops;
-       int ret;
-
-       ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0);
-       if (!ops)
-               return -ENOMEM;
-
-       ops[0].watch.ver = 0;
-       ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie);
-       ops[0].watch.flag = 0;
-
-       ret = rbd_req_sync_op(rbd_dev, NULL,
-                             CEPH_NOSNAP,
-                             CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
-                             ops,
-                             rbd_dev->header_name,
-                             0, 0, NULL, NULL, NULL);
-
-
-       rbd_destroy_ops(ops);
-       ceph_osdc_cancel_event(rbd_dev->watch_event);
-       rbd_dev->watch_event = NULL;
        return ret;
 }
 
 /*
  * Synchronous osd object method call
  */
-static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
+static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
                             const char *object_name,
                             const char *class_name,
                             const char *method_name,
@@ -1477,169 +1895,154 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
                             size_t outbound_size,
                             char *inbound,
                             size_t inbound_size,
-                            int flags,
-                            u64 *ver)
+                            u64 *version)
 {
-       struct ceph_osd_req_op *ops;
-       int class_name_len = strlen(class_name);
-       int method_name_len = strlen(method_name);
-       int payload_size;
+       struct rbd_obj_request *obj_request;
+       struct ceph_osd_client *osdc;
+       struct ceph_osd_req_op *op;
+       struct page **pages;
+       u32 page_count;
        int ret;
 
        /*
-        * Any input parameters required by the method we're calling
-        * will be sent along with the class and method names as
-        * part of the message payload.  That data and its size are
-        * supplied via the indata and indata_len fields (named from
-        * the perspective of the server side) in the OSD request
-        * operation.
+        * Method calls are ultimately read operations but they
+        * don't involve object data (so no offset or length).
+        * The result should placed into the inbound buffer
+        * provided.  They also supply outbound data--parameters for
+        * the object method.  Currently if this is present it will
+        * be a snapshot id.
         */
-       payload_size = class_name_len + method_name_len + outbound_size;
-       ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size);
-       if (!ops)
-               return -ENOMEM;
+       page_count = (u32) calc_pages_for(0, inbound_size);
+       pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+       if (IS_ERR(pages))
+               return PTR_ERR(pages);
 
-       ops[0].cls.class_name = class_name;
-       ops[0].cls.class_len = (__u8) class_name_len;
-       ops[0].cls.method_name = method_name;
-       ops[0].cls.method_len = (__u8) method_name_len;
-       ops[0].cls.argc = 0;
-       ops[0].cls.indata = outbound;
-       ops[0].cls.indata_len = outbound_size;
+       ret = -ENOMEM;
+       obj_request = rbd_obj_request_create(object_name, 0, 0,
+                                                       OBJ_REQUEST_PAGES);
+       if (!obj_request)
+               goto out;
 
-       ret = rbd_req_sync_op(rbd_dev, NULL,
-                              CEPH_NOSNAP,
-                              flags, ops,
-                              object_name, 0, inbound_size, inbound,
-                              NULL, ver);
+       obj_request->pages = pages;
+       obj_request->page_count = page_count;
 
-       rbd_destroy_ops(ops);
+       op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name,
+                                       method_name, outbound, outbound_size);
+       if (!op)
+               goto out;
+       obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+                                               obj_request, op);
+       rbd_osd_req_op_destroy(op);
+       if (!obj_request->osd_req)
+               goto out;
 
-       dout("cls_exec returned %d\n", ret);
-       return ret;
-}
+       osdc = &rbd_dev->rbd_client->client->osdc;
+       ret = rbd_obj_request_submit(osdc, obj_request);
+       if (ret)
+               goto out;
+       ret = rbd_obj_request_wait(obj_request);
+       if (ret)
+               goto out;
 
-static struct rbd_req_coll *rbd_alloc_coll(int num_reqs)
-{
-       struct rbd_req_coll *coll =
-                       kzalloc(sizeof(struct rbd_req_coll) +
-                               sizeof(struct rbd_req_status) * num_reqs,
-                               GFP_ATOMIC);
+       ret = obj_request->result;
+       if (ret < 0)
+               goto out;
+       ret = 0;
+       ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred);
+       if (version)
+               *version = obj_request->version;
+out:
+       if (obj_request)
+               rbd_obj_request_put(obj_request);
+       else
+               ceph_release_page_vector(pages, page_count);
 
-       if (!coll)
-               return NULL;
-       coll->total = num_reqs;
-       kref_init(&coll->kref);
-       return coll;
+       return ret;
 }
 
-/*
- * block device queue callback
- */
-static void rbd_rq_fn(struct request_queue *q)
+static void rbd_request_fn(struct request_queue *q)
+               __releases(q->queue_lock) __acquires(q->queue_lock)
 {
        struct rbd_device *rbd_dev = q->queuedata;
+       bool read_only = rbd_dev->mapping.read_only;
        struct request *rq;
+       int result;
 
        while ((rq = blk_fetch_request(q))) {
-               struct bio *bio;
-               bool do_write;
-               unsigned int size;
-               u64 ofs;
-               int num_segs, cur_seg = 0;
-               struct rbd_req_coll *coll;
-               struct ceph_snap_context *snapc;
-               unsigned int bio_offset;
-
-               dout("fetched request\n");
-
-               /* filter out block requests we don't understand */
-               if ((rq->cmd_type != REQ_TYPE_FS)) {
-                       __blk_end_request_all(rq, 0);
-                       continue;
-               }
+               bool write_request = rq_data_dir(rq) == WRITE;
+               struct rbd_img_request *img_request;
+               u64 offset;
+               u64 length;
+
+               /* Ignore any non-FS requests that filter through. */
 
-               /* deduce our operation (read, write) */
-               do_write = (rq_data_dir(rq) == WRITE);
-               if (do_write && rbd_dev->mapping.read_only) {
-                       __blk_end_request_all(rq, -EROFS);
+               if (rq->cmd_type != REQ_TYPE_FS) {
+                       dout("%s: non-fs request type %d\n", __func__,
+                               (int) rq->cmd_type);
+                       __blk_end_request_all(rq, 0);
                        continue;
                }
 
-               spin_unlock_irq(q->queue_lock);
+               /* Ignore/skip any zero-length requests */
 
-               down_read(&rbd_dev->header_rwsem);
+               offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT;
+               length = (u64) blk_rq_bytes(rq);
 
-               if (!rbd_dev->exists) {
-                       rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
-                       up_read(&rbd_dev->header_rwsem);
-                       dout("request for non-existent snapshot");
-                       spin_lock_irq(q->queue_lock);
-                       __blk_end_request_all(rq, -ENXIO);
+               if (!length) {
+                       dout("%s: zero-length request\n", __func__);
+                       __blk_end_request_all(rq, 0);
                        continue;
                }
 
-               snapc = ceph_get_snap_context(rbd_dev->header.snapc);
-
-               up_read(&rbd_dev->header_rwsem);
-
-               size = blk_rq_bytes(rq);
-               ofs = blk_rq_pos(rq) * SECTOR_SIZE;
-               bio = rq->bio;
+               spin_unlock_irq(q->queue_lock);
 
-               dout("%s 0x%x bytes at 0x%llx\n",
-                    do_write ? "write" : "read",
-                    size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
+               /* Disallow writes to a read-only device */
 
-               num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
-               if (num_segs <= 0) {
-                       spin_lock_irq(q->queue_lock);
-                       __blk_end_request_all(rq, num_segs);
-                       ceph_put_snap_context(snapc);
-                       continue;
+               if (write_request) {
+                       result = -EROFS;
+                       if (read_only)
+                               goto end_request;
+                       rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
                }
-               coll = rbd_alloc_coll(num_segs);
-               if (!coll) {
-                       spin_lock_irq(q->queue_lock);
-                       __blk_end_request_all(rq, -ENOMEM);
-                       ceph_put_snap_context(snapc);
-                       continue;
-               }
-
-               bio_offset = 0;
-               do {
-                       u64 limit = rbd_segment_length(rbd_dev, ofs, size);
-                       unsigned int chain_size;
-                       struct bio *bio_chain;
-
-                       BUG_ON(limit > (u64) UINT_MAX);
-                       chain_size = (unsigned int) limit;
-                       dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
 
-                       kref_get(&coll->kref);
+               /*
+                * Quit early if the mapped snapshot no longer
+                * exists.  It's still possible the snapshot will
+                * have disappeared by the time our request arrives
+                * at the osd, but there's no sense in sending it if
+                * we already know.
+                */
+               if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
+                       dout("request for non-existent snapshot");
+                       rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
+                       result = -ENXIO;
+                       goto end_request;
+               }
 
-                       /* Pass a cloned bio chain via an osd request */
+               result = -EINVAL;
+               if (WARN_ON(offset && length > U64_MAX - offset + 1))
+                       goto end_request;       /* Shouldn't happen */
 
-                       bio_chain = bio_chain_clone_range(&bio,
-                                               &bio_offset, chain_size,
-                                               GFP_ATOMIC);
-                       if (bio_chain)
-                               (void) rbd_do_op(rq, rbd_dev, snapc,
-                                               ofs, chain_size,
-                                               bio_chain, coll, cur_seg);
-                       else
-                               rbd_coll_end_req_index(rq, coll, cur_seg,
-                                                      -ENOMEM, chain_size);
-                       size -= chain_size;
-                       ofs += chain_size;
+               result = -ENOMEM;
+               img_request = rbd_img_request_create(rbd_dev, offset, length,
+                                                       write_request);
+               if (!img_request)
+                       goto end_request;
 
-                       cur_seg++;
-               } while (size > 0);
-               kref_put(&coll->kref, rbd_coll_release);
+               img_request->rq = rq;
 
+               result = rbd_img_request_fill_bio(img_request, rq->bio);
+               if (!result)
+                       result = rbd_img_request_submit(img_request);
+               if (result)
+                       rbd_img_request_put(img_request);
+end_request:
                spin_lock_irq(q->queue_lock);
-
-               ceph_put_snap_context(snapc);
+               if (result < 0) {
+                       rbd_warn(rbd_dev, "obj_request %s result %d\n",
+                               write_request ? "write" : "read", result);
+                       __blk_end_request_all(rq, result);
+               }
        }
 }
 
@@ -1703,6 +2106,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
        put_disk(disk);
 }
 
+static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
+                               const char *object_name,
+                               u64 offset, u64 length,
+                               char *buf, u64 *version)
+
+{
+       struct ceph_osd_req_op *op;
+       struct rbd_obj_request *obj_request;
+       struct ceph_osd_client *osdc;
+       struct page **pages = NULL;
+       u32 page_count;
+       size_t size;
+       int ret;
+
+       page_count = (u32) calc_pages_for(offset, length);
+       pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+       if (IS_ERR(pages))
+               ret = PTR_ERR(pages);
+
+       ret = -ENOMEM;
+       obj_request = rbd_obj_request_create(object_name, offset, length,
+                                                       OBJ_REQUEST_PAGES);
+       if (!obj_request)
+               goto out;
+
+       obj_request->pages = pages;
+       obj_request->page_count = page_count;
+
+       op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length);
+       if (!op)
+               goto out;
+       obj_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+                                               obj_request, op);
+       rbd_osd_req_op_destroy(op);
+       if (!obj_request->osd_req)
+               goto out;
+
+       osdc = &rbd_dev->rbd_client->client->osdc;
+       ret = rbd_obj_request_submit(osdc, obj_request);
+       if (ret)
+               goto out;
+       ret = rbd_obj_request_wait(obj_request);
+       if (ret)
+               goto out;
+
+       ret = obj_request->result;
+       if (ret < 0)
+               goto out;
+
+       rbd_assert(obj_request->xferred <= (u64) SIZE_MAX);
+       size = (size_t) obj_request->xferred;
+       ceph_copy_from_page_vector(pages, buf, 0, size);
+       rbd_assert(size <= (size_t) INT_MAX);
+       ret = (int) size;
+       if (version)
+               *version = obj_request->version;
+out:
+       if (obj_request)
+               rbd_obj_request_put(obj_request);
+       else
+               ceph_release_page_vector(pages, page_count);
+
+       return ret;
+}
+
 /*
  * Read the complete header for the given rbd device.
  *
@@ -1741,24 +2209,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
                if (!ondisk)
                        return ERR_PTR(-ENOMEM);
 
-               ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP,
-                                      rbd_dev->header_name,
+               ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
                                       0, size,
                                       (char *) ondisk, version);
-
                if (ret < 0)
                        goto out_err;
                if (WARN_ON((size_t) ret < size)) {
                        ret = -ENXIO;
-                       pr_warning("short header read for image %s"
-                                       " (want %zd got %d)\n",
-                               rbd_dev->spec->image_name, size, ret);
+                       rbd_warn(rbd_dev, "short header read (want %zd got %d)",
+                               size, ret);
                        goto out_err;
                }
                if (!rbd_dev_ondisk_valid(ondisk)) {
                        ret = -ENXIO;
-                       pr_warning("invalid header for image %s\n",
-                               rbd_dev->spec->image_name);
+                       rbd_warn(rbd_dev, "invalid header");
                        goto out_err;
                }
 
@@ -1895,8 +2359,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
        disk->fops = &rbd_bd_ops;
        disk->private_data = rbd_dev;
 
-       /* init rq */
-       q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
+       q = blk_init_queue(rbd_request_fn, &rbd_dev->lock);
        if (!q)
                goto out_disk;
 
@@ -2233,7 +2696,7 @@ static void rbd_spec_free(struct kref *kref)
        kfree(spec);
 }
 
-struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
+static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
                                struct rbd_spec *spec)
 {
        struct rbd_device *rbd_dev;
@@ -2243,6 +2706,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
                return NULL;
 
        spin_lock_init(&rbd_dev->lock);
+       rbd_dev->flags = 0;
        INIT_LIST_HEAD(&rbd_dev->node);
        INIT_LIST_HEAD(&rbd_dev->snaps);
        init_rwsem(&rbd_dev->header_rwsem);
@@ -2250,6 +2714,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
        rbd_dev->spec = spec;
        rbd_dev->rbd_client = rbdc;
 
+       /* Initialize the layout used for all rbd requests */
+
+       rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+       rbd_dev->layout.fl_stripe_count = cpu_to_le32(1);
+       rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
+       rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id);
+
        return rbd_dev;
 }
 
@@ -2360,12 +2831,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
                __le64 size;
        } __attribute__ ((packed)) size_buf = { 0 };
 
-       ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
                                "rbd", "get_size",
                                (char *) &snapid, sizeof (snapid),
-                               (char *) &size_buf, sizeof (size_buf),
-                               CEPH_OSD_FLAG_READ, NULL);
-       dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+                               (char *) &size_buf, sizeof (size_buf), NULL);
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                return ret;
 
@@ -2396,15 +2866,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
        if (!reply_buf)
                return -ENOMEM;
 
-       ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
                                "rbd", "get_object_prefix",
                                NULL, 0,
-                               reply_buf, RBD_OBJ_PREFIX_LEN_MAX,
-                               CEPH_OSD_FLAG_READ, NULL);
-       dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+                               reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL);
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
-       ret = 0;    /* rbd_req_sync_exec() can return positive */
 
        p = reply_buf;
        rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
@@ -2435,12 +2903,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
        u64 incompat;
        int ret;
 
-       ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
                                "rbd", "get_features",
                                (char *) &snapid, sizeof (snapid),
                                (char *) &features_buf, sizeof (features_buf),
-                               CEPH_OSD_FLAG_READ, NULL);
-       dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+                               NULL);
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                return ret;
 
@@ -2474,7 +2942,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
        void *end;
        char *image_id;
        u64 overlap;
-       size_t len = 0;
        int ret;
 
        parent_spec = rbd_spec_alloc();
@@ -2492,12 +2959,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
        }
 
        snapid = cpu_to_le64(CEPH_NOSNAP);
-       ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
                                "rbd", "get_parent",
                                (char *) &snapid, sizeof (snapid),
-                               (char *) reply_buf, size,
-                               CEPH_OSD_FLAG_READ, NULL);
-       dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+                               (char *) reply_buf, size, NULL);
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out_err;
 
@@ -2508,13 +2974,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
        if (parent_spec->pool_id == CEPH_NOPOOL)
                goto out;       /* No parent?  No problem. */
 
-       image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
+       /* The ceph file layout needs to fit pool id in 32 bits */
+
+       ret = -EIO;
+       if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX))
+               goto out;
+
+       image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
        if (IS_ERR(image_id)) {
                ret = PTR_ERR(image_id);
                goto out_err;
        }
        parent_spec->image_id = image_id;
-       parent_spec->image_id_len = len;
        ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
        ceph_decode_64_safe(&p, end, overlap, out_err);
 
@@ -2544,26 +3015,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev)
 
        rbd_assert(!rbd_dev->spec->image_name);
 
-       image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len;
+       len = strlen(rbd_dev->spec->image_id);
+       image_id_size = sizeof (__le32) + len;
        image_id = kmalloc(image_id_size, GFP_KERNEL);
        if (!image_id)
                return NULL;
 
        p = image_id;
        end = (char *) image_id + image_id_size;
-       ceph_encode_string(&p, end, rbd_dev->spec->image_id,
-                               (u32) rbd_dev->spec->image_id_len);
+       ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len);
 
        size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
        reply_buf = kmalloc(size, GFP_KERNEL);
        if (!reply_buf)
                goto out;
 
-       ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY,
+       ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY,
                                "rbd", "dir_get_name",
                                image_id, image_id_size,
-                               (char *) reply_buf, size,
-                               CEPH_OSD_FLAG_READ, NULL);
+                               (char *) reply_buf, size, NULL);
        if (ret < 0)
                goto out;
        p = reply_buf;
@@ -2602,8 +3072,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
 
        osdc = &rbd_dev->rbd_client->client->osdc;
        name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id);
-       if (!name)
-               return -EIO;    /* pool id too large (>= 2^31) */
+       if (!name) {
+               rbd_warn(rbd_dev, "there is no pool with id %llu",
+                       rbd_dev->spec->pool_id);        /* Really a BUG() */
+               return -EIO;
+       }
 
        rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL);
        if (!rbd_dev->spec->pool_name)
@@ -2612,19 +3085,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev)
        /* Fetch the image name; tolerate failure here */
 
        name = rbd_dev_image_name(rbd_dev);
-       if (name) {
-               rbd_dev->spec->image_name_len = strlen(name);
+       if (name)
                rbd_dev->spec->image_name = (char *) name;
-       } else {
-               pr_warning(RBD_DRV_NAME "%d "
-                       "unable to get image name for image id %s\n",
-                       rbd_dev->major, rbd_dev->spec->image_id);
-       }
+       else
+               rbd_warn(rbd_dev, "unable to get image name");
 
        /* Look up the snapshot name. */
 
        name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id);
        if (!name) {
+               rbd_warn(rbd_dev, "no snapshot with id %llu",
+                       rbd_dev->spec->snap_id);        /* Really a BUG() */
                ret = -EIO;
                goto out_err;
        }
@@ -2665,12 +3136,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
        if (!reply_buf)
                return -ENOMEM;
 
-       ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
                                "rbd", "get_snapcontext",
                                NULL, 0,
-                               reply_buf, size,
-                               CEPH_OSD_FLAG_READ, ver);
-       dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+                               reply_buf, size, ver);
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
 
@@ -2735,12 +3205,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
                return ERR_PTR(-ENOMEM);
 
        snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]);
-       ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
+       ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
                                "rbd", "get_snapshot_name",
                                (char *) &snap_id, sizeof (snap_id),
-                               reply_buf, size,
-                               CEPH_OSD_FLAG_READ, NULL);
-       dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+                               reply_buf, size, NULL);
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
 
@@ -2766,7 +3235,7 @@ out:
 static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which,
                u64 *snap_size, u64 *snap_features)
 {
-       __le64 snap_id;
+       u64 snap_id;
        u8 order;
        int ret;
 
@@ -2865,10 +3334,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
                if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) {
                        struct list_head *next = links->next;
 
-                       /* Existing snapshot not in the new snap context */
-
+                       /*
+                        * A previously-existing snapshot is not in
+                        * the new snap context.
+                        *
+                        * If the now missing snapshot is the one the
+                        * image is mapped to, clear its exists flag
+                        * so we can avoid sending any more requests
+                        * to it.
+                        */
                        if (rbd_dev->spec->snap_id == snap->id)
-                               rbd_dev->exists = false;
+                               clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
                        rbd_remove_snap_dev(snap);
                        dout("%ssnap id %llu has been removed\n",
                                rbd_dev->spec->snap_id == snap->id ?
@@ -2942,7 +3418,7 @@ static int rbd_dev_snaps_register(struct rbd_device *rbd_dev)
        struct rbd_snap *snap;
        int ret = 0;
 
-       dout("%s called\n", __func__);
+       dout("%s:\n", __func__);
        if (WARN_ON(!device_is_registered(&rbd_dev->dev)))
                return -EIO;
 
@@ -2983,22 +3459,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
        device_unregister(&rbd_dev->dev);
 }
 
-static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
-{
-       int ret, rc;
-
-       do {
-               ret = rbd_req_sync_watch(rbd_dev);
-               if (ret == -ERANGE) {
-                       rc = rbd_dev_refresh(rbd_dev, NULL);
-                       if (rc < 0)
-                               return rc;
-               }
-       } while (ret == -ERANGE);
-
-       return ret;
-}
-
 static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0);
 
 /*
@@ -3138,11 +3598,9 @@ static inline char *dup_token(const char **buf, size_t *lenp)
        size_t len;
 
        len = next_token(buf);
-       dup = kmalloc(len + 1, GFP_KERNEL);
+       dup = kmemdup(*buf, len + 1, GFP_KERNEL);
        if (!dup)
                return NULL;
-
-       memcpy(dup, *buf, len);
        *(dup + len) = '\0';
        *buf += len;
 
@@ -3210,8 +3668,10 @@ static int rbd_add_parse_args(const char *buf,
        /* The first four tokens are required */
 
        len = next_token(&buf);
-       if (!len)
-               return -EINVAL; /* Missing monitor address(es) */
+       if (!len) {
+               rbd_warn(NULL, "no monitor address(es) provided");
+               return -EINVAL;
+       }
        mon_addrs = buf;
        mon_addrs_size = len + 1;
        buf += len;
@@ -3220,8 +3680,10 @@ static int rbd_add_parse_args(const char *buf,
        options = dup_token(&buf, NULL);
        if (!options)
                return -ENOMEM;
-       if (!*options)
-               goto out_err;   /* Missing options */
+       if (!*options) {
+               rbd_warn(NULL, "no options provided");
+               goto out_err;
+       }
 
        spec = rbd_spec_alloc();
        if (!spec)
@@ -3230,14 +3692,18 @@ static int rbd_add_parse_args(const char *buf,
        spec->pool_name = dup_token(&buf, NULL);
        if (!spec->pool_name)
                goto out_mem;
-       if (!*spec->pool_name)
-               goto out_err;   /* Missing pool name */
+       if (!*spec->pool_name) {
+               rbd_warn(NULL, "no pool name provided");
+               goto out_err;
+       }
 
-       spec->image_name = dup_token(&buf, &spec->image_name_len);
+       spec->image_name = dup_token(&buf, NULL);
        if (!spec->image_name)
                goto out_mem;
-       if (!*spec->image_name)
-               goto out_err;   /* Missing image name */
+       if (!*spec->image_name) {
+               rbd_warn(NULL, "no image name provided");
+               goto out_err;
+       }
 
        /*
         * Snapshot name is optional; default is to use "-"
@@ -3251,10 +3717,9 @@ static int rbd_add_parse_args(const char *buf,
                ret = -ENAMETOOLONG;
                goto out_err;
        }
-       spec->snap_name = kmalloc(len + 1, GFP_KERNEL);
+       spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
        if (!spec->snap_name)
                goto out_mem;
-       memcpy(spec->snap_name, buf, len);
        *(spec->snap_name + len) = '\0';
 
        /* Initialize all rbd options to the defaults */
@@ -3323,7 +3788,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
         * First, see if the format 2 image id file exists, and if
         * so, get the image's persistent id from it.
         */
-       size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len;
+       size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name);
        object_name = kmalloc(size, GFP_NOIO);
        if (!object_name)
                return -ENOMEM;
@@ -3339,21 +3804,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
                goto out;
        }
 
-       ret = rbd_req_sync_exec(rbd_dev, object_name,
+       ret = rbd_obj_method_sync(rbd_dev, object_name,
                                "rbd", "get_id",
                                NULL, 0,
-                               response, RBD_IMAGE_ID_LEN_MAX,
-                               CEPH_OSD_FLAG_READ, NULL);
-       dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
+                               response, RBD_IMAGE_ID_LEN_MAX, NULL);
+       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
        if (ret < 0)
                goto out;
-       ret = 0;    /* rbd_req_sync_exec() can return positive */
 
        p = response;
        rbd_dev->spec->image_id = ceph_extract_encoded_string(&p,
                                                p + RBD_IMAGE_ID_LEN_MAX,
-                                               &rbd_dev->spec->image_id_len,
-                                               GFP_NOIO);
+                                               NULL, GFP_NOIO);
        if (IS_ERR(rbd_dev->spec->image_id)) {
                ret = PTR_ERR(rbd_dev->spec->image_id);
                rbd_dev->spec->image_id = NULL;
@@ -3377,11 +3839,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
        rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL);
        if (!rbd_dev->spec->image_id)
                return -ENOMEM;
-       rbd_dev->spec->image_id_len = 0;
 
        /* Record the header object name for this rbd image. */
 
-       size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX);
+       size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX);
        rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
        if (!rbd_dev->header_name) {
                ret = -ENOMEM;
@@ -3427,7 +3888,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
         * Image id was filled in by the caller.  Record the header
         * object name for this rbd image.
         */
-       size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len;
+       size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id);
        rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
        if (!rbd_dev->header_name)
                return -ENOMEM;
@@ -3542,7 +4003,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev)
        if (ret)
                goto err_out_bus;
 
-       ret = rbd_init_watch_dev(rbd_dev);
+       ret = rbd_dev_header_watch_sync(rbd_dev, 1);
        if (ret)
                goto err_out_bus;
 
@@ -3638,6 +4099,13 @@ static ssize_t rbd_add(struct bus_type *bus,
                goto err_out_client;
        spec->pool_id = (u64) rc;
 
+       /* The ceph file layout needs to fit pool id in 32 bits */
+
+       if (WARN_ON(spec->pool_id > (u64) U32_MAX)) {
+               rc = -EIO;
+               goto err_out_client;
+       }
+
        rbd_dev = rbd_dev_create(rbdc, spec);
        if (!rbd_dev)
                goto err_out_client;
@@ -3691,15 +4159,8 @@ static void rbd_dev_release(struct device *dev)
 {
        struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
-       if (rbd_dev->watch_request) {
-               struct ceph_client *client = rbd_dev->rbd_client->client;
-
-               ceph_osdc_unregister_linger_request(&client->osdc,
-                                                   rbd_dev->watch_request);
-       }
        if (rbd_dev->watch_event)
-               rbd_req_sync_unwatch(rbd_dev);
-
+               rbd_dev_header_watch_sync(rbd_dev, 0);
 
        /* clean up and free blkdev */
        rbd_free_disk(rbd_dev);
@@ -3743,10 +4204,14 @@ static ssize_t rbd_remove(struct bus_type *bus,
                goto done;
        }
 
-       if (rbd_dev->open_count) {
+       spin_lock_irq(&rbd_dev->lock);
+       if (rbd_dev->open_count)
                ret = -EBUSY;
+       else
+               set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags);
+       spin_unlock_irq(&rbd_dev->lock);
+       if (ret < 0)
                goto done;
-       }
 
        rbd_remove_all_snaps(rbd_dev);
        rbd_bus_del_dev(rbd_dev);
@@ -3782,10 +4247,15 @@ static void rbd_sysfs_cleanup(void)
        device_unregister(&rbd_root_dev);
 }
 
-int __init rbd_init(void)
+static int __init rbd_init(void)
 {
        int rc;
 
+       if (!libceph_compatible(NULL)) {
+               rbd_warn(NULL, "libceph incompatibility (quitting)");
+
+               return -EINVAL;
+       }
        rc = rbd_sysfs_init();
        if (rc)
                return rc;
@@ -3793,7 +4263,7 @@ int __init rbd_init(void)
        return 0;
 }
 
-void __exit rbd_exit(void)
+static void __exit rbd_exit(void)
 {
        rbd_sysfs_cleanup();
 }
diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile
new file mode 100644 (file)
index 0000000..f35cd0b
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o
+rsxx-y := config.o core.o cregs.o dev.o dma.o
diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c
new file mode 100644 (file)
index 0000000..a295e7e
--- /dev/null
@@ -0,0 +1,213 @@
+/*
+* Filename: config.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/types.h>
+#include <linux/crc32.h>
+#include <linux/swab.h>
+
+#include "rsxx_priv.h"
+#include "rsxx_cfg.h"
+
+static void initialize_config(void *config)
+{
+       struct rsxx_card_cfg *cfg = config;
+
+       cfg->hdr.version = RSXX_CFG_VERSION;
+
+       cfg->data.block_size        = RSXX_HW_BLK_SIZE;
+       cfg->data.stripe_size       = RSXX_HW_BLK_SIZE;
+       cfg->data.vendor_id         = RSXX_VENDOR_ID_TMS_IBM;
+       cfg->data.cache_order       = (-1);
+       cfg->data.intr_coal.mode    = RSXX_INTR_COAL_DISABLED;
+       cfg->data.intr_coal.count   = 0;
+       cfg->data.intr_coal.latency = 0;
+}
+
+static u32 config_data_crc32(struct rsxx_card_cfg *cfg)
+{
+       /*
+        * Return the compliment of the CRC to ensure compatibility
+        * (i.e. this is how early rsxx drivers did it.)
+        */
+
+       return ~crc32(~0, &cfg->data, sizeof(cfg->data));
+}
+
+
+/*----------------- Config Byte Swap Functions -------------------*/
+static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr)
+{
+       hdr->version = be32_to_cpu((__force __be32) hdr->version);
+       hdr->crc     = be32_to_cpu((__force __be32) hdr->crc);
+}
+
+static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr)
+{
+       hdr->version = (__force u32) cpu_to_be32(hdr->version);
+       hdr->crc     = (__force u32) cpu_to_be32(hdr->crc);
+}
+
+static void config_data_swab(struct rsxx_card_cfg *cfg)
+{
+       u32 *data = (u32 *) &cfg->data;
+       int i;
+
+       for (i = 0; i < (sizeof(cfg->data) / 4); i++)
+               data[i] = swab32(data[i]);
+}
+
+static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg)
+{
+       u32 *data = (u32 *) &cfg->data;
+       int i;
+
+       for (i = 0; i < (sizeof(cfg->data) / 4); i++)
+               data[i] = le32_to_cpu((__force __le32) data[i]);
+}
+
+static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg)
+{
+       u32 *data = (u32 *) &cfg->data;
+       int i;
+
+       for (i = 0; i < (sizeof(cfg->data) / 4); i++)
+               data[i] = (__force u32) cpu_to_le32(data[i]);
+}
+
+
+/*----------------- Config Operations ------------------*/
+static int rsxx_save_config(struct rsxx_cardinfo *card)
+{
+       struct rsxx_card_cfg cfg;
+       int st;
+
+       memcpy(&cfg, &card->config, sizeof(cfg));
+
+       if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) {
+               dev_err(CARD_TO_DEV(card),
+                       "Cannot save config with invalid version %d\n",
+                       cfg.hdr.version);
+               return -EINVAL;
+       }
+
+       /* Convert data to little endian for the CRC calculation. */
+       config_data_cpu_to_le(&cfg);
+
+       cfg.hdr.crc = config_data_crc32(&cfg);
+
+       /*
+        * Swap the data from little endian to big endian so it can be
+        * stored.
+        */
+       config_data_swab(&cfg);
+       config_hdr_cpu_to_be(&cfg.hdr);
+
+       st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1);
+       if (st)
+               return st;
+
+       return 0;
+}
+
+int rsxx_load_config(struct rsxx_cardinfo *card)
+{
+       int st;
+       u32 crc;
+
+       st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config),
+                               &card->config, 1);
+       if (st) {
+               dev_err(CARD_TO_DEV(card),
+                       "Failed reading card config.\n");
+               return st;
+       }
+
+       config_hdr_be_to_cpu(&card->config.hdr);
+
+       if (card->config.hdr.version == RSXX_CFG_VERSION) {
+               /*
+                * We calculate the CRC with the data in little endian, because
+                * early drivers did not take big endian CPUs into account.
+                * The data is always stored in big endian, so we need to byte
+                * swap it before calculating the CRC.
+                */
+
+               config_data_swab(&card->config);
+
+               /* Check the CRC */
+               crc = config_data_crc32(&card->config);
+               if (crc != card->config.hdr.crc) {
+                       dev_err(CARD_TO_DEV(card),
+                               "Config corruption detected!\n");
+                       dev_info(CARD_TO_DEV(card),
+                               "CRC (sb x%08x is x%08x)\n",
+                               card->config.hdr.crc, crc);
+                       return -EIO;
+               }
+
+               /* Convert the data to CPU byteorder */
+               config_data_le_to_cpu(&card->config);
+
+       } else if (card->config.hdr.version != 0) {
+               dev_err(CARD_TO_DEV(card),
+                       "Invalid config version %d.\n",
+                       card->config.hdr.version);
+               /*
+                * Config version changes require special handling from the
+                * user
+                */
+               return -EINVAL;
+       } else {
+               dev_info(CARD_TO_DEV(card),
+                       "Initializing card configuration.\n");
+               initialize_config(card);
+               st = rsxx_save_config(card);
+               if (st)
+                       return st;
+       }
+
+       card->config_valid = 1;
+
+       dev_dbg(CARD_TO_DEV(card), "version:     x%08x\n",
+               card->config.hdr.version);
+       dev_dbg(CARD_TO_DEV(card), "crc:         x%08x\n",
+               card->config.hdr.crc);
+       dev_dbg(CARD_TO_DEV(card), "block_size:  x%08x\n",
+               card->config.data.block_size);
+       dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n",
+               card->config.data.stripe_size);
+       dev_dbg(CARD_TO_DEV(card), "vendor_id:   x%08x\n",
+               card->config.data.vendor_id);
+       dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n",
+               card->config.data.cache_order);
+       dev_dbg(CARD_TO_DEV(card), "mode:        x%08x\n",
+               card->config.data.intr_coal.mode);
+       dev_dbg(CARD_TO_DEV(card), "count:       x%08x\n",
+               card->config.data.intr_coal.count);
+       dev_dbg(CARD_TO_DEV(card), "latency:     x%08x\n",
+                card->config.data.intr_coal.latency);
+
+       return 0;
+}
+
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
new file mode 100644 (file)
index 0000000..e516248
--- /dev/null
@@ -0,0 +1,649 @@
+/*
+* Filename: core.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+
+#include <linux/genhd.h>
+#include <linux/idr.h>
+
+#include "rsxx_priv.h"
+#include "rsxx_cfg.h"
+
+#define NO_LEGACY 0
+
+MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver");
+MODULE_AUTHOR("IBM <support@ramsan.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+static unsigned int force_legacy = NO_LEGACY;
+module_param(force_legacy, uint, 0444);
+MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
+
+static DEFINE_IDA(rsxx_disk_ida);
+static DEFINE_SPINLOCK(rsxx_ida_lock);
+
+/*----------------- Interrupt Control & Handling -------------------*/
+static void __enable_intr(unsigned int *mask, unsigned int intr)
+{
+       *mask |= intr;
+}
+
+static void __disable_intr(unsigned int *mask, unsigned int intr)
+{
+       *mask &= ~intr;
+}
+
+/*
+ * NOTE: Disabling the IER will disable the hardware interrupt.
+ * Disabling the ISR will disable the software handling of the ISR bit.
+ *
+ * Enable/Disable interrupt functions assume the card->irq_lock
+ * is held by the caller.
+ */
+void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
+{
+       if (unlikely(card->halt))
+               return;
+
+       __enable_intr(&card->ier_mask, intr);
+       iowrite32(card->ier_mask, card->regmap + IER);
+}
+
+void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
+{
+       __disable_intr(&card->ier_mask, intr);
+       iowrite32(card->ier_mask, card->regmap + IER);
+}
+
+void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
+                                unsigned int intr)
+{
+       if (unlikely(card->halt))
+               return;
+
+       __enable_intr(&card->isr_mask, intr);
+       __enable_intr(&card->ier_mask, intr);
+       iowrite32(card->ier_mask, card->regmap + IER);
+}
+void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
+                                 unsigned int intr)
+{
+       __disable_intr(&card->isr_mask, intr);
+       __disable_intr(&card->ier_mask, intr);
+       iowrite32(card->ier_mask, card->regmap + IER);
+}
+
+static irqreturn_t rsxx_isr(int irq, void *pdata)
+{
+       struct rsxx_cardinfo *card = pdata;
+       unsigned int isr;
+       int handled = 0;
+       int reread_isr;
+       int i;
+
+       spin_lock(&card->irq_lock);
+
+       do {
+               reread_isr = 0;
+
+               isr = ioread32(card->regmap + ISR);
+               if (isr == 0xffffffff) {
+                       /*
+                        * A few systems seem to have an intermittent issue
+                        * where PCI reads return all Fs, but retrying the read
+                        * a little later will return as expected.
+                        */
+                       dev_info(CARD_TO_DEV(card),
+                               "ISR = 0xFFFFFFFF, retrying later\n");
+                       break;
+               }
+
+               isr &= card->isr_mask;
+               if (!isr)
+                       break;
+
+               for (i = 0; i < card->n_targets; i++) {
+                       if (isr & CR_INTR_DMA(i)) {
+                               if (card->ier_mask & CR_INTR_DMA(i)) {
+                                       rsxx_disable_ier(card, CR_INTR_DMA(i));
+                                       reread_isr = 1;
+                               }
+                               queue_work(card->ctrl[i].done_wq,
+                                          &card->ctrl[i].dma_done_work);
+                               handled++;
+                       }
+               }
+
+               if (isr & CR_INTR_CREG) {
+                       schedule_work(&card->creg_ctrl.done_work);
+                       handled++;
+               }
+
+               if (isr & CR_INTR_EVENT) {
+                       schedule_work(&card->event_work);
+                       rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
+                       handled++;
+               }
+       } while (reread_isr);
+
+       spin_unlock(&card->irq_lock);
+
+       return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+/*----------------- Card Event Handler -------------------*/
+static char *rsxx_card_state_to_str(unsigned int state)
+{
+       static char *state_strings[] = {
+               "Unknown", "Shutdown", "Starting", "Formatting",
+               "Uninitialized", "Good", "Shutting Down",
+               "Fault", "Read Only Fault", "dStroying"
+       };
+
+       return state_strings[ffs(state)];
+}
+
+static void card_state_change(struct rsxx_cardinfo *card,
+                             unsigned int new_state)
+{
+       int st;
+
+       dev_info(CARD_TO_DEV(card),
+               "card state change detected.(%s -> %s)\n",
+               rsxx_card_state_to_str(card->state),
+               rsxx_card_state_to_str(new_state));
+
+       card->state = new_state;
+
+       /* Don't attach DMA interfaces if the card has an invalid config */
+       if (!card->config_valid)
+               return;
+
+       switch (new_state) {
+       case CARD_STATE_RD_ONLY_FAULT:
+               dev_crit(CARD_TO_DEV(card),
+                       "Hardware has entered read-only mode!\n");
+               /*
+                * Fall through so the DMA devices can be attached and
+                * the user can attempt to pull off their data.
+                */
+       case CARD_STATE_GOOD:
+               st = rsxx_get_card_size8(card, &card->size8);
+               if (st)
+                       dev_err(CARD_TO_DEV(card),
+                               "Failed attaching DMA devices\n");
+
+               if (card->config_valid)
+                       set_capacity(card->gendisk, card->size8 >> 9);
+               break;
+
+       case CARD_STATE_FAULT:
+               dev_crit(CARD_TO_DEV(card),
+                       "Hardware Fault reported!\n");
+               /* Fall through. */
+
+       /* Everything else, detach DMA interface if it's attached. */
+       case CARD_STATE_SHUTDOWN:
+       case CARD_STATE_STARTING:
+       case CARD_STATE_FORMATTING:
+       case CARD_STATE_UNINITIALIZED:
+       case CARD_STATE_SHUTTING_DOWN:
+       /*
+        * dStroy is a term coined by marketing to represent the low level
+        * secure erase.
+        */
+       case CARD_STATE_DSTROYING:
+               set_capacity(card->gendisk, 0);
+               break;
+       }
+}
+
+static void card_event_handler(struct work_struct *work)
+{
+       struct rsxx_cardinfo *card;
+       unsigned int state;
+       unsigned long flags;
+       int st;
+
+       card = container_of(work, struct rsxx_cardinfo, event_work);
+
+       if (unlikely(card->halt))
+               return;
+
+       /*
+        * Enable the interrupt now to avoid any weird race conditions where a
+        * state change might occur while rsxx_get_card_state() is
+        * processing a returned creg cmd.
+        */
+       spin_lock_irqsave(&card->irq_lock, flags);
+       rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
+       spin_unlock_irqrestore(&card->irq_lock, flags);
+
+       st = rsxx_get_card_state(card, &state);
+       if (st) {
+               dev_info(CARD_TO_DEV(card),
+                       "Failed reading state after event.\n");
+               return;
+       }
+
+       if (card->state != state)
+               card_state_change(card, state);
+
+       if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING)
+               rsxx_read_hw_log(card);
+}
+
+/*----------------- Card Operations -------------------*/
+static int card_shutdown(struct rsxx_cardinfo *card)
+{
+       unsigned int state;
+       signed long start;
+       const int timeout = msecs_to_jiffies(120000);
+       int st;
+
+       /* We can't issue a shutdown if the card is in a transition state */
+       start = jiffies;
+       do {
+               st = rsxx_get_card_state(card, &state);
+               if (st)
+                       return st;
+       } while (state == CARD_STATE_STARTING &&
+                (jiffies - start < timeout));
+
+       if (state == CARD_STATE_STARTING)
+               return -ETIMEDOUT;
+
+       /* Only issue a shutdown if we need to */
+       if ((state != CARD_STATE_SHUTTING_DOWN) &&
+           (state != CARD_STATE_SHUTDOWN)) {
+               st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN);
+               if (st)
+                       return st;
+       }
+
+       start = jiffies;
+       do {
+               st = rsxx_get_card_state(card, &state);
+               if (st)
+                       return st;
+       } while (state != CARD_STATE_SHUTDOWN &&
+                (jiffies - start < timeout));
+
+       if (state != CARD_STATE_SHUTDOWN)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+/*----------------- Driver Initialization & Setup -------------------*/
+/* Returns:   0 if the driver is compatible with the device
+            -1 if the driver is NOT compatible with the device */
+static int rsxx_compatibility_check(struct rsxx_cardinfo *card)
+{
+       unsigned char pci_rev;
+
+       pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
+
+       if (pci_rev > RS70_PCI_REV_SUPPORTED)
+               return -1;
+       return 0;
+}
+
+static int rsxx_pci_probe(struct pci_dev *dev,
+                                       const struct pci_device_id *id)
+{
+       struct rsxx_cardinfo *card;
+       int st;
+
+       dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
+
+       card = kzalloc(sizeof(*card), GFP_KERNEL);
+       if (!card)
+               return -ENOMEM;
+
+       card->dev = dev;
+       pci_set_drvdata(dev, card);
+
+       do {
+               if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) {
+                       st = -ENOMEM;
+                       goto failed_ida_get;
+               }
+
+               spin_lock(&rsxx_ida_lock);
+               st = ida_get_new(&rsxx_disk_ida, &card->disk_id);
+               spin_unlock(&rsxx_ida_lock);
+       } while (st == -EAGAIN);
+
+       if (st)
+               goto failed_ida_get;
+
+       st = pci_enable_device(dev);
+       if (st)
+               goto failed_enable;
+
+       pci_set_master(dev);
+       pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE);
+
+       st = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
+       if (st) {
+               dev_err(CARD_TO_DEV(card),
+                       "No usable DMA configuration,aborting\n");
+               goto failed_dma_mask;
+       }
+
+       st = pci_request_regions(dev, DRIVER_NAME);
+       if (st) {
+               dev_err(CARD_TO_DEV(card),
+                       "Failed to request memory region\n");
+               goto failed_request_regions;
+       }
+
+       if (pci_resource_len(dev, 0) == 0) {
+               dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n");
+               st = -ENOMEM;
+               goto failed_iomap;
+       }
+
+       card->regmap = pci_iomap(dev, 0, 0);
+       if (!card->regmap) {
+               dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n");
+               st = -ENOMEM;
+               goto failed_iomap;
+       }
+
+       spin_lock_init(&card->irq_lock);
+       card->halt = 0;
+
+       spin_lock_irq(&card->irq_lock);
+       rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
+       spin_unlock_irq(&card->irq_lock);
+
+       if (!force_legacy) {
+               st = pci_enable_msi(dev);
+               if (st)
+                       dev_warn(CARD_TO_DEV(card),
+                               "Failed to enable MSI\n");
+       }
+
+       st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED,
+                        DRIVER_NAME, card);
+       if (st) {
+               dev_err(CARD_TO_DEV(card),
+                       "Failed requesting IRQ%d\n", dev->irq);
+               goto failed_irq;
+       }
+
+       /************* Setup Processor Command Interface *************/
+       rsxx_creg_setup(card);
+
+       spin_lock_irq(&card->irq_lock);
+       rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
+       spin_unlock_irq(&card->irq_lock);
+
+       st = rsxx_compatibility_check(card);
+       if (st) {
+               dev_warn(CARD_TO_DEV(card),
+                       "Incompatible driver detected. Please update the driver.\n");
+               st = -EINVAL;
+               goto failed_compatiblity_check;
+       }
+
+       /************* Load Card Config *************/
+       st = rsxx_load_config(card);
+       if (st)
+               dev_err(CARD_TO_DEV(card),
+                       "Failed loading card config\n");
+
+       /************* Setup DMA Engine *************/
+       st = rsxx_get_num_targets(card, &card->n_targets);
+       if (st)
+               dev_info(CARD_TO_DEV(card),
+                       "Failed reading the number of DMA targets\n");
+
+       card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL);
+       if (!card->ctrl) {
+               st = -ENOMEM;
+               goto failed_dma_setup;
+       }
+
+       st = rsxx_dma_setup(card);
+       if (st) {
+               dev_info(CARD_TO_DEV(card),
+                       "Failed to setup DMA engine\n");
+               goto failed_dma_setup;
+       }
+
+       /************* Setup Card Event Handler *************/
+       INIT_WORK(&card->event_work, card_event_handler);
+
+       st = rsxx_setup_dev(card);
+       if (st)
+               goto failed_create_dev;
+
+       rsxx_get_card_state(card, &card->state);
+
+       dev_info(CARD_TO_DEV(card),
+               "card state: %s\n",
+               rsxx_card_state_to_str(card->state));
+
+       /*
+        * Now that the DMA Engine and devices have been setup,
+        * we can enable the event interrupt(it kicks off actions in
+        * those layers so we couldn't enable it right away.)
+        */
+       spin_lock_irq(&card->irq_lock);
+       rsxx_enable_ier_and_isr(card, CR_INTR_EVENT);
+       spin_unlock_irq(&card->irq_lock);
+
+       if (card->state == CARD_STATE_SHUTDOWN) {
+               st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP);
+               if (st)
+                       dev_crit(CARD_TO_DEV(card),
+                               "Failed issuing card startup\n");
+       } else if (card->state == CARD_STATE_GOOD ||
+                  card->state == CARD_STATE_RD_ONLY_FAULT) {
+               st = rsxx_get_card_size8(card, &card->size8);
+               if (st)
+                       card->size8 = 0;
+       }
+
+       rsxx_attach_dev(card);
+
+       return 0;
+
+failed_create_dev:
+       rsxx_dma_destroy(card);
+failed_dma_setup:
+failed_compatiblity_check:
+       spin_lock_irq(&card->irq_lock);
+       rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
+       spin_unlock_irq(&card->irq_lock);
+       free_irq(dev->irq, card);
+       if (!force_legacy)
+               pci_disable_msi(dev);
+failed_irq:
+       pci_iounmap(dev, card->regmap);
+failed_iomap:
+       pci_release_regions(dev);
+failed_request_regions:
+failed_dma_mask:
+       pci_disable_device(dev);
+failed_enable:
+       spin_lock(&rsxx_ida_lock);
+       ida_remove(&rsxx_disk_ida, card->disk_id);
+       spin_unlock(&rsxx_ida_lock);
+failed_ida_get:
+       kfree(card);
+
+       return st;
+}
+
+static void rsxx_pci_remove(struct pci_dev *dev)
+{
+       struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+       unsigned long flags;
+       int st;
+       int i;
+
+       if (!card)
+               return;
+
+       dev_info(CARD_TO_DEV(card),
+               "Removing PCI-Flash SSD.\n");
+
+       rsxx_detach_dev(card);
+
+       for (i = 0; i < card->n_targets; i++) {
+               spin_lock_irqsave(&card->irq_lock, flags);
+               rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
+               spin_unlock_irqrestore(&card->irq_lock, flags);
+       }
+
+       st = card_shutdown(card);
+       if (st)
+               dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n");
+
+       /* Sync outstanding event handlers. */
+       spin_lock_irqsave(&card->irq_lock, flags);
+       rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
+       spin_unlock_irqrestore(&card->irq_lock, flags);
+
+       /* Prevent work_structs from re-queuing themselves. */
+       card->halt = 1;
+
+       cancel_work_sync(&card->event_work);
+
+       rsxx_destroy_dev(card);
+       rsxx_dma_destroy(card);
+
+       spin_lock_irqsave(&card->irq_lock, flags);
+       rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
+       spin_unlock_irqrestore(&card->irq_lock, flags);
+       free_irq(dev->irq, card);
+
+       if (!force_legacy)
+               pci_disable_msi(dev);
+
+       rsxx_creg_destroy(card);
+
+       pci_iounmap(dev, card->regmap);
+
+       pci_disable_device(dev);
+       pci_release_regions(dev);
+
+       kfree(card);
+}
+
+static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state)
+{
+       /* We don't support suspend at this time. */
+       return -ENOSYS;
+}
+
+static void rsxx_pci_shutdown(struct pci_dev *dev)
+{
+       struct rsxx_cardinfo *card = pci_get_drvdata(dev);
+       unsigned long flags;
+       int i;
+
+       if (!card)
+               return;
+
+       dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n");
+
+       rsxx_detach_dev(card);
+
+       for (i = 0; i < card->n_targets; i++) {
+               spin_lock_irqsave(&card->irq_lock, flags);
+               rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i));
+               spin_unlock_irqrestore(&card->irq_lock, flags);
+       }
+
+       card_shutdown(card);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
+       {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)},
+       {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)},
+       {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)},
+       {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)},
+       {0,},
+};
+
+MODULE_DEVICE_TABLE(pci, rsxx_pci_ids);
+
+static struct pci_driver rsxx_pci_driver = {
+       .name           = DRIVER_NAME,
+       .id_table       = rsxx_pci_ids,
+       .probe          = rsxx_pci_probe,
+       .remove         = rsxx_pci_remove,
+       .suspend        = rsxx_pci_suspend,
+       .shutdown       = rsxx_pci_shutdown,
+};
+
+static int __init rsxx_core_init(void)
+{
+       int st;
+
+       st = rsxx_dev_init();
+       if (st)
+               return st;
+
+       st = rsxx_dma_init();
+       if (st)
+               goto dma_init_failed;
+
+       st = rsxx_creg_init();
+       if (st)
+               goto creg_init_failed;
+
+       return pci_register_driver(&rsxx_pci_driver);
+
+creg_init_failed:
+       rsxx_dma_cleanup();
+dma_init_failed:
+       rsxx_dev_cleanup();
+
+       return st;
+}
+
+static void __exit rsxx_core_cleanup(void)
+{
+       pci_unregister_driver(&rsxx_pci_driver);
+       rsxx_creg_cleanup();
+       rsxx_dma_cleanup();
+       rsxx_dev_cleanup();
+}
+
+module_init(rsxx_core_init);
+module_exit(rsxx_core_cleanup);
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
new file mode 100644 (file)
index 0000000..80bbe63
--- /dev/null
@@ -0,0 +1,758 @@
+/*
+* Filename: cregs.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/completion.h>
+#include <linux/slab.h>
+
+#include "rsxx_priv.h"
+
+#define CREG_TIMEOUT_MSEC      10000
+
+typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card,
+                           struct creg_cmd *cmd,
+                           int st);
+
+struct creg_cmd {
+       struct list_head list;
+       creg_cmd_cb cb;
+       void *cb_private;
+       unsigned int op;
+       unsigned int addr;
+       int cnt8;
+       void *buf;
+       unsigned int stream;
+       unsigned int status;
+};
+
+static struct kmem_cache *creg_cmd_pool;
+
+
+/*------------ Private Functions --------------*/
+
+#if defined(__LITTLE_ENDIAN)
+#define LITTLE_ENDIAN 1
+#elif defined(__BIG_ENDIAN)
+#define LITTLE_ENDIAN 0
+#else
+#error Unknown endianess!!! Aborting...
+#endif
+
+static void copy_to_creg_data(struct rsxx_cardinfo *card,
+                             int cnt8,
+                             void *buf,
+                             unsigned int stream)
+{
+       int i = 0;
+       u32 *data = buf;
+
+       for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
+               /*
+                * Firmware implementation makes it necessary to byte swap on
+                * little endian processors.
+                */
+               if (LITTLE_ENDIAN && stream)
+                       iowrite32be(data[i], card->regmap + CREG_DATA(i));
+               else
+                       iowrite32(data[i], card->regmap + CREG_DATA(i));
+       }
+}
+
+
+static void copy_from_creg_data(struct rsxx_cardinfo *card,
+                               int cnt8,
+                               void *buf,
+                               unsigned int stream)
+{
+       int i = 0;
+       u32 *data = buf;
+
+       for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
+               /*
+                * Firmware implementation makes it necessary to byte swap on
+                * little endian processors.
+                */
+               if (LITTLE_ENDIAN && stream)
+                       data[i] = ioread32be(card->regmap + CREG_DATA(i));
+               else
+                       data[i] = ioread32(card->regmap + CREG_DATA(i));
+       }
+}
+
+static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card)
+{
+       struct creg_cmd *cmd;
+
+       /*
+        * Spin lock is needed because this can be called in atomic/interrupt
+        * context.
+        */
+       spin_lock_bh(&card->creg_ctrl.lock);
+       cmd = card->creg_ctrl.active_cmd;
+       card->creg_ctrl.active_cmd = NULL;
+       spin_unlock_bh(&card->creg_ctrl.lock);
+
+       return cmd;
+}
+
+static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
+{
+       iowrite32(cmd->addr, card->regmap + CREG_ADD);
+       iowrite32(cmd->cnt8, card->regmap + CREG_CNT);
+
+       if (cmd->op == CREG_OP_WRITE) {
+               if (cmd->buf)
+                       copy_to_creg_data(card, cmd->cnt8,
+                                         cmd->buf, cmd->stream);
+       }
+
+       /*
+        * Data copy must complete before initiating the command. This is
+        * needed for weakly ordered processors (i.e. PowerPC), so that all
+        * neccessary registers are written before we kick the hardware.
+        */
+       wmb();
+
+       /* Setting the valid bit will kick off the command. */
+       iowrite32(cmd->op, card->regmap + CREG_CMD);
+}
+
+static void creg_kick_queue(struct rsxx_cardinfo *card)
+{
+       if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue))
+               return;
+
+       card->creg_ctrl.active = 1;
+       card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue,
+                                                     struct creg_cmd, list);
+       list_del(&card->creg_ctrl.active_cmd->list);
+       card->creg_ctrl.q_depth--;
+
+       /*
+        * We have to set the timer before we push the new command. Otherwise,
+        * we could create a race condition that would occur if the timer
+        * was not canceled, and expired after the new command was pushed,
+        * but before the command was issued to hardware.
+        */
+       mod_timer(&card->creg_ctrl.cmd_timer,
+                               jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC));
+
+       creg_issue_cmd(card, card->creg_ctrl.active_cmd);
+}
+
+static int creg_queue_cmd(struct rsxx_cardinfo *card,
+                         unsigned int op,
+                         unsigned int addr,
+                         unsigned int cnt8,
+                         void *buf,
+                         int stream,
+                         creg_cmd_cb callback,
+                         void *cb_private)
+{
+       struct creg_cmd *cmd;
+
+       /* Don't queue stuff up if we're halted. */
+       if (unlikely(card->halt))
+               return -EINVAL;
+
+       if (card->creg_ctrl.reset)
+               return -EAGAIN;
+
+       if (cnt8 > MAX_CREG_DATA8)
+               return -EINVAL;
+
+       cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL);
+       if (!cmd)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&cmd->list);
+
+       cmd->op         = op;
+       cmd->addr       = addr;
+       cmd->cnt8       = cnt8;
+       cmd->buf        = buf;
+       cmd->stream     = stream;
+       cmd->cb         = callback;
+       cmd->cb_private = cb_private;
+       cmd->status     = 0;
+
+       spin_lock(&card->creg_ctrl.lock);
+       list_add_tail(&cmd->list, &card->creg_ctrl.queue);
+       card->creg_ctrl.q_depth++;
+       creg_kick_queue(card);
+       spin_unlock(&card->creg_ctrl.lock);
+
+       return 0;
+}
+
+static void creg_cmd_timed_out(unsigned long data)
+{
+       struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data;
+       struct creg_cmd *cmd;
+
+       cmd = pop_active_cmd(card);
+       if (cmd == NULL) {
+               card->creg_ctrl.creg_stats.creg_timeout++;
+               dev_warn(CARD_TO_DEV(card),
+                       "No active command associated with timeout!\n");
+               return;
+       }
+
+       if (cmd->cb)
+               cmd->cb(card, cmd, -ETIMEDOUT);
+
+       kmem_cache_free(creg_cmd_pool, cmd);
+
+
+       spin_lock(&card->creg_ctrl.lock);
+       card->creg_ctrl.active = 0;
+       creg_kick_queue(card);
+       spin_unlock(&card->creg_ctrl.lock);
+}
+
+
+static void creg_cmd_done(struct work_struct *work)
+{
+       struct rsxx_cardinfo *card;
+       struct creg_cmd *cmd;
+       int st = 0;
+
+       card = container_of(work, struct rsxx_cardinfo,
+                           creg_ctrl.done_work);
+
+       /*
+        * The timer could not be cancelled for some reason,
+        * race to pop the active command.
+        */
+       if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0)
+               card->creg_ctrl.creg_stats.failed_cancel_timer++;
+
+       cmd = pop_active_cmd(card);
+       if (cmd == NULL) {
+               dev_err(CARD_TO_DEV(card),
+                       "Spurious creg interrupt!\n");
+               return;
+       }
+
+       card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT);
+       cmd->status = card->creg_ctrl.creg_stats.stat;
+       if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) {
+               dev_err(CARD_TO_DEV(card),
+                       "Invalid status on creg command\n");
+               /*
+                * At this point we're probably reading garbage from HW. Don't
+                * do anything else that could mess up the system and let
+                * the sync function return an error.
+                */
+               st = -EIO;
+               goto creg_done;
+       } else if (cmd->status & CREG_STAT_ERROR) {
+               st = -EIO;
+       }
+
+       if ((cmd->op == CREG_OP_READ)) {
+               unsigned int cnt8 = ioread32(card->regmap + CREG_CNT);
+
+               /* Paranoid Sanity Checks */
+               if (!cmd->buf) {
+                       dev_err(CARD_TO_DEV(card),
+                               "Buffer not given for read.\n");
+                       st = -EIO;
+                       goto creg_done;
+               }
+               if (cnt8 != cmd->cnt8) {
+                       dev_err(CARD_TO_DEV(card),
+                               "count mismatch\n");
+                       st = -EIO;
+                       goto creg_done;
+               }
+
+               copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
+       }
+
+creg_done:
+       if (cmd->cb)
+               cmd->cb(card, cmd, st);
+
+       kmem_cache_free(creg_cmd_pool, cmd);
+
+       spin_lock(&card->creg_ctrl.lock);
+       card->creg_ctrl.active = 0;
+       creg_kick_queue(card);
+       spin_unlock(&card->creg_ctrl.lock);
+}
+
+static void creg_reset(struct rsxx_cardinfo *card)
+{
+       struct creg_cmd *cmd = NULL;
+       struct creg_cmd *tmp;
+       unsigned long flags;
+
+       /*
+        * mutex_trylock is used here because if reset_lock is taken then a
+        * reset is already happening. So, we can just go ahead and return.
+        */
+       if (!mutex_trylock(&card->creg_ctrl.reset_lock))
+               return;
+
+       card->creg_ctrl.reset = 1;
+       spin_lock_irqsave(&card->irq_lock, flags);
+       rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
+       spin_unlock_irqrestore(&card->irq_lock, flags);
+
+       dev_warn(CARD_TO_DEV(card),
+               "Resetting creg interface for recovery\n");
+
+       /* Cancel outstanding commands */
+       spin_lock(&card->creg_ctrl.lock);
+       list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
+               list_del(&cmd->list);
+               card->creg_ctrl.q_depth--;
+               if (cmd->cb)
+                       cmd->cb(card, cmd, -ECANCELED);
+               kmem_cache_free(creg_cmd_pool, cmd);
+       }
+
+       cmd = card->creg_ctrl.active_cmd;
+       card->creg_ctrl.active_cmd = NULL;
+       if (cmd) {
+               if (timer_pending(&card->creg_ctrl.cmd_timer))
+                       del_timer_sync(&card->creg_ctrl.cmd_timer);
+
+               if (cmd->cb)
+                       cmd->cb(card, cmd, -ECANCELED);
+               kmem_cache_free(creg_cmd_pool, cmd);
+
+               card->creg_ctrl.active = 0;
+       }
+       spin_unlock(&card->creg_ctrl.lock);
+
+       card->creg_ctrl.reset = 0;
+       spin_lock_irqsave(&card->irq_lock, flags);
+       rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT);
+       spin_unlock_irqrestore(&card->irq_lock, flags);
+
+       mutex_unlock(&card->creg_ctrl.reset_lock);
+}
+
+/* Used for synchronous accesses */
+struct creg_completion {
+       struct completion       *cmd_done;
+       int                     st;
+       u32                     creg_status;
+};
+
+static void creg_cmd_done_cb(struct rsxx_cardinfo *card,
+                            struct creg_cmd *cmd,
+                            int st)
+{
+       struct creg_completion *cmd_completion;
+
+       cmd_completion = cmd->cb_private;
+       BUG_ON(!cmd_completion);
+
+       cmd_completion->st = st;
+       cmd_completion->creg_status = cmd->status;
+       complete(cmd_completion->cmd_done);
+}
+
+static int __issue_creg_rw(struct rsxx_cardinfo *card,
+                          unsigned int op,
+                          unsigned int addr,
+                          unsigned int cnt8,
+                          void *buf,
+                          int stream,
+                          unsigned int *hw_stat)
+{
+       DECLARE_COMPLETION_ONSTACK(cmd_done);
+       struct creg_completion completion;
+       unsigned long timeout;
+       int st;
+
+       completion.cmd_done = &cmd_done;
+       completion.st = 0;
+       completion.creg_status = 0;
+
+       st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb,
+                           &completion);
+       if (st)
+               return st;
+
+       /*
+        * This timeout is neccessary for unresponsive hardware. The additional
+        * 20 seconds to used to guarantee that each cregs requests has time to
+        * complete.
+        */
+       timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC *
+                               card->creg_ctrl.q_depth) + 20000);
+
+       /*
+        * The creg interface is guaranteed to complete. It has a timeout
+        * mechanism that will kick in if hardware does not respond.
+        */
+       st = wait_for_completion_timeout(completion.cmd_done, timeout);
+       if (st == 0) {
+               /*
+                * This is really bad, because the kernel timer did not
+                * expire and notify us of a timeout!
+                */
+               dev_crit(CARD_TO_DEV(card),
+                       "cregs timer failed\n");
+               creg_reset(card);
+               return -EIO;
+       }
+
+       *hw_stat = completion.creg_status;
+
+       if (completion.st) {
+               dev_warn(CARD_TO_DEV(card),
+                       "creg command failed(%d x%08x)\n",
+                       completion.st, addr);
+               return completion.st;
+       }
+
+       return 0;
+}
+
+static int issue_creg_rw(struct rsxx_cardinfo *card,
+                        u32 addr,
+                        unsigned int size8,
+                        void *data,
+                        int stream,
+                        int read)
+{
+       unsigned int hw_stat;
+       unsigned int xfer;
+       unsigned int op;
+       int st;
+
+       op = read ? CREG_OP_READ : CREG_OP_WRITE;
+
+       do {
+               xfer = min_t(unsigned int, size8, MAX_CREG_DATA8);
+
+               st = __issue_creg_rw(card, op, addr, xfer,
+                                    data, stream, &hw_stat);
+               if (st)
+                       return st;
+
+               data   = (char *)data + xfer;
+               addr  += xfer;
+               size8 -= xfer;
+       } while (size8);
+
+       return 0;
+}
+
+/* ---------------------------- Public API ---------------------------------- */
+int rsxx_creg_write(struct rsxx_cardinfo *card,
+                       u32 addr,
+                       unsigned int size8,
+                       void *data,
+                       int byte_stream)
+{
+       return issue_creg_rw(card, addr, size8, data, byte_stream, 0);
+}
+
+int rsxx_creg_read(struct rsxx_cardinfo *card,
+                      u32 addr,
+                      unsigned int size8,
+                      void *data,
+                      int byte_stream)
+{
+       return issue_creg_rw(card, addr, size8, data, byte_stream, 1);
+}
+
+int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state)
+{
+       return rsxx_creg_read(card, CREG_ADD_CARD_STATE,
+                                 sizeof(*state), state, 0);
+}
+
+int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8)
+{
+       unsigned int size;
+       int st;
+
+       st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE,
+                               sizeof(size), &size, 0);
+       if (st)
+               return st;
+
+       *size8 = (u64)size * RSXX_HW_BLK_SIZE;
+       return 0;
+}
+
+int rsxx_get_num_targets(struct rsxx_cardinfo *card,
+                            unsigned int *n_targets)
+{
+       return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS,
+                                 sizeof(*n_targets), n_targets, 0);
+}
+
+int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
+                                  u32 *capabilities)
+{
+       return rsxx_creg_read(card, CREG_ADD_CAPABILITIES,
+                                 sizeof(*capabilities), capabilities, 0);
+}
+
+int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd)
+{
+       return rsxx_creg_write(card, CREG_ADD_CARD_CMD,
+                                  sizeof(cmd), &cmd, 0);
+}
+
+
+/*----------------- HW Log Functions -------------------*/
+static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len)
+{
+       static char level;
+
+       /*
+        * New messages start with "<#>", where # is the log level. Messages
+        * that extend past the log buffer will use the previous level
+        */
+       if ((len > 3) && (str[0] == '<') && (str[2] == '>')) {
+               level = str[1];
+               str += 3; /* Skip past the log level. */
+               len -= 3;
+       }
+
+       switch (level) {
+       case '0':
+               dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       case '1':
+               dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       case '2':
+               dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       case '3':
+               dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       case '4':
+               dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       case '5':
+               dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       case '6':
+               dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       case '7':
+               dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       default:
+               dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str);
+               break;
+       }
+}
+
+/*
+ * The substrncpy function copies the src string (which includes the
+ * terminating '\0' character), up to the count into the dest pointer.
+ * Returns the number of bytes copied to dest.
+ */
+static int substrncpy(char *dest, const char *src, int count)
+{
+       int max_cnt = count;
+
+       while (count) {
+               count--;
+               *dest = *src;
+               if (*dest == '\0')
+                       break;
+               src++;
+               dest++;
+       }
+       return max_cnt - count;
+}
+
+
+static void read_hw_log_done(struct rsxx_cardinfo *card,
+                            struct creg_cmd *cmd,
+                            int st)
+{
+       char *buf;
+       char *log_str;
+       int cnt;
+       int len;
+       int off;
+
+       buf = cmd->buf;
+       off = 0;
+
+       /* Failed getting the log message */
+       if (st)
+               return;
+
+       while (off < cmd->cnt8) {
+               log_str = &card->log.buf[card->log.buf_len];
+               cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len);
+               len = substrncpy(log_str, &buf[off], cnt);
+
+               off += len;
+               card->log.buf_len += len;
+
+               /*
+                * Flush the log if we've hit the end of a message or if we've
+                * run out of buffer space.
+                */
+               if ((log_str[len - 1] == '\0')  ||
+                   (card->log.buf_len == LOG_BUF_SIZE8)) {
+                       if (card->log.buf_len != 1) /* Don't log blank lines. */
+                               hw_log_msg(card, card->log.buf,
+                                          card->log.buf_len);
+                       card->log.buf_len = 0;
+               }
+
+       }
+
+       if (cmd->status & CREG_STAT_LOG_PENDING)
+               rsxx_read_hw_log(card);
+}
+
+int rsxx_read_hw_log(struct rsxx_cardinfo *card)
+{
+       int st;
+
+       st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG,
+                           sizeof(card->log.tmp), card->log.tmp,
+                           1, read_hw_log_done, NULL);
+       if (st)
+               dev_err(CARD_TO_DEV(card),
+                       "Failed getting log text\n");
+
+       return st;
+}
+
+/*-------------- IOCTL REG Access ------------------*/
+static int issue_reg_cmd(struct rsxx_cardinfo *card,
+                        struct rsxx_reg_access *cmd,
+                        int read)
+{
+       unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE;
+
+       return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data,
+                              cmd->stream, &cmd->stat);
+}
+
+int rsxx_reg_access(struct rsxx_cardinfo *card,
+                       struct rsxx_reg_access __user *ucmd,
+                       int read)
+{
+       struct rsxx_reg_access cmd;
+       int st;
+
+       st = copy_from_user(&cmd, ucmd, sizeof(cmd));
+       if (st)
+               return -EFAULT;
+
+       if (cmd.cnt > RSXX_MAX_REG_CNT)
+               return -EFAULT;
+
+       st = issue_reg_cmd(card, &cmd, read);
+       if (st)
+               return st;
+
+       st = put_user(cmd.stat, &ucmd->stat);
+       if (st)
+               return -EFAULT;
+
+       if (read) {
+               st = copy_to_user(ucmd->data, cmd.data, cmd.cnt);
+               if (st)
+                       return -EFAULT;
+       }
+
+       return 0;
+}
+
+/*------------ Initialization & Setup --------------*/
+int rsxx_creg_setup(struct rsxx_cardinfo *card)
+{
+       card->creg_ctrl.active_cmd = NULL;
+
+       INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
+       mutex_init(&card->creg_ctrl.reset_lock);
+       INIT_LIST_HEAD(&card->creg_ctrl.queue);
+       spin_lock_init(&card->creg_ctrl.lock);
+       setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out,
+                   (unsigned long) card);
+
+       return 0;
+}
+
+void rsxx_creg_destroy(struct rsxx_cardinfo *card)
+{
+       struct creg_cmd *cmd;
+       struct creg_cmd *tmp;
+       int cnt = 0;
+
+       /* Cancel outstanding commands */
+       spin_lock(&card->creg_ctrl.lock);
+       list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) {
+               list_del(&cmd->list);
+               if (cmd->cb)
+                       cmd->cb(card, cmd, -ECANCELED);
+               kmem_cache_free(creg_cmd_pool, cmd);
+               cnt++;
+       }
+
+       if (cnt)
+               dev_info(CARD_TO_DEV(card),
+                       "Canceled %d queue creg commands\n", cnt);
+
+       cmd = card->creg_ctrl.active_cmd;
+       card->creg_ctrl.active_cmd = NULL;
+       if (cmd) {
+               if (timer_pending(&card->creg_ctrl.cmd_timer))
+                       del_timer_sync(&card->creg_ctrl.cmd_timer);
+
+               if (cmd->cb)
+                       cmd->cb(card, cmd, -ECANCELED);
+               dev_info(CARD_TO_DEV(card),
+                       "Canceled active creg command\n");
+               kmem_cache_free(creg_cmd_pool, cmd);
+       }
+       spin_unlock(&card->creg_ctrl.lock);
+
+       cancel_work_sync(&card->creg_ctrl.done_work);
+}
+
+
+int rsxx_creg_init(void)
+{
+       creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN);
+       if (!creg_cmd_pool)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void rsxx_creg_cleanup(void)
+{
+       kmem_cache_destroy(creg_cmd_pool);
+}
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
new file mode 100644 (file)
index 0000000..4346d17
--- /dev/null
@@ -0,0 +1,367 @@
+/*
+* Filename: dev.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+
+#include <linux/fs.h>
+
+#include "rsxx_priv.h"
+
+static unsigned int blkdev_minors = 64;
+module_param(blkdev_minors, uint, 0444);
+MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)");
+
+/*
+ * For now I'm making this tweakable in case any applications hit this limit.
+ * If you see a "bio too big" error in the log you will need to raise this
+ * value.
+ */
+static unsigned int blkdev_max_hw_sectors = 1024;
+module_param(blkdev_max_hw_sectors, uint, 0444);
+MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO");
+
+static unsigned int enable_blkdev = 1;
+module_param(enable_blkdev , uint, 0444);
+MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces");
+
+
+struct rsxx_bio_meta {
+       struct bio      *bio;
+       atomic_t        pending_dmas;
+       atomic_t        error;
+       unsigned long   start_time;
+};
+
+static struct kmem_cache *bio_meta_pool;
+
+/*----------------- Block Device Operations -----------------*/
+static int rsxx_blkdev_ioctl(struct block_device *bdev,
+                                fmode_t mode,
+                                unsigned int cmd,
+                                unsigned long arg)
+{
+       struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
+
+       switch (cmd) {
+       case RSXX_GETREG:
+               return rsxx_reg_access(card, (void __user *)arg, 1);
+       case RSXX_SETREG:
+               return rsxx_reg_access(card, (void __user *)arg, 0);
+       }
+
+       return -ENOTTY;
+}
+
+static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct rsxx_cardinfo *card = bdev->bd_disk->private_data;
+       u64 blocks = card->size8 >> 9;
+
+       /*
+        * get geometry: Fake it. I haven't found any drivers that set
+        * geo->start, so we won't either.
+        */
+       if (card->size8) {
+               geo->heads = 64;
+               geo->sectors = 16;
+               do_div(blocks, (geo->heads * geo->sectors));
+               geo->cylinders = blocks;
+       } else {
+               geo->heads = 0;
+               geo->sectors = 0;
+               geo->cylinders = 0;
+       }
+       return 0;
+}
+
+static const struct block_device_operations rsxx_fops = {
+       .owner          = THIS_MODULE,
+       .getgeo         = rsxx_getgeo,
+       .ioctl          = rsxx_blkdev_ioctl,
+};
+
+static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
+{
+       struct hd_struct *part0 = &card->gendisk->part0;
+       int rw = bio_data_dir(bio);
+       int cpu;
+
+       cpu = part_stat_lock();
+
+       part_round_stats(cpu, part0);
+       part_inc_in_flight(part0, rw);
+
+       part_stat_unlock();
+}
+
+static void disk_stats_complete(struct rsxx_cardinfo *card,
+                               struct bio *bio,
+                               unsigned long start_time)
+{
+       struct hd_struct *part0 = &card->gendisk->part0;
+       unsigned long duration = jiffies - start_time;
+       int rw = bio_data_dir(bio);
+       int cpu;
+
+       cpu = part_stat_lock();
+
+       part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio));
+       part_stat_inc(cpu, part0, ios[rw]);
+       part_stat_add(cpu, part0, ticks[rw], duration);
+
+       part_round_stats(cpu, part0);
+       part_dec_in_flight(part0, rw);
+
+       part_stat_unlock();
+}
+
+static void bio_dma_done_cb(struct rsxx_cardinfo *card,
+                           void *cb_data,
+                           unsigned int error)
+{
+       struct rsxx_bio_meta *meta = cb_data;
+
+       if (error)
+               atomic_set(&meta->error, 1);
+
+       if (atomic_dec_and_test(&meta->pending_dmas)) {
+               disk_stats_complete(card, meta->bio, meta->start_time);
+
+               bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
+               kmem_cache_free(bio_meta_pool, meta);
+       }
+}
+
+static void rsxx_make_request(struct request_queue *q, struct bio *bio)
+{
+       struct rsxx_cardinfo *card = q->queuedata;
+       struct rsxx_bio_meta *bio_meta;
+       int st = -EINVAL;
+
+       might_sleep();
+
+       if (unlikely(card->halt)) {
+               st = -EFAULT;
+               goto req_err;
+       }
+
+       if (unlikely(card->dma_fault)) {
+               st = (-EFAULT);
+               goto req_err;
+       }
+
+       if (bio->bi_size == 0) {
+               dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
+               goto req_err;
+       }
+
+       bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
+       if (!bio_meta) {
+               st = -ENOMEM;
+               goto req_err;
+       }
+
+       bio_meta->bio = bio;
+       atomic_set(&bio_meta->error, 0);
+       atomic_set(&bio_meta->pending_dmas, 0);
+       bio_meta->start_time = jiffies;
+
+       disk_stats_start(card, bio);
+
+       dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
+                bio_data_dir(bio) ? 'W' : 'R', bio_meta,
+                (u64)bio->bi_sector << 9, bio->bi_size);
+
+       st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
+                                   bio_dma_done_cb, bio_meta);
+       if (st)
+               goto queue_err;
+
+       return;
+
+queue_err:
+       kmem_cache_free(bio_meta_pool, bio_meta);
+req_err:
+       bio_endio(bio, st);
+}
+
+/*----------------- Device Setup -------------------*/
+static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
+{
+       unsigned char pci_rev;
+
+       pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev);
+
+       return (pci_rev >= RSXX_DISCARD_SUPPORT);
+}
+
+static unsigned short rsxx_get_logical_block_size(
+                                       struct rsxx_cardinfo *card)
+{
+       u32 capabilities = 0;
+       int st;
+
+       st = rsxx_get_card_capabilities(card, &capabilities);
+       if (st)
+               dev_warn(CARD_TO_DEV(card),
+                       "Failed reading card capabilities register\n");
+
+       /* Earlier firmware did not have support for 512 byte accesses */
+       if (capabilities & CARD_CAP_SUBPAGE_WRITES)
+               return 512;
+       else
+               return RSXX_HW_BLK_SIZE;
+}
+
+int rsxx_attach_dev(struct rsxx_cardinfo *card)
+{
+       mutex_lock(&card->dev_lock);
+
+       /* The block device requires the stripe size from the config. */
+       if (enable_blkdev) {
+               if (card->config_valid)
+                       set_capacity(card->gendisk, card->size8 >> 9);
+               else
+                       set_capacity(card->gendisk, 0);
+               add_disk(card->gendisk);
+
+               card->bdev_attached = 1;
+       }
+
+       mutex_unlock(&card->dev_lock);
+
+       return 0;
+}
+
+void rsxx_detach_dev(struct rsxx_cardinfo *card)
+{
+       mutex_lock(&card->dev_lock);
+
+       if (card->bdev_attached) {
+               del_gendisk(card->gendisk);
+               card->bdev_attached = 0;
+       }
+
+       mutex_unlock(&card->dev_lock);
+}
+
+int rsxx_setup_dev(struct rsxx_cardinfo *card)
+{
+       unsigned short blk_size;
+
+       mutex_init(&card->dev_lock);
+
+       if (!enable_blkdev)
+               return 0;
+
+       card->major = register_blkdev(0, DRIVER_NAME);
+       if (card->major < 0) {
+               dev_err(CARD_TO_DEV(card), "Failed to get major number\n");
+               return -ENOMEM;
+       }
+
+       card->queue = blk_alloc_queue(GFP_KERNEL);
+       if (!card->queue) {
+               dev_err(CARD_TO_DEV(card), "Failed queue alloc\n");
+               unregister_blkdev(card->major, DRIVER_NAME);
+               return -ENOMEM;
+       }
+
+       card->gendisk = alloc_disk(blkdev_minors);
+       if (!card->gendisk) {
+               dev_err(CARD_TO_DEV(card), "Failed disk alloc\n");
+               blk_cleanup_queue(card->queue);
+               unregister_blkdev(card->major, DRIVER_NAME);
+               return -ENOMEM;
+       }
+
+       blk_size = rsxx_get_logical_block_size(card);
+
+       blk_queue_make_request(card->queue, rsxx_make_request);
+       blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
+       blk_queue_dma_alignment(card->queue, blk_size - 1);
+       blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
+       blk_queue_logical_block_size(card->queue, blk_size);
+       blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
+
+       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue);
+       if (rsxx_discard_supported(card)) {
+               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue);
+               blk_queue_max_discard_sectors(card->queue,
+                                               RSXX_HW_BLK_SIZE >> 9);
+               card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE;
+               card->queue->limits.discard_alignment   = RSXX_HW_BLK_SIZE;
+               card->queue->limits.discard_zeroes_data = 1;
+       }
+
+       card->queue->queuedata = card;
+
+       snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name),
+                "rsxx%d", card->disk_id);
+       card->gendisk->driverfs_dev = &card->dev->dev;
+       card->gendisk->major = card->major;
+       card->gendisk->first_minor = 0;
+       card->gendisk->fops = &rsxx_fops;
+       card->gendisk->private_data = card;
+       card->gendisk->queue = card->queue;
+
+       return 0;
+}
+
+void rsxx_destroy_dev(struct rsxx_cardinfo *card)
+{
+       if (!enable_blkdev)
+               return;
+
+       put_disk(card->gendisk);
+       card->gendisk = NULL;
+
+       blk_cleanup_queue(card->queue);
+       unregister_blkdev(card->major, DRIVER_NAME);
+}
+
+int rsxx_dev_init(void)
+{
+       bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN);
+       if (!bio_meta_pool)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void rsxx_dev_cleanup(void)
+{
+       kmem_cache_destroy(bio_meta_pool);
+}
+
+
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
new file mode 100644 (file)
index 0000000..63176e6
--- /dev/null
@@ -0,0 +1,998 @@
+/*
+* Filename: dma.c
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#include <linux/slab.h>
+#include "rsxx_priv.h"
+
+struct rsxx_dma {
+       struct list_head         list;
+       u8                       cmd;
+       unsigned int             laddr;     /* Logical address on the ramsan */
+       struct {
+               u32              off;
+               u32              cnt;
+       } sub_page;
+       dma_addr_t               dma_addr;
+       struct page              *page;
+       unsigned int             pg_off;    /* Page Offset */
+       rsxx_dma_cb              cb;
+       void                     *cb_data;
+};
+
+/* This timeout is used to detect a stalled DMA channel */
+#define DMA_ACTIVITY_TIMEOUT   msecs_to_jiffies(10000)
+
+struct hw_status {
+       u8      status;
+       u8      tag;
+       __le16  count;
+       __le32  _rsvd2;
+       __le64  _rsvd3;
+} __packed;
+
+enum rsxx_dma_status {
+       DMA_SW_ERR    = 0x1,
+       DMA_HW_FAULT  = 0x2,
+       DMA_CANCELLED = 0x4,
+};
+
+struct hw_cmd {
+       u8      command;
+       u8      tag;
+       u8      _rsvd;
+       u8      sub_page; /* Bit[0:2]: 512byte offset */
+                         /* Bit[4:6]: 512byte count */
+       __le32  device_addr;
+       __le64  host_addr;
+} __packed;
+
+enum rsxx_hw_cmd {
+       HW_CMD_BLK_DISCARD      = 0x70,
+       HW_CMD_BLK_WRITE        = 0x80,
+       HW_CMD_BLK_READ         = 0xC0,
+       HW_CMD_BLK_RECON_READ   = 0xE0,
+};
+
+enum rsxx_hw_status {
+       HW_STATUS_CRC           = 0x01,
+       HW_STATUS_HARD_ERR      = 0x02,
+       HW_STATUS_SOFT_ERR      = 0x04,
+       HW_STATUS_FAULT         = 0x08,
+};
+
+#define STATUS_BUFFER_SIZE8     4096
+#define COMMAND_BUFFER_SIZE8    4096
+
+static struct kmem_cache *rsxx_dma_pool;
+
+struct dma_tracker {
+       int                     next_tag;
+       struct rsxx_dma *dma;
+};
+
+#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \
+               (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS))
+
+struct dma_tracker_list {
+       spinlock_t              lock;
+       int                     head;
+       struct dma_tracker      list[0];
+};
+
+
+/*----------------- Misc Utility Functions -------------------*/
+static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card)
+{
+       unsigned long long tgt_addr8;
+
+       tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) &
+                     card->_stripe.upper_mask) |
+                   ((addr8) & card->_stripe.lower_mask);
+       do_div(tgt_addr8, RSXX_HW_BLK_SIZE);
+       return tgt_addr8;
+}
+
+static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8)
+{
+       unsigned int tgt;
+
+       tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask;
+
+       return tgt;
+}
+
+static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card)
+{
+       /* Reset all DMA Command/Status Queues */
+       iowrite32(DMA_QUEUE_RESET, card->regmap + RESET);
+}
+
+static unsigned int get_dma_size(struct rsxx_dma *dma)
+{
+       if (dma->sub_page.cnt)
+               return dma->sub_page.cnt << 9;
+       else
+               return RSXX_HW_BLK_SIZE;
+}
+
+
+/*----------------- DMA Tracker -------------------*/
+static void set_tracker_dma(struct dma_tracker_list *trackers,
+                           int tag,
+                           struct rsxx_dma *dma)
+{
+       trackers->list[tag].dma = dma;
+}
+
+static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers,
+                                           int tag)
+{
+       return trackers->list[tag].dma;
+}
+
+static int pop_tracker(struct dma_tracker_list *trackers)
+{
+       int tag;
+
+       spin_lock(&trackers->lock);
+       tag = trackers->head;
+       if (tag != -1) {
+               trackers->head = trackers->list[tag].next_tag;
+               trackers->list[tag].next_tag = -1;
+       }
+       spin_unlock(&trackers->lock);
+
+       return tag;
+}
+
+static void push_tracker(struct dma_tracker_list *trackers, int tag)
+{
+       spin_lock(&trackers->lock);
+       trackers->list[tag].next_tag = trackers->head;
+       trackers->head = tag;
+       trackers->list[tag].dma = NULL;
+       spin_unlock(&trackers->lock);
+}
+
+
+/*----------------- Interrupt Coalescing -------------*/
+/*
+ * Interrupt Coalescing Register Format:
+ * Interrupt Timer (64ns units) [15:0]
+ * Interrupt Count [24:16]
+ * Reserved [31:25]
+*/
+#define INTR_COAL_LATENCY_MASK       (0x0000ffff)
+
+#define INTR_COAL_COUNT_SHIFT        16
+#define INTR_COAL_COUNT_BITS         9
+#define INTR_COAL_COUNT_MASK         (((1 << INTR_COAL_COUNT_BITS) - 1) << \
+                                       INTR_COAL_COUNT_SHIFT)
+#define INTR_COAL_LATENCY_UNITS_NS   64
+
+
+static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency)
+{
+       u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS;
+
+       if (mode == RSXX_INTR_COAL_DISABLED)
+               return 0;
+
+       return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) |
+                       (latency_units & INTR_COAL_LATENCY_MASK);
+
+}
+
+static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card)
+{
+       int i;
+       u32 q_depth = 0;
+       u32 intr_coal;
+
+       if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE)
+               return;
+
+       for (i = 0; i < card->n_targets; i++)
+               q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth);
+
+       intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode,
+                                     q_depth / 2,
+                                     card->config.data.intr_coal.latency);
+       iowrite32(intr_coal, card->regmap + INTR_COAL);
+}
+
+/*----------------- RSXX DMA Handling -------------------*/
+static void rsxx_complete_dma(struct rsxx_cardinfo *card,
+                                 struct rsxx_dma *dma,
+                                 unsigned int status)
+{
+       if (status & DMA_SW_ERR)
+               printk_ratelimited(KERN_ERR
+                                  "SW Error in DMA(cmd x%02x, laddr x%08x)\n",
+                                  dma->cmd, dma->laddr);
+       if (status & DMA_HW_FAULT)
+               printk_ratelimited(KERN_ERR
+                                  "HW Fault in DMA(cmd x%02x, laddr x%08x)\n",
+                                  dma->cmd, dma->laddr);
+       if (status & DMA_CANCELLED)
+               printk_ratelimited(KERN_ERR
+                                  "DMA Cancelled(cmd x%02x, laddr x%08x)\n",
+                                  dma->cmd, dma->laddr);
+
+       if (dma->dma_addr)
+               pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma),
+                              dma->cmd == HW_CMD_BLK_WRITE ?
+                                          PCI_DMA_TODEVICE :
+                                          PCI_DMA_FROMDEVICE);
+
+       if (dma->cb)
+               dma->cb(card, dma->cb_data, status ? 1 : 0);
+
+       kmem_cache_free(rsxx_dma_pool, dma);
+}
+
+static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
+                                struct rsxx_dma *dma)
+{
+       /*
+        * Requeued DMAs go to the front of the queue so they are issued
+        * first.
+        */
+       spin_lock(&ctrl->queue_lock);
+       list_add(&dma->list, &ctrl->queue);
+       spin_unlock(&ctrl->queue_lock);
+}
+
+static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
+                                     struct rsxx_dma *dma,
+                                     u8 hw_st)
+{
+       unsigned int status = 0;
+       int requeue_cmd = 0;
+
+       dev_dbg(CARD_TO_DEV(ctrl->card),
+               "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n",
+               dma->cmd, dma->laddr, hw_st);
+
+       if (hw_st & HW_STATUS_CRC)
+               ctrl->stats.crc_errors++;
+       if (hw_st & HW_STATUS_HARD_ERR)
+               ctrl->stats.hard_errors++;
+       if (hw_st & HW_STATUS_SOFT_ERR)
+               ctrl->stats.soft_errors++;
+
+       switch (dma->cmd) {
+       case HW_CMD_BLK_READ:
+               if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) {
+                       if (ctrl->card->scrub_hard) {
+                               dma->cmd = HW_CMD_BLK_RECON_READ;
+                               requeue_cmd = 1;
+                               ctrl->stats.reads_retried++;
+                       } else {
+                               status |= DMA_HW_FAULT;
+                               ctrl->stats.reads_failed++;
+                       }
+               } else if (hw_st & HW_STATUS_FAULT) {
+                       status |= DMA_HW_FAULT;
+                       ctrl->stats.reads_failed++;
+               }
+
+               break;
+       case HW_CMD_BLK_RECON_READ:
+               if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) {
+                       /* Data could not be reconstructed. */
+                       status |= DMA_HW_FAULT;
+                       ctrl->stats.reads_failed++;
+               }
+
+               break;
+       case HW_CMD_BLK_WRITE:
+               status |= DMA_HW_FAULT;
+               ctrl->stats.writes_failed++;
+
+               break;
+       case HW_CMD_BLK_DISCARD:
+               status |= DMA_HW_FAULT;
+               ctrl->stats.discards_failed++;
+
+               break;
+       default:
+               dev_err(CARD_TO_DEV(ctrl->card),
+                       "Unknown command in DMA!(cmd: x%02x "
+                          "laddr x%08x st: x%02x\n",
+                          dma->cmd, dma->laddr, hw_st);
+               status |= DMA_SW_ERR;
+
+               break;
+       }
+
+       if (requeue_cmd)
+               rsxx_requeue_dma(ctrl, dma);
+       else
+               rsxx_complete_dma(ctrl->card, dma, status);
+}
+
+static void dma_engine_stalled(unsigned long data)
+{
+       struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
+
+       if (atomic_read(&ctrl->stats.hw_q_depth) == 0)
+               return;
+
+       if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) {
+               /*
+                * The dma engine was stalled because the SW_CMD_IDX write
+                * was lost. Issue it again to recover.
+                */
+               dev_warn(CARD_TO_DEV(ctrl->card),
+                       "SW_CMD_IDX write was lost, re-writing...\n");
+               iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+               mod_timer(&ctrl->activity_timer,
+                         jiffies + DMA_ACTIVITY_TIMEOUT);
+       } else {
+               dev_warn(CARD_TO_DEV(ctrl->card),
+                       "DMA channel %d has stalled, faulting interface.\n",
+                       ctrl->id);
+               ctrl->card->dma_fault = 1;
+       }
+}
+
+static void rsxx_issue_dmas(struct work_struct *work)
+{
+       struct rsxx_dma_ctrl *ctrl;
+       struct rsxx_dma *dma;
+       int tag;
+       int cmds_pending = 0;
+       struct hw_cmd *hw_cmd_buf;
+
+       ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
+       hw_cmd_buf = ctrl->cmd.buf;
+
+       if (unlikely(ctrl->card->halt))
+               return;
+
+       while (1) {
+               spin_lock(&ctrl->queue_lock);
+               if (list_empty(&ctrl->queue)) {
+                       spin_unlock(&ctrl->queue_lock);
+                       break;
+               }
+               spin_unlock(&ctrl->queue_lock);
+
+               tag = pop_tracker(ctrl->trackers);
+               if (tag == -1)
+                       break;
+
+               spin_lock(&ctrl->queue_lock);
+               dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
+               list_del(&dma->list);
+               ctrl->stats.sw_q_depth--;
+               spin_unlock(&ctrl->queue_lock);
+
+               /*
+                * This will catch any DMAs that slipped in right before the
+                * fault, but was queued after all the other DMAs were
+                * cancelled.
+                */
+               if (unlikely(ctrl->card->dma_fault)) {
+                       push_tracker(ctrl->trackers, tag);
+                       rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED);
+                       continue;
+               }
+
+               set_tracker_dma(ctrl->trackers, tag, dma);
+               hw_cmd_buf[ctrl->cmd.idx].command  = dma->cmd;
+               hw_cmd_buf[ctrl->cmd.idx].tag      = tag;
+               hw_cmd_buf[ctrl->cmd.idx]._rsvd    = 0;
+               hw_cmd_buf[ctrl->cmd.idx].sub_page =
+                                       ((dma->sub_page.cnt & 0x7) << 4) |
+                                        (dma->sub_page.off & 0x7);
+
+               hw_cmd_buf[ctrl->cmd.idx].device_addr =
+                                       cpu_to_le32(dma->laddr);
+
+               hw_cmd_buf[ctrl->cmd.idx].host_addr =
+                                       cpu_to_le64(dma->dma_addr);
+
+               dev_dbg(CARD_TO_DEV(ctrl->card),
+                       "Issue DMA%d(laddr %d tag %d) to idx %d\n",
+                       ctrl->id, dma->laddr, tag, ctrl->cmd.idx);
+
+               ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK;
+               cmds_pending++;
+
+               if (dma->cmd == HW_CMD_BLK_WRITE)
+                       ctrl->stats.writes_issued++;
+               else if (dma->cmd == HW_CMD_BLK_DISCARD)
+                       ctrl->stats.discards_issued++;
+               else
+                       ctrl->stats.reads_issued++;
+       }
+
+       /* Let HW know we've queued commands. */
+       if (cmds_pending) {
+               /*
+                * We must guarantee that the CPU writes to 'ctrl->cmd.buf'
+                * (which is in PCI-consistent system-memory) from the loop
+                * above make it into the coherency domain before the
+                * following PIO "trigger" updating the cmd.idx.  A WMB is
+                * sufficient. We need not explicitly CPU cache-flush since
+                * the memory is a PCI-consistent (ie; coherent) mapping.
+                */
+               wmb();
+
+               atomic_add(cmds_pending, &ctrl->stats.hw_q_depth);
+               mod_timer(&ctrl->activity_timer,
+                         jiffies + DMA_ACTIVITY_TIMEOUT);
+               iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+       }
+}
+
+static void rsxx_dma_done(struct work_struct *work)
+{
+       struct rsxx_dma_ctrl *ctrl;
+       struct rsxx_dma *dma;
+       unsigned long flags;
+       u16 count;
+       u8 status;
+       u8 tag;
+       struct hw_status *hw_st_buf;
+
+       ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
+       hw_st_buf = ctrl->status.buf;
+
+       if (unlikely(ctrl->card->halt) ||
+           unlikely(ctrl->card->dma_fault))
+               return;
+
+       count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
+
+       while (count == ctrl->e_cnt) {
+               /*
+                * The read memory-barrier is necessary to keep aggressive
+                * processors/optimizers (such as the PPC Apple G5) from
+                * reordering the following status-buffer tag & status read
+                * *before* the count read on subsequent iterations of the
+                * loop!
+                */
+               rmb();
+
+               status = hw_st_buf[ctrl->status.idx].status;
+               tag    = hw_st_buf[ctrl->status.idx].tag;
+
+               dma = get_tracker_dma(ctrl->trackers, tag);
+               if (dma == NULL) {
+                       spin_lock_irqsave(&ctrl->card->irq_lock, flags);
+                       rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL);
+                       spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
+
+                       dev_err(CARD_TO_DEV(ctrl->card),
+                               "No tracker for tag %d "
+                               "(idx %d id %d)\n",
+                               tag, ctrl->status.idx, ctrl->id);
+                       return;
+               }
+
+               dev_dbg(CARD_TO_DEV(ctrl->card),
+                       "Completing DMA%d"
+                       "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n",
+                       ctrl->id, dma->laddr, tag, status, count,
+                       ctrl->status.idx);
+
+               atomic_dec(&ctrl->stats.hw_q_depth);
+
+               mod_timer(&ctrl->activity_timer,
+                         jiffies + DMA_ACTIVITY_TIMEOUT);
+
+               if (status)
+                       rsxx_handle_dma_error(ctrl, dma, status);
+               else
+                       rsxx_complete_dma(ctrl->card, dma, 0);
+
+               push_tracker(ctrl->trackers, tag);
+
+               ctrl->status.idx = (ctrl->status.idx + 1) &
+                                  RSXX_CS_IDX_MASK;
+               ctrl->e_cnt++;
+
+               count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count);
+       }
+
+       dma_intr_coal_auto_tune(ctrl->card);
+
+       if (atomic_read(&ctrl->stats.hw_q_depth) == 0)
+               del_timer_sync(&ctrl->activity_timer);
+
+       spin_lock_irqsave(&ctrl->card->irq_lock, flags);
+       rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
+       spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
+
+       spin_lock(&ctrl->queue_lock);
+       if (ctrl->stats.sw_q_depth)
+               queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
+       spin_unlock(&ctrl->queue_lock);
+}
+
+static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
+                                     struct list_head *q)
+{
+       struct rsxx_dma *dma;
+       struct rsxx_dma *tmp;
+       int cnt = 0;
+
+       list_for_each_entry_safe(dma, tmp, q, list) {
+               list_del(&dma->list);
+
+               if (dma->dma_addr)
+                       pci_unmap_page(card->dev, dma->dma_addr,
+                                      get_dma_size(dma),
+                                      (dma->cmd == HW_CMD_BLK_WRITE) ?
+                                      PCI_DMA_TODEVICE :
+                                      PCI_DMA_FROMDEVICE);
+               kmem_cache_free(rsxx_dma_pool, dma);
+               cnt++;
+       }
+
+       return cnt;
+}
+
+static int rsxx_queue_discard(struct rsxx_cardinfo *card,
+                                 struct list_head *q,
+                                 unsigned int laddr,
+                                 rsxx_dma_cb cb,
+                                 void *cb_data)
+{
+       struct rsxx_dma *dma;
+
+       dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
+       if (!dma)
+               return -ENOMEM;
+
+       dma->cmd          = HW_CMD_BLK_DISCARD;
+       dma->laddr        = laddr;
+       dma->dma_addr     = 0;
+       dma->sub_page.off = 0;
+       dma->sub_page.cnt = 0;
+       dma->page         = NULL;
+       dma->pg_off       = 0;
+       dma->cb           = cb;
+       dma->cb_data      = cb_data;
+
+       dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr);
+
+       list_add_tail(&dma->list, q);
+
+       return 0;
+}
+
+static int rsxx_queue_dma(struct rsxx_cardinfo *card,
+                             struct list_head *q,
+                             int dir,
+                             unsigned int dma_off,
+                             unsigned int dma_len,
+                             unsigned int laddr,
+                             struct page *page,
+                             unsigned int pg_off,
+                             rsxx_dma_cb cb,
+                             void *cb_data)
+{
+       struct rsxx_dma *dma;
+
+       dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
+       if (!dma)
+               return -ENOMEM;
+
+       dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len,
+                                    dir ? PCI_DMA_TODEVICE :
+                                    PCI_DMA_FROMDEVICE);
+       if (!dma->dma_addr) {
+               kmem_cache_free(rsxx_dma_pool, dma);
+               return -ENOMEM;
+       }
+
+       dma->cmd          = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
+       dma->laddr        = laddr;
+       dma->sub_page.off = (dma_off >> 9);
+       dma->sub_page.cnt = (dma_len >> 9);
+       dma->page         = page;
+       dma->pg_off       = pg_off;
+       dma->cb           = cb;
+       dma->cb_data      = cb_data;
+
+       dev_dbg(CARD_TO_DEV(card),
+               "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n",
+               dir ? 'W' : 'R', dma->laddr, dma->sub_page.off,
+               dma->sub_page.cnt, dma->page, dma->pg_off);
+
+       /* Queue the DMA */
+       list_add_tail(&dma->list, q);
+
+       return 0;
+}
+
+int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+                          struct bio *bio,
+                          atomic_t *n_dmas,
+                          rsxx_dma_cb cb,
+                          void *cb_data)
+{
+       struct list_head dma_list[RSXX_MAX_TARGETS];
+       struct bio_vec *bvec;
+       unsigned long long addr8;
+       unsigned int laddr;
+       unsigned int bv_len;
+       unsigned int bv_off;
+       unsigned int dma_off;
+       unsigned int dma_len;
+       int dma_cnt[RSXX_MAX_TARGETS];
+       int tgt;
+       int st;
+       int i;
+
+       addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */
+       atomic_set(n_dmas, 0);
+
+       for (i = 0; i < card->n_targets; i++) {
+               INIT_LIST_HEAD(&dma_list[i]);
+               dma_cnt[i] = 0;
+       }
+
+       if (bio->bi_rw & REQ_DISCARD) {
+               bv_len = bio->bi_size;
+
+               while (bv_len > 0) {
+                       tgt   = rsxx_get_dma_tgt(card, addr8);
+                       laddr = rsxx_addr8_to_laddr(addr8, card);
+
+                       st = rsxx_queue_discard(card, &dma_list[tgt], laddr,
+                                                   cb, cb_data);
+                       if (st)
+                               goto bvec_err;
+
+                       dma_cnt[tgt]++;
+                       atomic_inc(n_dmas);
+                       addr8  += RSXX_HW_BLK_SIZE;
+                       bv_len -= RSXX_HW_BLK_SIZE;
+               }
+       } else {
+               bio_for_each_segment(bvec, bio, i) {
+                       bv_len = bvec->bv_len;
+                       bv_off = bvec->bv_offset;
+
+                       while (bv_len > 0) {
+                               tgt   = rsxx_get_dma_tgt(card, addr8);
+                               laddr = rsxx_addr8_to_laddr(addr8, card);
+                               dma_off = addr8 & RSXX_HW_BLK_MASK;
+                               dma_len = min(bv_len,
+                                             RSXX_HW_BLK_SIZE - dma_off);
+
+                               st = rsxx_queue_dma(card, &dma_list[tgt],
+                                                       bio_data_dir(bio),
+                                                       dma_off, dma_len,
+                                                       laddr, bvec->bv_page,
+                                                       bv_off, cb, cb_data);
+                               if (st)
+                                       goto bvec_err;
+
+                               dma_cnt[tgt]++;
+                               atomic_inc(n_dmas);
+                               addr8  += dma_len;
+                               bv_off += dma_len;
+                               bv_len -= dma_len;
+                       }
+               }
+       }
+
+       for (i = 0; i < card->n_targets; i++) {
+               if (!list_empty(&dma_list[i])) {
+                       spin_lock(&card->ctrl[i].queue_lock);
+                       card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
+                       list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
+                       spin_unlock(&card->ctrl[i].queue_lock);
+
+                       queue_work(card->ctrl[i].issue_wq,
+                                  &card->ctrl[i].issue_dma_work);
+               }
+       }
+
+       return 0;
+
+bvec_err:
+       for (i = 0; i < card->n_targets; i++)
+               rsxx_cleanup_dma_queue(card, &dma_list[i]);
+
+       return st;
+}
+
+
+/*----------------- DMA Engine Initialization & Setup -------------------*/
+static int rsxx_dma_ctrl_init(struct pci_dev *dev,
+                                 struct rsxx_dma_ctrl *ctrl)
+{
+       int i;
+
+       memset(&ctrl->stats, 0, sizeof(ctrl->stats));
+
+       ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8,
+                                               &ctrl->status.dma_addr);
+       ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8,
+                                            &ctrl->cmd.dma_addr);
+       if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL)
+               return -ENOMEM;
+
+       ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8);
+       if (!ctrl->trackers)
+               return -ENOMEM;
+
+       ctrl->trackers->head = 0;
+       for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
+               ctrl->trackers->list[i].next_tag = i + 1;
+               ctrl->trackers->list[i].dma = NULL;
+       }
+       ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1;
+       spin_lock_init(&ctrl->trackers->lock);
+
+       spin_lock_init(&ctrl->queue_lock);
+       INIT_LIST_HEAD(&ctrl->queue);
+
+       setup_timer(&ctrl->activity_timer, dma_engine_stalled,
+                                       (unsigned long)ctrl);
+
+       ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0);
+       if (!ctrl->issue_wq)
+               return -ENOMEM;
+
+       ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0);
+       if (!ctrl->done_wq)
+               return -ENOMEM;
+
+       INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
+       INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
+
+       memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8);
+       iowrite32(lower_32_bits(ctrl->status.dma_addr),
+                 ctrl->regmap + SB_ADD_LO);
+       iowrite32(upper_32_bits(ctrl->status.dma_addr),
+                 ctrl->regmap + SB_ADD_HI);
+
+       memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8);
+       iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO);
+       iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI);
+
+       ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT);
+       if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+               dev_crit(&dev->dev, "Failed reading status cnt x%x\n",
+                        ctrl->status.idx);
+               return -EINVAL;
+       }
+       iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT);
+       iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT);
+
+       ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX);
+       if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) {
+               dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n",
+                        ctrl->status.idx);
+               return -EINVAL;
+       }
+       iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX);
+       iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX);
+
+       wmb();
+
+       return 0;
+}
+
+static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card,
+                             unsigned int stripe_size8)
+{
+       if (!is_power_of_2(stripe_size8)) {
+               dev_err(CARD_TO_DEV(card),
+                       "stripe_size is NOT a power of 2!\n");
+               return -EINVAL;
+       }
+
+       card->_stripe.lower_mask = stripe_size8 - 1;
+
+       card->_stripe.upper_mask  = ~(card->_stripe.lower_mask);
+       card->_stripe.upper_shift = ffs(card->n_targets) - 1;
+
+       card->_stripe.target_mask = card->n_targets - 1;
+       card->_stripe.target_shift = ffs(stripe_size8) - 1;
+
+       dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask   = x%016llx\n",
+               card->_stripe.lower_mask);
+       dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift  = x%016llx\n",
+               card->_stripe.upper_shift);
+       dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask   = x%016llx\n",
+               card->_stripe.upper_mask);
+       dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask  = x%016llx\n",
+               card->_stripe.target_mask);
+       dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n",
+               card->_stripe.target_shift);
+
+       return 0;
+}
+
+static int rsxx_dma_configure(struct rsxx_cardinfo *card)
+{
+       u32 intr_coal;
+
+       intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode,
+                                     card->config.data.intr_coal.count,
+                                     card->config.data.intr_coal.latency);
+       iowrite32(intr_coal, card->regmap + INTR_COAL);
+
+       return rsxx_dma_stripe_setup(card, card->config.data.stripe_size);
+}
+
+int rsxx_dma_setup(struct rsxx_cardinfo *card)
+{
+       unsigned long flags;
+       int st;
+       int i;
+
+       dev_info(CARD_TO_DEV(card),
+               "Initializing %d DMA targets\n",
+               card->n_targets);
+
+       /* Regmap is divided up into 4K chunks. One for each DMA channel */
+       for (i = 0; i < card->n_targets; i++)
+               card->ctrl[i].regmap = card->regmap + (i * 4096);
+
+       card->dma_fault = 0;
+
+       /* Reset the DMA queues */
+       rsxx_dma_queue_reset(card);
+
+       /************* Setup DMA Control *************/
+       for (i = 0; i < card->n_targets; i++) {
+               st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]);
+               if (st)
+                       goto failed_dma_setup;
+
+               card->ctrl[i].card = card;
+               card->ctrl[i].id = i;
+       }
+
+       card->scrub_hard = 1;
+
+       if (card->config_valid)
+               rsxx_dma_configure(card);
+
+       /* Enable the interrupts after all setup has completed. */
+       for (i = 0; i < card->n_targets; i++) {
+               spin_lock_irqsave(&card->irq_lock, flags);
+               rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i));
+               spin_unlock_irqrestore(&card->irq_lock, flags);
+       }
+
+       return 0;
+
+failed_dma_setup:
+       for (i = 0; i < card->n_targets; i++) {
+               struct rsxx_dma_ctrl *ctrl = &card->ctrl[i];
+
+               if (ctrl->issue_wq) {
+                       destroy_workqueue(ctrl->issue_wq);
+                       ctrl->issue_wq = NULL;
+               }
+
+               if (ctrl->done_wq) {
+                       destroy_workqueue(ctrl->done_wq);
+                       ctrl->done_wq = NULL;
+               }
+
+               if (ctrl->trackers)
+                       vfree(ctrl->trackers);
+
+               if (ctrl->status.buf)
+                       pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
+                                           ctrl->status.buf,
+                                           ctrl->status.dma_addr);
+               if (ctrl->cmd.buf)
+                       pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
+                                           ctrl->cmd.buf, ctrl->cmd.dma_addr);
+       }
+
+       return st;
+}
+
+
+void rsxx_dma_destroy(struct rsxx_cardinfo *card)
+{
+       struct rsxx_dma_ctrl *ctrl;
+       struct rsxx_dma *dma;
+       int i, j;
+       int cnt = 0;
+
+       for (i = 0; i < card->n_targets; i++) {
+               ctrl = &card->ctrl[i];
+
+               if (ctrl->issue_wq) {
+                       destroy_workqueue(ctrl->issue_wq);
+                       ctrl->issue_wq = NULL;
+               }
+
+               if (ctrl->done_wq) {
+                       destroy_workqueue(ctrl->done_wq);
+                       ctrl->done_wq = NULL;
+               }
+
+               if (timer_pending(&ctrl->activity_timer))
+                       del_timer_sync(&ctrl->activity_timer);
+
+               /* Clean up the DMA queue */
+               spin_lock(&ctrl->queue_lock);
+               cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
+               spin_unlock(&ctrl->queue_lock);
+
+               if (cnt)
+                       dev_info(CARD_TO_DEV(card),
+                               "Freed %d queued DMAs on channel %d\n",
+                               cnt, i);
+
+               /* Clean up issued DMAs */
+               for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
+                       dma = get_tracker_dma(ctrl->trackers, j);
+                       if (dma) {
+                               pci_unmap_page(card->dev, dma->dma_addr,
+                                              get_dma_size(dma),
+                                              (dma->cmd == HW_CMD_BLK_WRITE) ?
+                                              PCI_DMA_TODEVICE :
+                                              PCI_DMA_FROMDEVICE);
+                               kmem_cache_free(rsxx_dma_pool, dma);
+                               cnt++;
+                       }
+               }
+
+               if (cnt)
+                       dev_info(CARD_TO_DEV(card),
+                               "Freed %d pending DMAs on channel %d\n",
+                               cnt, i);
+
+               vfree(ctrl->trackers);
+
+               pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
+                                   ctrl->status.buf, ctrl->status.dma_addr);
+               pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
+                                   ctrl->cmd.buf, ctrl->cmd.dma_addr);
+       }
+}
+
+
+int rsxx_dma_init(void)
+{
+       rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN);
+       if (!rsxx_dma_pool)
+               return -ENOMEM;
+
+       return 0;
+}
+
+
+void rsxx_dma_cleanup(void)
+{
+       kmem_cache_destroy(rsxx_dma_pool);
+}
+
diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h
new file mode 100644 (file)
index 0000000..2e50b65
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+* Filename: rsxx.h
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#ifndef __RSXX_H__
+#define __RSXX_H__
+
+/*----------------- IOCTL Definitions -------------------*/
+
+struct rsxx_reg_access {
+       __u32 addr;
+       __u32 cnt;
+       __u32 stat;
+       __u32 stream;
+       __u32 data[8];
+};
+
+#define RSXX_MAX_REG_CNT       (8 * (sizeof(__u32)))
+
+#define RSXX_IOC_MAGIC 'r'
+
+#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access)
+#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access)
+
+#endif /* __RSXX_H_ */
diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h
new file mode 100644 (file)
index 0000000..c025fe5
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+* Filename: rsXX_cfg.h
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#ifndef __RSXX_CFG_H__
+#define __RSXX_CFG_H__
+
+/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */
+#include <linux/types.h>
+
+/*
+ * The card config version must match the driver's expected version. If it does
+ * not, the DMA interfaces will not be attached and the user will need to
+ * initialize/upgrade the card configuration using the card config utility.
+ */
+#define RSXX_CFG_VERSION       4
+
+struct card_cfg_hdr {
+       __u32   version;
+       __u32   crc;
+};
+
+struct card_cfg_data {
+       __u32   block_size;
+       __u32   stripe_size;
+       __u32   vendor_id;
+       __u32   cache_order;
+       struct {
+               __u32   mode;   /* Disabled, manual, auto-tune... */
+               __u32   count;  /* Number of intr to coalesce     */
+               __u32   latency;/* Max wait time (in ns)          */
+       } intr_coal;
+};
+
+struct rsxx_card_cfg {
+       struct card_cfg_hdr     hdr;
+       struct card_cfg_data    data;
+};
+
+/* Vendor ID Values */
+#define RSXX_VENDOR_ID_TMS_IBM         0
+#define RSXX_VENDOR_ID_DSI             1
+#define RSXX_VENDOR_COUNT              2
+
+/* Interrupt Coalescing Values */
+#define RSXX_INTR_COAL_DISABLED           0
+#define RSXX_INTR_COAL_EXPLICIT           1
+#define RSXX_INTR_COAL_AUTO_TUNE          2
+
+
+#endif /* __RSXX_CFG_H__ */
+
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
new file mode 100644 (file)
index 0000000..a1ac907
--- /dev/null
@@ -0,0 +1,399 @@
+/*
+* Filename: rsxx_priv.h
+*
+*
+* Authors: Joshua Morris <josh.h.morris@us.ibm.com>
+*      Philip Kelleher <pjk1939@linux.vnet.ibm.com>
+*
+* (C) Copyright 2013 IBM Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation; either version 2 of the
+* License, or (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software Foundation,
+* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+
+#ifndef __RSXX_PRIV_H__
+#define __RSXX_PRIV_H__
+
+#include <linux/version.h>
+#include <linux/semaphore.h>
+
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <linux/workqueue.h>
+#include <linux/bio.h>
+#include <linux/vmalloc.h>
+#include <linux/timer.h>
+#include <linux/ioctl.h>
+
+#include "rsxx.h"
+#include "rsxx_cfg.h"
+
+struct proc_cmd;
+
+#define PCI_VENDOR_ID_TMS_IBM          0x15B6
+#define PCI_DEVICE_ID_RS70_FLASH       0x0019
+#define PCI_DEVICE_ID_RS70D_FLASH      0x001A
+#define PCI_DEVICE_ID_RS80_FLASH       0x001C
+#define PCI_DEVICE_ID_RS81_FLASH       0x001E
+
+#define RS70_PCI_REV_SUPPORTED 4
+
+#define DRIVER_NAME "rsxx"
+#define DRIVER_VERSION "3.7"
+
+/* Block size is 4096 */
+#define RSXX_HW_BLK_SHIFT              12
+#define RSXX_HW_BLK_SIZE               (1 << RSXX_HW_BLK_SHIFT)
+#define RSXX_HW_BLK_MASK               (RSXX_HW_BLK_SIZE - 1)
+
+#define MAX_CREG_DATA8 32
+#define LOG_BUF_SIZE8  128
+
+#define RSXX_MAX_OUTSTANDING_CMDS      255
+#define RSXX_CS_IDX_MASK               0xff
+
+#define RSXX_MAX_TARGETS       8
+
+struct dma_tracker_list;
+
+/* DMA Command/Status Buffer structure */
+struct rsxx_cs_buffer {
+       dma_addr_t      dma_addr;
+       void            *buf;
+       u32             idx;
+};
+
+struct rsxx_dma_stats {
+       u32 crc_errors;
+       u32 hard_errors;
+       u32 soft_errors;
+       u32 writes_issued;
+       u32 writes_failed;
+       u32 reads_issued;
+       u32 reads_failed;
+       u32 reads_retried;
+       u32 discards_issued;
+       u32 discards_failed;
+       u32 done_rescheduled;
+       u32 issue_rescheduled;
+       u32 sw_q_depth;         /* Number of DMAs on the SW queue. */
+       atomic_t hw_q_depth;    /* Number of DMAs queued to HW. */
+};
+
+struct rsxx_dma_ctrl {
+       struct rsxx_cardinfo            *card;
+       int                             id;
+       void                            __iomem *regmap;
+       struct rsxx_cs_buffer           status;
+       struct rsxx_cs_buffer           cmd;
+       u16                             e_cnt;
+       spinlock_t                      queue_lock;
+       struct list_head                queue;
+       struct workqueue_struct         *issue_wq;
+       struct work_struct              issue_dma_work;
+       struct workqueue_struct         *done_wq;
+       struct work_struct              dma_done_work;
+       struct timer_list               activity_timer;
+       struct dma_tracker_list         *trackers;
+       struct rsxx_dma_stats           stats;
+};
+
+struct rsxx_cardinfo {
+       struct pci_dev          *dev;
+       unsigned int            halt;
+
+       void                    __iomem *regmap;
+       spinlock_t              irq_lock;
+       unsigned int            isr_mask;
+       unsigned int            ier_mask;
+
+       struct rsxx_card_cfg    config;
+       int                     config_valid;
+
+       /* Embedded CPU Communication */
+       struct {
+               spinlock_t              lock;
+               bool                    active;
+               struct creg_cmd         *active_cmd;
+               struct work_struct      done_work;
+               struct list_head        queue;
+               unsigned int            q_depth;
+               /* Cache the creg status to prevent ioreads */
+               struct {
+                       u32             stat;
+                       u32             failed_cancel_timer;
+                       u32             creg_timeout;
+               } creg_stats;
+               struct timer_list       cmd_timer;
+               struct mutex            reset_lock;
+               int                     reset;
+       } creg_ctrl;
+
+       struct {
+               char tmp[MAX_CREG_DATA8];
+               char buf[LOG_BUF_SIZE8]; /* terminated */
+               int buf_len;
+       } log;
+
+       struct work_struct      event_work;
+       unsigned int            state;
+       u64                     size8;
+
+       /* Lock the device attach/detach function */
+       struct mutex            dev_lock;
+
+       /* Block Device Variables */
+       bool                    bdev_attached;
+       int                     disk_id;
+       int                     major;
+       struct request_queue    *queue;
+       struct gendisk          *gendisk;
+       struct {
+               /* Used to convert a byte address to a device address. */
+               u64 lower_mask;
+               u64 upper_shift;
+               u64 upper_mask;
+               u64 target_mask;
+               u64 target_shift;
+       } _stripe;
+       unsigned int            dma_fault;
+
+       int                     scrub_hard;
+
+       int                     n_targets;
+       struct rsxx_dma_ctrl    *ctrl;
+};
+
+enum rsxx_pci_regmap {
+       HWID            = 0x00, /* Hardware Identification Register */
+       SCRATCH         = 0x04, /* Scratch/Debug Register */
+       RESET           = 0x08, /* Reset Register */
+       ISR             = 0x10, /* Interrupt Status Register */
+       IER             = 0x14, /* Interrupt Enable Register */
+       IPR             = 0x18, /* Interrupt Poll Register */
+       CB_ADD_LO       = 0x20, /* Command Host Buffer Address [31:0] */
+       CB_ADD_HI       = 0x24, /* Command Host Buffer Address [63:32]*/
+       HW_CMD_IDX      = 0x28, /* Hardware Processed Command Index */
+       SW_CMD_IDX      = 0x2C, /* Software Processed Command Index */
+       SB_ADD_LO       = 0x30, /* Status Host Buffer Address [31:0] */
+       SB_ADD_HI       = 0x34, /* Status Host Buffer Address [63:32] */
+       HW_STATUS_CNT   = 0x38, /* Hardware Status Counter */
+       SW_STATUS_CNT   = 0x3C, /* Deprecated */
+       CREG_CMD        = 0x40, /* CPU Command Register */
+       CREG_ADD        = 0x44, /* CPU Address Register */
+       CREG_CNT        = 0x48, /* CPU Count Register */
+       CREG_STAT       = 0x4C, /* CPU Status Register */
+       CREG_DATA0      = 0x50, /* CPU Data Registers */
+       CREG_DATA1      = 0x54,
+       CREG_DATA2      = 0x58,
+       CREG_DATA3      = 0x5C,
+       CREG_DATA4      = 0x60,
+       CREG_DATA5      = 0x64,
+       CREG_DATA6      = 0x68,
+       CREG_DATA7      = 0x6c,
+       INTR_COAL       = 0x70, /* Interrupt Coalescing Register */
+       HW_ERROR        = 0x74, /* Card Error Register */
+       PCI_DEBUG0      = 0x78, /* PCI Debug Registers */
+       PCI_DEBUG1      = 0x7C,
+       PCI_DEBUG2      = 0x80,
+       PCI_DEBUG3      = 0x84,
+       PCI_DEBUG4      = 0x88,
+       PCI_DEBUG5      = 0x8C,
+       PCI_DEBUG6      = 0x90,
+       PCI_DEBUG7      = 0x94,
+       PCI_POWER_THROTTLE = 0x98,
+       PERF_CTRL       = 0x9c,
+       PERF_TIMER_LO   = 0xa0,
+       PERF_TIMER_HI   = 0xa4,
+       PERF_RD512_LO   = 0xa8,
+       PERF_RD512_HI   = 0xac,
+       PERF_WR512_LO   = 0xb0,
+       PERF_WR512_HI   = 0xb4,
+};
+
+enum rsxx_intr {
+       CR_INTR_DMA0    = 0x00000001,
+       CR_INTR_CREG    = 0x00000002,
+       CR_INTR_DMA1    = 0x00000004,
+       CR_INTR_EVENT   = 0x00000008,
+       CR_INTR_DMA2    = 0x00000010,
+       CR_INTR_DMA3    = 0x00000020,
+       CR_INTR_DMA4    = 0x00000040,
+       CR_INTR_DMA5    = 0x00000080,
+       CR_INTR_DMA6    = 0x00000100,
+       CR_INTR_DMA7    = 0x00000200,
+       CR_INTR_DMA_ALL = 0x000003f5,
+       CR_INTR_ALL     = 0xffffffff,
+};
+
+static inline int CR_INTR_DMA(int N)
+{
+       static const unsigned int _CR_INTR_DMA[] = {
+               CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3,
+               CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7
+       };
+       return _CR_INTR_DMA[N];
+}
+enum rsxx_pci_reset {
+       DMA_QUEUE_RESET         = 0x00000001,
+};
+
+enum rsxx_pci_revision {
+       RSXX_DISCARD_SUPPORT = 2,
+};
+
+enum rsxx_creg_cmd {
+       CREG_CMD_TAG_MASK       = 0x0000FF00,
+       CREG_OP_WRITE           = 0x000000C0,
+       CREG_OP_READ            = 0x000000E0,
+};
+
+enum rsxx_creg_addr {
+       CREG_ADD_CARD_CMD               = 0x80001000,
+       CREG_ADD_CARD_STATE             = 0x80001004,
+       CREG_ADD_CARD_SIZE              = 0x8000100c,
+       CREG_ADD_CAPABILITIES           = 0x80001050,
+       CREG_ADD_LOG                    = 0x80002000,
+       CREG_ADD_NUM_TARGETS            = 0x80003000,
+       CREG_ADD_CONFIG                 = 0xB0000000,
+};
+
+enum rsxx_creg_card_cmd {
+       CARD_CMD_STARTUP                = 1,
+       CARD_CMD_SHUTDOWN               = 2,
+       CARD_CMD_LOW_LEVEL_FORMAT       = 3,
+       CARD_CMD_FPGA_RECONFIG_BR       = 4,
+       CARD_CMD_FPGA_RECONFIG_MAIN     = 5,
+       CARD_CMD_BACKUP                 = 6,
+       CARD_CMD_RESET                  = 7,
+       CARD_CMD_deprecated             = 8,
+       CARD_CMD_UNINITIALIZE           = 9,
+       CARD_CMD_DSTROY_EMERGENCY       = 10,
+       CARD_CMD_DSTROY_NORMAL          = 11,
+       CARD_CMD_DSTROY_EXTENDED        = 12,
+       CARD_CMD_DSTROY_ABORT           = 13,
+};
+
+enum rsxx_card_state {
+       CARD_STATE_SHUTDOWN             = 0x00000001,
+       CARD_STATE_STARTING             = 0x00000002,
+       CARD_STATE_FORMATTING           = 0x00000004,
+       CARD_STATE_UNINITIALIZED        = 0x00000008,
+       CARD_STATE_GOOD                 = 0x00000010,
+       CARD_STATE_SHUTTING_DOWN        = 0x00000020,
+       CARD_STATE_FAULT                = 0x00000040,
+       CARD_STATE_RD_ONLY_FAULT        = 0x00000080,
+       CARD_STATE_DSTROYING            = 0x00000100,
+};
+
+enum rsxx_led {
+       LED_DEFAULT     = 0x0,
+       LED_IDENTIFY    = 0x1,
+       LED_SOAK        = 0x2,
+};
+
+enum rsxx_creg_flash_lock {
+       CREG_FLASH_LOCK         = 1,
+       CREG_FLASH_UNLOCK       = 2,
+};
+
+enum rsxx_card_capabilities {
+       CARD_CAP_SUBPAGE_WRITES = 0x00000080,
+};
+
+enum rsxx_creg_stat {
+       CREG_STAT_STATUS_MASK   = 0x00000003,
+       CREG_STAT_SUCCESS       = 0x1,
+       CREG_STAT_ERROR         = 0x2,
+       CREG_STAT_CHAR_PENDING  = 0x00000004, /* Character I/O pending bit */
+       CREG_STAT_LOG_PENDING   = 0x00000008, /* HW log message pending bit */
+       CREG_STAT_TAG_MASK      = 0x0000ff00,
+};
+
+static inline unsigned int CREG_DATA(int N)
+{
+       return CREG_DATA0 + (N << 2);
+}
+
+/*----------------- Convenient Log Wrappers -------------------*/
+#define CARD_TO_DEV(__CARD)    (&(__CARD)->dev->dev)
+
+/***** config.c *****/
+int rsxx_load_config(struct rsxx_cardinfo *card);
+
+/***** core.c *****/
+void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr);
+void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr);
+void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
+                                unsigned int intr);
+void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
+                                 unsigned int intr);
+
+/***** dev.c *****/
+int rsxx_attach_dev(struct rsxx_cardinfo *card);
+void rsxx_detach_dev(struct rsxx_cardinfo *card);
+int rsxx_setup_dev(struct rsxx_cardinfo *card);
+void rsxx_destroy_dev(struct rsxx_cardinfo *card);
+int rsxx_dev_init(void);
+void rsxx_dev_cleanup(void);
+
+/***** dma.c ****/
+typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
+                               void *cb_data,
+                               unsigned int status);
+int rsxx_dma_setup(struct rsxx_cardinfo *card);
+void rsxx_dma_destroy(struct rsxx_cardinfo *card);
+int rsxx_dma_init(void);
+void rsxx_dma_cleanup(void);
+int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
+                          struct bio *bio,
+                          atomic_t *n_dmas,
+                          rsxx_dma_cb cb,
+                          void *cb_data);
+
+/***** cregs.c *****/
+int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr,
+                       unsigned int size8,
+                       void *data,
+                       int byte_stream);
+int rsxx_creg_read(struct rsxx_cardinfo *card,
+                      u32 addr,
+                      unsigned int size8,
+                      void *data,
+                      int byte_stream);
+int rsxx_read_hw_log(struct rsxx_cardinfo *card);
+int rsxx_get_card_state(struct rsxx_cardinfo *card,
+                           unsigned int *state);
+int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8);
+int rsxx_get_num_targets(struct rsxx_cardinfo *card,
+                            unsigned int *n_targets);
+int rsxx_get_card_capabilities(struct rsxx_cardinfo *card,
+                                  u32 *capabilities);
+int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd);
+int rsxx_creg_setup(struct rsxx_cardinfo *card);
+void rsxx_creg_destroy(struct rsxx_cardinfo *card);
+int rsxx_creg_init(void);
+void rsxx_creg_cleanup(void);
+
+int rsxx_reg_access(struct rsxx_cardinfo *card,
+                       struct rsxx_reg_access __user *ucmd,
+                       int read);
+
+
+
+#endif /* __DRIVERS_BLOCK_RSXX_H__ */
index 57763c54363aaf18a03f44464c7496f1c0fd4232..758f2ac878cfe5bfc8f5c891f5f0a48cc4b2e3fd 100644 (file)
@@ -1090,10 +1090,13 @@ static const struct block_device_operations floppy_fops = {
 static void swim3_mb_event(struct macio_dev* mdev, int mb_state)
 {
        struct floppy_state *fs = macio_get_drvdata(mdev);
-       struct swim3 __iomem *sw = fs->swim3;
+       struct swim3 __iomem *sw;
 
        if (!fs)
                return;
+
+       sw = fs->swim3;
+
        if (mb_state != MB_FD)
                return;
 
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
deleted file mode 100644 (file)
index ff54052..0000000
+++ /dev/null
@@ -1,1123 +0,0 @@
-/*
- * This file contains the driver for an XT hard disk controller
- * (at least the DTC 5150X) for Linux.
- *
- * Author: Pat Mackinlay, pat@it.com.au
- * Date: 29/09/92
- * 
- * Revised: 01/01/93, ...
- *
- * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler,
- *   kevinf@agora.rain.com)
- * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and
- *   Wim Van Dorst.
- *
- * Revised: 04/04/94 by Risto Kankkunen
- *   Moved the detection code from xd_init() to xd_geninit() as it needed
- *   interrupts enabled and Linus didn't want to enable them in that first
- *   phase. xd_geninit() is the place to do these kinds of things anyway,
- *   he says.
- *
- * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu
- *
- * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl
- *   Fixed some problems with disk initialization and module initiation.
- *   Added support for manual geometry setting (except Seagate controllers)
- *   in form:
- *      xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>]
- *   Recovered DMA access. Abridged messages. Added support for DTC5051CX,
- *   WD1002-27X & XEBEC controllers. Driver uses now some jumper settings.
- *   Extended ioctl() support.
- *
- * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/genhd.h>
-#include <linux/hdreg.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/wait.h>
-#include <linux/blkdev.h>
-#include <linux/mutex.h>
-#include <linux/blkpg.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/gfp.h>
-
-#include <asm/uaccess.h>
-#include <asm/dma.h>
-
-#include "xd.h"
-
-static DEFINE_MUTEX(xd_mutex);
-static void __init do_xd_setup (int *integers);
-#ifdef MODULE
-static int xd[5] = { -1,-1,-1,-1, };
-#endif
-
-#define XD_DONT_USE_DMA                0  /* Initial value. may be overriden using
-                                     "nodma" module option */
-#define XD_INIT_DISK_DELAY     (30)  /* 30 ms delay during disk initialization */
-
-/* Above may need to be increased if a problem with the 2nd drive detection
-   (ST11M controller) or resetting a controller (WD) appears */
-
-static XD_INFO xd_info[XD_MAXDRIVES];
-
-/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS
-   signature and details to the following list of signatures. A BIOS signature is a string embedded into the first
-   few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG
-   command. Run DEBUG, and then you can examine your BIOS signature with:
-
-       d xxxx:0000
-
-   where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should
-   be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters
-   in the table are, in order:
-
-       offset                  ; this is the offset (in bytes) from the start of your ROM where the signature starts
-       signature               ; this is the actual text of the signature
-       xd_?_init_controller    ; this is the controller init routine used by your controller
-       xd_?_init_drive         ; this is the drive init routine used by your controller
-
-   The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is
-   made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your
-   best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and
-   may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>.
-
-   NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver
-   should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */
-
-#include <asm/page.h>
-#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size))
-#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size))
-static char *xd_dma_buffer;
-
-static XD_SIGNATURE xd_sigs[] __initdata = {
-       { 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */
-       { 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
-       { 0x000B,"CRD18A   Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */
-       { 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */
-       { 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
-       { 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */
-       { 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */
-       { 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */
-       { 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */
-       { 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */
-       { 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */
-       { 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" },
-       { 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */
-};
-
-static unsigned int xd_bases[] __initdata =
-{
-       0xC8000, 0xCA000, 0xCC000,
-       0xCE000, 0xD0000, 0xD2000,
-       0xD4000, 0xD6000, 0xD8000,
-       0xDA000, 0xDC000, 0xDE000,
-       0xE0000
-};
-
-static DEFINE_SPINLOCK(xd_lock);
-
-static struct gendisk *xd_gendisk[2];
-
-static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
-
-static const struct block_device_operations xd_fops = {
-       .owner  = THIS_MODULE,
-       .ioctl  = xd_ioctl,
-       .getgeo = xd_getgeo,
-};
-static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int);
-static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors;
-static u_char xd_override __initdata = 0, xd_type __initdata = 0;
-static u_short xd_iobase = 0x320;
-static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, };
-
-static volatile int xdc_busy;
-static struct timer_list xd_watchdog_int;
-
-static volatile u_char xd_error;
-static bool nodma = XD_DONT_USE_DMA;
-
-static struct request_queue *xd_queue;
-
-/* xd_init: register the block device number and set up pointer tables */
-static int __init xd_init(void)
-{
-       u_char i,controller;
-       unsigned int address;
-       int err;
-
-#ifdef MODULE
-       {
-               u_char count = 0;
-               for (i = 4; i > 0; i--)
-                       if (((xd[i] = xd[i-1]) >= 0) && !count)
-                               count = i;
-               if ((xd[0] = count))
-                       do_xd_setup(xd);
-       }
-#endif
-
-       init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog;
-
-       err = -EBUSY;
-       if (register_blkdev(XT_DISK_MAJOR, "xd"))
-               goto out1;
-
-       err = -ENOMEM;
-       xd_queue = blk_init_queue(do_xd_request, &xd_lock);
-       if (!xd_queue)
-               goto out1a;
-
-       if (xd_detect(&controller,&address)) {
-
-               printk("Detected a%s controller (type %d) at address %06x\n",
-                       xd_sigs[controller].name,controller,address);
-               if (!request_region(xd_iobase,4,"xd")) {
-                       printk("xd: Ports at 0x%x are not available\n",
-                               xd_iobase);
-                       goto out2;
-               }
-               if (controller)
-                       xd_sigs[controller].init_controller(address);
-               xd_drives = xd_initdrives(xd_sigs[controller].init_drive);
-               
-               printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n",
-                       xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma);
-       }
-
-       /*
-        * With the drive detected, xd_maxsectors should now be known.
-        * If xd_maxsectors is 0, nothing was detected and we fall through
-        * to return -ENODEV
-        */
-       if (!xd_dma_buffer && xd_maxsectors) {
-               xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
-               if (!xd_dma_buffer) {
-                       printk(KERN_ERR "xd: Out of memory.\n");
-                       goto out3;
-               }
-       }
-
-       err = -ENODEV;
-       if (!xd_drives)
-               goto out3;
-
-       for (i = 0; i < xd_drives; i++) {
-               XD_INFO *p = &xd_info[i];
-               struct gendisk *disk = alloc_disk(64);
-               if (!disk)
-                       goto Enomem;
-               p->unit = i;
-               disk->major = XT_DISK_MAJOR;
-               disk->first_minor = i<<6;
-               sprintf(disk->disk_name, "xd%c", i+'a');
-               disk->fops = &xd_fops;
-               disk->private_data = p;
-               disk->queue = xd_queue;
-               set_capacity(disk, p->heads * p->cylinders * p->sectors);
-               printk(" %s: CHS=%d/%d/%d\n", disk->disk_name,
-                       p->cylinders, p->heads, p->sectors);
-               xd_gendisk[i] = disk;
-       }
-
-       err = -EBUSY;
-       if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) {
-               printk("xd: unable to get IRQ%d\n",xd_irq);
-               goto out4;
-       }
-
-       if (request_dma(xd_dma,"xd")) {
-               printk("xd: unable to get DMA%d\n",xd_dma);
-               goto out5;
-       }
-
-       /* xd_maxsectors depends on controller - so set after detection */
-       blk_queue_max_hw_sectors(xd_queue, xd_maxsectors);
-
-       for (i = 0; i < xd_drives; i++)
-               add_disk(xd_gendisk[i]);
-
-       return 0;
-
-out5:
-       free_irq(xd_irq, NULL);
-out4:
-       for (i = 0; i < xd_drives; i++)
-               put_disk(xd_gendisk[i]);
-out3:
-       if (xd_maxsectors)
-               release_region(xd_iobase,4);
-
-       if (xd_dma_buffer)
-               xd_dma_mem_free((unsigned long)xd_dma_buffer,
-                               xd_maxsectors * 0x200);
-out2:
-       blk_cleanup_queue(xd_queue);
-out1a:
-       unregister_blkdev(XT_DISK_MAJOR, "xd");
-out1:
-       return err;
-Enomem:
-       err = -ENOMEM;
-       while (i--)
-               put_disk(xd_gendisk[i]);
-       goto out3;
-}
-
-/* xd_detect: scan the possible BIOS ROM locations for the signature strings */
-static u_char __init xd_detect (u_char *controller, unsigned int *address)
-{
-       int i, j;
-
-       if (xd_override)
-       {
-               *controller = xd_type;
-               *address = 0;
-               return(1);
-       }
-
-       for (i = 0; i < ARRAY_SIZE(xd_bases); i++) {
-               void __iomem *p = ioremap(xd_bases[i], 0x2000);
-               if (!p)
-                       continue;
-               for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) {
-                       const char *s = xd_sigs[j].string;
-                       if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) {
-                               *controller = j;
-                               xd_type = j;
-                               *address = xd_bases[i];
-                               iounmap(p);
-                               return 1;
-                       }
-               }
-               iounmap(p);
-       }
-       return 0;
-}
-
-/* do_xd_request: handle an incoming request */
-static void do_xd_request (struct request_queue * q)
-{
-       struct request *req;
-
-       if (xdc_busy)
-               return;
-
-       req = blk_fetch_request(q);
-       while (req) {
-               unsigned block = blk_rq_pos(req);
-               unsigned count = blk_rq_cur_sectors(req);
-               XD_INFO *disk = req->rq_disk->private_data;
-               int res = -EIO;
-               int retry;
-
-               if (req->cmd_type != REQ_TYPE_FS)
-                       goto done;
-               if (block + count > get_capacity(req->rq_disk))
-                       goto done;
-               for (retry = 0; (retry < XD_RETRIES) && !res; retry++)
-                       res = xd_readwrite(rq_data_dir(req), disk, req->buffer,
-                                          block, count);
-       done:
-               /* wrap up, 0 = success, -errno = fail */
-               if (!__blk_end_request_cur(req, res))
-                       req = blk_fetch_request(q);
-       }
-}
-
-static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-       XD_INFO *p = bdev->bd_disk->private_data;
-
-       geo->heads = p->heads;
-       geo->sectors = p->sectors;
-       geo->cylinders = p->cylinders;
-       return 0;
-}
-
-/* xd_ioctl: handle device ioctl's */
-static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg)
-{
-       switch (cmd) {
-               case HDIO_SET_DMA:
-                       if (!capable(CAP_SYS_ADMIN)) return -EACCES;
-                       if (xdc_busy) return -EBUSY;
-                       nodma = !arg;
-                       if (nodma && xd_dma_buffer) {
-                               xd_dma_mem_free((unsigned long)xd_dma_buffer,
-                                               xd_maxsectors * 0x200);
-                               xd_dma_buffer = NULL;
-                       } else if (!nodma && !xd_dma_buffer) {
-                               xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
-                               if (!xd_dma_buffer) {
-                                       nodma = XD_DONT_USE_DMA;
-                                       return -ENOMEM;
-                               }
-                       }
-                       return 0;
-               case HDIO_GET_DMA:
-                       return put_user(!nodma, (long __user *) arg);
-               case HDIO_GET_MULTCOUNT:
-                       return put_user(xd_maxsectors, (long __user *) arg);
-               default:
-                       return -EINVAL;
-       }
-}
-
-static int xd_ioctl(struct block_device *bdev, fmode_t mode,
-                            unsigned int cmd, unsigned long param)
-{
-       int ret;
-
-       mutex_lock(&xd_mutex);
-       ret = xd_locked_ioctl(bdev, mode, cmd, param);
-       mutex_unlock(&xd_mutex);
-
-       return ret;
-}
-
-/* xd_readwrite: handle a read/write request */
-static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count)
-{
-       int drive = p->unit;
-       u_char cmdblk[6],sense[4];
-       u_short track,cylinder;
-       u_char head,sector,control,mode = PIO_MODE,temp;
-       char **real_buffer;
-       register int i;
-       
-#ifdef DEBUG_READWRITE
-       printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count);
-#endif /* DEBUG_READWRITE */
-
-       spin_unlock_irq(&xd_lock);
-
-       control = p->control;
-       if (!xd_dma_buffer)
-               xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200);
-       while (count) {
-               temp = count < xd_maxsectors ? count : xd_maxsectors;
-
-               track = block / p->sectors;
-               head = track % p->heads;
-               cylinder = track / p->heads;
-               sector = block % p->sectors;
-
-#ifdef DEBUG_READWRITE
-               printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp);
-#endif /* DEBUG_READWRITE */
-
-               if (xd_dma_buffer) {
-                       mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200);
-                       real_buffer = &xd_dma_buffer;
-                       for (i=0; i < (temp * 0x200); i++)
-                               xd_dma_buffer[i] = buffer[i];
-               }
-               else
-                       real_buffer = &buffer;
-
-               xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control);
-
-               switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) {
-                       case 1:
-                               printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write"));
-                               xd_recalibrate(drive);
-                               spin_lock_irq(&xd_lock);
-                               return -EIO;
-                       case 2:
-                               if (sense[0] & 0x30) {
-                                       printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing"));
-                                       switch ((sense[0] & 0x30) >> 4) {
-                                       case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F);
-                                               break;
-                                       case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F);
-                                               break;
-                                       case 2: printk("command error, code = 0x%X",sense[0] & 0x0F);
-                                               break;
-                                       case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F);
-                                               break;
-                                       }
-                               }
-                               if (sense[0] & 0x80)
-                                       printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F);
-                               /*      reported drive number = (sense[1] & 0xE0) >> 5 */
-                               else
-                                       printk(" - no valid disk address\n");
-                               spin_lock_irq(&xd_lock);
-                               return -EIO;
-               }
-               if (xd_dma_buffer)
-                       for (i=0; i < (temp * 0x200); i++)
-                               buffer[i] = xd_dma_buffer[i];
-
-               count -= temp, buffer += temp * 0x200, block += temp;
-       }
-       spin_lock_irq(&xd_lock);
-       return 0;
-}
-
-/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */
-static void xd_recalibrate (u_char drive)
-{
-       u_char cmdblk[6];
-       
-       xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0);
-       if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8))
-               printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive);
-}
-
-/* xd_interrupt_handler: interrupt service routine */
-static irqreturn_t xd_interrupt_handler(int irq, void *dev_id)
-{
-       if (inb(XD_STATUS) & STAT_INTERRUPT) {                                                  /* check if it was our device */
-#ifdef DEBUG_OTHER
-               printk("xd_interrupt_handler: interrupt detected\n");
-#endif /* DEBUG_OTHER */
-               outb(0,XD_CONTROL);                                                             /* acknowledge interrupt */
-               wake_up(&xd_wait_int);  /* and wake up sleeping processes */
-               return IRQ_HANDLED;
-       }
-       else
-               printk("xd: unexpected interrupt\n");
-       return IRQ_NONE;
-}
-
-/* xd_setup_dma: set up the DMA controller for a data transfer */
-static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count)
-{
-       unsigned long f;
-       
-       if (nodma)
-               return (PIO_MODE);
-       if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) {
-#ifdef DEBUG_OTHER
-               printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n");
-#endif /* DEBUG_OTHER */
-               return (PIO_MODE);
-       }
-       
-       f=claim_dma_lock();
-       disable_dma(xd_dma);
-       clear_dma_ff(xd_dma);
-       set_dma_mode(xd_dma,mode);
-       set_dma_addr(xd_dma, (unsigned long) buffer);
-       set_dma_count(xd_dma,count);
-       
-       release_dma_lock(f);
-
-       return (DMA_MODE);                      /* use DMA and INT */
-}
-
-/* xd_build: put stuff into an array in a format suitable for the controller */
-static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control)
-{
-       cmdblk[0] = command;
-       cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F);
-       cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F);
-       cmdblk[3] = cylinder & 0xFF;
-       cmdblk[4] = count;
-       cmdblk[5] = control;
-       
-       return (cmdblk);
-}
-
-static void xd_watchdog (unsigned long unused)
-{
-       xd_error = 1;
-       wake_up(&xd_wait_int);
-}
-
-/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */
-static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout)
-{
-       u_long expiry = jiffies + timeout;
-       int success;
-
-       xdc_busy = 1;
-       while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry))
-               schedule_timeout_uninterruptible(1);
-       xdc_busy = 0;
-       return (success);
-}
-
-static inline u_int xd_wait_for_IRQ (void)
-{
-       unsigned long flags;
-       xd_watchdog_int.expires = jiffies + 8 * HZ;
-       add_timer(&xd_watchdog_int);
-       
-       flags=claim_dma_lock();
-       enable_dma(xd_dma);
-       release_dma_lock(flags);
-       
-       sleep_on(&xd_wait_int);
-       del_timer(&xd_watchdog_int);
-       xdc_busy = 0;
-       
-       flags=claim_dma_lock();
-       disable_dma(xd_dma);
-       release_dma_lock(flags);
-       
-       if (xd_error) {
-               printk("xd: missed IRQ - command aborted\n");
-               xd_error = 0;
-               return (1);
-       }
-       return (0);
-}
-
-/* xd_command: handle all data transfers necessary for a single command */
-static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout)
-{
-       u_char cmdblk[6],csb,complete = 0;
-
-#ifdef DEBUG_COMMAND
-       printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense);
-#endif /* DEBUG_COMMAND */
-
-       outb(0,XD_SELECT);
-       outb(mode,XD_CONTROL);
-
-       if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout))
-               return (1);
-
-       while (!complete) {
-               if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout))
-                       return (1);
-
-               switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) {
-                       case 0:
-                               if (mode == DMA_MODE) {
-                                       if (xd_wait_for_IRQ())
-                                               return (1);
-                               } else
-                                       outb(outdata ? *outdata++ : 0,XD_DATA);
-                               break;
-                       case STAT_INPUT:
-                               if (mode == DMA_MODE) {
-                                       if (xd_wait_for_IRQ())
-                                               return (1);
-                               } else
-                                       if (indata)
-                                               *indata++ = inb(XD_DATA);
-                                       else
-                                               inb(XD_DATA);
-                               break;
-                       case STAT_COMMAND:
-                               outb(command ? *command++ : 0,XD_DATA);
-                               break;
-                       case STAT_COMMAND | STAT_INPUT:
-                               complete = 1;
-                               break;
-               }
-       }
-       csb = inb(XD_DATA);
-
-       if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout))                                       /* wait until deselected */
-               return (1);
-
-       if (csb & CSB_ERROR) {                                                                  /* read sense data if error */
-               xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0);
-               if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT))
-                       printk("xd: warning! sense command failed!\n");
-       }
-
-#ifdef DEBUG_COMMAND
-       printk("xd_command: completed with csb = 0x%X\n",csb);
-#endif /* DEBUG_COMMAND */
-
-       return (csb & CSB_ERROR);
-}
-
-static u_char __init xd_initdrives (void (*init_drive)(u_char drive))
-{
-       u_char cmdblk[6],i,count = 0;
-
-       for (i = 0; i < XD_MAXDRIVES; i++) {
-               xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0);
-               if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) {
-                       msleep_interruptible(XD_INIT_DISK_DELAY);
-
-                       init_drive(count);
-                       count++;
-
-                       msleep_interruptible(XD_INIT_DISK_DELAY);
-               }
-       }
-       return (count);
-}
-
-static void __init xd_manual_geo_set (u_char drive)
-{
-       xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]);
-       xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]);
-       xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]);
-}
-
-static void __init xd_dtc_init_controller (unsigned int address)
-{
-       switch (address) {
-               case 0x00000:
-               case 0xC8000:   break;                  /*initial: 0x320 */
-               case 0xCA000:   xd_iobase = 0x324; 
-               case 0xD0000:                           /*5150CX*/
-               case 0xD8000:   break;                  /*5150CX & 5150XL*/
-               default:        printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address);
-                               break;
-       }
-       xd_maxsectors = 0x01;           /* my card seems to have trouble doing multi-block transfers? */
-
-       outb(0,XD_RESET);               /* reset the controller */
-}
-
-
-static void __init xd_dtc5150cx_init_drive (u_char drive)
-{
-       /* values from controller's BIOS - BIOS chip may be removed */
-       static u_short geometry_table[][4] = {
-               {0x200,8,0x200,0x100},
-               {0x267,2,0x267,0x267},
-               {0x264,4,0x264,0x80},
-               {0x132,4,0x132,0x0},
-               {0x132,2,0x80, 0x132},
-               {0x177,8,0x177,0x0},
-               {0x132,8,0x84, 0x0},
-               {},  /* not used */
-               {0x132,6,0x80, 0x100},
-               {0x200,6,0x100,0x100},
-               {0x264,2,0x264,0x80},
-               {0x280,4,0x280,0x100},
-               {0x2B9,3,0x2B9,0x2B9},
-               {0x2B9,5,0x2B9,0x2B9},
-               {0x280,6,0x280,0x100},
-               {0x132,4,0x132,0x0}};
-       u_char n;
-
-       n = inb(XD_JUMPER);
-       n = (drive ? n : (n >> 2)) & 0x33;
-       n = (n | (n >> 2)) & 0x0F;
-       if (xd_geo[3*drive])
-               xd_manual_geo_set(drive);
-       else
-               if (n != 7) {   
-                       xd_info[drive].heads = (u_char)(geometry_table[n][1]);                  /* heads */
-                       xd_info[drive].cylinders = geometry_table[n][0];        /* cylinders */
-                       xd_info[drive].sectors = 17;                            /* sectors */
-#if 0
-                       xd_info[drive].rwrite = geometry_table[n][2];   /* reduced write */
-                       xd_info[drive].precomp = geometry_table[n][3]           /* write precomp */
-                       xd_info[drive].ecc = 0x0B;                              /* ecc length */
-#endif /* 0 */
-               }
-               else {
-                       printk("xd%c: undetermined drive geometry\n",'a'+drive);
-                       return;
-               }
-       xd_info[drive].control = 5;                             /* control byte */
-       xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B);
-       xd_recalibrate(drive);
-}
-
-static void __init xd_dtc_init_drive (u_char drive)
-{
-       u_char cmdblk[6],buf[64];
-
-       xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0);
-       if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
-               xd_info[drive].heads = buf[0x0A];                       /* heads */
-               xd_info[drive].cylinders = ((u_short *) (buf))[0x04];   /* cylinders */
-               xd_info[drive].sectors = 17;                            /* sectors */
-               if (xd_geo[3*drive])
-                       xd_manual_geo_set(drive);
-#if 0
-               xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05];  /* reduced write */
-               xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06]; /* write precomp */
-               xd_info[drive].ecc = buf[0x0F];                         /* ecc length */
-#endif /* 0 */
-               xd_info[drive].control = 0;                             /* control byte */
-
-               xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]);
-               xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7);
-               if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2))
-                       printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive);
-       }
-       else
-               printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive);
-}
-
-static void __init xd_wd_init_controller (unsigned int address)
-{
-       switch (address) {
-               case 0x00000:
-               case 0xC8000:   break;                  /*initial: 0x320 */
-               case 0xCA000:   xd_iobase = 0x324; break;
-               case 0xCC000:   xd_iobase = 0x328; break;
-               case 0xCE000:   xd_iobase = 0x32C; break;
-               case 0xD0000:   xd_iobase = 0x328; break; /* ? */
-               case 0xD8000:   xd_iobase = 0x32C; break; /* ? */
-               default:        printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address);
-                               break;
-       }
-       xd_maxsectors = 0x01;           /* this one doesn't wrap properly either... */
-
-       outb(0,XD_RESET);               /* reset the controller */
-
-       msleep(XD_INIT_DISK_DELAY);
-}
-
-static void __init xd_wd_init_drive (u_char drive)
-{
-       /* values from controller's BIOS - BIOS may be disabled */
-       static u_short geometry_table[][4] = {
-               {0x264,4,0x1C2,0x1C2},   /* common part */
-               {0x132,4,0x099,0x0},
-               {0x267,2,0x1C2,0x1C2},
-               {0x267,4,0x1C2,0x1C2},
-
-               {0x334,6,0x335,0x335},   /* 1004 series RLL */
-               {0x30E,4,0x30F,0x3DC},
-               {0x30E,2,0x30F,0x30F},
-               {0x267,4,0x268,0x268},
-
-               {0x3D5,5,0x3D6,0x3D6},   /* 1002 series RLL */
-               {0x3DB,7,0x3DC,0x3DC},
-               {0x264,4,0x265,0x265},
-               {0x267,4,0x268,0x268}};
-
-       u_char cmdblk[6],buf[0x200];
-       u_char n = 0,rll,jumper_state,use_jumper_geo;
-       u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6');
-       
-       jumper_state = ~(inb(0x322));
-       if (jumper_state & 0x40)
-               xd_irq = 9;
-       rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0;
-       xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0);
-       if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
-               xd_info[drive].heads = buf[0x1AF];                              /* heads */
-               xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6];       /* cylinders */
-               xd_info[drive].sectors = 17;                                    /* sectors */
-               if (xd_geo[3*drive])
-                       xd_manual_geo_set(drive);
-#if 0
-               xd_info[drive].rwrite = ((u_short *) (buf))[0xD8];              /* reduced write */
-               xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA];            /* write precomp */
-               xd_info[drive].ecc = buf[0x1B4];                                /* ecc length */
-#endif /* 0 */
-               xd_info[drive].control = buf[0x1B5];                            /* control byte */
-               use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders);
-               if (xd_geo[3*drive]) {
-                       xd_manual_geo_set(drive);
-                       xd_info[drive].control = rll ? 7 : 5;
-               }
-               else if (use_jumper_geo) {
-                       n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll;
-                       xd_info[drive].cylinders = geometry_table[n][0];
-                       xd_info[drive].heads = (u_char)(geometry_table[n][1]);
-                       xd_info[drive].control = rll ? 7 : 5;
-#if 0
-                       xd_info[drive].rwrite = geometry_table[n][2];
-                       xd_info[drive].wprecomp = geometry_table[n][3];
-                       xd_info[drive].ecc = 0x0B;
-#endif /* 0 */
-               }
-               if (!wd_1002) {
-                       if (use_jumper_geo)
-                               xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,
-                                       geometry_table[n][2],geometry_table[n][3],0x0B);
-                       else
-                               xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,
-                                       ((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]);
-               }
-       /* 1002 based RLL controller requests converted addressing, but reports physical 
-          (physical 26 sec., logical 17 sec.) 
-          1004 based ???? */
-               if (rll & wd_1002) {
-                       if ((xd_info[drive].cylinders *= 26,
-                            xd_info[drive].cylinders /= 17) > 1023)
-                               xd_info[drive].cylinders = 1023;  /* 1024 ? */
-#if 0
-                       xd_info[drive].rwrite *= 26; 
-                       xd_info[drive].rwrite /= 17;
-                       xd_info[drive].wprecomp *= 26
-                       xd_info[drive].wprecomp /= 17;
-#endif /* 0 */
-               }
-       }
-       else
-               printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive);        
-
-}
-
-static void __init xd_seagate_init_controller (unsigned int address)
-{
-       switch (address) {
-               case 0x00000:
-               case 0xC8000:   break;                  /*initial: 0x320 */
-               case 0xD0000:   xd_iobase = 0x324; break;
-               case 0xD8000:   xd_iobase = 0x328; break;
-               case 0xE0000:   xd_iobase = 0x32C; break;
-               default:        printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address);
-                               break;
-       }
-       xd_maxsectors = 0x40;
-
-       outb(0,XD_RESET);               /* reset the controller */
-}
-
-static void __init xd_seagate_init_drive (u_char drive)
-{
-       u_char cmdblk[6],buf[0x200];
-
-       xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0);
-       if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) {
-               xd_info[drive].heads = buf[0x04];                               /* heads */
-               xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03];        /* cylinders */
-               xd_info[drive].sectors = buf[0x05];                             /* sectors */
-               xd_info[drive].control = 0;                                     /* control byte */
-       }
-       else
-               printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive);
-}
-
-/* Omti support courtesy Dirk Melchers */
-static void __init xd_omti_init_controller (unsigned int address)
-{
-       switch (address) {
-               case 0x00000:
-               case 0xC8000:   break;                  /*initial: 0x320 */
-               case 0xD0000:   xd_iobase = 0x324; break;
-               case 0xD8000:   xd_iobase = 0x328; break;
-               case 0xE0000:   xd_iobase = 0x32C; break;
-               default:        printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address);
-                               break;
-       }
-       
-       xd_maxsectors = 0x40;
-
-       outb(0,XD_RESET);               /* reset the controller */
-}
-
-static void __init xd_omti_init_drive (u_char drive)
-{
-       /* gets infos from drive */
-       xd_override_init_drive(drive);
-
-       /* set other parameters, Hardcoded, not that nice :-) */
-       xd_info[drive].control = 2;
-}
-
-/* Xebec support (AK) */
-static void __init xd_xebec_init_controller (unsigned int address)
-{
-/* iobase may be set manually in range 0x300 - 0x33C
-      irq may be set manually to 2(9),3,4,5,6,7
-      dma may be set manually to 1,2,3
-       (How to detect them ???)
-BIOS address may be set manually in range 0x0 - 0xF8000
-If you need non-standard settings use the xd=... command */
-
-       switch (address) {
-               case 0x00000:
-               case 0xC8000:   /* initially: xd_iobase==0x320 */
-               case 0xD0000:
-               case 0xD2000:
-               case 0xD4000:
-               case 0xD6000:
-               case 0xD8000:
-               case 0xDA000:
-               case 0xDC000:
-               case 0xDE000:
-               case 0xE0000:   break;
-               default:        printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address);
-                               break;
-               }
-
-       xd_maxsectors = 0x01;
-       outb(0,XD_RESET);               /* reset the controller */
-
-       msleep(XD_INIT_DISK_DELAY);
-}
-
-static void __init xd_xebec_init_drive (u_char drive)
-{
-       /* values from controller's BIOS - BIOS chip may be removed */
-       static u_short geometry_table[][5] = {
-               {0x132,4,0x080,0x080,0x7},
-               {0x132,4,0x080,0x080,0x17},
-               {0x264,2,0x100,0x100,0x7},
-               {0x264,2,0x100,0x100,0x17},
-               {0x132,8,0x080,0x080,0x7},
-               {0x132,8,0x080,0x080,0x17},
-               {0x264,4,0x100,0x100,0x6},
-               {0x264,4,0x100,0x100,0x17},
-               {0x2BC,5,0x2BC,0x12C,0x6},
-               {0x3A5,4,0x3A5,0x3A5,0x7},
-               {0x26C,6,0x26C,0x26C,0x7},
-               {0x200,8,0x200,0x100,0x17},
-               {0x400,5,0x400,0x400,0x7},
-               {0x400,6,0x400,0x400,0x7},
-               {0x264,8,0x264,0x200,0x17},
-               {0x33E,7,0x33E,0x200,0x7}};
-       u_char n;
-
-       n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry 
-                                       is assumed for BOTH drives */
-       if (xd_geo[3*drive])
-               xd_manual_geo_set(drive);
-       else {
-               xd_info[drive].heads = (u_char)(geometry_table[n][1]);                  /* heads */
-               xd_info[drive].cylinders = geometry_table[n][0];        /* cylinders */
-               xd_info[drive].sectors = 17;                            /* sectors */
-#if 0
-               xd_info[drive].rwrite = geometry_table[n][2];   /* reduced write */
-               xd_info[drive].precomp = geometry_table[n][3]           /* write precomp */
-               xd_info[drive].ecc = 0x0B;                              /* ecc length */
-#endif /* 0 */
-       }
-       xd_info[drive].control = geometry_table[n][4];                  /* control byte */
-       xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B);
-       xd_recalibrate(drive);
-}
-
-/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads
-   etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */
-static void __init xd_override_init_drive (u_char drive)
-{
-       u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 };
-       u_char cmdblk[6],i;
-
-       if (xd_geo[3*drive])
-               xd_manual_geo_set(drive);
-       else {
-               for (i = 0; i < 3; i++) {
-                       while (min[i] != max[i] - 1) {
-                               test[i] = (min[i] + max[i]) / 2;
-                               xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0);
-                               if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2))
-                                       min[i] = test[i];
-                               else
-                                       max[i] = test[i];
-                       }
-                       test[i] = min[i];
-               }
-               xd_info[drive].heads = (u_char) min[0] + 1;
-               xd_info[drive].cylinders = (u_short) min[1] + 1;
-               xd_info[drive].sectors = (u_char) min[2] + 1;
-       }
-       xd_info[drive].control = 0;
-}
-
-/* xd_setup: initialise controller from command line parameters */
-static void __init do_xd_setup (int *integers)
-{
-       switch (integers[0]) {
-               case 4: if (integers[4] < 0)
-                               nodma = 1;
-                       else if (integers[4] < 8)
-                               xd_dma = integers[4];
-               case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC))
-                               xd_iobase = integers[3];
-               case 2: if ((integers[2] > 0) && (integers[2] < 16))
-                               xd_irq = integers[2];
-               case 1: xd_override = 1;
-                       if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs)))
-                               xd_type = integers[1];
-               case 0: break;
-               default:printk("xd: too many parameters for xd\n");
-       }
-       xd_maxsectors = 0x01;
-}
-
-/* xd_setparam: set the drive characteristics */
-static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc)
-{
-       u_char cmdblk[14];
-
-       xd_build(cmdblk,command,drive,0,0,0,0,0);
-       cmdblk[6] = (u_char) (cylinders >> 8) & 0x03;
-       cmdblk[7] = (u_char) (cylinders & 0xFF);
-       cmdblk[8] = heads & 0x1F;
-       cmdblk[9] = (u_char) (rwrite >> 8) & 0x03;
-       cmdblk[10] = (u_char) (rwrite & 0xFF);
-       cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03;
-       cmdblk[12] = (u_char) (wprecomp & 0xFF);
-       cmdblk[13] = ecc;
-
-       /* Some controllers require geometry info as data, not command */
-
-       if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2))
-               printk("xd: error setting characteristics for xd%c\n", 'a'+drive);
-}
-
-
-#ifdef MODULE
-
-module_param_array(xd, int, NULL, 0);
-module_param_array(xd_geo, int, NULL, 0);
-module_param(nodma, bool, 0);
-
-MODULE_LICENSE("GPL");
-
-void cleanup_module(void)
-{
-       int i;
-       unregister_blkdev(XT_DISK_MAJOR, "xd");
-       for (i = 0; i < xd_drives; i++) {
-               del_gendisk(xd_gendisk[i]);
-               put_disk(xd_gendisk[i]);
-       }
-       blk_cleanup_queue(xd_queue);
-       release_region(xd_iobase,4);
-       if (xd_drives) {
-               free_irq(xd_irq, NULL);
-               free_dma(xd_dma);
-               if (xd_dma_buffer)
-                       xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200);
-       }
-}
-#else
-
-static int __init xd_setup (char *str)
-{
-       int ints[5];
-       get_options (str, ARRAY_SIZE (ints), ints);
-       do_xd_setup (ints);
-       return 1;
-}
-
-/* xd_manual_geo_init: initialise drive geometry from command line parameters
-   (used only for WD drives) */
-static int __init xd_manual_geo_init (char *str)
-{
-       int i, integers[1 + 3*XD_MAXDRIVES];
-
-       get_options (str, ARRAY_SIZE (integers), integers);
-       if (integers[0]%3 != 0) {
-               printk("xd: incorrect number of parameters for xd_geo\n");
-               return 1;
-       }
-       for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++)
-               xd_geo[i] = integers[i+1];
-       return 1;
-}
-
-__setup ("xd=", xd_setup);
-__setup ("xd_geo=", xd_manual_geo_init);
-
-#endif /* MODULE */
-
-module_init(xd_init);
-MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR);
diff --git a/drivers/block/xd.h b/drivers/block/xd.h
deleted file mode 100644 (file)
index 37cacef..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-#ifndef _LINUX_XD_H
-#define _LINUX_XD_H
-
-/*
- * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X).
- *
- * Author: Pat Mackinlay, pat@it.com.au
- * Date: 29/09/92
- *
- * Revised: 01/01/93, ...
- *
- * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com)
- * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst.
- */
-
-#include <linux/interrupt.h>
-
-/* XT hard disk controller registers */
-#define XD_DATA                (xd_iobase + 0x00)      /* data RW register */
-#define XD_RESET       (xd_iobase + 0x01)      /* reset WO register */
-#define XD_STATUS      (xd_iobase + 0x01)      /* status RO register */
-#define XD_SELECT      (xd_iobase + 0x02)      /* select WO register */
-#define XD_JUMPER      (xd_iobase + 0x02)      /* jumper RO register */
-#define XD_CONTROL     (xd_iobase + 0x03)      /* DMAE/INTE WO register */
-#define XD_RESERVED    (xd_iobase + 0x03)      /* reserved */
-
-/* XT hard disk controller commands (incomplete list) */
-#define CMD_TESTREADY  0x00    /* test drive ready */
-#define CMD_RECALIBRATE        0x01    /* recalibrate drive */
-#define CMD_SENSE      0x03    /* request sense */
-#define CMD_FORMATDRV  0x04    /* format drive */
-#define CMD_VERIFY     0x05    /* read verify */
-#define CMD_FORMATTRK  0x06    /* format track */
-#define CMD_FORMATBAD  0x07    /* format bad track */
-#define CMD_READ       0x08    /* read */
-#define CMD_WRITE      0x0A    /* write */
-#define CMD_SEEK       0x0B    /* seek */
-
-/* Controller specific commands */
-#define CMD_DTCSETPARAM        0x0C    /* set drive parameters (DTC 5150X & CX only?) */
-#define CMD_DTCGETECC  0x0D    /* get ecc error length (DTC 5150X only?) */
-#define CMD_DTCREADBUF 0x0E    /* read sector buffer (DTC 5150X only?) */
-#define CMD_DTCWRITEBUF 0x0F   /* write sector buffer (DTC 5150X only?) */
-#define CMD_DTCREMAPTRK        0x11    /* assign alternate track (DTC 5150X only?) */
-#define CMD_DTCGETPARAM        0xFB    /* get drive parameters (DTC 5150X only?) */
-#define CMD_DTCSETSTEP 0xFC    /* set step rate (DTC 5150X only?) */
-#define CMD_DTCSETGEOM 0xFE    /* set geometry data (DTC 5150X only?) */
-#define CMD_DTCGETGEOM 0xFF    /* get geometry data (DTC 5150X only?) */
-#define CMD_ST11GETGEOM 0xF8   /* get geometry data (Seagate ST11R/M only?) */
-#define CMD_WDSETPARAM 0x0C    /* set drive parameters (WD 1004A27X only?) */
-#define CMD_XBSETPARAM 0x0C    /* set drive parameters (XEBEC only?) */
-
-/* Bits for command status byte */
-#define CSB_ERROR      0x02    /* error */
-#define CSB_LUN                0x20    /* logical Unit Number */
-
-/* XT hard disk controller status bits */
-#define STAT_READY     0x01    /* controller is ready */
-#define STAT_INPUT     0x02    /* data flowing from controller to host */
-#define STAT_COMMAND   0x04    /* controller in command phase */
-#define STAT_SELECT    0x08    /* controller is selected */
-#define STAT_REQUEST   0x10    /* controller requesting data */
-#define STAT_INTERRUPT 0x20    /* controller requesting interrupt */
-
-/* XT hard disk controller control bits */
-#define PIO_MODE       0x00    /* control bits to set for PIO */
-#define DMA_MODE       0x03    /* control bits to set for DMA & interrupt */
-
-#define XD_MAXDRIVES   2       /* maximum 2 drives */
-#define XD_TIMEOUT     HZ      /* 1 second timeout */
-#define XD_RETRIES     4       /* maximum 4 retries */
-
-#undef DEBUG                   /* define for debugging output */
-
-#ifdef DEBUG
-       #define DEBUG_STARTUP   /* debug driver initialisation */
-       #define DEBUG_OVERRIDE  /* debug override geometry detection */
-       #define DEBUG_READWRITE /* debug each read/write command */
-       #define DEBUG_OTHER     /* debug misc. interrupt/DMA stuff */
-       #define DEBUG_COMMAND   /* debug each controller command */
-#endif /* DEBUG */
-
-/* this structure defines the XT drives and their types */
-typedef struct {
-       u_char heads;
-       u_short cylinders;
-       u_char sectors;
-       u_char control;
-       int unit;
-} XD_INFO;
-
-/* this structure defines a ROM BIOS signature */
-typedef struct {
-       unsigned int offset;
-       const char *string;
-       void (*init_controller)(unsigned int address);
-       void (*init_drive)(u_char drive);
-       const char *name;
-} XD_SIGNATURE;
-
-#ifndef MODULE
-static int xd_manual_geo_init (char *command);
-#endif /* MODULE */
-static u_char xd_detect (u_char *controller, unsigned int *address);
-static u_char xd_initdrives (void (*init_drive)(u_char drive));
-
-static void do_xd_request (struct request_queue * q);
-static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg);
-static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count);
-static void xd_recalibrate (u_char drive);
-
-static irqreturn_t xd_interrupt_handler(int irq, void *dev_id);
-static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count);
-static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control);
-static void xd_watchdog (unsigned long unused);
-static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout);
-static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout);
-
-/* card specific setup and geometry gathering code */
-static void xd_dtc_init_controller (unsigned int address);
-static void xd_dtc5150cx_init_drive (u_char drive);
-static void xd_dtc_init_drive (u_char drive);
-static void xd_wd_init_controller (unsigned int address);
-static void xd_wd_init_drive (u_char drive);
-static void xd_seagate_init_controller (unsigned int address);
-static void xd_seagate_init_drive (u_char drive);
-static void xd_omti_init_controller (unsigned int address);
-static void xd_omti_init_drive (u_char drive);
-static void xd_xebec_init_controller (unsigned int address);
-static void xd_xebec_init_drive (u_char drive);
-static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc);
-static void xd_override_init_drive (u_char drive);
-
-#endif /* _LINUX_XD_H */
index 5ac841ff6cc73acd59c7439722ab799a27fac93b..de1f319f7bd7e0118a960b5bd23fd286a6ba343e 100644 (file)
@@ -46,6 +46,7 @@
 #include <xen/xen.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#include <xen/balloon.h>
 #include "common.h"
 
 /*
@@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
                        ret = gnttab_unmap_refs(unmap, NULL, pages,
                                segs_to_unmap);
                        BUG_ON(ret);
+                       free_xenballooned_pages(segs_to_unmap, pages);
                        segs_to_unmap = 0;
                }
 
@@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req,
                                GFP_KERNEL);
                        if (!persistent_gnt)
                                return -ENOMEM;
-                       persistent_gnt->page = alloc_page(GFP_KERNEL);
-                       if (!persistent_gnt->page) {
+                       if (alloc_xenballooned_pages(1, &persistent_gnt->page,
+                           false)) {
                                kfree(persistent_gnt);
                                return -ENOMEM;
                        }
@@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
                goto fail_response;
        }
 
-       preq.dev           = req->u.rw.handle;
        preq.sector_number = req->u.rw.sector_number;
        preq.nr_sects      = 0;
 
index 63980722db41af223941be7e83b33001c89bd9c7..5e237f630c47f2b2299749e067744dc89adba1a8 100644 (file)
@@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
                be->blkif = NULL;
        }
 
+       kfree(be->mode);
        kfree(be);
        dev_set_drvdata(&dev->dev, NULL);
        return 0;
@@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch,
                = container_of(watch, struct backend_info, backend_watch);
        struct xenbus_device *dev = be->dev;
        int cdrom = 0;
+       unsigned long handle;
        char *device_type;
 
        DPRINTK("");
@@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch,
                return;
        }
 
-       if ((be->major || be->minor) &&
-           ((be->major != major) || (be->minor != minor))) {
-               pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
-                       be->major, be->minor, major, minor);
+       if (be->major | be->minor) {
+               if (be->major != major || be->minor != minor)
+                       pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n",
+                               be->major, be->minor, major, minor);
                return;
        }
 
@@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch,
                kfree(device_type);
        }
 
-       if (be->major == 0 && be->minor == 0) {
-               /* Front end dir is a number, which is used as the handle. */
-
-               char *p = strrchr(dev->otherend, '/') + 1;
-               long handle;
-               err = strict_strtoul(p, 0, &handle);
-               if (err)
-                       return;
+       /* Front end dir is a number, which is used as the handle. */
+       err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
+       if (err)
+               return;
 
-               be->major = major;
-               be->minor = minor;
+       be->major = major;
+       be->minor = minor;
 
-               err = xen_vbd_create(be->blkif, handle, major, minor,
-                                (NULL == strchr(be->mode, 'w')), cdrom);
-               if (err) {
-                       be->major = 0;
-                       be->minor = 0;
-                       xenbus_dev_fatal(dev, err, "creating vbd structure");
-                       return;
-               }
+       err = xen_vbd_create(be->blkif, handle, major, minor,
+                            !strchr(be->mode, 'w'), cdrom);
 
+       if (err)
+               xenbus_dev_fatal(dev, err, "creating vbd structure");
+       else {
                err = xenvbd_sysfs_addif(dev);
                if (err) {
                        xen_vbd_free(&be->blkif->vbd);
-                       be->major = 0;
-                       be->minor = 0;
                        xenbus_dev_fatal(dev, err, "creating sysfs entries");
-                       return;
                }
+       }
 
+       if (err) {
+               kfree(be->mode);
+               be->mode = NULL;
+               be->major = 0;
+               be->minor = 0;
+       } else {
                /* We're potentially connected now */
                xen_update_blkif_status(be->blkif);
        }
index 11043c18ac5ab01fcfd2e492fff70a87260143fd..c3dae2e0f290e8ad64b4f3e6c869c2fca14cd1a1 100644 (file)
@@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work)
 static void blkif_free(struct blkfront_info *info, int suspend)
 {
        struct llist_node *all_gnts;
-       struct grant *persistent_gnt;
+       struct grant *persistent_gnt, *tmp;
        struct llist_node *n;
 
        /* Prevent new requests being issued until we fix things up. */
@@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
        /* Remove all persistent grants */
        if (info->persistent_gnts_c) {
                all_gnts = llist_del_all(&info->persistent_gnts);
-               llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) {
+               persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
+               while (persistent_gnt) {
                        gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
                        __free_page(pfn_to_page(persistent_gnt->pfn));
-                       kfree(persistent_gnt);
+                       tmp = persistent_gnt;
+                       n = persistent_gnt->node.next;
+                       if (n)
+                               persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
+                       else
+                               persistent_gnt = NULL;
+                       kfree(tmp);
                }
                info->persistent_gnts_c = 0;
        }
index a4605fd7e3039be92612db8e7dffca6372e1ff32..47a673070d70edbf13d6ecaf7b60d04e5b5ed3c5 100644 (file)
 #include <linux/of_address.h>
 #include <linux/irq.h>
 #include <linux/module.h>
-#include <asm/sched_clock.h>
 
+#include <asm/sched_clock.h>
+#include <asm/localtimer.h>
+#include <linux/percpu.h>
 /*
  * Timer block registers.
  */
@@ -49,6 +51,7 @@
 #define TIMER1_RELOAD_OFF      0x0018
 #define TIMER1_VAL_OFF         0x001c
 
+#define LCL_TIMER_EVENTS_STATUS        0x0028
 /* Global timers are connected to the coherency fabric clock, and the
    below divider reduces their incrementing frequency. */
 #define TIMER_DIVIDER_SHIFT     5
 /*
  * SoC-specific data.
  */
-static void __iomem *timer_base;
-static int timer_irq;
+static void __iomem *timer_base, *local_base;
+static unsigned int timer_clk;
+static bool timer25Mhz = true;
 
 /*
  * Number of timer ticks per jiffy.
  */
 static u32 ticks_per_jiffy;
 
+static struct clock_event_device __percpu **percpu_armada_370_xp_evt;
+
 static u32 notrace armada_370_xp_read_sched_clock(void)
 {
        return ~readl(timer_base + TIMER0_VAL_OFF);
@@ -78,24 +84,23 @@ armada_370_xp_clkevt_next_event(unsigned long delta,
                                struct clock_event_device *dev)
 {
        u32 u;
-
        /*
         * Clear clockevent timer interrupt.
         */
-       writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS);
+       writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS);
 
        /*
         * Setup new clockevent timer value.
         */
-       writel(delta, timer_base + TIMER1_VAL_OFF);
+       writel(delta, local_base + TIMER0_VAL_OFF);
 
        /*
         * Enable the timer.
         */
-       u = readl(timer_base + TIMER_CTRL_OFF);
-       u = ((u & ~TIMER1_RELOAD_EN) | TIMER1_EN |
-            TIMER1_DIV(TIMER_DIVIDER_SHIFT));
-       writel(u, timer_base + TIMER_CTRL_OFF);
+       u = readl(local_base + TIMER_CTRL_OFF);
+       u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN |
+            TIMER0_DIV(TIMER_DIVIDER_SHIFT));
+       writel(u, local_base + TIMER_CTRL_OFF);
 
        return 0;
 }
@@ -107,37 +112,38 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode,
        u32 u;
 
        if (mode == CLOCK_EVT_MODE_PERIODIC) {
+
                /*
                 * Setup timer to fire at 1/HZ intervals.
                 */
-               writel(ticks_per_jiffy - 1, timer_base + TIMER1_RELOAD_OFF);
-               writel(ticks_per_jiffy - 1, timer_base + TIMER1_VAL_OFF);
+               writel(ticks_per_jiffy - 1, local_base + TIMER0_RELOAD_OFF);
+               writel(ticks_per_jiffy - 1, local_base + TIMER0_VAL_OFF);
 
                /*
                 * Enable timer.
                 */
-               u = readl(timer_base + TIMER_CTRL_OFF);
 
-               writel((u | TIMER1_EN | TIMER1_RELOAD_EN |
-                       TIMER1_DIV(TIMER_DIVIDER_SHIFT)),
-                      timer_base + TIMER_CTRL_OFF);
+               u = readl(local_base + TIMER_CTRL_OFF);
+
+               writel((u | TIMER0_EN | TIMER0_RELOAD_EN |
+                       TIMER0_DIV(TIMER_DIVIDER_SHIFT)),
+                       local_base + TIMER_CTRL_OFF);
        } else {
                /*
                 * Disable timer.
                 */
-               u = readl(timer_base + TIMER_CTRL_OFF);
-               writel(u & ~TIMER1_EN, timer_base + TIMER_CTRL_OFF);
+               u = readl(local_base + TIMER_CTRL_OFF);
+               writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF);
 
                /*
                 * ACK pending timer interrupt.
                 */
-               writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS);
-
+               writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS);
        }
 }
 
 static struct clock_event_device armada_370_xp_clkevt = {
-       .name           = "armada_370_xp_tick",
+       .name           = "armada_370_xp_per_cpu_tick",
        .features       = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
        .shift          = 32,
        .rating         = 300,
@@ -150,32 +156,78 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id)
        /*
         * ACK timer interrupt and call event handler.
         */
+       struct clock_event_device *evt = *(struct clock_event_device **)dev_id;
 
-       writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS);
-       armada_370_xp_clkevt.event_handler(&armada_370_xp_clkevt);
+       writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS);
+       evt->event_handler(evt);
 
        return IRQ_HANDLED;
 }
 
-static struct irqaction armada_370_xp_timer_irq = {
-       .name           = "armada_370_xp_tick",
-       .flags          = IRQF_DISABLED | IRQF_TIMER,
-       .handler        = armada_370_xp_timer_interrupt
+/*
+ * Setup the local clock events for a CPU.
+ */
+static int __cpuinit armada_370_xp_timer_setup(struct clock_event_device *evt)
+{
+       u32 u;
+       int cpu = smp_processor_id();
+
+       /* Use existing clock_event for cpu 0 */
+       if (!smp_processor_id())
+               return 0;
+
+       u = readl(local_base + TIMER_CTRL_OFF);
+       if (timer25Mhz)
+               writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF);
+       else
+               writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF);
+
+       evt->name               = armada_370_xp_clkevt.name;
+       evt->irq                = armada_370_xp_clkevt.irq;
+       evt->features           = armada_370_xp_clkevt.features;
+       evt->shift              = armada_370_xp_clkevt.shift;
+       evt->rating             = armada_370_xp_clkevt.rating,
+       evt->set_next_event     = armada_370_xp_clkevt_next_event,
+       evt->set_mode           = armada_370_xp_clkevt_mode,
+       evt->cpumask            = cpumask_of(cpu);
+
+       *__this_cpu_ptr(percpu_armada_370_xp_evt) = evt;
+
+       clockevents_config_and_register(evt, timer_clk, 1, 0xfffffffe);
+       enable_percpu_irq(evt->irq, 0);
+
+       return 0;
+}
+
+static void  armada_370_xp_timer_stop(struct clock_event_device *evt)
+{
+       evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt);
+       disable_percpu_irq(evt->irq);
+}
+
+static struct local_timer_ops armada_370_xp_local_timer_ops __cpuinitdata = {
+       .setup  = armada_370_xp_timer_setup,
+       .stop   =  armada_370_xp_timer_stop,
 };
 
 void __init armada_370_xp_timer_init(void)
 {
        u32 u;
        struct device_node *np;
-       unsigned int timer_clk;
+       int res;
+
        np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer");
        timer_base = of_iomap(np, 0);
        WARN_ON(!timer_base);
+       local_base = of_iomap(np, 1);
 
        if (of_find_property(np, "marvell,timer-25Mhz", NULL)) {
                /* The fixed 25MHz timer is available so let's use it */
+               u = readl(local_base + TIMER_CTRL_OFF);
+               writel(u | TIMER0_25MHZ,
+                      local_base + TIMER_CTRL_OFF);
                u = readl(timer_base + TIMER_CTRL_OFF);
-               writel(u | TIMER0_25MHZ | TIMER1_25MHZ,
+               writel(u | TIMER0_25MHZ,
                       timer_base + TIMER_CTRL_OFF);
                timer_clk = 25000000;
        } else {
@@ -183,15 +235,23 @@ void __init armada_370_xp_timer_init(void)
                struct clk *clk = of_clk_get(np, 0);
                WARN_ON(IS_ERR(clk));
                rate =  clk_get_rate(clk);
+               u = readl(local_base + TIMER_CTRL_OFF);
+               writel(u & ~(TIMER0_25MHZ),
+                      local_base + TIMER_CTRL_OFF);
+
                u = readl(timer_base + TIMER_CTRL_OFF);
-               writel(u & ~(TIMER0_25MHZ | TIMER1_25MHZ),
+               writel(u & ~(TIMER0_25MHZ),
                       timer_base + TIMER_CTRL_OFF);
+
                timer_clk = rate / TIMER_DIVIDER;
+               timer25Mhz = false;
        }
 
-       /* We use timer 0 as clocksource, and timer 1 for
-          clockevents */
-       timer_irq = irq_of_parse_and_map(np, 1);
+       /*
+        * We use timer 0 as clocksource, and private(local) timer 0
+        * for clockevents
+        */
+       armada_370_xp_clkevt.irq = irq_of_parse_and_map(np, 4);
 
        ticks_per_jiffy = (timer_clk + HZ / 2) / HZ;
 
@@ -216,12 +276,26 @@ void __init armada_370_xp_timer_init(void)
                              "armada_370_xp_clocksource",
                              timer_clk, 300, 32, clocksource_mmio_readl_down);
 
-       /*
-        * Setup clockevent timer (interrupt-driven).
-        */
-       setup_irq(timer_irq, &armada_370_xp_timer_irq);
+       /* Register the clockevent on the private timer of CPU 0 */
        armada_370_xp_clkevt.cpumask = cpumask_of(0);
        clockevents_config_and_register(&armada_370_xp_clkevt,
                                        timer_clk, 1, 0xfffffffe);
-}
 
+       percpu_armada_370_xp_evt = alloc_percpu(struct clock_event_device *);
+
+
+       /*
+        * Setup clockevent timer (interrupt-driven).
+        */
+       *__this_cpu_ptr(percpu_armada_370_xp_evt) = &armada_370_xp_clkevt;
+       res = request_percpu_irq(armada_370_xp_clkevt.irq,
+                               armada_370_xp_timer_interrupt,
+                               armada_370_xp_clkevt.name,
+                               percpu_armada_370_xp_evt);
+       if (!res) {
+               enable_percpu_irq(armada_370_xp_clkevt.irq, 0);
+#ifdef CONFIG_LOCAL_TIMERS
+               local_timer_register(&armada_370_xp_local_timer_ops);
+#endif
+       }
+}
index acb709bfac0fea2aed419aab4d96fd0b1eae731c..e443f2c1dfd1685ab0bcf88be945e898a5373a55 100644 (file)
@@ -80,6 +80,29 @@ config EDAC_MM_EDAC
          occurred so that a particular failing memory module can be
          replaced.  If unsure, select 'Y'.
 
+config EDAC_GHES
+       bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
+       depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y)
+       default y
+       help
+         Not all machines support hardware-driven error report. Some of those
+         provide a BIOS-driven error report mechanism via ACPI, using the
+         APEI/GHES driver. By enabling this option, the error reports provided
+         by GHES are sent to userspace via the EDAC API.
+
+         When this option is enabled, it will disable the hardware-driven
+         mechanisms, if a GHES BIOS is detected, entering into the
+         "Firmware First" mode.
+
+         It should be noticed that keeping both GHES and a hardware-driven
+         error mechanism won't work well, as BIOS will race with OS, while
+         reading the error registers. So, if you want to not use "Firmware
+         first" GHES error mechanism, you should disable GHES either at
+         compilation time or by passing "ghes.disable=1" Kernel parameter
+         at boot time.
+
+         In doubt, say 'Y'.
+
 config EDAC_AMD64
        tristate "AMD64 (Opteron, Athlon64) K8, F10h"
        depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
index 5608a9ba61b7aaf442564feac6e586c6a26588cf..4154ed6a02c671f57c1dd27ddd5571dc42595c42 100644 (file)
@@ -16,6 +16,7 @@ ifdef CONFIG_PCI
 edac_core-y    += edac_pci.o edac_pci_sysfs.o
 endif
 
+obj-$(CONFIG_EDAC_GHES)                        += ghes_edac.o
 obj-$(CONFIG_EDAC_MCE_INJ)             += mce_amd_inj.o
 
 edac_mce_amd-y                         := mce_amd.o
index 23bb99fa44f1e7ffc6307eeb9d64fbda43f591d4..3c2625e7980daf3479ff5261d7790ef9bdc6c84c 100644 (file)
@@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
 extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
 extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
                                      unsigned long page);
+
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+                             struct mem_ctl_info *mci,
+                             struct edac_raw_error_desc *e);
+
 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                          struct mem_ctl_info *mci,
                          const u16 error_count,
index d1e9eb191f2bd77ff72614c0819885d31dcf473d..cdb81aa73ab7aeb595be53f77430488486947e66 100644 (file)
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
 
+/*
+ * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
+ *     apei/ghes and i7core_edac to be used at the same time.
+ */
+static void const *edac_mc_owner;
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
                                 unsigned len)
 {
@@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 
        mci->op_state = OP_ALLOC;
 
-       /* at this point, the root kobj is valid, and in order to
-        * 'free' the object, then the function:
-        *      edac_mc_unregister_sysfs_main_kobj() must be called
-        * which will perform kobj unregistration and the actual free
-        * will occur during the kobject callback operation
-        */
-
        return mci;
 
 error:
@@ -666,9 +665,9 @@ fail1:
        return 1;
 }
 
-static void del_mc_from_global_list(struct mem_ctl_info *mci)
+static int del_mc_from_global_list(struct mem_ctl_info *mci)
 {
-       atomic_dec(&edac_handlers);
+       int handlers = atomic_dec_return(&edac_handlers);
        list_del_rcu(&mci->link);
 
        /* these are for safe removal of devices from global list while
@@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
         */
        synchronize_rcu();
        INIT_LIST_HEAD(&mci->link);
+
+       return handlers;
 }
 
 /**
@@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find);
 /* FIXME - should a warning be printed if no error detection? correction? */
 int edac_mc_add_mc(struct mem_ctl_info *mci)
 {
+       int ret = -EINVAL;
        edac_dbg(0, "\n");
 
 #ifdef CONFIG_EDAC_DEBUG
@@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 #endif
        mutex_lock(&mem_ctls_mutex);
 
+       if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
+               ret = -EPERM;
+               goto fail0;
+       }
+
        if (add_mc_to_global_list(mci))
                goto fail0;
 
@@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 
+       edac_mc_owner = mci->mod_name;
+
        mutex_unlock(&mem_ctls_mutex);
        return 0;
 
@@ -783,7 +792,7 @@ fail1:
 
 fail0:
        mutex_unlock(&mem_ctls_mutex);
-       return 1;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 
@@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
                return NULL;
        }
 
-       del_mc_from_global_list(mci);
+       if (!del_mc_from_global_list(mci))
+               edac_mc_owner = NULL;
        mutex_unlock(&mem_ctls_mutex);
 
        /* flush workq processes */
@@ -907,6 +917,7 @@ const char *edac_layer_name[] = {
        [EDAC_MC_LAYER_CHANNEL] = "channel",
        [EDAC_MC_LAYER_SLOT] = "slot",
        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+       [EDAC_MC_LAYER_ALL_MEM] = "memory",
 };
 EXPORT_SYMBOL_GPL(edac_layer_name);
 
@@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci,
        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
 }
 
-#define OTHER_LABEL " or "
+/**
+ * edac_raw_mc_handle_error - reports a memory event to userspace without doing
+ *                           anything to discover the error location
+ *
+ * @type:              severity of the error (CE/UE/Fatal)
+ * @mci:               a struct mem_ctl_info pointer
+ * @e:                 error description
+ *
+ * This raw function is used internally by edac_mc_handle_error(). It should
+ * only be called directly when the hardware error come directly from BIOS,
+ * like in the case of APEI GHES driver.
+ */
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+                             struct mem_ctl_info *mci,
+                             struct edac_raw_error_desc *e)
+{
+       char detail[80];
+       int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+
+       /* Memory type dependent details about the error */
+       if (type == HW_EVENT_ERR_CORRECTED) {
+               snprintf(detail, sizeof(detail),
+                       "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
+                       e->page_frame_number, e->offset_in_page,
+                       e->grain, e->syndrome);
+               edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+                             detail, e->other_detail, e->enable_per_layer_report,
+                             e->page_frame_number, e->offset_in_page, e->grain);
+       } else {
+               snprintf(detail, sizeof(detail),
+                       "page:0x%lx offset:0x%lx grain:%ld",
+                       e->page_frame_number, e->offset_in_page, e->grain);
+
+               edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+                             detail, e->other_detail, e->enable_per_layer_report);
+       }
+
+
+}
+EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
 
 /**
  * edac_mc_handle_error - reports a memory event to userspace
@@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                          const char *msg,
                          const char *other_detail)
 {
-       /* FIXME: too much for stack: move it to some pre-alocated area */
-       char detail[80], location[80];
-       char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
        char *p;
        int row = -1, chan = -1;
        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
-       int i;
-       long grain;
-       bool enable_per_layer_report = false;
+       int i, n_labels = 0;
        u8 grain_bits;
+       struct edac_raw_error_desc *e = &mci->error_desc;
 
        edac_dbg(3, "MC%d\n", mci->mc_idx);
 
+       /* Fills the error report buffer */
+       memset(e, 0, sizeof (*e));
+       e->error_count = error_count;
+       e->top_layer = top_layer;
+       e->mid_layer = mid_layer;
+       e->low_layer = low_layer;
+       e->page_frame_number = page_frame_number;
+       e->offset_in_page = offset_in_page;
+       e->syndrome = syndrome;
+       e->msg = msg;
+       e->other_detail = other_detail;
+
        /*
         * Check if the event report is consistent and if the memory
         * location is known. If it is known, enable_per_layer_report will be
@@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                        pos[i] = -1;
                }
                if (pos[i] >= 0)
-                       enable_per_layer_report = true;
+                       e->enable_per_layer_report = true;
        }
 
        /*
@@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
         * where each memory belongs to a separate channel within the same
         * branch.
         */
-       grain = 0;
-       p = label;
+       p = e->label;
        *p = '\0';
 
        for (i = 0; i < mci->tot_dimms; i++) {
@@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                        continue;
 
                /* get the max grain, over the error match range */
-               if (dimm->grain > grain)
-                       grain = dimm->grain;
+               if (dimm->grain > e->grain)
+                       e->grain = dimm->grain;
 
                /*
                 * If the error is memory-controller wide, there's no need to
@@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                 * channel/memory controller/...  may be affected.
                 * Also, don't show errors for empty DIMM slots.
                 */
-               if (enable_per_layer_report && dimm->nr_pages) {
-                       if (p != label) {
+               if (e->enable_per_layer_report && dimm->nr_pages) {
+                       if (n_labels >= EDAC_MAX_LABELS) {
+                               e->enable_per_layer_report = false;
+                               break;
+                       }
+                       n_labels++;
+                       if (p != e->label) {
                                strcpy(p, OTHER_LABEL);
                                p += strlen(OTHER_LABEL);
                        }
@@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                }
        }
 
-       if (!enable_per_layer_report) {
-               strcpy(label, "any memory");
+       if (!e->enable_per_layer_report) {
+               strcpy(e->label, "any memory");
        } else {
                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
-               if (p == label)
-                       strcpy(label, "unknown memory");
+               if (p == e->label)
+                       strcpy(e->label, "unknown memory");
                if (type == HW_EVENT_ERR_CORRECTED) {
                        if (row >= 0) {
                                mci->csrows[row]->ce_count += error_count;
@@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
        }
 
        /* Fill the RAM location data */
-       p = location;
+       p = e->location;
 
        for (i = 0; i < mci->n_layers; i++) {
                if (pos[i] < 0)
@@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
                             edac_layer_name[mci->layers[i].type],
                             pos[i]);
        }
-       if (p > location)
+       if (p > e->location)
                *(p - 1) = '\0';
 
        /* Report the error via the trace interface */
-       grain_bits = fls_long(grain) + 1;
-       trace_mc_event(type, msg, label, error_count,
-                      mci->mc_idx, top_layer, mid_layer, low_layer,
-                      PAGES_TO_MiB(page_frame_number) | offset_in_page,
-                      grain_bits, syndrome, other_detail);
+       grain_bits = fls_long(e->grain) + 1;
+       trace_mc_event(type, e->msg, e->label, e->error_count,
+                      mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+                      PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+                      grain_bits, e->syndrome, e->other_detail);
 
-       /* Memory type dependent details about the error */
-       if (type == HW_EVENT_ERR_CORRECTED) {
-               snprintf(detail, sizeof(detail),
-                       "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
-                       page_frame_number, offset_in_page,
-                       grain, syndrome);
-               edac_ce_error(mci, error_count, pos, msg, location, label,
-                             detail, other_detail, enable_per_layer_report,
-                             page_frame_number, offset_in_page, grain);
-       } else {
-               snprintf(detail, sizeof(detail),
-                       "page:0x%lx offset:0x%lx grain:%ld",
-                       page_frame_number, offset_in_page, grain);
-
-               edac_ue_error(mci, error_count, pos, msg, location, label,
-                             detail, other_detail, enable_per_layer_report);
-       }
+       edac_raw_mc_handle_error(type, mci, e);
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
index 0ca1ca71157f2b7320e418d13deceeb63965c030..4f4b6137d74e6c4ebd894e104cdeaf18cd565969 100644 (file)
@@ -7,7 +7,7 @@
  *
  * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com
  *
- * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com>
+ * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com>
  *     The entire API were re-written, and ported to use struct device
  *
  */
@@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
                if (!nr_pages_per_csrow(csrow))
                        continue;
                err = edac_create_csrow_object(mci, mci->csrows[i], i);
-               if (err < 0)
+               if (err < 0) {
+                       edac_dbg(1,
+                                "failure: create csrow objects for csrow %d\n",
+                                i);
                        goto error;
+               }
        }
        return 0;
 
@@ -677,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev,
        unsigned long bandwidth = 0;
        int new_bw = 0;
 
-       if (!mci->set_sdram_scrub_rate)
-               return -ENODEV;
-
        if (strict_strtoul(data, 10, &bandwidth) < 0)
                return -EINVAL;
 
@@ -703,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev,
        struct mem_ctl_info *mci = to_mci(dev);
        int bandwidth = 0;
 
-       if (!mci->get_sdram_scrub_rate)
-               return -ENODEV;
-
        bandwidth = mci->get_sdram_scrub_rate(mci);
        if (bandwidth < 0) {
                edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n");
@@ -866,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL);
 DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL);
 
 /* memory scrubber attribute file */
-DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show,
-       mci_sdram_scrub_rate_store);
+DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL);
 
 static struct attribute *mci_attrs[] = {
        &dev_attr_reset_counters.attr,
@@ -878,7 +875,6 @@ static struct attribute *mci_attrs[] = {
        &dev_attr_ce_noinfo_count.attr,
        &dev_attr_ue_count.attr,
        &dev_attr_ce_count.attr,
-       &dev_attr_sdram_scrub_rate.attr,
        &dev_attr_max_location.attr,
        NULL
 };
@@ -1007,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
        edac_dbg(0, "creating device %s\n", dev_name(&mci->dev));
        err = device_add(&mci->dev);
        if (err < 0) {
+               edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
                bus_unregister(&mci->bus);
                kfree(mci->bus.name);
                return err;
        }
 
+       if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) {
+               if (mci->get_sdram_scrub_rate) {
+                       dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO;
+                       dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show;
+               }
+               if (mci->set_sdram_scrub_rate) {
+                       dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR;
+                       dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store;
+               }
+               err = device_create_file(&mci->dev,
+                                        &dev_attr_sdram_scrub_rate);
+               if (err) {
+                       edac_dbg(1, "failure: create sdram_scrub_rate\n");
+                       goto fail2;
+               }
+       }
        /*
         * Create the dimm/rank devices
         */
@@ -1056,6 +1069,7 @@ fail:
                        continue;
                device_unregister(&dimm->dev);
        }
+fail2:
        device_unregister(&mci->dev);
        bus_unregister(&mci->bus);
        kfree(mci->bus.name);
index 12c951a2c33d5c8bac6d80bf6f0221467278940e..a66941fea5a417724097837bdb6b891b07241852 100644 (file)
@@ -146,7 +146,7 @@ static void __exit edac_exit(void)
 /*
  * Inform the kernel of our entry and exit points
  */
-module_init(edac_init);
+subsys_initcall(edac_init);
 module_exit(edac_exit);
 
 MODULE_LICENSE("GPL");
index 0056c4dae9d52cc24985200dbb58b724be73c3ed..e8658e451762647774f71b79101f61efb39194d9 100644 (file)
@@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void)
        if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
                edac_dbg(0, "called kobject_put on main kobj\n");
                kobject_put(edac_pci_top_main_kobj);
+               edac_put_sysfs_subsys();
        }
-       edac_put_sysfs_subsys();
 }
 
 /*
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
new file mode 100644 (file)
index 0000000..bb53467
--- /dev/null
@@ -0,0 +1,537 @@
+/*
+ * GHES/EDAC Linux driver
+ *
+ * This file may be distributed under the terms of the GNU General Public
+ * License version 2.
+ *
+ * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com>
+ *
+ * Red Hat Inc. http://www.redhat.com
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <acpi/ghes.h>
+#include <linux/edac.h>
+#include <linux/dmi.h>
+#include "edac_core.h"
+#include <ras/ras_event.h>
+
+#define GHES_EDAC_REVISION " Ver: 1.0.0"
+
+struct ghes_edac_pvt {
+       struct list_head list;
+       struct ghes *ghes;
+       struct mem_ctl_info *mci;
+
+       /* Buffers for the error handling routine */
+       char detail_location[240];
+       char other_detail[160];
+       char msg[80];
+};
+
+static LIST_HEAD(ghes_reglist);
+static DEFINE_MUTEX(ghes_edac_lock);
+static int ghes_edac_mc_num;
+
+
+/* Memory Device - Type 17 of SMBIOS spec */
+struct memdev_dmi_entry {
+       u8 type;
+       u8 length;
+       u16 handle;
+       u16 phys_mem_array_handle;
+       u16 mem_err_info_handle;
+       u16 total_width;
+       u16 data_width;
+       u16 size;
+       u8 form_factor;
+       u8 device_set;
+       u8 device_locator;
+       u8 bank_locator;
+       u8 memory_type;
+       u16 type_detail;
+       u16 speed;
+       u8 manufacturer;
+       u8 serial_number;
+       u8 asset_tag;
+       u8 part_number;
+       u8 attributes;
+       u32 extended_size;
+       u16 conf_mem_clk_speed;
+} __attribute__((__packed__));
+
+struct ghes_edac_dimm_fill {
+       struct mem_ctl_info *mci;
+       unsigned count;
+};
+
+char *memory_type[] = {
+       [MEM_EMPTY] = "EMPTY",
+       [MEM_RESERVED] = "RESERVED",
+       [MEM_UNKNOWN] = "UNKNOWN",
+       [MEM_FPM] = "FPM",
+       [MEM_EDO] = "EDO",
+       [MEM_BEDO] = "BEDO",
+       [MEM_SDR] = "SDR",
+       [MEM_RDR] = "RDR",
+       [MEM_DDR] = "DDR",
+       [MEM_RDDR] = "RDDR",
+       [MEM_RMBS] = "RMBS",
+       [MEM_DDR2] = "DDR2",
+       [MEM_FB_DDR2] = "FB_DDR2",
+       [MEM_RDDR2] = "RDDR2",
+       [MEM_XDR] = "XDR",
+       [MEM_DDR3] = "DDR3",
+       [MEM_RDDR3] = "RDDR3",
+};
+
+static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
+{
+       int *num_dimm = arg;
+
+       if (dh->type == DMI_ENTRY_MEM_DEVICE)
+               (*num_dimm)++;
+}
+
+static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
+{
+       struct ghes_edac_dimm_fill *dimm_fill = arg;
+       struct mem_ctl_info *mci = dimm_fill->mci;
+
+       if (dh->type == DMI_ENTRY_MEM_DEVICE) {
+               struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
+               struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+                                                      mci->n_layers,
+                                                      dimm_fill->count, 0, 0);
+
+               if (entry->size == 0xffff) {
+                       pr_info("Can't get DIMM%i size\n",
+                               dimm_fill->count);
+                       dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
+               } else if (entry->size == 0x7fff) {
+                       dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
+               } else {
+                       if (entry->size & 1 << 15)
+                               dimm->nr_pages = MiB_TO_PAGES((entry->size &
+                                                              0x7fff) << 10);
+                       else
+                               dimm->nr_pages = MiB_TO_PAGES(entry->size);
+               }
+
+               switch (entry->memory_type) {
+               case 0x12:
+                       if (entry->type_detail & 1 << 13)
+                               dimm->mtype = MEM_RDDR;
+                       else
+                               dimm->mtype = MEM_DDR;
+                       break;
+               case 0x13:
+                       if (entry->type_detail & 1 << 13)
+                               dimm->mtype = MEM_RDDR2;
+                       else
+                               dimm->mtype = MEM_DDR2;
+                       break;
+               case 0x14:
+                       dimm->mtype = MEM_FB_DDR2;
+                       break;
+               case 0x18:
+                       if (entry->type_detail & 1 << 13)
+                               dimm->mtype = MEM_RDDR3;
+                       else
+                               dimm->mtype = MEM_DDR3;
+                       break;
+               default:
+                       if (entry->type_detail & 1 << 6)
+                               dimm->mtype = MEM_RMBS;
+                       else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
+                                == ((1 << 7) | (1 << 13)))
+                               dimm->mtype = MEM_RDR;
+                       else if (entry->type_detail & 1 << 7)
+                               dimm->mtype = MEM_SDR;
+                       else if (entry->type_detail & 1 << 9)
+                               dimm->mtype = MEM_EDO;
+                       else
+                               dimm->mtype = MEM_UNKNOWN;
+               }
+
+               /*
+                * Actually, we can only detect if the memory has bits for
+                * checksum or not
+                */
+               if (entry->total_width == entry->data_width)
+                       dimm->edac_mode = EDAC_NONE;
+               else
+                       dimm->edac_mode = EDAC_SECDED;
+
+               dimm->dtype = DEV_UNKNOWN;
+               dimm->grain = 128;              /* Likely, worse case */
+
+               /*
+                * FIXME: It shouldn't be hard to also fill the DIMM labels
+                */
+
+               if (dimm->nr_pages) {
+                       edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
+                               dimm_fill->count, memory_type[dimm->mtype],
+                               PAGES_TO_MiB(dimm->nr_pages),
+                               (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
+                       edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
+                               entry->memory_type, entry->type_detail,
+                               entry->total_width, entry->data_width);
+               }
+
+               dimm_fill->count++;
+       }
+}
+
+void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+                               struct cper_sec_mem_err *mem_err)
+{
+       enum hw_event_mc_err_type type;
+       struct edac_raw_error_desc *e;
+       struct mem_ctl_info *mci;
+       struct ghes_edac_pvt *pvt = NULL;
+       char *p;
+       u8 grain_bits;
+
+       list_for_each_entry(pvt, &ghes_reglist, list) {
+               if (ghes == pvt->ghes)
+                       break;
+       }
+       if (!pvt) {
+               pr_err("Internal error: Can't find EDAC structure\n");
+               return;
+       }
+       mci = pvt->mci;
+       e = &mci->error_desc;
+
+       /* Cleans the error report buffer */
+       memset(e, 0, sizeof (*e));
+       e->error_count = 1;
+       strcpy(e->label, "unknown label");
+       e->msg = pvt->msg;
+       e->other_detail = pvt->other_detail;
+       e->top_layer = -1;
+       e->mid_layer = -1;
+       e->low_layer = -1;
+       *pvt->other_detail = '\0';
+       *pvt->msg = '\0';
+
+       switch (sev) {
+       case GHES_SEV_CORRECTED:
+               type = HW_EVENT_ERR_CORRECTED;
+               break;
+       case GHES_SEV_RECOVERABLE:
+               type = HW_EVENT_ERR_UNCORRECTED;
+               break;
+       case GHES_SEV_PANIC:
+               type = HW_EVENT_ERR_FATAL;
+               break;
+       default:
+       case GHES_SEV_NO:
+               type = HW_EVENT_ERR_INFO;
+       }
+
+       edac_dbg(1, "error validation_bits: 0x%08llx\n",
+                (long long)mem_err->validation_bits);
+
+       /* Error type, mapped on e->msg */
+       if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
+               p = pvt->msg;
+               switch (mem_err->error_type) {
+               case 0:
+                       p += sprintf(p, "Unknown");
+                       break;
+               case 1:
+                       p += sprintf(p, "No error");
+                       break;
+               case 2:
+                       p += sprintf(p, "Single-bit ECC");
+                       break;
+               case 3:
+                       p += sprintf(p, "Multi-bit ECC");
+                       break;
+               case 4:
+                       p += sprintf(p, "Single-symbol ChipKill ECC");
+                       break;
+               case 5:
+                       p += sprintf(p, "Multi-symbol ChipKill ECC");
+                       break;
+               case 6:
+                       p += sprintf(p, "Master abort");
+                       break;
+               case 7:
+                       p += sprintf(p, "Target abort");
+                       break;
+               case 8:
+                       p += sprintf(p, "Parity Error");
+                       break;
+               case 9:
+                       p += sprintf(p, "Watchdog timeout");
+                       break;
+               case 10:
+                       p += sprintf(p, "Invalid address");
+                       break;
+               case 11:
+                       p += sprintf(p, "Mirror Broken");
+                       break;
+               case 12:
+                       p += sprintf(p, "Memory Sparing");
+                       break;
+               case 13:
+                       p += sprintf(p, "Scrub corrected error");
+                       break;
+               case 14:
+                       p += sprintf(p, "Scrub uncorrected error");
+                       break;
+               case 15:
+                       p += sprintf(p, "Physical Memory Map-out event");
+                       break;
+               default:
+                       p += sprintf(p, "reserved error (%d)",
+                                    mem_err->error_type);
+               }
+       } else {
+               strcpy(pvt->msg, "unknown error");
+       }
+
+       /* Error address */
+       if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+               e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
+               e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
+       }
+
+       /* Error grain */
+       if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) {
+               e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
+       }
+
+       /* Memory error location, mapped on e->location */
+       p = e->location;
+       if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
+               p += sprintf(p, "node:%d ", mem_err->node);
+       if (mem_err->validation_bits & CPER_MEM_VALID_CARD)
+               p += sprintf(p, "card:%d ", mem_err->card);
+       if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
+               p += sprintf(p, "module:%d ", mem_err->module);
+       if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
+               p += sprintf(p, "bank:%d ", mem_err->bank);
+       if (mem_err->validation_bits & CPER_MEM_VALID_ROW)
+               p += sprintf(p, "row:%d ", mem_err->row);
+       if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
+               p += sprintf(p, "col:%d ", mem_err->column);
+       if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
+               p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
+       if (p > e->location)
+               *(p - 1) = '\0';
+
+       /* All other fields are mapped on e->other_detail */
+       p = pvt->other_detail;
+       if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
+               u64 status = mem_err->error_status;
+
+               p += sprintf(p, "status(0x%016llx): ", (long long)status);
+               switch ((status >> 8) & 0xff) {
+               case 1:
+                       p += sprintf(p, "Error detected internal to the component ");
+                       break;
+               case 16:
+                       p += sprintf(p, "Error detected in the bus ");
+                       break;
+               case 4:
+                       p += sprintf(p, "Storage error in DRAM memory ");
+                       break;
+               case 5:
+                       p += sprintf(p, "Storage error in TLB ");
+                       break;
+               case 6:
+                       p += sprintf(p, "Storage error in cache ");
+                       break;
+               case 7:
+                       p += sprintf(p, "Error in one or more functional units ");
+                       break;
+               case 8:
+                       p += sprintf(p, "component failed self test ");
+                       break;
+               case 9:
+                       p += sprintf(p, "Overflow or undervalue of internal queue ");
+                       break;
+               case 17:
+                       p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
+                       break;
+               case 18:
+                       p += sprintf(p, "Improper access error ");
+                       break;
+               case 19:
+                       p += sprintf(p, "Access to a memory address which is not mapped to any component ");
+                       break;
+               case 20:
+                       p += sprintf(p, "Loss of Lockstep ");
+                       break;
+               case 21:
+                       p += sprintf(p, "Response not associated with a request ");
+                       break;
+               case 22:
+                       p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
+                       break;
+               case 23:
+                       p += sprintf(p, "Detection of a PATH_ERROR ");
+                       break;
+               case 25:
+                       p += sprintf(p, "Bus operation timeout ");
+                       break;
+               case 26:
+                       p += sprintf(p, "A read was issued to data that has been poisoned ");
+                       break;
+               default:
+                       p += sprintf(p, "reserved ");
+                       break;
+               }
+       }
+       if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
+               p += sprintf(p, "requestorID: 0x%016llx ",
+                            (long long)mem_err->requestor_id);
+       if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
+               p += sprintf(p, "responderID: 0x%016llx ",
+                            (long long)mem_err->responder_id);
+       if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
+               p += sprintf(p, "targetID: 0x%016llx ",
+                            (long long)mem_err->responder_id);
+       if (p > pvt->other_detail)
+               *(p - 1) = '\0';
+
+       /* Generate the trace event */
+       grain_bits = fls_long(e->grain);
+       sprintf(pvt->detail_location, "APEI location: %s %s",
+               e->location, e->other_detail);
+       trace_mc_event(type, e->msg, e->label, e->error_count,
+                      mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+                      PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+                      grain_bits, e->syndrome, pvt->detail_location);
+
+       /* Report the error via EDAC API */
+       edac_raw_mc_handle_error(type, mci, e);
+}
+EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error);
+
+int ghes_edac_register(struct ghes *ghes, struct device *dev)
+{
+       bool fake = false;
+       int rc, num_dimm = 0;
+       struct mem_ctl_info *mci;
+       struct edac_mc_layer layers[1];
+       struct ghes_edac_pvt *pvt;
+       struct ghes_edac_dimm_fill dimm_fill;
+
+       /* Get the number of DIMMs */
+       dmi_walk(ghes_edac_count_dimms, &num_dimm);
+
+       /* Check if we've got a bogus BIOS */
+       if (num_dimm == 0) {
+               fake = true;
+               num_dimm = 1;
+       }
+
+       layers[0].type = EDAC_MC_LAYER_ALL_MEM;
+       layers[0].size = num_dimm;
+       layers[0].is_virt_csrow = true;
+
+       /*
+        * We need to serialize edac_mc_alloc() and edac_mc_add_mc(),
+        * to avoid duplicated memory controller numbers
+        */
+       mutex_lock(&ghes_edac_lock);
+       mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers,
+                           sizeof(*pvt));
+       if (!mci) {
+               pr_info("Can't allocate memory for EDAC data\n");
+               mutex_unlock(&ghes_edac_lock);
+               return -ENOMEM;
+       }
+
+       pvt = mci->pvt_info;
+       memset(pvt, 0, sizeof(*pvt));
+       list_add_tail(&pvt->list, &ghes_reglist);
+       pvt->ghes = ghes;
+       pvt->mci  = mci;
+       mci->pdev = dev;
+
+       mci->mtype_cap = MEM_FLAG_EMPTY;
+       mci->edac_ctl_cap = EDAC_FLAG_NONE;
+       mci->edac_cap = EDAC_FLAG_NONE;
+       mci->mod_name = "ghes_edac.c";
+       mci->mod_ver = GHES_EDAC_REVISION;
+       mci->ctl_name = "ghes_edac";
+       mci->dev_name = "ghes";
+
+       if (!ghes_edac_mc_num) {
+               if (!fake) {
+                       pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
+                       pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
+                       pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
+                       pr_info("If you find incorrect reports, please contact your hardware vendor\n");
+                       pr_info("to correct its BIOS.\n");
+                       pr_info("This system has %d DIMM sockets.\n",
+                               num_dimm);
+               } else {
+                       pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
+                       pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
+                       pr_info("work on such system. Use this driver with caution\n");
+               }
+       }
+
+       if (!fake) {
+               /*
+                * Fill DIMM info from DMI for the memory controller #0
+                *
+                * Keep it in blank for the other memory controllers, as
+                * there's no reliable way to properly credit each DIMM to
+                * the memory controller, as different BIOSes fill the
+                * DMI bank location fields on different ways
+                */
+               if (!ghes_edac_mc_num) {
+                       dimm_fill.count = 0;
+                       dimm_fill.mci = mci;
+                       dmi_walk(ghes_edac_dmidecode, &dimm_fill);
+               }
+       } else {
+               struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+                                                      mci->n_layers, 0, 0, 0);
+
+               dimm->nr_pages = 1;
+               dimm->grain = 128;
+               dimm->mtype = MEM_UNKNOWN;
+               dimm->dtype = DEV_UNKNOWN;
+               dimm->edac_mode = EDAC_SECDED;
+       }
+
+       rc = edac_mc_add_mc(mci);
+       if (rc < 0) {
+               pr_info("Can't register at EDAC core\n");
+               edac_mc_free(mci);
+               mutex_unlock(&ghes_edac_lock);
+               return -ENODEV;
+       }
+
+       ghes_edac_mc_num++;
+       mutex_unlock(&ghes_edac_lock);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ghes_edac_register);
+
+void ghes_edac_unregister(struct ghes *ghes)
+{
+       struct mem_ctl_info *mci;
+       struct ghes_edac_pvt *pvt, *tmp;
+
+       list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) {
+               if (ghes == pvt->ghes) {
+                       mci = pvt->mci;
+                       edac_mc_del_mc(mci->pdev);
+                       edac_mc_free(mci);
+                       list_del(&pvt->list);
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(ghes_edac_unregister);
index 4e8337602e781fed8d99ffbbb099a5523e585160..aa44c1718f50382eac5a87b1c9bef5e33db020e1 100644 (file)
@@ -106,16 +106,26 @@ static int nr_channels;
 
 static int how_many_channels(struct pci_dev *pdev)
 {
+       int n_channels;
+
        unsigned char capid0_8b; /* 8th byte of CAPID0 */
 
        pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+
        if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
                edac_dbg(0, "In single channel mode\n");
-               return 1;
+               n_channels = 1;
        } else {
                edac_dbg(0, "In dual channel mode\n");
-               return 2;
+               n_channels = 2;
        }
+
+       if (capid0_8b & 0x10) /* check if both channels are filled */
+               edac_dbg(0, "2 DIMMS per channel disabled\n");
+       else
+               edac_dbg(0, "2 DIMMS per channel enabled\n");
+
+       return n_channels;
 }
 
 static unsigned long eccerrlog_syndrome(u64 log)
@@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window,
        for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
                drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
                drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+
+               edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
        }
 }
 
@@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages(
        int n;
 
        n = drbs[channel][rank];
+       if (!n)
+               return 0;
+
        if (rank > 0)
                n -= drbs[channel][rank - 1];
        if (stacked && (channel == 1) &&
@@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
         * cumulative; the last one will contain the total memory
         * contained in all ranks.
         */
-       for (i = 0; i < mci->nr_csrows; i++) {
+       for (i = 0; i < I3200_DIMMS; i++) {
                unsigned long nr_pages;
-               struct csrow_info *csrow = mci->csrows[i];
 
-               nr_pages = drb_to_nr_pages(drbs, stacked,
-                       i / I3200_RANKS_PER_CHANNEL,
-                       i % I3200_RANKS_PER_CHANNEL);
+               for (j = 0; j < nr_channels; j++) {
+                       struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+                                                              mci->n_layers, i, j, 0);
 
-               if (nr_pages == 0)
-                       continue;
+                       nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
+                       if (nr_pages == 0)
+                               continue;
 
-               for (j = 0; j < nr_channels; j++) {
-                       struct dimm_info *dimm = csrow->channels[j]->dimm;
+                       edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
+                                stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
 
                        dimm->nr_pages = nr_pages;
                        dimm->grain = nr_pages << PAGE_SHIFT;
index d6955b2cc99fa47bc9ac665fa93eb5f59cafc88e..1b635178cc44ff31b8ed224560ebdf86851165f0 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/edac.h>
 #include <linux/delay.h>
 #include <linux/mmzone.h>
+#include <linux/debugfs.h>
 
 #include "edac_core.h"
 
                        I5100_FERR_NF_MEM_M1ERR_MASK)
 #define        I5100_NERR_NF_MEM       0xa4    /* MC Next Non-Fatal Errors */
 #define I5100_EMASK_MEM                0xa8    /* MC Error Mask Register */
+#define I5100_MEM0EINJMSK0     0x200   /* Injection Mask0 Register Channel 0 */
+#define I5100_MEM1EINJMSK0     0x208   /* Injection Mask0 Register Channel 1 */
+#define                I5100_MEMXEINJMSK0_EINJEN       (1 << 27)
+#define I5100_MEM0EINJMSK1     0x204   /* Injection Mask1 Register Channel 0 */
+#define I5100_MEM1EINJMSK1     0x206   /* Injection Mask1 Register Channel 1 */
+
+/* Device 19, Function 0 */
+#define I5100_DINJ0 0x9a
 
 /* device 21 and 22, func 0 */
 #define I5100_MTR_0    0x154   /* Memory Technology Registers 0-3 */
@@ -338,13 +347,26 @@ struct i5100_priv {
        unsigned ranksperchan;  /* number of ranks per channel */
 
        struct pci_dev *mc;     /* device 16 func 1 */
+       struct pci_dev *einj;   /* device 19 func 0 */
        struct pci_dev *ch0mm;  /* device 21 func 0 */
        struct pci_dev *ch1mm;  /* device 22 func 0 */
 
        struct delayed_work i5100_scrubbing;
        int scrub_enable;
+
+       /* Error injection */
+       u8 inject_channel;
+       u8 inject_hlinesel;
+       u8 inject_deviceptr1;
+       u8 inject_deviceptr2;
+       u16 inject_eccmask1;
+       u16 inject_eccmask2;
+
+       struct dentry *debugfs;
 };
 
+static struct dentry *i5100_debugfs;
+
 /* map a rank/chan to a slot number on the mainboard */
 static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
                              int chan, int rank)
@@ -863,13 +885,126 @@ static void i5100_init_csrows(struct mem_ctl_info *mci)
        }
 }
 
+/****************************************************************************
+ *                       Error injection routines
+ ****************************************************************************/
+
+static void i5100_do_inject(struct mem_ctl_info *mci)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       u32 mask0;
+       u16 mask1;
+
+       /* MEM[1:0]EINJMSK0
+        * 31    - ADDRMATCHEN
+        * 29:28 - HLINESEL
+        *         00 Reserved
+        *         01 Lower half of cache line
+        *         10 Upper half of cache line
+        *         11 Both upper and lower parts of cache line
+        * 27    - EINJEN
+        * 25:19 - XORMASK1 for deviceptr1
+        * 9:5   - SEC2RAM for deviceptr2
+        * 4:0   - FIR2RAM for deviceptr1
+        */
+       mask0 = ((priv->inject_hlinesel & 0x3) << 28) |
+               I5100_MEMXEINJMSK0_EINJEN |
+               ((priv->inject_eccmask1 & 0xffff) << 10) |
+               ((priv->inject_deviceptr2 & 0x1f) << 5) |
+               (priv->inject_deviceptr1 & 0x1f);
+
+       /* MEM[1:0]EINJMSK1
+        * 15:0  - XORMASK2 for deviceptr2
+        */
+       mask1 = priv->inject_eccmask2;
+
+       if (priv->inject_channel == 0) {
+               pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0);
+               pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1);
+       } else {
+               pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0);
+               pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1);
+       }
+
+       /* Error Injection Response Function
+        * Intel 5100 Memory Controller Hub Chipset (318378) datasheet
+        * hints about this register but carry no data about them. All
+        * data regarding device 19 is based on experimentation and the
+        * Intel 7300 Chipset Memory Controller Hub (318082) datasheet
+        * which appears to be accurate for the i5100 in this area.
+        *
+        * The injection code don't work without setting this register.
+        * The register needs to be flipped off then on else the hardware
+        * will only preform the first injection.
+        *
+        * Stop condition bits 7:4
+        * 1010 - Stop after one injection
+        * 1011 - Never stop injecting faults
+        *
+        * Start condition bits 3:0
+        * 1010 - Never start
+        * 1011 - Start immediately
+        */
+       pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa);
+       pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab);
+}
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+static ssize_t inject_enable_write(struct file *file, const char __user *data,
+               size_t count, loff_t *ppos)
+{
+       struct device *dev = file->private_data;
+       struct mem_ctl_info *mci = to_mci(dev);
+
+       i5100_do_inject(mci);
+
+       return count;
+}
+
+static const struct file_operations i5100_inject_enable_fops = {
+       .open = simple_open,
+       .write = inject_enable_write,
+       .llseek = generic_file_llseek,
+};
+
+static int i5100_setup_debugfs(struct mem_ctl_info *mci)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+
+       if (!i5100_debugfs)
+               return -ENODEV;
+
+       priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs);
+
+       if (!priv->debugfs)
+               return -ENOMEM;
+
+       debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs,
+                       &priv->inject_channel);
+       debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs,
+                       &priv->inject_hlinesel);
+       debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs,
+                       &priv->inject_deviceptr1);
+       debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs,
+                       &priv->inject_deviceptr2);
+       debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs,
+                       &priv->inject_eccmask1);
+       debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs,
+                       &priv->inject_eccmask2);
+       debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs,
+                       &mci->dev, &i5100_inject_enable_fops);
+
+       return 0;
+
+}
+
 static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        int rc;
        struct mem_ctl_info *mci;
        struct edac_mc_layer layers[2];
        struct i5100_priv *priv;
-       struct pci_dev *ch0mm, *ch1mm;
+       struct pci_dev *ch0mm, *ch1mm, *einj;
        int ret = 0;
        u32 dw;
        int ranksperch;
@@ -941,6 +1076,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
                goto bail_disable_ch1;
        }
 
+
+       /* device 19, func 0, Error injection */
+       einj = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+                                   PCI_DEVICE_ID_INTEL_5100_19, 0);
+       if (!einj) {
+               ret = -ENODEV;
+               goto bail_einj;
+       }
+
+       rc = pci_enable_device(einj);
+       if (rc < 0) {
+               ret = rc;
+               goto bail_disable_einj;
+       }
+
+
        mci->pdev = &pdev->dev;
 
        priv = mci->pvt_info;
@@ -948,6 +1099,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        priv->mc = pdev;
        priv->ch0mm = ch0mm;
        priv->ch1mm = ch1mm;
+       priv->einj = einj;
 
        INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing);
 
@@ -975,6 +1127,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        mci->set_sdram_scrub_rate = i5100_set_scrub_rate;
        mci->get_sdram_scrub_rate = i5100_get_scrub_rate;
 
+       priv->inject_channel = 0;
+       priv->inject_hlinesel = 0;
+       priv->inject_deviceptr1 = 0;
+       priv->inject_deviceptr2 = 0;
+       priv->inject_eccmask1 = 0;
+       priv->inject_eccmask2 = 0;
+
        i5100_init_csrows(mci);
 
        /* this strange construction seems to be in every driver, dunno why */
@@ -992,6 +1151,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
                goto bail_scrub;
        }
 
+       i5100_setup_debugfs(mci);
+
        return ret;
 
 bail_scrub:
@@ -999,6 +1160,12 @@ bail_scrub:
        cancel_delayed_work_sync(&(priv->i5100_scrubbing));
        edac_mc_free(mci);
 
+bail_disable_einj:
+       pci_disable_device(einj);
+
+bail_einj:
+       pci_dev_put(einj);
+
 bail_disable_ch1:
        pci_disable_device(ch1mm);
 
@@ -1030,14 +1197,18 @@ static void i5100_remove_one(struct pci_dev *pdev)
 
        priv = mci->pvt_info;
 
+       debugfs_remove_recursive(priv->debugfs);
+
        priv->scrub_enable = 0;
        cancel_delayed_work_sync(&(priv->i5100_scrubbing));
 
        pci_disable_device(pdev);
        pci_disable_device(priv->ch0mm);
        pci_disable_device(priv->ch1mm);
+       pci_disable_device(priv->einj);
        pci_dev_put(priv->ch0mm);
        pci_dev_put(priv->ch1mm);
+       pci_dev_put(priv->einj);
 
        edac_mc_free(mci);
 }
@@ -1060,13 +1231,16 @@ static int __init i5100_init(void)
 {
        int pci_rc;
 
-       pci_rc = pci_register_driver(&i5100_driver);
+       i5100_debugfs = debugfs_create_dir("i5100_edac", NULL);
 
+       pci_rc = pci_register_driver(&i5100_driver);
        return (pci_rc < 0) ? pci_rc : 0;
 }
 
 static void __exit i5100_exit(void)
 {
+       debugfs_remove(i5100_debugfs);
+
        pci_unregister_driver(&i5100_driver);
 }
 
index e213d030b0dd7968756193f185a5661b7a4b9441..0ec3e95a12cd48c37fa100542e2bbe010cb4afad 100644 (file)
@@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms)
 
 static inline int numrank(u32 rank)
 {
-       static int ranks[4] = { 1, 2, 4, -EINVAL };
+       static const int ranks[] = { 1, 2, 4, -EINVAL };
 
        return ranks[rank & 0x3];
 }
 
 static inline int numbank(u32 bank)
 {
-       static int banks[4] = { 4, 8, 16, -EINVAL };
+       static const int banks[] = { 4, 8, 16, -EINVAL };
 
        return banks[bank & 0x3];
 }
 
 static inline int numrow(u32 row)
 {
-       static int rows[8] = {
+       static const int rows[] = {
                1 << 12, 1 << 13, 1 << 14, 1 << 15,
                1 << 16, -EINVAL, -EINVAL, -EINVAL,
        };
@@ -444,7 +444,7 @@ static inline int numrow(u32 row)
 
 static inline int numcol(u32 col)
 {
-       static int cols[8] = {
+       static const int cols[] = {
                1 << 10, 1 << 11, 1 << 12, -EINVAL,
        };
        return cols[col & 0x3];
index da7e2986e3d53c9aa721b9f1dd60901672828cc4..57244f995614f9622db8640065f645d00af621f7 100644 (file)
@@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
        tmp_mb = (1 + pvt->tohm) >> 20;
 
        mb = div_u64_rem(tmp_mb, 1000, &kb);
-       edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm);
+       edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
 
        /*
         * Step 2) Get SAD range and SAD Interleave list
index e67a4be0080d1a422a4d3509188f08fae2992084..bb2cd3ce9b0f2156bc2e225796a44a55d8761734 100644 (file)
@@ -626,7 +626,6 @@ static void dec_pending(struct dm_io *io, int error)
                        queue_io(md, bio);
                } else {
                        /* done with normal IO or empty flush */
-                       trace_block_bio_complete(md->queue, bio, io_error);
                        bio_endio(bio, io_error);
                }
        }
index 697f026cb318d74874329e2585c8f148738371b3..5af2d270908178b2628a3db42c508417fc4f5579 100644 (file)
@@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi)
                return_bi = bi->bi_next;
                bi->bi_next = NULL;
                bi->bi_size = 0;
-               trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
-                                        bi, 0);
                bio_endio(bi, 0);
                bi = return_bi;
        }
@@ -3916,8 +3914,6 @@ static void raid5_align_endio(struct bio *bi, int error)
        rdev_dec_pending(rdev, conf->mddev);
 
        if (!error && uptodate) {
-               trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
-                                        raid_bi, 0);
                bio_endio(raid_bi, 0);
                if (atomic_dec_and_test(&conf->active_aligned_reads))
                        wake_up(&conf->wait_for_stripe);
@@ -4376,8 +4372,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                if ( rw == WRITE )
                        md_write_end(mddev);
 
-               trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
-                                        bi, 0);
                bio_endio(bi, 0);
        }
 }
@@ -4754,11 +4748,8 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                handled++;
        }
        remaining = raid5_dec_bi_active_stripes(raid_bio);
-       if (remaining == 0) {
-               trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
-                                        raid_bio, 0);
+       if (remaining == 0)
                bio_endio(raid_bio, 0);
-       }
        if (atomic_dec_and_test(&conf->active_aligned_reads))
                wake_up(&conf->wait_for_stripe);
        return handled;
index 383a727b8aa07b97002363bf35a949aa7b90607e..6e5ad8ec0a220f83cac3965b2da3c0cb6ad47fff 100644 (file)
@@ -1338,28 +1338,15 @@ static int isp_enable_clocks(struct isp_device *isp)
 {
        int r;
        unsigned long rate;
-       int divisor;
-
-       /*
-        * cam_mclk clock chain:
-        *   dpll4 -> dpll4_m5 -> dpll4_m5x2 -> cam_mclk
-        *
-        * In OMAP3630 dpll4_m5x2 != 2 x dpll4_m5 but both are
-        * set to the same value. Hence the rate set for dpll4_m5
-        * has to be twice of what is set on OMAP3430 to get
-        * the required value for cam_mclk
-        */
-       divisor = isp->revision == ISP_REVISION_15_0 ? 1 : 2;
 
        r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_ICK]);
        if (r) {
                dev_err(isp->dev, "failed to enable cam_ick clock\n");
                goto out_clk_enable_ick;
        }
-       r = clk_set_rate(isp->clock[ISP_CLK_DPLL4_M5_CK],
-                        CM_CAM_MCLK_HZ/divisor);
+       r = clk_set_rate(isp->clock[ISP_CLK_CAM_MCLK], CM_CAM_MCLK_HZ);
        if (r) {
-               dev_err(isp->dev, "clk_set_rate for dpll4_m5_ck failed\n");
+               dev_err(isp->dev, "clk_set_rate for cam_mclk failed\n");
                goto out_clk_enable_mclk;
        }
        r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_MCLK]);
@@ -1401,7 +1388,6 @@ static void isp_disable_clocks(struct isp_device *isp)
 static const char *isp_clocks[] = {
        "cam_ick",
        "cam_mclk",
-       "dpll4_m5_ck",
        "csi2_96m_fck",
        "l3_ick",
 };
index 517d348ce32b1b4871315d5a7d367cd3c475087c..c77e1f2ae5ca43b0badb7d4bffdfadc85472a0c4 100644 (file)
@@ -147,7 +147,6 @@ struct isp_platform_callback {
  * @ref_count: Reference count for handling multiple ISP requests.
  * @cam_ick: Pointer to camera interface clock structure.
  * @cam_mclk: Pointer to camera functional clock structure.
- * @dpll4_m5_ck: Pointer to DPLL4 M5 clock structure.
  * @csi2_fck: Pointer to camera CSI2 complexIO clock structure.
  * @l3_ick: Pointer to OMAP3 L3 bus interface clock.
  * @irq: Currently attached ISP ISR callbacks information structure.
@@ -189,10 +188,9 @@ struct isp_device {
        u32 xclk_divisor[2];    /* Two clocks, a and b. */
 #define ISP_CLK_CAM_ICK                0
 #define ISP_CLK_CAM_MCLK       1
-#define ISP_CLK_DPLL4_M5_CK    2
-#define ISP_CLK_CSI2_FCK       3
-#define ISP_CLK_L3_ICK         4
-       struct clk *clock[5];
+#define ISP_CLK_CSI2_FCK       2
+#define ISP_CLK_L3_ICK         3
+       struct clk *clock[4];
 
        /* ISP modules */
        struct ispstat isp_af;
index 742f5d7eb0f5988c8b07592eef1448685f5ab7a1..a6f7190c09a469b878f2bd9a0db84b3767c38557 100644 (file)
  *----------------------------------------------------------------------------*/
 
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 29801
+# define AAC_DRIVER_BUILD 30000
 # define AAC_DRIVER_BRANCH "-ms"
 #endif
 #define MAXIMUM_NUM_CONTAINERS 32
 
 #define AAC_NUM_MGT_FIB         8
-#define AAC_NUM_IO_FIB         (512 - AAC_NUM_MGT_FIB)
+#define AAC_NUM_IO_FIB         (1024 - AAC_NUM_MGT_FIB)
 #define AAC_NUM_FIB            (AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB)
 
 #define AAC_MAX_LUN            (8)
 #define CONTAINER_TO_ID(cont)          (cont)
 #define CONTAINER_TO_LUN(cont)         (0)
 
+#define PMC_DEVICE_S7  0x28c
+#define PMC_DEVICE_S8  0x28d
+#define PMC_DEVICE_S9  0x28f
+
 #define aac_phys_to_logical(x)  ((x)+1)
 #define aac_logical_to_phys(x)  ((x)?(x)-1:0)
 
index 8e5d3be1612712566b084453ad747c8d5582525d..3f759957f4b476ef86c067af0d366922ea28615d 100644 (file)
@@ -404,7 +404,13 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
                dev->max_fib_size = status[1] & 0xFFE0;
                host->sg_tablesize = status[2] >> 16;
                dev->sg_tablesize = status[2] & 0xFFFF;
-               host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB;
+               if (dev->pdev->device == PMC_DEVICE_S7 ||
+                   dev->pdev->device == PMC_DEVICE_S8 ||
+                   dev->pdev->device == PMC_DEVICE_S9)
+                       host->can_queue = ((status[3] >> 16) ? (status[3] >> 16) :
+                               (status[3] & 0xFFFF)) - AAC_NUM_MGT_FIB;
+               else
+                       host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB;
                dev->max_num_aif = status[4] & 0xFFFF;
                /*
                 *      NOTE:
@@ -452,6 +458,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev)
                }
        }
 
+       if (host->can_queue > AAC_NUM_IO_FIB)
+               host->can_queue = AAC_NUM_IO_FIB;
+
        /*
         *      Ok now init the communication subsystem
         */
index e6bf12675db8d363f2f94a9483c2a3770e165ad9..a5f7690e819ef13e8c74b76dcf8be9bf3d42268d 100644 (file)
@@ -1034,7 +1034,7 @@ bfad_start_ops(struct bfad_s *bfad) {
                        sizeof(driver_info.host_os_patch) - 1);
 
        strncpy(driver_info.os_device_name, bfad->pci_name,
-               sizeof(driver_info.os_device_name - 1));
+               sizeof(driver_info.os_device_name) - 1);
 
        /* FCS driver info init */
        spin_lock_irqsave(&bfad->bfad_lock, flags);
index 3486845ba301b095525f25ba26de0e809bf27ef9..50fcd018d14b347c35b8243a5e27b8f285faeb59 100644 (file)
@@ -64,7 +64,7 @@
 #include "bnx2fc_constants.h"
 
 #define BNX2FC_NAME            "bnx2fc"
-#define BNX2FC_VERSION         "1.0.12"
+#define BNX2FC_VERSION         "1.0.13"
 
 #define PFX                    "bnx2fc: "
 
 #define BNX2FC_RELOGIN_WAIT_TIME       200
 #define BNX2FC_RELOGIN_WAIT_CNT                10
 
+#define BNX2FC_STATS(hba, stat, cnt)                                   \
+       do {                                                            \
+               u32 val;                                                \
+                                                                       \
+               val = fw_stats->stat.cnt;                               \
+               if (hba->prev_stats.stat.cnt <= val)                    \
+                       val -= hba->prev_stats.stat.cnt;                \
+               else                                                    \
+                       val += (0xfffffff - hba->prev_stats.stat.cnt);  \
+               hba->bfw_stats.cnt += val;                              \
+       } while (0)
+
 /* bnx2fc driver uses only one instance of fcoe_percpu_s */
 extern struct fcoe_percpu_s bnx2fc_global;
 
@@ -167,6 +179,14 @@ struct bnx2fc_percpu_s {
        spinlock_t fp_work_lock;
 };
 
+struct bnx2fc_fw_stats {
+       u64     fc_crc_cnt;
+       u64     fcoe_tx_pkt_cnt;
+       u64     fcoe_rx_pkt_cnt;
+       u64     fcoe_tx_byte_cnt;
+       u64     fcoe_rx_byte_cnt;
+};
+
 struct bnx2fc_hba {
        struct list_head list;
        struct cnic_dev *cnic;
@@ -207,6 +227,8 @@ struct bnx2fc_hba {
        struct bnx2fc_rport **tgt_ofld_list;
 
        /* statistics */
+       struct bnx2fc_fw_stats bfw_stats;
+       struct fcoe_statistics_params prev_stats;
        struct fcoe_statistics_params *stats_buffer;
        dma_addr_t stats_buf_dma;
        struct completion stat_req_done;
@@ -280,6 +302,7 @@ struct bnx2fc_rport {
 #define BNX2FC_FLAG_UPLD_REQ_COMPL     0x7
 #define BNX2FC_FLAG_EXPL_LOGO          0x8
 #define BNX2FC_FLAG_DISABLE_FAILED     0x9
+#define BNX2FC_FLAG_ENABLED            0xa
 
        u8 src_addr[ETH_ALEN];
        u32 max_sqes;
@@ -468,6 +491,8 @@ int bnx2fc_send_fw_fcoe_init_msg(struct bnx2fc_hba *hba);
 int bnx2fc_send_fw_fcoe_destroy_msg(struct bnx2fc_hba *hba);
 int bnx2fc_send_session_ofld_req(struct fcoe_port *port,
                                        struct bnx2fc_rport *tgt);
+int bnx2fc_send_session_enable_req(struct fcoe_port *port,
+                                       struct bnx2fc_rport *tgt);
 int bnx2fc_send_session_disable_req(struct fcoe_port *port,
                                    struct bnx2fc_rport *tgt);
 int bnx2fc_send_session_destroy_req(struct bnx2fc_hba *hba,
index 70ecd953a5793e2990d4f9e32715e2ada15d22cb..6401db494ef580c1e8298b2749fa7a7411ff4237 100644 (file)
@@ -22,7 +22,7 @@ DEFINE_PER_CPU(struct bnx2fc_percpu_s, bnx2fc_percpu);
 
 #define DRV_MODULE_NAME                "bnx2fc"
 #define DRV_MODULE_VERSION     BNX2FC_VERSION
-#define DRV_MODULE_RELDATE     "Jun 04, 2012"
+#define DRV_MODULE_RELDATE     "Dec 21, 2012"
 
 
 static char version[] =
@@ -687,11 +687,16 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
                BNX2FC_HBA_DBG(lport, "FW stat req timed out\n");
                return bnx2fc_stats;
        }
-       bnx2fc_stats->invalid_crc_count += fw_stats->rx_stat2.fc_crc_cnt;
-       bnx2fc_stats->tx_frames += fw_stats->tx_stat.fcoe_tx_pkt_cnt;
-       bnx2fc_stats->tx_words += (fw_stats->tx_stat.fcoe_tx_byte_cnt) / 4;
-       bnx2fc_stats->rx_frames += fw_stats->rx_stat0.fcoe_rx_pkt_cnt;
-       bnx2fc_stats->rx_words += (fw_stats->rx_stat0.fcoe_rx_byte_cnt) / 4;
+       BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt);
+       bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt;
+       BNX2FC_STATS(hba, tx_stat, fcoe_tx_pkt_cnt);
+       bnx2fc_stats->tx_frames += hba->bfw_stats.fcoe_tx_pkt_cnt;
+       BNX2FC_STATS(hba, tx_stat, fcoe_tx_byte_cnt);
+       bnx2fc_stats->tx_words += ((hba->bfw_stats.fcoe_tx_byte_cnt) / 4);
+       BNX2FC_STATS(hba, rx_stat0, fcoe_rx_pkt_cnt);
+       bnx2fc_stats->rx_frames += hba->bfw_stats.fcoe_rx_pkt_cnt;
+       BNX2FC_STATS(hba, rx_stat0, fcoe_rx_byte_cnt);
+       bnx2fc_stats->rx_words += ((hba->bfw_stats.fcoe_rx_byte_cnt) / 4);
 
        bnx2fc_stats->dumped_frames = 0;
        bnx2fc_stats->lip_count = 0;
@@ -700,6 +705,8 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost)
        bnx2fc_stats->loss_of_signal_count = 0;
        bnx2fc_stats->prim_seq_protocol_err_count = 0;
 
+       memcpy(&hba->prev_stats, hba->stats_buffer,
+              sizeof(struct fcoe_statistics_params));
        return bnx2fc_stats;
 }
 
@@ -2660,7 +2667,7 @@ static struct scsi_host_template bnx2fc_shost_template = {
        .can_queue              = BNX2FC_CAN_QUEUE,
        .use_clustering         = ENABLE_CLUSTERING,
        .sg_tablesize           = BNX2FC_MAX_BDS_PER_CMD,
-       .max_sectors            = 512,
+       .max_sectors            = 1024,
 };
 
 static struct libfc_function_template bnx2fc_libfc_fcn_templ = {
index ef60afa94d0e75c265a5010be1dc72c62b82166b..85ea98a80f40204ae9e3a1fc835c0f0ec900f865 100644 (file)
@@ -347,7 +347,7 @@ int bnx2fc_send_session_ofld_req(struct fcoe_port *port,
  * @port:              port structure pointer
  * @tgt:               bnx2fc_rport structure pointer
  */
-static int bnx2fc_send_session_enable_req(struct fcoe_port *port,
+int bnx2fc_send_session_enable_req(struct fcoe_port *port,
                                        struct bnx2fc_rport *tgt)
 {
        struct kwqe *kwqe_arr[2];
@@ -759,8 +759,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe)
                case FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET:
                        BNX2FC_TGT_DBG(tgt, "REC TOV popped for xid - 0x%x\n",
                                   xid);
-                       memset(&io_req->err_entry, 0,
-                              sizeof(struct fcoe_err_report_entry));
                        memcpy(&io_req->err_entry, err_entry,
                               sizeof(struct fcoe_err_report_entry));
                        if (!test_bit(BNX2FC_FLAG_SRR_SENT,
@@ -847,8 +845,6 @@ ret_err_rqe:
                        goto ret_warn_rqe;
                }
 
-               memset(&io_req->err_entry, 0,
-                      sizeof(struct fcoe_err_report_entry));
                memcpy(&io_req->err_entry, err_entry,
                       sizeof(struct fcoe_err_report_entry));
 
@@ -1124,7 +1120,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba,
        struct bnx2fc_interface         *interface;
        u32                             conn_id;
        u32                             context_id;
-       int                             rc;
 
        conn_id = ofld_kcqe->fcoe_conn_id;
        context_id = ofld_kcqe->fcoe_conn_context_id;
@@ -1153,17 +1148,10 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba,
                                "resources\n");
                        set_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, &tgt->flags);
                }
-               goto ofld_cmpl_err;
        } else {
-
-               /* now enable the session */
-               rc = bnx2fc_send_session_enable_req(port, tgt);
-               if (rc) {
-                       printk(KERN_ERR PFX "enable session failed\n");
-                       goto ofld_cmpl_err;
-               }
+               /* FW offload request successfully completed */
+               set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
        }
-       return;
 ofld_cmpl_err:
        set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
        wake_up_interruptible(&tgt->ofld_wait);
@@ -1210,15 +1198,9 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba,
                printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n");
                goto enbl_cmpl_err;
        }
-       if (ofld_kcqe->completion_status)
-               goto enbl_cmpl_err;
-       else {
+       if (!ofld_kcqe->completion_status)
                /* enable successful - rport ready for issuing IOs */
-               set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
-               set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
-               wake_up_interruptible(&tgt->ofld_wait);
-       }
-       return;
+               set_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
 
 enbl_cmpl_err:
        set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
@@ -1251,6 +1233,7 @@ static void bnx2fc_process_conn_disable_cmpl(struct bnx2fc_hba *hba,
                /* disable successful */
                BNX2FC_TGT_DBG(tgt, "disable successful\n");
                clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
+               clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
                set_bit(BNX2FC_FLAG_DISABLED, &tgt->flags);
                set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags);
                wake_up_interruptible(&tgt->upld_wait);
index 8d4626c07a12f5e7a4f164b835f57e7f275acd2a..60798e829de671f7bdf4151642814a79f42fe8c6 100644 (file)
@@ -654,7 +654,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req)
        mp_req->mp_resp_bd = dma_alloc_coherent(&hba->pcidev->dev, sz,
                                                 &mp_req->mp_resp_bd_dma,
                                                 GFP_ATOMIC);
-       if (!mp_req->mp_req_bd) {
+       if (!mp_req->mp_resp_bd) {
                printk(KERN_ERR PFX "unable to alloc MP resp bd\n");
                bnx2fc_free_mp_resc(io_req);
                return FAILED;
@@ -685,8 +685,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req)
 static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
 {
        struct fc_lport *lport;
-       struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device));
-       struct fc_rport_libfc_priv *rp = rport->dd_data;
+       struct fc_rport *rport;
+       struct fc_rport_libfc_priv *rp;
        struct fcoe_port *port;
        struct bnx2fc_interface *interface;
        struct bnx2fc_rport *tgt;
@@ -704,6 +704,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
        unsigned long start = jiffies;
 
        lport = shost_priv(host);
+       rport = starget_to_rport(scsi_target(sc_cmd->device));
        port = lport_priv(lport);
        interface = port->priv;
 
@@ -712,6 +713,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags)
                rc = FAILED;
                goto tmf_err;
        }
+       rp = rport->dd_data;
 
        rc = fc_block_scsi_eh(sc_cmd);
        if (rc)
index b9d0d9cb17f944309f07dd119e55999bb577c852..c57a3bb8a9fbfe1944e796a67df7cf81efec23c8 100644 (file)
@@ -33,6 +33,7 @@ static void bnx2fc_upld_timer(unsigned long data)
        BNX2FC_TGT_DBG(tgt, "upld_timer - Upload compl not received!!\n");
        /* fake upload completion */
        clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
+       clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
        set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags);
        wake_up_interruptible(&tgt->upld_wait);
 }
@@ -55,10 +56,25 @@ static void bnx2fc_ofld_timer(unsigned long data)
         * resources are freed up in bnx2fc_offload_session
         */
        clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
+       clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags);
        set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
        wake_up_interruptible(&tgt->ofld_wait);
 }
 
+static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt)
+{
+       setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt);
+       mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT);
+
+       wait_event_interruptible(tgt->ofld_wait,
+                                (test_bit(
+                                 BNX2FC_FLAG_OFLD_REQ_CMPL,
+                                 &tgt->flags)));
+       if (signal_pending(current))
+               flush_signals(current);
+       del_timer_sync(&tgt->ofld_timer);
+}
+
 static void bnx2fc_offload_session(struct fcoe_port *port,
                                        struct bnx2fc_rport *tgt,
                                        struct fc_rport_priv *rdata)
@@ -103,17 +119,7 @@ retry_ofld:
         * wait for the session is offloaded and enabled. 3 Secs
         * should be ample time for this process to complete.
         */
-       setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt);
-       mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT);
-
-       wait_event_interruptible(tgt->ofld_wait,
-                                (test_bit(
-                                 BNX2FC_FLAG_OFLD_REQ_CMPL,
-                                 &tgt->flags)));
-       if (signal_pending(current))
-               flush_signals(current);
-
-       del_timer_sync(&tgt->ofld_timer);
+       bnx2fc_ofld_wait(tgt);
 
        if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) {
                if (test_and_clear_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE,
@@ -131,14 +137,23 @@ retry_ofld:
        }
        if (bnx2fc_map_doorbell(tgt)) {
                printk(KERN_ERR PFX "map doorbell failed - no mem\n");
-               /* upload will take care of cleaning up sess resc */
-               lport->tt.rport_logoff(rdata);
+               goto ofld_err;
        }
+       clear_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags);
+       rval = bnx2fc_send_session_enable_req(port, tgt);
+       if (rval) {
+               pr_err(PFX "enable session failed\n");
+               goto ofld_err;
+       }
+       bnx2fc_ofld_wait(tgt);
+       if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)))
+               goto ofld_err;
        return;
 
 ofld_err:
        /* couldn't offload the session. log off from this rport */
        BNX2FC_TGT_DBG(tgt, "bnx2fc_offload_session - offload error\n");
+       clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags);
        /* Free session resources */
        bnx2fc_free_session_resc(hba, tgt);
 tgt_init_err:
@@ -259,6 +274,19 @@ void bnx2fc_flush_active_ios(struct bnx2fc_rport *tgt)
        spin_unlock_bh(&tgt->tgt_lock);
 }
 
+static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt)
+{
+       setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt);
+       mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT);
+       wait_event_interruptible(tgt->upld_wait,
+                                (test_bit(
+                                 BNX2FC_FLAG_UPLD_REQ_COMPL,
+                                 &tgt->flags)));
+       if (signal_pending(current))
+               flush_signals(current);
+       del_timer_sync(&tgt->upld_timer);
+}
+
 static void bnx2fc_upload_session(struct fcoe_port *port,
                                        struct bnx2fc_rport *tgt)
 {
@@ -279,19 +307,8 @@ static void bnx2fc_upload_session(struct fcoe_port *port,
         * wait for upload to complete. 3 Secs
         * should be sufficient time for this process to complete.
         */
-       setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt);
-       mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT);
-
        BNX2FC_TGT_DBG(tgt, "waiting for disable compl\n");
-       wait_event_interruptible(tgt->upld_wait,
-                                (test_bit(
-                                 BNX2FC_FLAG_UPLD_REQ_COMPL,
-                                 &tgt->flags)));
-
-       if (signal_pending(current))
-               flush_signals(current);
-
-       del_timer_sync(&tgt->upld_timer);
+       bnx2fc_upld_wait(tgt);
 
        /*
         * traverse thru the active_q and tmf_q and cleanup
@@ -308,24 +325,13 @@ static void bnx2fc_upload_session(struct fcoe_port *port,
                bnx2fc_send_session_destroy_req(hba, tgt);
 
                /* wait for destroy to complete */
-               setup_timer(&tgt->upld_timer,
-                           bnx2fc_upld_timer, (unsigned long)tgt);
-               mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT);
-
-               wait_event_interruptible(tgt->upld_wait,
-                                        (test_bit(
-                                         BNX2FC_FLAG_UPLD_REQ_COMPL,
-                                         &tgt->flags)));
+               bnx2fc_upld_wait(tgt);
 
                if (!(test_bit(BNX2FC_FLAG_DESTROYED, &tgt->flags)))
                        printk(KERN_ERR PFX "ERROR!! destroy timed out\n");
 
                BNX2FC_TGT_DBG(tgt, "destroy wait complete flags = 0x%lx\n",
                        tgt->flags);
-               if (signal_pending(current))
-                       flush_signals(current);
-
-               del_timer_sync(&tgt->upld_timer);
 
        } else if (test_bit(BNX2FC_FLAG_DISABLE_FAILED, &tgt->flags)) {
                printk(KERN_ERR PFX "ERROR!! DISABLE req failed, destroy"
@@ -381,7 +387,9 @@ static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt,
        tgt->rq_cons_idx = 0;
        atomic_set(&tgt->num_active_ios, 0);
 
-       if (rdata->flags & FC_RP_FLAGS_RETRY) {
+       if (rdata->flags & FC_RP_FLAGS_RETRY &&
+           rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET &&
+           !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) {
                tgt->dev_type = TYPE_TAPE;
                tgt->io_timeout = 0; /* use default ULP timeout */
        } else {
@@ -479,7 +487,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport,
                tgt = (struct bnx2fc_rport *)&rp[1];
 
                /* This can happen when ADISC finds the same target */
-               if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) {
+               if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) {
                        BNX2FC_TGT_DBG(tgt, "already offloaded\n");
                        mutex_unlock(&hba->hba_mutex);
                        return;
@@ -494,11 +502,8 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport,
                BNX2FC_TGT_DBG(tgt, "OFFLOAD num_ofld_sess = %d\n",
                        hba->num_ofld_sess);
 
-               if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) {
-                       /*
-                        * Session is offloaded and enabled. Map
-                        * doorbell register for this target
-                        */
+               if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) {
+                       /* Session is offloaded and enabled.  */
                        BNX2FC_TGT_DBG(tgt, "sess offloaded\n");
                        /* This counter is protected with hba mutex */
                        hba->num_ofld_sess++;
@@ -535,7 +540,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport,
                 */
                tgt = (struct bnx2fc_rport *)&rp[1];
 
-               if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) {
+               if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) {
                        mutex_unlock(&hba->hba_mutex);
                        break;
                }
index 91eec60252ee1c973198ae5c230762af37607073..a28b03e5a5f6bd7b2a30325526fb7e20806fd257 100644 (file)
@@ -1317,7 +1317,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba)
                (1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN));
        if (error_mask1) {
                iscsi_init2.error_bit_map[0] = error_mask1;
-               mask64 &= (u32)(~mask64);
+               mask64 ^= (u32)(mask64);
                mask64 |= error_mask1;
        } else
                iscsi_init2.error_bit_map[0] = (u32) mask64;
index 8ecdb94a59f40d30db6d97ed96c63207951a5334..bdd78fb4fc70c5a7c91131e35a94a00e6bf6bb94 100644 (file)
@@ -2131,13 +2131,16 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path)
                value_to_add = 4 - (cf->size % 4);
 
        cfg_data = kzalloc(cf->size+value_to_add, GFP_KERNEL);
-       if (cfg_data == NULL)
-               return -ENOMEM;
+       if (cfg_data == NULL) {
+               ret = -ENOMEM;
+               goto leave;
+       }
 
        memcpy((void *)cfg_data, (const void *)cf->data, cf->size);
-
-       if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0)
-               return -EINVAL;
+       if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) {
+               ret = -EINVAL;
+               goto leave;
+       }
 
        mtype = FW_PARAMS_PARAM_Y_GET(*fw_cfg_param);
        maddr = FW_PARAMS_PARAM_Z_GET(*fw_cfg_param) << 16;
@@ -2149,9 +2152,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path)
                strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64);
        }
 
+leave:
        kfree(cfg_data);
        release_firmware(cf);
-
        return ret;
 }
 
index c323b2030afac653447670e7ff4ca9b20adf80db..0604b5ff36381a432cb1c45beae670a1a8de7d16 100644 (file)
@@ -60,13 +60,6 @@ static struct scsi_transport_template *csio_fcoe_transport_vport;
 /*
  * debugfs support
  */
-static int
-csio_mem_open(struct inode *inode, struct file *file)
-{
-       file->private_data = inode->i_private;
-       return 0;
-}
-
 static ssize_t
 csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
@@ -110,7 +103,7 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 static const struct file_operations csio_mem_debugfs_fops = {
        .owner   = THIS_MODULE,
-       .open    = csio_mem_open,
+       .open    = simple_open,
        .read    = csio_mem_read,
        .llseek  = default_llseek,
 };
index f924b3c3720e3fa268db4470e0987b388e4eee7d..3fecf35ba2926f6e27d29ae0463d9b97755c3a92 100644 (file)
@@ -1564,6 +1564,7 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state)
                break;
        case CXGB4_STATE_DETACH:
                pr_info("cdev 0x%p, DETACH.\n", cdev);
+               cxgbi_device_unregister(cdev);
                break;
        default:
                pr_info("cdev 0x%p, unknown state %d.\n", cdev, state);
index 3c53c3478ee71224ada8d48488e665c6dd9e35c7..483eb9dbe66366377c5dd4f049c15a160ca2a500 100644 (file)
@@ -495,7 +495,8 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb)
        }
 
        fnic_queue_wq_eth_desc(wq, skb, pa, skb->len,
-                              fnic->vlan_hw_insert, fnic->vlan_id, 1);
+                              0 /* hw inserts cos value */,
+                              fnic->vlan_id, 1);
        spin_unlock_irqrestore(&fnic->wq_lock[0], flags);
 }
 
@@ -563,7 +564,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp)
        }
 
        fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp),
-                          fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1);
+                          0 /* hw inserts cos value */,
+                          fnic->vlan_id, 1, 1, 1);
 fnic_send_frame_end:
        spin_unlock_irqrestore(&fnic->wq_lock[0], flags);
 
index 599790e41a989046592a5ce50fc5bb3a2cc80804..59bceac51a4ccff560a91eca5f4d7de01d4c08e3 100644 (file)
@@ -1107,14 +1107,8 @@ static int gdth_init_pci(struct pci_dev *pdev, gdth_pci_str *pcistr,
        pci_read_config_word(pdev, PCI_COMMAND, &command);
         command |= 6;
        pci_write_config_word(pdev, PCI_COMMAND, command);
-       if (pci_resource_start(pdev, 8) == 1UL)
-           pci_resource_start(pdev, 8) = 0UL;
-        i = 0xFEFF0001UL;
-       pci_write_config_dword(pdev, PCI_ROM_ADDRESS, i);
-        gdth_delay(1);
-       pci_write_config_dword(pdev, PCI_ROM_ADDRESS,
-                              pci_resource_start(pdev, 8));
-        
+       gdth_delay(1);
+
         dp6m_ptr = ha->brd;
 
         /* Ensure that it is safe to access the non HW portions of DPMEM.
index 1d7da3f41ebb62db01cb74d4f2478b3d595fdbef..8fa79b83f2d38de823569ed5e9533b23d0238157 100644 (file)
@@ -98,6 +98,7 @@ static unsigned int ipr_transop_timeout = 0;
 static unsigned int ipr_debug = 0;
 static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS;
 static unsigned int ipr_dual_ioa_raid = 1;
+static unsigned int ipr_number_of_msix = 2;
 static DEFINE_SPINLOCK(ipr_driver_lock);
 
 /* This table describes the differences between DMA controller chips */
@@ -107,6 +108,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = {
                .max_cmds = 100,
                .cache_line_size = 0x20,
                .clear_isr = 1,
+               .iopoll_weight = 0,
                {
                        .set_interrupt_mask_reg = 0x0022C,
                        .clr_interrupt_mask_reg = 0x00230,
@@ -131,6 +133,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = {
                .max_cmds = 100,
                .cache_line_size = 0x20,
                .clear_isr = 1,
+               .iopoll_weight = 0,
                {
                        .set_interrupt_mask_reg = 0x00288,
                        .clr_interrupt_mask_reg = 0x0028C,
@@ -155,6 +158,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = {
                .max_cmds = 1000,
                .cache_line_size = 0x20,
                .clear_isr = 0,
+               .iopoll_weight = 64,
                {
                        .set_interrupt_mask_reg = 0x00010,
                        .clr_interrupt_mask_reg = 0x00018,
@@ -215,6 +219,8 @@ MODULE_PARM_DESC(dual_ioa_raid, "Enable dual adapter RAID support. Set to 1 to e
 module_param_named(max_devs, ipr_max_devs, int, 0);
 MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. "
                 "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]");
+module_param_named(number_of_msix, ipr_number_of_msix, int, 0);
+MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 5).  (default:2)");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(IPR_DRIVER_VERSION);
 
@@ -549,7 +555,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd,
        struct ipr_trace_entry *trace_entry;
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 
-       trace_entry = &ioa_cfg->trace[ioa_cfg->trace_index++];
+       trace_entry = &ioa_cfg->trace[atomic_add_return
+                       (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES];
        trace_entry->time = jiffies;
        trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0];
        trace_entry->type = type;
@@ -560,6 +567,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd,
        trace_entry->cmd_index = ipr_cmd->cmd_index & 0xff;
        trace_entry->res_handle = ipr_cmd->ioarcb.res_handle;
        trace_entry->u.add_data = add_data;
+       wmb();
 }
 #else
 #define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0)
@@ -595,8 +603,11 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd)
        struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa;
        struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64;
        dma_addr_t dma_addr = ipr_cmd->dma_addr;
+       int hrrq_id;
 
+       hrrq_id = ioarcb->cmd_pkt.hrrq_id;
        memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt));
+       ioarcb->cmd_pkt.hrrq_id = hrrq_id;
        ioarcb->data_transfer_length = 0;
        ioarcb->read_data_transfer_length = 0;
        ioarcb->ioadl_len = 0;
@@ -646,12 +657,16 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd,
  *     pointer to ipr command struct
  **/
 static
-struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
+struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_hrr_queue *hrrq)
 {
-       struct ipr_cmnd *ipr_cmd;
+       struct ipr_cmnd *ipr_cmd = NULL;
+
+       if (likely(!list_empty(&hrrq->hrrq_free_q))) {
+               ipr_cmd = list_entry(hrrq->hrrq_free_q.next,
+                       struct ipr_cmnd, queue);
+               list_del(&ipr_cmd->queue);
+       }
 
-       ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue);
-       list_del(&ipr_cmd->queue);
 
        return ipr_cmd;
 }
@@ -666,7 +681,8 @@ struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
 static
 struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg)
 {
-       struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
+       struct ipr_cmnd *ipr_cmd =
+               __ipr_get_free_ipr_cmnd(&ioa_cfg->hrrq[IPR_INIT_HRRQ]);
        ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done);
        return ipr_cmd;
 }
@@ -686,9 +702,15 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg,
                                          u32 clr_ints)
 {
        volatile u32 int_reg;
+       int i;
 
        /* Stop new interrupts */
-       ioa_cfg->allow_interrupts = 0;
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               spin_lock(&ioa_cfg->hrrq[i]._lock);
+               ioa_cfg->hrrq[i].allow_interrupts = 0;
+               spin_unlock(&ioa_cfg->hrrq[i]._lock);
+       }
+       wmb();
 
        /* Set interrupt mask to stop all new interrupts */
        if (ioa_cfg->sis64)
@@ -761,13 +783,12 @@ static int ipr_set_pcix_cmd_reg(struct ipr_ioa_cfg *ioa_cfg)
  **/
 static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd)
 {
-       struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct ata_queued_cmd *qc = ipr_cmd->qc;
        struct ipr_sata_port *sata_port = qc->ap->private_data;
 
        qc->err_mask |= AC_ERR_OTHER;
        sata_port->ioasa.status |= ATA_BUSY;
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        ata_qc_complete(qc);
 }
 
@@ -783,14 +804,13 @@ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd)
  **/
 static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd)
 {
-       struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
 
        scsi_cmd->result |= (DID_ERROR << 16);
 
        scsi_dma_unmap(ipr_cmd->scsi_cmd);
        scsi_cmd->scsi_done(scsi_cmd);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 }
 
 /**
@@ -805,24 +825,32 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd)
 static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg)
 {
        struct ipr_cmnd *ipr_cmd, *temp;
+       struct ipr_hrr_queue *hrrq;
 
        ENTER;
-       list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) {
-               list_del(&ipr_cmd->queue);
+       for_each_hrrq(hrrq, ioa_cfg) {
+               spin_lock(&hrrq->_lock);
+               list_for_each_entry_safe(ipr_cmd,
+                                       temp, &hrrq->hrrq_pending_q, queue) {
+                       list_del(&ipr_cmd->queue);
 
-               ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET);
-               ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID);
+                       ipr_cmd->s.ioasa.hdr.ioasc =
+                               cpu_to_be32(IPR_IOASC_IOA_WAS_RESET);
+                       ipr_cmd->s.ioasa.hdr.ilid =
+                               cpu_to_be32(IPR_DRIVER_ILID);
 
-               if (ipr_cmd->scsi_cmd)
-                       ipr_cmd->done = ipr_scsi_eh_done;
-               else if (ipr_cmd->qc)
-                       ipr_cmd->done = ipr_sata_eh_done;
+                       if (ipr_cmd->scsi_cmd)
+                               ipr_cmd->done = ipr_scsi_eh_done;
+                       else if (ipr_cmd->qc)
+                               ipr_cmd->done = ipr_sata_eh_done;
 
-               ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET);
-               del_timer(&ipr_cmd->timer);
-               ipr_cmd->done(ipr_cmd);
+                       ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH,
+                                    IPR_IOASC_IOA_WAS_RESET);
+                       del_timer(&ipr_cmd->timer);
+                       ipr_cmd->done(ipr_cmd);
+               }
+               spin_unlock(&hrrq->_lock);
        }
-
        LEAVE;
 }
 
@@ -872,9 +900,7 @@ static void ipr_do_req(struct ipr_cmnd *ipr_cmd,
                       void (*done) (struct ipr_cmnd *),
                       void (*timeout_func) (struct ipr_cmnd *), u32 timeout)
 {
-       struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 
        ipr_cmd->done = done;
 
@@ -975,6 +1001,14 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd,
        spin_lock_irq(ioa_cfg->host->host_lock);
 }
 
+static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg)
+{
+       if (ioa_cfg->hrrq_num == 1)
+               return 0;
+       else
+               return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1;
+}
+
 /**
  * ipr_send_hcam - Send an HCAM to the adapter.
  * @ioa_cfg:   ioa config struct
@@ -994,9 +1028,9 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type,
        struct ipr_cmnd *ipr_cmd;
        struct ipr_ioarcb *ioarcb;
 
-       if (ioa_cfg->allow_cmds) {
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) {
                ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
-               list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+               list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
                list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_pending_q);
 
                ipr_cmd->u.hostrcb = hostrcb;
@@ -1166,14 +1200,15 @@ static int ipr_is_same_device(struct ipr_resource_entry *res,
 }
 
 /**
- * ipr_format_res_path - Format the resource path for printing.
+ * __ipr_format_res_path - Format the resource path for printing.
  * @res_path:  resource path
  * @buf:       buffer
+ * @len:       length of buffer provided
  *
  * Return value:
  *     pointer to buffer
  **/
-static char *ipr_format_res_path(u8 *res_path, char *buffer, int len)
+static char *__ipr_format_res_path(u8 *res_path, char *buffer, int len)
 {
        int i;
        char *p = buffer;
@@ -1186,6 +1221,27 @@ static char *ipr_format_res_path(u8 *res_path, char *buffer, int len)
        return buffer;
 }
 
+/**
+ * ipr_format_res_path - Format the resource path for printing.
+ * @ioa_cfg:   ioa config struct
+ * @res_path:  resource path
+ * @buf:       buffer
+ * @len:       length of buffer provided
+ *
+ * Return value:
+ *     pointer to buffer
+ **/
+static char *ipr_format_res_path(struct ipr_ioa_cfg *ioa_cfg,
+                                u8 *res_path, char *buffer, int len)
+{
+       char *p = buffer;
+
+       *p = '\0';
+       p += snprintf(p, buffer + len - p, "%d/", ioa_cfg->host->host_no);
+       __ipr_format_res_path(res_path, p, len - (buffer - p));
+       return buffer;
+}
+
 /**
  * ipr_update_res_entry - Update the resource entry.
  * @res:       resource entry struct
@@ -1226,8 +1282,8 @@ static void ipr_update_res_entry(struct ipr_resource_entry *res,
 
                if (res->sdev && new_path)
                        sdev_printk(KERN_INFO, res->sdev, "Resource path: %s\n",
-                                   ipr_format_res_path(res->res_path, buffer,
-                                                       sizeof(buffer)));
+                                   ipr_format_res_path(res->ioa_cfg,
+                                       res->res_path, buffer, sizeof(buffer)));
        } else {
                res->flags = cfgtew->u.cfgte->flags;
                if (res->flags & IPR_IS_IOA_RESOURCE)
@@ -1363,7 +1419,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd)
        u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        list_del(&hostrcb->queue);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 
        if (ioasc) {
                if (ioasc != IPR_IOASC_IOA_WAS_RESET)
@@ -1613,8 +1669,8 @@ static void ipr_log_sis64_config_error(struct ipr_ioa_cfg *ioa_cfg,
                ipr_err_separator;
 
                ipr_err("Device %d : %s", i + 1,
-                        ipr_format_res_path(dev_entry->res_path, buffer,
-                                            sizeof(buffer)));
+                       __ipr_format_res_path(dev_entry->res_path,
+                                             buffer, sizeof(buffer)));
                ipr_log_ext_vpd(&dev_entry->vpd);
 
                ipr_err("-----New Device Information-----\n");
@@ -1960,14 +2016,16 @@ static void ipr_log64_fabric_path(struct ipr_hostrcb *hostrcb,
 
                        ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s\n",
                                     path_active_desc[i].desc, path_state_desc[j].desc,
-                                    ipr_format_res_path(fabric->res_path, buffer,
-                                                        sizeof(buffer)));
+                                    ipr_format_res_path(hostrcb->ioa_cfg,
+                                               fabric->res_path,
+                                               buffer, sizeof(buffer)));
                        return;
                }
        }
 
        ipr_err("Path state=%02X Resource Path=%s\n", path_state,
-               ipr_format_res_path(fabric->res_path, buffer, sizeof(buffer)));
+               ipr_format_res_path(hostrcb->ioa_cfg, fabric->res_path,
+                                   buffer, sizeof(buffer)));
 }
 
 static const struct {
@@ -2108,18 +2166,20 @@ static void ipr_log64_path_elem(struct ipr_hostrcb *hostrcb,
 
                        ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s, Link rate=%s, WWN=%08X%08X\n",
                                     path_status_desc[j].desc, path_type_desc[i].desc,
-                                    ipr_format_res_path(cfg->res_path, buffer,
-                                                        sizeof(buffer)),
-                                    link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
-                                    be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1]));
+                                    ipr_format_res_path(hostrcb->ioa_cfg,
+                                       cfg->res_path, buffer, sizeof(buffer)),
+                                       link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
+                                       be32_to_cpu(cfg->wwid[0]),
+                                       be32_to_cpu(cfg->wwid[1]));
                        return;
                }
        }
        ipr_hcam_err(hostrcb, "Path element=%02X: Resource Path=%s, Link rate=%s "
                     "WWN=%08X%08X\n", cfg->type_status,
-                    ipr_format_res_path(cfg->res_path, buffer, sizeof(buffer)),
-                    link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
-                    be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1]));
+                    ipr_format_res_path(hostrcb->ioa_cfg,
+                       cfg->res_path, buffer, sizeof(buffer)),
+                       link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK],
+                       be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1]));
 }
 
 /**
@@ -2182,7 +2242,8 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg,
 
        ipr_err("RAID %s Array Configuration: %s\n",
                error->protection_level,
-               ipr_format_res_path(error->last_res_path, buffer, sizeof(buffer)));
+               ipr_format_res_path(ioa_cfg, error->last_res_path,
+                       buffer, sizeof(buffer)));
 
        ipr_err_separator;
 
@@ -2203,11 +2264,12 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg,
                ipr_err("Array Member %d:\n", i);
                ipr_log_ext_vpd(&array_entry->vpd);
                ipr_err("Current Location: %s\n",
-                        ipr_format_res_path(array_entry->res_path, buffer,
-                                            sizeof(buffer)));
+                        ipr_format_res_path(ioa_cfg, array_entry->res_path,
+                               buffer, sizeof(buffer)));
                ipr_err("Expected Location: %s\n",
-                        ipr_format_res_path(array_entry->expected_res_path,
-                                            buffer, sizeof(buffer)));
+                        ipr_format_res_path(ioa_cfg,
+                               array_entry->expected_res_path,
+                               buffer, sizeof(buffer)));
 
                ipr_err_separator;
        }
@@ -2409,7 +2471,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd)
                fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error.fd_ioasc);
 
        list_del(&hostrcb->queue);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 
        if (!ioasc) {
                ipr_handle_log_data(ioa_cfg, hostrcb);
@@ -2490,36 +2552,6 @@ static void ipr_oper_timeout(struct ipr_cmnd *ipr_cmd)
        LEAVE;
 }
 
-/**
- * ipr_reset_reload - Reset/Reload the IOA
- * @ioa_cfg:           ioa config struct
- * @shutdown_type:     shutdown type
- *
- * This function resets the adapter and re-initializes it.
- * This function assumes that all new host commands have been stopped.
- * Return value:
- *     SUCCESS / FAILED
- **/
-static int ipr_reset_reload(struct ipr_ioa_cfg *ioa_cfg,
-                           enum ipr_shutdown_type shutdown_type)
-{
-       if (!ioa_cfg->in_reset_reload)
-               ipr_initiate_ioa_reset(ioa_cfg, shutdown_type);
-
-       spin_unlock_irq(ioa_cfg->host->host_lock);
-       wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
-       spin_lock_irq(ioa_cfg->host->host_lock);
-
-       /* If we got hit with a host reset while we were already resetting
-        the adapter for some reason, and the reset failed. */
-       if (ioa_cfg->ioa_is_dead) {
-               ipr_trace;
-               return FAILED;
-       }
-
-       return SUCCESS;
-}
-
 /**
  * ipr_find_ses_entry - Find matching SES in SES table
  * @res:       resource entry struct of SES
@@ -3153,7 +3185,8 @@ static void ipr_worker_thread(struct work_struct *work)
 restart:
        do {
                did_work = 0;
-               if (!ioa_cfg->allow_cmds || !ioa_cfg->allow_ml_add_del) {
+               if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds ||
+                   !ioa_cfg->allow_ml_add_del) {
                        spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
                        return;
                }
@@ -3401,7 +3434,7 @@ static ssize_t ipr_show_adapter_state(struct device *dev,
        int len;
 
        spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-       if (ioa_cfg->ioa_is_dead)
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
                len = snprintf(buf, PAGE_SIZE, "offline\n");
        else
                len = snprintf(buf, PAGE_SIZE, "online\n");
@@ -3427,14 +3460,20 @@ static ssize_t ipr_store_adapter_state(struct device *dev,
        struct Scsi_Host *shost = class_to_shost(dev);
        struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
        unsigned long lock_flags;
-       int result = count;
+       int result = count, i;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
 
        spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-       if (ioa_cfg->ioa_is_dead && !strncmp(buf, "online", 6)) {
-               ioa_cfg->ioa_is_dead = 0;
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead &&
+           !strncmp(buf, "online", 6)) {
+               for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+                       spin_lock(&ioa_cfg->hrrq[i]._lock);
+                       ioa_cfg->hrrq[i].ioa_is_dead = 0;
+                       spin_unlock(&ioa_cfg->hrrq[i]._lock);
+               }
+               wmb();
                ioa_cfg->reset_retries = 0;
                ioa_cfg->in_ioa_bringdown = 0;
                ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
@@ -3494,6 +3533,95 @@ static struct device_attribute ipr_ioa_reset_attr = {
        .store = ipr_store_reset_adapter
 };
 
+static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+ /**
+ * ipr_show_iopoll_weight - Show ipr polling mode
+ * @dev:       class device struct
+ * @buf:       buffer
+ *
+ * Return value:
+ *     number of bytes printed to buffer
+ **/
+static ssize_t ipr_show_iopoll_weight(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(dev);
+       struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
+       unsigned long lock_flags = 0;
+       int len;
+
+       spin_lock_irqsave(shost->host_lock, lock_flags);
+       len = snprintf(buf, PAGE_SIZE, "%d\n", ioa_cfg->iopoll_weight);
+       spin_unlock_irqrestore(shost->host_lock, lock_flags);
+
+       return len;
+}
+
+/**
+ * ipr_store_iopoll_weight - Change the adapter's polling mode
+ * @dev:       class device struct
+ * @buf:       buffer
+ *
+ * Return value:
+ *     number of bytes printed to buffer
+ **/
+static ssize_t ipr_store_iopoll_weight(struct device *dev,
+                                       struct device_attribute *attr,
+                                       const char *buf, size_t count)
+{
+       struct Scsi_Host *shost = class_to_shost(dev);
+       struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
+       unsigned long user_iopoll_weight;
+       unsigned long lock_flags = 0;
+       int i;
+
+       if (!ioa_cfg->sis64) {
+               dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+               return -EINVAL;
+       }
+       if (kstrtoul(buf, 10, &user_iopoll_weight))
+               return -EINVAL;
+
+       if (user_iopoll_weight > 256) {
+               dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+               return -EINVAL;
+       }
+
+       if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
+               dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+               return strlen(buf);
+       }
+
+       if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+                       ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+               for (i = 1; i < ioa_cfg->hrrq_num; i++)
+                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+       }
+
+       spin_lock_irqsave(shost->host_lock, lock_flags);
+       ioa_cfg->iopoll_weight = user_iopoll_weight;
+       if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+                       ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+               for (i = 1; i < ioa_cfg->hrrq_num; i++) {
+                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                                       ioa_cfg->iopoll_weight, ipr_iopoll);
+                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
+               }
+       }
+       spin_unlock_irqrestore(shost->host_lock, lock_flags);
+
+       return strlen(buf);
+}
+
+static struct device_attribute ipr_iopoll_weight_attr = {
+       .attr = {
+               .name =         "iopoll_weight",
+               .mode =         S_IRUGO | S_IWUSR,
+       },
+       .show = ipr_show_iopoll_weight,
+       .store = ipr_store_iopoll_weight
+};
+
 /**
  * ipr_alloc_ucode_buffer - Allocates a microcode download buffer
  * @buf_len:           buffer length
@@ -3862,6 +3990,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
        &ipr_ioa_reset_attr,
        &ipr_update_fw_attr,
        &ipr_ioa_fw_type_attr,
+       &ipr_iopoll_weight_attr,
        NULL,
 };
 
@@ -4014,7 +4143,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg)
 
        ioa_cfg->dump = dump;
        ioa_cfg->sdt_state = WAIT_FOR_DUMP;
-       if (ioa_cfg->ioa_is_dead && !ioa_cfg->dump_taken) {
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && !ioa_cfg->dump_taken) {
                ioa_cfg->dump_taken = 1;
                schedule_work(&ioa_cfg->work_q);
        }
@@ -4227,8 +4356,8 @@ static ssize_t ipr_show_resource_path(struct device *dev, struct device_attribut
        res = (struct ipr_resource_entry *)sdev->hostdata;
        if (res && ioa_cfg->sis64)
                len = snprintf(buf, PAGE_SIZE, "%s\n",
-                              ipr_format_res_path(res->res_path, buffer,
-                                                  sizeof(buffer)));
+                              __ipr_format_res_path(res->res_path, buffer,
+                                                    sizeof(buffer)));
        else if (res)
                len = snprintf(buf, PAGE_SIZE, "%d:%d:%d:%d\n", ioa_cfg->host->host_no,
                               res->bus, res->target, res->lun);
@@ -4556,8 +4685,8 @@ static int ipr_slave_configure(struct scsi_device *sdev)
                        scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
                if (ioa_cfg->sis64)
                        sdev_printk(KERN_INFO, sdev, "Resource path: %s\n",
-                                   ipr_format_res_path(res->res_path, buffer,
-                                                       sizeof(buffer)));
+                                   ipr_format_res_path(ioa_cfg,
+                               res->res_path, buffer, sizeof(buffer)));
                return 0;
        }
        spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
@@ -4638,22 +4767,18 @@ static int ipr_slave_alloc(struct scsi_device *sdev)
        return rc;
 }
 
-/**
- * ipr_eh_host_reset - Reset the host adapter
- * @scsi_cmd:  scsi command struct
- *
- * Return value:
- *     SUCCESS / FAILED
- **/
-static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd)
+static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg;
-       int rc;
+       unsigned long lock_flags = 0;
+       int rc = SUCCESS;
 
        ENTER;
-       ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
+       ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata;
+       spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 
        if (!ioa_cfg->in_reset_reload) {
+               ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV);
                dev_err(&ioa_cfg->pdev->dev,
                        "Adapter being reset as a result of error recovery.\n");
 
@@ -4661,20 +4786,19 @@ static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd)
                        ioa_cfg->sdt_state = GET_DUMP;
        }
 
-       rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV);
-
-       LEAVE;
-       return rc;
-}
-
-static int ipr_eh_host_reset(struct scsi_cmnd *cmd)
-{
-       int rc;
+       spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+       wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
+       spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 
-       spin_lock_irq(cmd->device->host->host_lock);
-       rc = __ipr_eh_host_reset(cmd);
-       spin_unlock_irq(cmd->device->host->host_lock);
+       /* If we got hit with a host reset while we were already resetting
+        the adapter for some reason, and the reset failed. */
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
+               ipr_trace;
+               rc = FAILED;
+       }
 
+       spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+       LEAVE;
        return rc;
 }
 
@@ -4723,7 +4847,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg,
 
        ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT);
        ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) {
                if (ipr_cmd->ioa_cfg->sis64)
                        memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata,
@@ -4793,6 +4917,7 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd)
        struct ipr_resource_entry *res;
        struct ata_port *ap;
        int rc = 0;
+       struct ipr_hrr_queue *hrrq;
 
        ENTER;
        ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
@@ -4808,22 +4933,26 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd)
         */
        if (ioa_cfg->in_reset_reload)
                return FAILED;
-       if (ioa_cfg->ioa_is_dead)
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
                return FAILED;
 
-       list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-               if (ipr_cmd->ioarcb.res_handle == res->res_handle) {
-                       if (ipr_cmd->scsi_cmd)
-                               ipr_cmd->done = ipr_scsi_eh_done;
-                       if (ipr_cmd->qc)
-                               ipr_cmd->done = ipr_sata_eh_done;
-                       if (ipr_cmd->qc && !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) {
-                               ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT;
-                               ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED;
+       for_each_hrrq(hrrq, ioa_cfg) {
+               spin_lock(&hrrq->_lock);
+               list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
+                       if (ipr_cmd->ioarcb.res_handle == res->res_handle) {
+                               if (ipr_cmd->scsi_cmd)
+                                       ipr_cmd->done = ipr_scsi_eh_done;
+                               if (ipr_cmd->qc)
+                                       ipr_cmd->done = ipr_sata_eh_done;
+                               if (ipr_cmd->qc &&
+                                   !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) {
+                                       ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT;
+                                       ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED;
+                               }
                        }
                }
+               spin_unlock(&hrrq->_lock);
        }
-
        res->resetting_device = 1;
        scmd_printk(KERN_ERR, scsi_cmd, "Resetting device\n");
 
@@ -4833,11 +4962,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd)
                ata_std_error_handler(ap);
                spin_lock_irq(scsi_cmd->device->host->host_lock);
 
-               list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-                       if (ipr_cmd->ioarcb.res_handle == res->res_handle) {
-                               rc = -EIO;
-                               break;
+               for_each_hrrq(hrrq, ioa_cfg) {
+                       spin_lock(&hrrq->_lock);
+                       list_for_each_entry(ipr_cmd,
+                                           &hrrq->hrrq_pending_q, queue) {
+                               if (ipr_cmd->ioarcb.res_handle ==
+                                   res->res_handle) {
+                                       rc = -EIO;
+                                       break;
+                               }
                        }
+                       spin_unlock(&hrrq->_lock);
                }
        } else
                rc = ipr_device_reset(ioa_cfg, res);
@@ -4890,7 +5025,7 @@ static void ipr_bus_reset_done(struct ipr_cmnd *ipr_cmd)
        else
                ipr_cmd->sibling->done(ipr_cmd->sibling);
 
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        LEAVE;
 }
 
@@ -4951,6 +5086,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
        struct ipr_cmd_pkt *cmd_pkt;
        u32 ioasc, int_reg;
        int op_found = 0;
+       struct ipr_hrr_queue *hrrq;
 
        ENTER;
        ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata;
@@ -4960,7 +5096,8 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
         * This will force the mid-layer to call ipr_eh_host_reset,
         * which will then go to sleep and wait for the reset to complete
         */
-       if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead)
+       if (ioa_cfg->in_reset_reload ||
+           ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
                return FAILED;
        if (!res)
                return FAILED;
@@ -4975,12 +5112,16 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
        if (!ipr_is_gscsi(res))
                return FAILED;
 
-       list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-               if (ipr_cmd->scsi_cmd == scsi_cmd) {
-                       ipr_cmd->done = ipr_scsi_eh_done;
-                       op_found = 1;
-                       break;
+       for_each_hrrq(hrrq, ioa_cfg) {
+               spin_lock(&hrrq->_lock);
+               list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
+                       if (ipr_cmd->scsi_cmd == scsi_cmd) {
+                               ipr_cmd->done = ipr_scsi_eh_done;
+                               op_found = 1;
+                               break;
+                       }
                }
+               spin_unlock(&hrrq->_lock);
        }
 
        if (!op_found)
@@ -5007,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd)
                ipr_trace;
        }
 
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q);
        if (!ipr_is_naca_model(res))
                res->needs_sync_complete = 1;
 
@@ -5099,6 +5240,9 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
        } else {
                if (int_reg & IPR_PCII_IOA_UNIT_CHECKED)
                        ioa_cfg->ioa_unit_checked = 1;
+               else if (int_reg & IPR_PCII_NO_HOST_RRQ)
+                       dev_err(&ioa_cfg->pdev->dev,
+                               "No Host RRQ. 0x%08X\n", int_reg);
                else
                        dev_err(&ioa_cfg->pdev->dev,
                                "Permanent IOA failure. 0x%08X\n", int_reg);
@@ -5121,10 +5265,10 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg,
  * Return value:
  *     none
  **/
-static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg)
+static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg, u16 number)
 {
        ioa_cfg->errors_logged++;
-       dev_err(&ioa_cfg->pdev->dev, "%s\n", msg);
+       dev_err(&ioa_cfg->pdev->dev, "%s %d\n", msg, number);
 
        if (WAIT_FOR_DUMP == ioa_cfg->sdt_state)
                ioa_cfg->sdt_state = GET_DUMP;
@@ -5132,6 +5276,83 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg)
        ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
 }
 
+static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
+                                               struct list_head *doneq)
+{
+       u32 ioasc;
+       u16 cmd_index;
+       struct ipr_cmnd *ipr_cmd;
+       struct ipr_ioa_cfg *ioa_cfg = hrr_queue->ioa_cfg;
+       int num_hrrq = 0;
+
+       /* If interrupts are disabled, ignore the interrupt */
+       if (!hrr_queue->allow_interrupts)
+               return 0;
+
+       while ((be32_to_cpu(*hrr_queue->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
+              hrr_queue->toggle_bit) {
+
+               cmd_index = (be32_to_cpu(*hrr_queue->hrrq_curr) &
+                            IPR_HRRQ_REQ_RESP_HANDLE_MASK) >>
+                            IPR_HRRQ_REQ_RESP_HANDLE_SHIFT;
+
+               if (unlikely(cmd_index > hrr_queue->max_cmd_id ||
+                            cmd_index < hrr_queue->min_cmd_id)) {
+                       ipr_isr_eh(ioa_cfg,
+                               "Invalid response handle from IOA: ",
+                               cmd_index);
+                       break;
+               }
+
+               ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index];
+               ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
+
+               ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc);
+
+               list_move_tail(&ipr_cmd->queue, doneq);
+
+               if (hrr_queue->hrrq_curr < hrr_queue->hrrq_end) {
+                       hrr_queue->hrrq_curr++;
+               } else {
+                       hrr_queue->hrrq_curr = hrr_queue->hrrq_start;
+                       hrr_queue->toggle_bit ^= 1u;
+               }
+               num_hrrq++;
+               if (budget > 0 && num_hrrq >= budget)
+                       break;
+       }
+
+       return num_hrrq;
+}
+
+static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+{
+       struct ipr_ioa_cfg *ioa_cfg;
+       struct ipr_hrr_queue *hrrq;
+       struct ipr_cmnd *ipr_cmd, *temp;
+       unsigned long hrrq_flags;
+       int completed_ops;
+       LIST_HEAD(doneq);
+
+       hrrq = container_of(iop, struct ipr_hrr_queue, iopoll);
+       ioa_cfg = hrrq->ioa_cfg;
+
+       spin_lock_irqsave(hrrq->lock, hrrq_flags);
+       completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
+
+       if (completed_ops < budget)
+               blk_iopoll_complete(iop);
+       spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+
+       list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
+               list_del(&ipr_cmd->queue);
+               del_timer(&ipr_cmd->timer);
+               ipr_cmd->fast_done(ipr_cmd);
+       }
+
+       return completed_ops;
+}
+
 /**
  * ipr_isr - Interrupt service routine
  * @irq:       irq number
@@ -5142,78 +5363,48 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg)
  **/
 static irqreturn_t ipr_isr(int irq, void *devp)
 {
-       struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp;
-       unsigned long lock_flags = 0;
+       struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp;
+       struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg;
+       unsigned long hrrq_flags = 0;
        u32 int_reg = 0;
-       u32 ioasc;
-       u16 cmd_index;
        int num_hrrq = 0;
        int irq_none = 0;
        struct ipr_cmnd *ipr_cmd, *temp;
        irqreturn_t rc = IRQ_NONE;
        LIST_HEAD(doneq);
 
-       spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-
+       spin_lock_irqsave(hrrq->lock, hrrq_flags);
        /* If interrupts are disabled, ignore the interrupt */
-       if (!ioa_cfg->allow_interrupts) {
-               spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+       if (!hrrq->allow_interrupts) {
+               spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                return IRQ_NONE;
        }
 
        while (1) {
-               ipr_cmd = NULL;
-
-               while ((be32_to_cpu(*ioa_cfg->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
-                      ioa_cfg->toggle_bit) {
-
-                       cmd_index = (be32_to_cpu(*ioa_cfg->hrrq_curr) &
-                                    IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> IPR_HRRQ_REQ_RESP_HANDLE_SHIFT;
-
-                       if (unlikely(cmd_index >= IPR_NUM_CMD_BLKS)) {
-                               ipr_isr_eh(ioa_cfg, "Invalid response handle from IOA");
-                               rc = IRQ_HANDLED;
-                               goto unlock_out;
-                       }
-
-                       ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index];
-
-                       ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
+               if (ipr_process_hrrq(hrrq, -1, &doneq)) {
+                       rc =  IRQ_HANDLED;
 
-                       ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc);
-
-                       list_move_tail(&ipr_cmd->queue, &doneq);
-
-                       rc = IRQ_HANDLED;
-
-                       if (ioa_cfg->hrrq_curr < ioa_cfg->hrrq_end) {
-                               ioa_cfg->hrrq_curr++;
-                       } else {
-                               ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start;
-                               ioa_cfg->toggle_bit ^= 1u;
-                       }
-               }
-
-               if (ipr_cmd && !ioa_cfg->clear_isr)
-                       break;
+                       if (!ioa_cfg->clear_isr)
+                               break;
 
-               if (ipr_cmd != NULL) {
                        /* Clear the PCI interrupt */
                        num_hrrq = 0;
                        do {
-                               writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32);
+                               writel(IPR_PCII_HRRQ_UPDATED,
+                                    ioa_cfg->regs.clr_interrupt_reg32);
                                int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
                        } while (int_reg & IPR_PCII_HRRQ_UPDATED &&
-                                       num_hrrq++ < IPR_MAX_HRRQ_RETRIES);
+                               num_hrrq++ < IPR_MAX_HRRQ_RETRIES);
 
                } else if (rc == IRQ_NONE && irq_none == 0) {
                        int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32);
                        irq_none++;
                } else if (num_hrrq == IPR_MAX_HRRQ_RETRIES &&
                           int_reg & IPR_PCII_HRRQ_UPDATED) {
-                       ipr_isr_eh(ioa_cfg, "Error clearing HRRQ");
+                       ipr_isr_eh(ioa_cfg,
+                               "Error clearing HRRQ: ", num_hrrq);
                        rc = IRQ_HANDLED;
-                       goto unlock_out;
+                       break;
                } else
                        break;
        }
@@ -5221,14 +5412,64 @@ static irqreturn_t ipr_isr(int irq, void *devp)
        if (unlikely(rc == IRQ_NONE))
                rc = ipr_handle_other_interrupt(ioa_cfg, int_reg);
 
-unlock_out:
-       spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+       spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
        list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
                list_del(&ipr_cmd->queue);
                del_timer(&ipr_cmd->timer);
                ipr_cmd->fast_done(ipr_cmd);
        }
+       return rc;
+}
+
+/**
+ * ipr_isr_mhrrq - Interrupt service routine
+ * @irq:       irq number
+ * @devp:      pointer to ioa config struct
+ *
+ * Return value:
+ *     IRQ_NONE / IRQ_HANDLED
+ **/
+static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
+{
+       struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp;
+       struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg;
+       unsigned long hrrq_flags = 0;
+       struct ipr_cmnd *ipr_cmd, *temp;
+       irqreturn_t rc = IRQ_NONE;
+       LIST_HEAD(doneq);
+
+       spin_lock_irqsave(hrrq->lock, hrrq_flags);
+
+       /* If interrupts are disabled, ignore the interrupt */
+       if (!hrrq->allow_interrupts) {
+               spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+               return IRQ_NONE;
+       }
+
+       if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+                       ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+               if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
+                      hrrq->toggle_bit) {
+                       if (!blk_iopoll_sched_prep(&hrrq->iopoll))
+                               blk_iopoll_sched(&hrrq->iopoll);
+                       spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+                       return IRQ_HANDLED;
+               }
+       } else {
+               if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
+                       hrrq->toggle_bit)
+
+                       if (ipr_process_hrrq(hrrq, -1, &doneq))
+                               rc =  IRQ_HANDLED;
+       }
 
+       spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+
+       list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
+               list_del(&ipr_cmd->queue);
+               del_timer(&ipr_cmd->timer);
+               ipr_cmd->fast_done(ipr_cmd);
+       }
        return rc;
 }
 
@@ -5388,7 +5629,6 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd)
 {
        struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
        struct ipr_resource_entry *res = scsi_cmd->device->hostdata;
-       struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
        if (IPR_IOASC_SENSE_KEY(ioasc) > 0) {
@@ -5406,7 +5646,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd)
                res->in_erp = 0;
        }
        scsi_dma_unmap(ipr_cmd->scsi_cmd);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        scsi_cmd->scsi_done(scsi_cmd);
 }
 
@@ -5790,7 +6030,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
        }
 
        scsi_dma_unmap(ipr_cmd->scsi_cmd);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        scsi_cmd->scsi_done(scsi_cmd);
 }
 
@@ -5809,21 +6049,21 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd)
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
        u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
-       unsigned long lock_flags;
+       unsigned long hrrq_flags;
 
        scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len));
 
        if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) {
                scsi_dma_unmap(scsi_cmd);
 
-               spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-               list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+               spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags);
+               list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
                scsi_cmd->scsi_done(scsi_cmd);
-               spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+               spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags);
        } else {
-               spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+               spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags);
                ipr_erp_start(ioa_cfg, ipr_cmd);
-               spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+               spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags);
        }
 }
 
@@ -5846,22 +6086,34 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
        struct ipr_resource_entry *res;
        struct ipr_ioarcb *ioarcb;
        struct ipr_cmnd *ipr_cmd;
-       unsigned long lock_flags;
+       unsigned long hrrq_flags, lock_flags;
        int rc;
+       struct ipr_hrr_queue *hrrq;
+       int hrrq_id;
 
        ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata;
 
-       spin_lock_irqsave(shost->host_lock, lock_flags);
        scsi_cmd->result = (DID_OK << 16);
        res = scsi_cmd->device->hostdata;
 
+       if (ipr_is_gata(res) && res->sata_port) {
+               spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+               rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap);
+               spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
+               return rc;
+       }
+
+       hrrq_id = ipr_get_hrrq_index(ioa_cfg);
+       hrrq = &ioa_cfg->hrrq[hrrq_id];
+
+       spin_lock_irqsave(hrrq->lock, hrrq_flags);
        /*
         * We are currently blocking all devices due to a host reset
         * We have told the host to stop giving us new requests, but
         * ERP ops don't count. FIXME
         */
-       if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) {
-               spin_unlock_irqrestore(shost->host_lock, lock_flags);
+       if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead)) {
+               spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                return SCSI_MLQUEUE_HOST_BUSY;
        }
 
@@ -5869,19 +6121,17 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
         * FIXME - Create scsi_set_host_offline interface
         *  and the ioa_is_dead check can be removed
         */
-       if (unlikely(ioa_cfg->ioa_is_dead || !res)) {
-               spin_unlock_irqrestore(shost->host_lock, lock_flags);
+       if (unlikely(hrrq->ioa_is_dead || !res)) {
+               spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                goto err_nodev;
        }
 
-       if (ipr_is_gata(res) && res->sata_port) {
-               rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap);
-               spin_unlock_irqrestore(shost->host_lock, lock_flags);
-               return rc;
+       ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq);
+       if (ipr_cmd == NULL) {
+               spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
+               return SCSI_MLQUEUE_HOST_BUSY;
        }
-
-       ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg);
-       spin_unlock_irqrestore(shost->host_lock, lock_flags);
+       spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
 
        ipr_init_ipr_cmnd(ipr_cmd, ipr_scsi_done);
        ioarcb = &ipr_cmd->ioarcb;
@@ -5902,26 +6152,27 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
        }
 
        if (scsi_cmd->cmnd[0] >= 0xC0 &&
-           (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE))
+           (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) {
                ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
+       }
 
        if (ioa_cfg->sis64)
                rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd);
        else
                rc = ipr_build_ioadl(ioa_cfg, ipr_cmd);
 
-       spin_lock_irqsave(shost->host_lock, lock_flags);
-       if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) {
-               list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
-               spin_unlock_irqrestore(shost->host_lock, lock_flags);
+       spin_lock_irqsave(hrrq->lock, hrrq_flags);
+       if (unlikely(rc || (!hrrq->allow_cmds && !hrrq->ioa_is_dead))) {
+               list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q);
+               spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                if (!rc)
                        scsi_dma_unmap(scsi_cmd);
                return SCSI_MLQUEUE_HOST_BUSY;
        }
 
-       if (unlikely(ioa_cfg->ioa_is_dead)) {
-               list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
-               spin_unlock_irqrestore(shost->host_lock, lock_flags);
+       if (unlikely(hrrq->ioa_is_dead)) {
+               list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q);
+               spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
                scsi_dma_unmap(scsi_cmd);
                goto err_nodev;
        }
@@ -5931,18 +6182,18 @@ static int ipr_queuecommand(struct Scsi_Host *shost,
                ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE;
                res->needs_sync_complete = 0;
        }
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+       list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_pending_q);
        ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res));
        ipr_send_command(ipr_cmd);
-       spin_unlock_irqrestore(shost->host_lock, lock_flags);
+       spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
        return 0;
 
 err_nodev:
-       spin_lock_irqsave(shost->host_lock, lock_flags);
+       spin_lock_irqsave(hrrq->lock, hrrq_flags);
        memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
        scsi_cmd->result = (DID_NO_CONNECT << 16);
        scsi_cmd->scsi_done(scsi_cmd);
-       spin_unlock_irqrestore(shost->host_lock, lock_flags);
+       spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
        return 0;
 }
 
@@ -6040,7 +6291,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap)
                spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
        }
 
-       if (!ioa_cfg->allow_cmds)
+       if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds)
                goto out_unlock;
 
        rc = ipr_device_reset(ioa_cfg, res);
@@ -6071,6 +6322,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc)
        struct ipr_sata_port *sata_port = qc->ap->private_data;
        struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg;
        struct ipr_cmnd *ipr_cmd;
+       struct ipr_hrr_queue *hrrq;
        unsigned long flags;
 
        spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
@@ -6080,11 +6332,15 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc)
                spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
        }
 
-       list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
-               if (ipr_cmd->qc == qc) {
-                       ipr_device_reset(ioa_cfg, sata_port->res);
-                       break;
+       for_each_hrrq(hrrq, ioa_cfg) {
+               spin_lock(&hrrq->_lock);
+               list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) {
+                       if (ipr_cmd->qc == qc) {
+                               ipr_device_reset(ioa_cfg, sata_port->res);
+                               break;
+                       }
                }
+               spin_unlock(&hrrq->_lock);
        }
        spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 }
@@ -6133,6 +6389,7 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd)
        struct ipr_resource_entry *res = sata_port->res;
        u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
 
+       spin_lock(&ipr_cmd->hrrq->_lock);
        if (ipr_cmd->ioa_cfg->sis64)
                memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata,
                       sizeof(struct ipr_ioasa_gata));
@@ -6148,7 +6405,8 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd)
                qc->err_mask |= __ac_err_mask(sata_port->ioasa.status);
        else
                qc->err_mask |= ac_err_mask(sata_port->ioasa.status);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
+       spin_unlock(&ipr_cmd->hrrq->_lock);
        ata_qc_complete(qc);
 }
 
@@ -6243,6 +6501,48 @@ static void ipr_build_ata_ioadl(struct ipr_cmnd *ipr_cmd,
                last_ioadl->flags_and_data_len |= cpu_to_be32(IPR_IOADL_FLAGS_LAST);
 }
 
+/**
+ * ipr_qc_defer - Get a free ipr_cmd
+ * @qc:        queued command
+ *
+ * Return value:
+ *     0 if success
+ **/
+static int ipr_qc_defer(struct ata_queued_cmd *qc)
+{
+       struct ata_port *ap = qc->ap;
+       struct ipr_sata_port *sata_port = ap->private_data;
+       struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg;
+       struct ipr_cmnd *ipr_cmd;
+       struct ipr_hrr_queue *hrrq;
+       int hrrq_id;
+
+       hrrq_id = ipr_get_hrrq_index(ioa_cfg);
+       hrrq = &ioa_cfg->hrrq[hrrq_id];
+
+       qc->lldd_task = NULL;
+       spin_lock(&hrrq->_lock);
+       if (unlikely(hrrq->ioa_is_dead)) {
+               spin_unlock(&hrrq->_lock);
+               return 0;
+       }
+
+       if (unlikely(!hrrq->allow_cmds)) {
+               spin_unlock(&hrrq->_lock);
+               return ATA_DEFER_LINK;
+       }
+
+       ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq);
+       if (ipr_cmd == NULL) {
+               spin_unlock(&hrrq->_lock);
+               return ATA_DEFER_LINK;
+       }
+
+       qc->lldd_task = ipr_cmd;
+       spin_unlock(&hrrq->_lock);
+       return 0;
+}
+
 /**
  * ipr_qc_issue - Issue a SATA qc to a device
  * @qc:        queued command
@@ -6260,10 +6560,23 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc)
        struct ipr_ioarcb *ioarcb;
        struct ipr_ioarcb_ata_regs *regs;
 
-       if (unlikely(!ioa_cfg->allow_cmds || ioa_cfg->ioa_is_dead))
+       if (qc->lldd_task == NULL)
+               ipr_qc_defer(qc);
+
+       ipr_cmd = qc->lldd_task;
+       if (ipr_cmd == NULL)
                return AC_ERR_SYSTEM;
 
-       ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
+       qc->lldd_task = NULL;
+       spin_lock(&ipr_cmd->hrrq->_lock);
+       if (unlikely(!ipr_cmd->hrrq->allow_cmds ||
+                       ipr_cmd->hrrq->ioa_is_dead)) {
+               list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
+               spin_unlock(&ipr_cmd->hrrq->_lock);
+               return AC_ERR_SYSTEM;
+       }
+
+       ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done);
        ioarcb = &ipr_cmd->ioarcb;
 
        if (ioa_cfg->sis64) {
@@ -6275,7 +6588,7 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc)
        memset(regs, 0, sizeof(*regs));
        ioarcb->add_cmd_parms_len = cpu_to_be16(sizeof(*regs));
 
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
        ipr_cmd->qc = qc;
        ipr_cmd->done = ipr_sata_done;
        ipr_cmd->ioarcb.res_handle = res->res_handle;
@@ -6315,10 +6628,12 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc)
 
        default:
                WARN_ON(1);
+               spin_unlock(&ipr_cmd->hrrq->_lock);
                return AC_ERR_INVALID;
        }
 
        ipr_send_command(ipr_cmd);
+       spin_unlock(&ipr_cmd->hrrq->_lock);
 
        return 0;
 }
@@ -6357,6 +6672,7 @@ static struct ata_port_operations ipr_sata_ops = {
        .hardreset = ipr_sata_reset,
        .post_internal_cmd = ipr_ata_post_internal,
        .qc_prep = ata_noop_qc_prep,
+       .qc_defer = ipr_qc_defer,
        .qc_issue = ipr_qc_issue,
        .qc_fill_rtf = ipr_qc_fill_rtf,
        .port_start = ata_sas_port_start,
@@ -6427,7 +6743,7 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd)
        ENTER;
        ioa_cfg->in_reset_reload = 0;
        ioa_cfg->reset_retries = 0;
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        wake_up_all(&ioa_cfg->reset_wait_q);
 
        spin_unlock_irq(ioa_cfg->host->host_lock);
@@ -6454,11 +6770,16 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd)
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct ipr_resource_entry *res;
        struct ipr_hostrcb *hostrcb, *temp;
-       int i = 0;
+       int i = 0, j;
 
        ENTER;
        ioa_cfg->in_reset_reload = 0;
-       ioa_cfg->allow_cmds = 1;
+       for (j = 0; j < ioa_cfg->hrrq_num; j++) {
+               spin_lock(&ioa_cfg->hrrq[j]._lock);
+               ioa_cfg->hrrq[j].allow_cmds = 1;
+               spin_unlock(&ioa_cfg->hrrq[j]._lock);
+       }
+       wmb();
        ioa_cfg->reset_cmd = NULL;
        ioa_cfg->doorbell |= IPR_RUNTIME_RESET;
 
@@ -6482,14 +6803,14 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd)
        dev_info(&ioa_cfg->pdev->dev, "IOA initialized.\n");
 
        ioa_cfg->reset_retries = 0;
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        wake_up_all(&ioa_cfg->reset_wait_q);
 
        spin_unlock(ioa_cfg->host->host_lock);
        scsi_unblock_requests(ioa_cfg->host);
        spin_lock(ioa_cfg->host->host_lock);
 
-       if (!ioa_cfg->allow_cmds)
+       if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds)
                scsi_block_requests(ioa_cfg->host);
 
        LEAVE;
@@ -6560,9 +6881,11 @@ static int ipr_set_supported_devs(struct ipr_cmnd *ipr_cmd)
 
                if (!ioa_cfg->sis64)
                        ipr_cmd->job_step = ipr_set_supported_devs;
+               LEAVE;
                return IPR_RC_JOB_RETURN;
        }
 
+       LEAVE;
        return IPR_RC_JOB_CONTINUE;
 }
 
@@ -6820,7 +7143,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd)
                ipr_cmd->ioarcb.cmd_pkt.cdb[0], ioasc);
 
        ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
        return IPR_RC_JOB_RETURN;
 }
 
@@ -7278,46 +7601,71 @@ static int ipr_ioafp_identify_hrrq(struct ipr_cmnd *ipr_cmd)
 {
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb;
+       struct ipr_hrr_queue *hrrq;
 
        ENTER;
+       ipr_cmd->job_step = ipr_ioafp_std_inquiry;
        dev_info(&ioa_cfg->pdev->dev, "Starting IOA initialization sequence.\n");
 
-       ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q;
-       ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
+       if (ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) {
+               hrrq = &ioa_cfg->hrrq[ioa_cfg->identify_hrrq_index];
 
-       ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
-       if (ioa_cfg->sis64)
-               ioarcb->cmd_pkt.cdb[1] = 0x1;
-       ioarcb->cmd_pkt.cdb[2] =
-               ((u64) ioa_cfg->host_rrq_dma >> 24) & 0xff;
-       ioarcb->cmd_pkt.cdb[3] =
-               ((u64) ioa_cfg->host_rrq_dma >> 16) & 0xff;
-       ioarcb->cmd_pkt.cdb[4] =
-               ((u64) ioa_cfg->host_rrq_dma >> 8) & 0xff;
-       ioarcb->cmd_pkt.cdb[5] =
-               ((u64) ioa_cfg->host_rrq_dma) & 0xff;
-       ioarcb->cmd_pkt.cdb[7] =
-               ((sizeof(u32) * IPR_NUM_CMD_BLKS) >> 8) & 0xff;
-       ioarcb->cmd_pkt.cdb[8] =
-               (sizeof(u32) * IPR_NUM_CMD_BLKS) & 0xff;
+               ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q;
+               ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
 
-       if (ioa_cfg->sis64) {
-               ioarcb->cmd_pkt.cdb[10] =
-                       ((u64) ioa_cfg->host_rrq_dma >> 56) & 0xff;
-               ioarcb->cmd_pkt.cdb[11] =
-                       ((u64) ioa_cfg->host_rrq_dma >> 48) & 0xff;
-               ioarcb->cmd_pkt.cdb[12] =
-                       ((u64) ioa_cfg->host_rrq_dma >> 40) & 0xff;
-               ioarcb->cmd_pkt.cdb[13] =
-                       ((u64) ioa_cfg->host_rrq_dma >> 32) & 0xff;
-       }
+               ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
+               if (ioa_cfg->sis64)
+                       ioarcb->cmd_pkt.cdb[1] = 0x1;
 
-       ipr_cmd->job_step = ipr_ioafp_std_inquiry;
+               if (ioa_cfg->nvectors == 1)
+                       ioarcb->cmd_pkt.cdb[1] &= ~IPR_ID_HRRQ_SELE_ENABLE;
+               else
+                       ioarcb->cmd_pkt.cdb[1] |= IPR_ID_HRRQ_SELE_ENABLE;
+
+               ioarcb->cmd_pkt.cdb[2] =
+                       ((u64) hrrq->host_rrq_dma >> 24) & 0xff;
+               ioarcb->cmd_pkt.cdb[3] =
+                       ((u64) hrrq->host_rrq_dma >> 16) & 0xff;
+               ioarcb->cmd_pkt.cdb[4] =
+                       ((u64) hrrq->host_rrq_dma >> 8) & 0xff;
+               ioarcb->cmd_pkt.cdb[5] =
+                       ((u64) hrrq->host_rrq_dma) & 0xff;
+               ioarcb->cmd_pkt.cdb[7] =
+                       ((sizeof(u32) * hrrq->size) >> 8) & 0xff;
+               ioarcb->cmd_pkt.cdb[8] =
+                       (sizeof(u32) * hrrq->size) & 0xff;
+
+               if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE)
+                       ioarcb->cmd_pkt.cdb[9] =
+                                       ioa_cfg->identify_hrrq_index;
 
-       ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, IPR_INTERNAL_TIMEOUT);
+               if (ioa_cfg->sis64) {
+                       ioarcb->cmd_pkt.cdb[10] =
+                               ((u64) hrrq->host_rrq_dma >> 56) & 0xff;
+                       ioarcb->cmd_pkt.cdb[11] =
+                               ((u64) hrrq->host_rrq_dma >> 48) & 0xff;
+                       ioarcb->cmd_pkt.cdb[12] =
+                               ((u64) hrrq->host_rrq_dma >> 40) & 0xff;
+                       ioarcb->cmd_pkt.cdb[13] =
+                               ((u64) hrrq->host_rrq_dma >> 32) & 0xff;
+               }
+
+               if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE)
+                       ioarcb->cmd_pkt.cdb[14] =
+                                       ioa_cfg->identify_hrrq_index;
+
+               ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout,
+                          IPR_INTERNAL_TIMEOUT);
+
+               if (++ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num)
+                       ipr_cmd->job_step = ipr_ioafp_identify_hrrq;
+
+               LEAVE;
+               return IPR_RC_JOB_RETURN;
+       }
 
        LEAVE;
-       return IPR_RC_JOB_RETURN;
+       return IPR_RC_JOB_CONTINUE;
 }
 
 /**
@@ -7365,7 +7713,9 @@ static void ipr_reset_timer_done(struct ipr_cmnd *ipr_cmd)
 static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd,
                                  unsigned long timeout)
 {
-       list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q);
+
+       ENTER;
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
        ipr_cmd->done = ipr_reset_ioa_job;
 
        ipr_cmd->timer.data = (unsigned long) ipr_cmd;
@@ -7383,13 +7733,26 @@ static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd,
  **/
 static void ipr_init_ioa_mem(struct ipr_ioa_cfg *ioa_cfg)
 {
-       memset(ioa_cfg->host_rrq, 0, sizeof(u32) * IPR_NUM_CMD_BLKS);
+       struct ipr_hrr_queue *hrrq;
+
+       for_each_hrrq(hrrq, ioa_cfg) {
+               spin_lock(&hrrq->_lock);
+               memset(hrrq->host_rrq, 0, sizeof(u32) * hrrq->size);
+
+               /* Initialize Host RRQ pointers */
+               hrrq->hrrq_start = hrrq->host_rrq;
+               hrrq->hrrq_end = &hrrq->host_rrq[hrrq->size - 1];
+               hrrq->hrrq_curr = hrrq->hrrq_start;
+               hrrq->toggle_bit = 1;
+               spin_unlock(&hrrq->_lock);
+       }
+       wmb();
 
-       /* Initialize Host RRQ pointers */
-       ioa_cfg->hrrq_start = ioa_cfg->host_rrq;
-       ioa_cfg->hrrq_end = &ioa_cfg->host_rrq[IPR_NUM_CMD_BLKS - 1];
-       ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start;
-       ioa_cfg->toggle_bit = 1;
+       ioa_cfg->identify_hrrq_index = 0;
+       if (ioa_cfg->hrrq_num == 1)
+               atomic_set(&ioa_cfg->hrrq_index, 0);
+       else
+               atomic_set(&ioa_cfg->hrrq_index, 1);
 
        /* Zero out config table */
        memset(ioa_cfg->u.cfg_table, 0, ioa_cfg->cfg_table_size);
@@ -7446,7 +7809,8 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd)
        ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout;
        ipr_cmd->done = ipr_reset_ioa_job;
        add_timer(&ipr_cmd->timer);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 
        return IPR_RC_JOB_RETURN;
 }
@@ -7466,12 +7830,18 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd)
        struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
        volatile u32 int_reg;
        volatile u64 maskval;
+       int i;
 
        ENTER;
        ipr_cmd->job_step = ipr_ioafp_identify_hrrq;
        ipr_init_ioa_mem(ioa_cfg);
 
-       ioa_cfg->allow_interrupts = 1;
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               spin_lock(&ioa_cfg->hrrq[i]._lock);
+               ioa_cfg->hrrq[i].allow_interrupts = 1;
+               spin_unlock(&ioa_cfg->hrrq[i]._lock);
+       }
+       wmb();
        if (ioa_cfg->sis64) {
                /* Set the adapter to the correct endian mode. */
                writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg);
@@ -7511,7 +7881,7 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd)
        ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout;
        ipr_cmd->done = ipr_reset_ioa_job;
        add_timer(&ipr_cmd->timer);
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
 
        LEAVE;
        return IPR_RC_JOB_RETURN;
@@ -8030,7 +8400,8 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd)
        int rc = IPR_RC_JOB_CONTINUE;
 
        ENTER;
-       if (shutdown_type != IPR_SHUTDOWN_NONE && !ioa_cfg->ioa_is_dead) {
+       if (shutdown_type != IPR_SHUTDOWN_NONE &&
+                       !ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
                ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE);
                ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD;
                ipr_cmd->ioarcb.cmd_pkt.cdb[0] = IPR_IOA_SHUTDOWN;
@@ -8078,7 +8449,8 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd)
                         * We are doing nested adapter resets and this is
                         * not the current reset job.
                         */
-                       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+                       list_add_tail(&ipr_cmd->queue,
+                                       &ipr_cmd->hrrq->hrrq_free_q);
                        return;
                }
 
@@ -8113,9 +8485,15 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
                                    enum ipr_shutdown_type shutdown_type)
 {
        struct ipr_cmnd *ipr_cmd;
+       int i;
 
        ioa_cfg->in_reset_reload = 1;
-       ioa_cfg->allow_cmds = 0;
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               spin_lock(&ioa_cfg->hrrq[i]._lock);
+               ioa_cfg->hrrq[i].allow_cmds = 0;
+               spin_unlock(&ioa_cfg->hrrq[i]._lock);
+       }
+       wmb();
        scsi_block_requests(ioa_cfg->host);
 
        ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg);
@@ -8141,7 +8519,9 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
 static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
                                   enum ipr_shutdown_type shutdown_type)
 {
-       if (ioa_cfg->ioa_is_dead)
+       int i;
+
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead)
                return;
 
        if (ioa_cfg->in_reset_reload) {
@@ -8156,7 +8536,12 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
                        "IOA taken offline - error recovery failed\n");
 
                ioa_cfg->reset_retries = 0;
-               ioa_cfg->ioa_is_dead = 1;
+               for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+                       spin_lock(&ioa_cfg->hrrq[i]._lock);
+                       ioa_cfg->hrrq[i].ioa_is_dead = 1;
+                       spin_unlock(&ioa_cfg->hrrq[i]._lock);
+               }
+               wmb();
 
                if (ioa_cfg->in_ioa_bringdown) {
                        ioa_cfg->reset_cmd = NULL;
@@ -8188,9 +8573,17 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg,
  */
 static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd)
 {
+       struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+       int i;
+
        /* Disallow new interrupts, avoid loop */
-       ipr_cmd->ioa_cfg->allow_interrupts = 0;
-       list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q);
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               spin_lock(&ioa_cfg->hrrq[i]._lock);
+               ioa_cfg->hrrq[i].allow_interrupts = 0;
+               spin_unlock(&ioa_cfg->hrrq[i]._lock);
+       }
+       wmb();
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q);
        ipr_cmd->done = ipr_reset_ioa_job;
        return IPR_RC_JOB_RETURN;
 }
@@ -8247,13 +8640,19 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev)
 {
        unsigned long flags = 0;
        struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
+       int i;
 
        spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
        if (ioa_cfg->sdt_state == WAIT_FOR_DUMP)
                ioa_cfg->sdt_state = ABORT_DUMP;
        ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES;
        ioa_cfg->in_ioa_bringdown = 1;
-       ioa_cfg->allow_cmds = 0;
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               spin_lock(&ioa_cfg->hrrq[i]._lock);
+               ioa_cfg->hrrq[i].allow_cmds = 0;
+               spin_unlock(&ioa_cfg->hrrq[i]._lock);
+       }
+       wmb();
        ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
        spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 }
@@ -8310,12 +8709,11 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg)
        } else
                _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa,
                                        IPR_SHUTDOWN_NONE);
-
        spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
        wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
        spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
 
-       if (ioa_cfg->ioa_is_dead) {
+       if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
                rc = -EIO;
        } else if (ipr_invalid_adapter(ioa_cfg)) {
                if (!ipr_testmode)
@@ -8376,8 +8774,13 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg)
        pci_free_consistent(ioa_cfg->pdev, sizeof(struct ipr_misc_cbs),
                            ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma);
        ipr_free_cmd_blks(ioa_cfg);
-       pci_free_consistent(ioa_cfg->pdev, sizeof(u32) * IPR_NUM_CMD_BLKS,
-                           ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma);
+
+       for (i = 0; i < ioa_cfg->hrrq_num; i++)
+               pci_free_consistent(ioa_cfg->pdev,
+                                       sizeof(u32) * ioa_cfg->hrrq[i].size,
+                                       ioa_cfg->hrrq[i].host_rrq,
+                                       ioa_cfg->hrrq[i].host_rrq_dma);
+
        pci_free_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size,
                            ioa_cfg->u.cfg_table,
                            ioa_cfg->cfg_table_dma);
@@ -8408,8 +8811,23 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg)
        struct pci_dev *pdev = ioa_cfg->pdev;
 
        ENTER;
-       free_irq(pdev->irq, ioa_cfg);
-       pci_disable_msi(pdev);
+       if (ioa_cfg->intr_flag == IPR_USE_MSI ||
+           ioa_cfg->intr_flag == IPR_USE_MSIX) {
+               int i;
+               for (i = 0; i < ioa_cfg->nvectors; i++)
+                       free_irq(ioa_cfg->vectors_info[i].vec,
+                               &ioa_cfg->hrrq[i]);
+       } else
+               free_irq(pdev->irq, &ioa_cfg->hrrq[0]);
+
+       if (ioa_cfg->intr_flag == IPR_USE_MSI) {
+               pci_disable_msi(pdev);
+               ioa_cfg->intr_flag &= ~IPR_USE_MSI;
+       } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) {
+               pci_disable_msix(pdev);
+               ioa_cfg->intr_flag &= ~IPR_USE_MSIX;
+       }
+
        iounmap(ioa_cfg->hdw_dma_regs);
        pci_release_regions(pdev);
        ipr_free_mem(ioa_cfg);
@@ -8430,7 +8848,7 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
        struct ipr_cmnd *ipr_cmd;
        struct ipr_ioarcb *ioarcb;
        dma_addr_t dma_addr;
-       int i;
+       int i, entries_each_hrrq, hrrq_id = 0;
 
        ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev,
                                                sizeof(struct ipr_cmnd), 512, 0);
@@ -8446,6 +8864,41 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
                return -ENOMEM;
        }
 
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               if (ioa_cfg->hrrq_num > 1) {
+                       if (i == 0) {
+                               entries_each_hrrq = IPR_NUM_INTERNAL_CMD_BLKS;
+                               ioa_cfg->hrrq[i].min_cmd_id = 0;
+                                       ioa_cfg->hrrq[i].max_cmd_id =
+                                               (entries_each_hrrq - 1);
+                       } else {
+                               entries_each_hrrq =
+                                       IPR_NUM_BASE_CMD_BLKS/
+                                       (ioa_cfg->hrrq_num - 1);
+                               ioa_cfg->hrrq[i].min_cmd_id =
+                                       IPR_NUM_INTERNAL_CMD_BLKS +
+                                       (i - 1) * entries_each_hrrq;
+                               ioa_cfg->hrrq[i].max_cmd_id =
+                                       (IPR_NUM_INTERNAL_CMD_BLKS +
+                                       i * entries_each_hrrq - 1);
+                       }
+               } else {
+                       entries_each_hrrq = IPR_NUM_CMD_BLKS;
+                       ioa_cfg->hrrq[i].min_cmd_id = 0;
+                       ioa_cfg->hrrq[i].max_cmd_id = (entries_each_hrrq - 1);
+               }
+               ioa_cfg->hrrq[i].size = entries_each_hrrq;
+       }
+
+       BUG_ON(ioa_cfg->hrrq_num == 0);
+
+       i = IPR_NUM_CMD_BLKS -
+               ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id - 1;
+       if (i > 0) {
+               ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].size += i;
+               ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id += i;
+       }
+
        for (i = 0; i < IPR_NUM_CMD_BLKS; i++) {
                ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr);
 
@@ -8484,7 +8937,11 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg)
                ipr_cmd->sense_buffer_dma = dma_addr +
                        offsetof(struct ipr_cmnd, sense_buffer);
 
-               list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+               ipr_cmd->ioarcb.cmd_pkt.hrrq_id = hrrq_id;
+               ipr_cmd->hrrq = &ioa_cfg->hrrq[hrrq_id];
+               list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
+               if (i >= ioa_cfg->hrrq[hrrq_id].max_cmd_id)
+                       hrrq_id++;
        }
 
        return 0;
@@ -8516,6 +8973,10 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
                                             BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL);
                ioa_cfg->vset_ids = kzalloc(sizeof(unsigned long) *
                                            BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL);
+
+               if (!ioa_cfg->target_ids || !ioa_cfg->array_ids
+                       || !ioa_cfg->vset_ids)
+                       goto out_free_res_entries;
        }
 
        for (i = 0; i < ioa_cfg->max_devs_supported; i++) {
@@ -8530,15 +8991,34 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg)
        if (!ioa_cfg->vpd_cbs)
                goto out_free_res_entries;
 
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_free_q);
+               INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_pending_q);
+               spin_lock_init(&ioa_cfg->hrrq[i]._lock);
+               if (i == 0)
+                       ioa_cfg->hrrq[i].lock = ioa_cfg->host->host_lock;
+               else
+                       ioa_cfg->hrrq[i].lock = &ioa_cfg->hrrq[i]._lock;
+       }
+
        if (ipr_alloc_cmd_blks(ioa_cfg))
                goto out_free_vpd_cbs;
 
-       ioa_cfg->host_rrq = pci_alloc_consistent(ioa_cfg->pdev,
-                                                sizeof(u32) * IPR_NUM_CMD_BLKS,
-                                                &ioa_cfg->host_rrq_dma);
-
-       if (!ioa_cfg->host_rrq)
-               goto out_ipr_free_cmd_blocks;
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               ioa_cfg->hrrq[i].host_rrq = pci_alloc_consistent(ioa_cfg->pdev,
+                                       sizeof(u32) * ioa_cfg->hrrq[i].size,
+                                       &ioa_cfg->hrrq[i].host_rrq_dma);
+
+               if (!ioa_cfg->hrrq[i].host_rrq)  {
+                       while (--i > 0)
+                               pci_free_consistent(pdev,
+                                       sizeof(u32) * ioa_cfg->hrrq[i].size,
+                                       ioa_cfg->hrrq[i].host_rrq,
+                                       ioa_cfg->hrrq[i].host_rrq_dma);
+                       goto out_ipr_free_cmd_blocks;
+               }
+               ioa_cfg->hrrq[i].ioa_cfg = ioa_cfg;
+       }
 
        ioa_cfg->u.cfg_table = pci_alloc_consistent(ioa_cfg->pdev,
                                                    ioa_cfg->cfg_table_size,
@@ -8582,8 +9062,12 @@ out_free_hostrcb_dma:
                            ioa_cfg->u.cfg_table,
                            ioa_cfg->cfg_table_dma);
 out_free_host_rrq:
-       pci_free_consistent(pdev, sizeof(u32) * IPR_NUM_CMD_BLKS,
-                           ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma);
+       for (i = 0; i < ioa_cfg->hrrq_num; i++) {
+               pci_free_consistent(pdev,
+                               sizeof(u32) * ioa_cfg->hrrq[i].size,
+                               ioa_cfg->hrrq[i].host_rrq,
+                               ioa_cfg->hrrq[i].host_rrq_dma);
+       }
 out_ipr_free_cmd_blocks:
        ipr_free_cmd_blks(ioa_cfg);
 out_free_vpd_cbs:
@@ -8591,6 +9075,9 @@ out_free_vpd_cbs:
                            ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma);
 out_free_res_entries:
        kfree(ioa_cfg->res_entries);
+       kfree(ioa_cfg->target_ids);
+       kfree(ioa_cfg->array_ids);
+       kfree(ioa_cfg->vset_ids);
        goto out;
 }
 
@@ -8638,15 +9125,11 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg,
        ioa_cfg->doorbell = IPR_DOORBELL;
        sprintf(ioa_cfg->eye_catcher, IPR_EYECATCHER);
        sprintf(ioa_cfg->trace_start, IPR_TRACE_START_LABEL);
-       sprintf(ioa_cfg->ipr_free_label, IPR_FREEQ_LABEL);
-       sprintf(ioa_cfg->ipr_pending_label, IPR_PENDQ_LABEL);
        sprintf(ioa_cfg->cfg_table_start, IPR_CFG_TBL_START);
        sprintf(ioa_cfg->resource_table_label, IPR_RES_TABLE_LABEL);
        sprintf(ioa_cfg->ipr_hcam_label, IPR_HCAM_LABEL);
        sprintf(ioa_cfg->ipr_cmd_label, IPR_CMD_LABEL);
 
-       INIT_LIST_HEAD(&ioa_cfg->free_q);
-       INIT_LIST_HEAD(&ioa_cfg->pending_q);
        INIT_LIST_HEAD(&ioa_cfg->hostrcb_free_q);
        INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q);
        INIT_LIST_HEAD(&ioa_cfg->free_res_q);
@@ -8724,6 +9207,88 @@ ipr_get_chip_info(const struct pci_device_id *dev_id)
        return NULL;
 }
 
+static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg)
+{
+       struct msix_entry entries[IPR_MAX_MSIX_VECTORS];
+       int i, err, vectors;
+
+       for (i = 0; i < ARRAY_SIZE(entries); ++i)
+               entries[i].entry = i;
+
+       vectors = ipr_number_of_msix;
+
+       while ((err = pci_enable_msix(ioa_cfg->pdev, entries, vectors)) > 0)
+                       vectors = err;
+
+       if (err < 0) {
+               pci_disable_msix(ioa_cfg->pdev);
+               return err;
+       }
+
+       if (!err) {
+               for (i = 0; i < vectors; i++)
+                       ioa_cfg->vectors_info[i].vec = entries[i].vector;
+               ioa_cfg->nvectors = vectors;
+       }
+
+       return err;
+}
+
+static int ipr_enable_msi(struct ipr_ioa_cfg *ioa_cfg)
+{
+       int i, err, vectors;
+
+       vectors = ipr_number_of_msix;
+
+       while ((err = pci_enable_msi_block(ioa_cfg->pdev, vectors)) > 0)
+                       vectors = err;
+
+       if (err < 0) {
+               pci_disable_msi(ioa_cfg->pdev);
+               return err;
+       }
+
+       if (!err) {
+               for (i = 0; i < vectors; i++)
+                       ioa_cfg->vectors_info[i].vec = ioa_cfg->pdev->irq + i;
+               ioa_cfg->nvectors = vectors;
+       }
+
+       return err;
+}
+
+static void name_msi_vectors(struct ipr_ioa_cfg *ioa_cfg)
+{
+       int vec_idx, n = sizeof(ioa_cfg->vectors_info[0].desc) - 1;
+
+       for (vec_idx = 0; vec_idx < ioa_cfg->nvectors; vec_idx++) {
+               snprintf(ioa_cfg->vectors_info[vec_idx].desc, n,
+                        "host%d-%d", ioa_cfg->host->host_no, vec_idx);
+               ioa_cfg->vectors_info[vec_idx].
+                       desc[strlen(ioa_cfg->vectors_info[vec_idx].desc)] = 0;
+       }
+}
+
+static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg)
+{
+       int i, rc;
+
+       for (i = 1; i < ioa_cfg->nvectors; i++) {
+               rc = request_irq(ioa_cfg->vectors_info[i].vec,
+                       ipr_isr_mhrrq,
+                       0,
+                       ioa_cfg->vectors_info[i].desc,
+                       &ioa_cfg->hrrq[i]);
+               if (rc) {
+                       while (--i >= 0)
+                               free_irq(ioa_cfg->vectors_info[i].vec,
+                                       &ioa_cfg->hrrq[i]);
+                       return rc;
+               }
+       }
+       return 0;
+}
+
 /**
  * ipr_test_intr - Handle the interrupt generated in ipr_test_msi().
  * @pdev:              PCI device struct
@@ -8740,6 +9305,7 @@ static irqreturn_t ipr_test_intr(int irq, void *devp)
        unsigned long lock_flags = 0;
        irqreturn_t rc = IRQ_HANDLED;
 
+       dev_info(&ioa_cfg->pdev->dev, "Received IRQ : %d\n", irq);
        spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 
        ioa_cfg->msi_received = 1;
@@ -8787,9 +9353,9 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev)
        writel(IPR_PCII_IO_DEBUG_ACKNOWLEDGE, ioa_cfg->regs.sense_interrupt_reg32);
        int_reg = readl(ioa_cfg->regs.sense_interrupt_reg);
        wait_event_timeout(ioa_cfg->msi_wait_q, ioa_cfg->msi_received, HZ);
+       spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
        ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER);
 
-       spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
        if (!ioa_cfg->msi_received) {
                /* MSI test failed */
                dev_info(&pdev->dev, "MSI test failed.  Falling back to LSI.\n");
@@ -8806,8 +9372,7 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev)
        return rc;
 }
 
-/**
- * ipr_probe_ioa - Allocates memory and does first stage of initialization
+ /* ipr_probe_ioa - Allocates memory and does first stage of initialization
  * @pdev:              PCI device struct
  * @dev_id:            PCI device id struct
  *
@@ -8823,6 +9388,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
        void __iomem *ipr_regs;
        int rc = PCIBIOS_SUCCESSFUL;
        volatile u32 mask, uproc, interrupts;
+       unsigned long lock_flags;
 
        ENTER;
 
@@ -8918,17 +9484,56 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
                goto cleanup_nomem;
        }
 
-       /* Enable MSI style interrupts if they are supported. */
-       if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && !pci_enable_msi(pdev)) {
+       if (ipr_number_of_msix > IPR_MAX_MSIX_VECTORS) {
+               dev_err(&pdev->dev, "The max number of MSIX is %d\n",
+                       IPR_MAX_MSIX_VECTORS);
+               ipr_number_of_msix = IPR_MAX_MSIX_VECTORS;
+       }
+
+       if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI &&
+                       ipr_enable_msix(ioa_cfg) == 0)
+               ioa_cfg->intr_flag = IPR_USE_MSIX;
+       else if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI &&
+                       ipr_enable_msi(ioa_cfg) == 0)
+               ioa_cfg->intr_flag = IPR_USE_MSI;
+       else {
+               ioa_cfg->intr_flag = IPR_USE_LSI;
+               ioa_cfg->nvectors = 1;
+               dev_info(&pdev->dev, "Cannot enable MSI.\n");
+       }
+
+       if (ioa_cfg->intr_flag == IPR_USE_MSI ||
+           ioa_cfg->intr_flag == IPR_USE_MSIX) {
                rc = ipr_test_msi(ioa_cfg, pdev);
-               if (rc == -EOPNOTSUPP)
-                       pci_disable_msi(pdev);
+               if (rc == -EOPNOTSUPP) {
+                       if (ioa_cfg->intr_flag == IPR_USE_MSI) {
+                               ioa_cfg->intr_flag &= ~IPR_USE_MSI;
+                               pci_disable_msi(pdev);
+                        } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) {
+                               ioa_cfg->intr_flag &= ~IPR_USE_MSIX;
+                               pci_disable_msix(pdev);
+                       }
+
+                       ioa_cfg->intr_flag = IPR_USE_LSI;
+                       ioa_cfg->nvectors = 1;
+               }
                else if (rc)
                        goto out_msi_disable;
-               else
-                       dev_info(&pdev->dev, "MSI enabled with IRQ: %d\n", pdev->irq);
-       } else if (ipr_debug)
-               dev_info(&pdev->dev, "Cannot enable MSI.\n");
+               else {
+                       if (ioa_cfg->intr_flag == IPR_USE_MSI)
+                               dev_info(&pdev->dev,
+                                       "Request for %d MSIs succeeded with starting IRQ: %d\n",
+                                       ioa_cfg->nvectors, pdev->irq);
+                       else if (ioa_cfg->intr_flag == IPR_USE_MSIX)
+                               dev_info(&pdev->dev,
+                                       "Request for %d MSIXs succeeded.",
+                                       ioa_cfg->nvectors);
+               }
+       }
+
+       ioa_cfg->hrrq_num = min3(ioa_cfg->nvectors,
+                               (unsigned int)num_online_cpus(),
+                               (unsigned int)IPR_MAX_HRRQ_NUM);
 
        /* Save away PCI config space for use following IOA reset */
        rc = pci_save_state(pdev);
@@ -8975,11 +9580,24 @@ static int ipr_probe_ioa(struct pci_dev *pdev,
        if (interrupts & IPR_PCII_IOA_UNIT_CHECKED)
                ioa_cfg->ioa_unit_checked = 1;
 
+       spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
        ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER);
-       rc = request_irq(pdev->irq, ipr_isr,
-                        ioa_cfg->msi_received ? 0 : IRQF_SHARED,
-                        IPR_NAME, ioa_cfg);
+       spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 
+       if (ioa_cfg->intr_flag == IPR_USE_MSI
+                       || ioa_cfg->intr_flag == IPR_USE_MSIX) {
+               name_msi_vectors(ioa_cfg);
+               rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_isr,
+                       0,
+                       ioa_cfg->vectors_info[0].desc,
+                       &ioa_cfg->hrrq[0]);
+               if (!rc)
+                       rc = ipr_request_other_msi_irqs(ioa_cfg);
+       } else {
+               rc = request_irq(pdev->irq, ipr_isr,
+                        IRQF_SHARED,
+                        IPR_NAME, &ioa_cfg->hrrq[0]);
+       }
        if (rc) {
                dev_err(&pdev->dev, "Couldn't register IRQ %d! rc=%d\n",
                        pdev->irq, rc);
@@ -9004,7 +9622,10 @@ out:
 cleanup_nolog:
        ipr_free_mem(ioa_cfg);
 out_msi_disable:
-       pci_disable_msi(pdev);
+       if (ioa_cfg->intr_flag == IPR_USE_MSI)
+               pci_disable_msi(pdev);
+       else if (ioa_cfg->intr_flag == IPR_USE_MSIX)
+               pci_disable_msix(pdev);
 cleanup_nomem:
        iounmap(ipr_regs);
 out_release_regions:
@@ -9138,7 +9759,7 @@ static void ipr_remove(struct pci_dev *pdev)
 static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 {
        struct ipr_ioa_cfg *ioa_cfg;
-       int rc;
+       int rc, i;
 
        rc = ipr_probe_ioa(pdev, dev_id);
 
@@ -9185,6 +9806,17 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
        scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN);
        ioa_cfg->allow_ml_add_del = 1;
        ioa_cfg->host->max_channel = IPR_VSET_BUS;
+       ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
+
+       if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+                       ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+               for (i = 1; i < ioa_cfg->hrrq_num; i++) {
+                       blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+                                       ioa_cfg->iopoll_weight, ipr_iopoll);
+                       blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
+               }
+       }
+
        schedule_work(&ioa_cfg->work_q);
        return 0;
 }
@@ -9203,8 +9835,16 @@ static void ipr_shutdown(struct pci_dev *pdev)
 {
        struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev);
        unsigned long lock_flags = 0;
+       int i;
 
        spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
+       if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
+                       ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+               ioa_cfg->iopoll_weight = 0;
+               for (i = 1; i < ioa_cfg->hrrq_num; i++)
+                       blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+       }
+
        while (ioa_cfg->in_reset_reload) {
                spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
                wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
@@ -9276,6 +9916,8 @@ static struct pci_device_id ipr_pci_table[] = {
                PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_574D, 0, 0, 0 },
        { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
                PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57B2, 0, 0, 0 },
+       { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
+               PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C0, 0, 0, 0 },
        { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
                PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C3, 0, 0, 0 },
        { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2,
@@ -9290,6 +9932,14 @@ static struct pci_device_id ipr_pci_table[] = {
                PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C8, 0, 0, 0 },
        { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
                PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57CE, 0, 0, 0 },
+       { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+               PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D5, 0, 0, 0 },
+       { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+               PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D6, 0, 0, 0 },
+       { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+               PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D7, 0, 0, 0 },
+       { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE,
+               PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D8, 0, 0, 0 },
        { }
 };
 MODULE_DEVICE_TABLE(pci, ipr_pci_table);
@@ -9316,9 +9966,7 @@ static struct pci_driver ipr_driver = {
  **/
 static void ipr_halt_done(struct ipr_cmnd *ipr_cmd)
 {
-       struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
-
-       list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+       list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q);
 }
 
 /**
@@ -9340,7 +9988,7 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf)
 
        list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) {
                spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
-               if (!ioa_cfg->allow_cmds) {
+               if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) {
                        spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
                        continue;
                }
index c8a137f83bb13e1dbd9d3ceb1478c7fa29e76177..1a9a246932ae903e82ecfa4bec4cea4ea2868d60 100644 (file)
 #include <linux/libata.h>
 #include <linux/list.h>
 #include <linux/kref.h>
+#include <linux/blk-iopoll.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 
 /*
  * Literals
  */
-#define IPR_DRIVER_VERSION "2.5.4"
-#define IPR_DRIVER_DATE "(July 11, 2012)"
+#define IPR_DRIVER_VERSION "2.6.0"
+#define IPR_DRIVER_DATE "(November 16, 2012)"
 
 /*
  * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding
@@ -82,6 +83,7 @@
 
 #define IPR_SUBS_DEV_ID_57B4    0x033B
 #define IPR_SUBS_DEV_ID_57B2    0x035F
+#define IPR_SUBS_DEV_ID_57C0    0x0352
 #define IPR_SUBS_DEV_ID_57C3    0x0353
 #define IPR_SUBS_DEV_ID_57C4    0x0354
 #define IPR_SUBS_DEV_ID_57C6    0x0357
 #define IPR_SUBS_DEV_ID_574D    0x0356
 #define IPR_SUBS_DEV_ID_57C8    0x035D
 
+#define IPR_SUBS_DEV_ID_57D5    0x03FB
+#define IPR_SUBS_DEV_ID_57D6    0x03FC
+#define IPR_SUBS_DEV_ID_57D7    0x03FF
+#define IPR_SUBS_DEV_ID_57D8    0x03FE
 #define IPR_NAME                               "ipr"
 
 /*
@@ -298,6 +304,9 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR)
  * Misc literals
  */
 #define IPR_NUM_IOADL_ENTRIES                  IPR_MAX_SGLIST
+#define IPR_MAX_MSIX_VECTORS           0x5
+#define IPR_MAX_HRRQ_NUM               0x10
+#define IPR_INIT_HRRQ                  0x0
 
 /*
  * Adapter interface types
@@ -404,7 +413,7 @@ struct ipr_config_table_entry64 {
        __be64 dev_id;
        __be64 lun;
        __be64 lun_wwn[2];
-#define IPR_MAX_RES_PATH_LENGTH                24
+#define IPR_MAX_RES_PATH_LENGTH                48
        __be64 res_path;
        struct ipr_std_inq_data std_inq_data;
        u8 reserved2[4];
@@ -459,9 +468,39 @@ struct ipr_supported_device {
        u8 reserved2[16];
 }__attribute__((packed, aligned (4)));
 
+struct ipr_hrr_queue {
+       struct ipr_ioa_cfg *ioa_cfg;
+       __be32 *host_rrq;
+       dma_addr_t host_rrq_dma;
+#define IPR_HRRQ_REQ_RESP_HANDLE_MASK  0xfffffffc
+#define IPR_HRRQ_RESP_BIT_SET          0x00000002
+#define IPR_HRRQ_TOGGLE_BIT            0x00000001
+#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2
+#define IPR_ID_HRRQ_SELE_ENABLE                0x02
+       volatile __be32 *hrrq_start;
+       volatile __be32 *hrrq_end;
+       volatile __be32 *hrrq_curr;
+
+       struct list_head hrrq_free_q;
+       struct list_head hrrq_pending_q;
+       spinlock_t _lock;
+       spinlock_t *lock;
+
+       volatile u32 toggle_bit;
+       u32 size;
+       u32 min_cmd_id;
+       u32 max_cmd_id;
+       u8 allow_interrupts:1;
+       u8 ioa_is_dead:1;
+       u8 allow_cmds:1;
+
+       struct blk_iopoll iopoll;
+};
+
 /* Command packet structure */
 struct ipr_cmd_pkt {
-       __be16 reserved;                /* Reserved by IOA */
+       u8 reserved;            /* Reserved by IOA */
+       u8 hrrq_id;
        u8 request_type;
 #define IPR_RQTYPE_SCSICDB             0x00
 #define IPR_RQTYPE_IOACMD              0x01
@@ -1022,6 +1061,10 @@ struct ipr_hostrcb64_fabric_desc {
        struct ipr_hostrcb64_config_element elem[1];
 }__attribute__((packed, aligned (8)));
 
+#define for_each_hrrq(hrrq, ioa_cfg) \
+               for (hrrq = (ioa_cfg)->hrrq; \
+                       hrrq < ((ioa_cfg)->hrrq + (ioa_cfg)->hrrq_num); hrrq++)
+
 #define for_each_fabric_cfg(fabric, cfg) \
                for (cfg = (fabric)->elem; \
                        cfg < ((fabric)->elem + be16_to_cpu((fabric)->num_entries)); \
@@ -1308,6 +1351,7 @@ struct ipr_chip_cfg_t {
        u16 max_cmds;
        u8 cache_line_size;
        u8 clear_isr;
+       u32 iopoll_weight;
        struct ipr_interrupt_offsets regs;
 };
 
@@ -1317,6 +1361,7 @@ struct ipr_chip_t {
        u16 intr_type;
 #define IPR_USE_LSI                    0x00
 #define IPR_USE_MSI                    0x01
+#define IPR_USE_MSIX                   0x02
        u16 sis_type;
 #define IPR_SIS32                      0x00
 #define IPR_SIS64                      0x01
@@ -1375,13 +1420,10 @@ struct ipr_ioa_cfg {
 
        struct list_head queue;
 
-       u8 allow_interrupts:1;
        u8 in_reset_reload:1;
        u8 in_ioa_bringdown:1;
        u8 ioa_unit_checked:1;
-       u8 ioa_is_dead:1;
        u8 dump_taken:1;
-       u8 allow_cmds:1;
        u8 allow_ml_add_del:1;
        u8 needs_hard_reset:1;
        u8 dual_raid:1;
@@ -1413,21 +1455,7 @@ struct ipr_ioa_cfg {
        char trace_start[8];
 #define IPR_TRACE_START_LABEL                  "trace"
        struct ipr_trace_entry *trace;
-       u32 trace_index:IPR_NUM_TRACE_INDEX_BITS;
-
-       /*
-        * Queue for free command blocks
-        */
-       char ipr_free_label[8];
-#define IPR_FREEQ_LABEL                        "free-q"
-       struct list_head free_q;
-
-       /*
-        * Queue for command blocks outstanding to the adapter
-        */
-       char ipr_pending_label[8];
-#define IPR_PENDQ_LABEL                        "pend-q"
-       struct list_head pending_q;
+       atomic_t trace_index;
 
        char cfg_table_start[8];
 #define IPR_CFG_TBL_START              "cfg"
@@ -1452,16 +1480,10 @@ struct ipr_ioa_cfg {
        struct list_head hostrcb_free_q;
        struct list_head hostrcb_pending_q;
 
-       __be32 *host_rrq;
-       dma_addr_t host_rrq_dma;
-#define IPR_HRRQ_REQ_RESP_HANDLE_MASK  0xfffffffc
-#define IPR_HRRQ_RESP_BIT_SET                  0x00000002
-#define IPR_HRRQ_TOGGLE_BIT                            0x00000001
-#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2
-       volatile __be32 *hrrq_start;
-       volatile __be32 *hrrq_end;
-       volatile __be32 *hrrq_curr;
-       volatile u32 toggle_bit;
+       struct ipr_hrr_queue hrrq[IPR_MAX_HRRQ_NUM];
+       u32 hrrq_num;
+       atomic_t  hrrq_index;
+       u16 identify_hrrq_index;
 
        struct ipr_bus_attributes bus_attr[IPR_MAX_NUM_BUSES];
 
@@ -1507,6 +1529,17 @@ struct ipr_ioa_cfg {
        u32 max_cmds;
        struct ipr_cmnd **ipr_cmnd_list;
        dma_addr_t *ipr_cmnd_list_dma;
+
+       u16 intr_flag;
+       unsigned int nvectors;
+
+       struct {
+               unsigned short vec;
+               char desc[22];
+       } vectors_info[IPR_MAX_MSIX_VECTORS];
+
+       u32 iopoll_weight;
+
 }; /* struct ipr_ioa_cfg */
 
 struct ipr_cmnd {
@@ -1544,6 +1577,7 @@ struct ipr_cmnd {
                struct scsi_device *sdev;
        } u;
 
+       struct ipr_hrr_queue *hrrq;
        struct ipr_ioa_cfg *ioa_cfg;
 };
 
@@ -1717,7 +1751,8 @@ struct ipr_ucode_image_header {
        if (ipr_is_device(hostrcb)) {                                   \
                if ((hostrcb)->ioa_cfg->sis64) {                        \
                        printk(KERN_ERR IPR_NAME ": %s: " fmt,          \
-                               ipr_format_res_path(hostrcb->hcam.u.error64.fd_res_path, \
+                               ipr_format_res_path(hostrcb->ioa_cfg,   \
+                                       hostrcb->hcam.u.error64.fd_res_path, \
                                        hostrcb->rp_buffer,             \
                                        sizeof(hostrcb->rp_buffer)),    \
                                __VA_ARGS__);                           \
index df4c13a5534c07fe2b166d3898a0906699c018a8..7706c99ec8bbb1e8c70b051113f789cb6f97c1dd 100644 (file)
@@ -466,11 +466,13 @@ enum intr_type_t {
        MSIX,
 };
 
+#define LPFC_CT_CTX_MAX                64
 struct unsol_rcv_ct_ctx {
        uint32_t ctxt_id;
        uint32_t SID;
-       uint32_t flags;
-#define UNSOL_VALID    0x00000001
+       uint32_t valid;
+#define UNSOL_INVALID          0
+#define UNSOL_VALID            1
        uint16_t oxid;
        uint16_t rxid;
 };
@@ -750,6 +752,15 @@ struct lpfc_hba {
        void __iomem *ctrl_regs_memmap_p;/* Kernel memory mapped address for
                                            PCI BAR2 */
 
+       void __iomem *pci_bar0_memmap_p; /* Kernel memory mapped address for
+                                           PCI BAR0 with dual-ULP support */
+       void __iomem *pci_bar2_memmap_p; /* Kernel memory mapped address for
+                                           PCI BAR2 with dual-ULP support */
+       void __iomem *pci_bar4_memmap_p; /* Kernel memory mapped address for
+                                           PCI BAR4 with dual-ULP support */
+#define PCI_64BIT_BAR0 0
+#define PCI_64BIT_BAR2 2
+#define PCI_64BIT_BAR4 4
        void __iomem *MBslimaddr;       /* virtual address for mbox cmds */
        void __iomem *HAregaddr;        /* virtual address for host attn reg */
        void __iomem *CAregaddr;        /* virtual address for chip attn reg */
@@ -938,7 +949,7 @@ struct lpfc_hba {
 
        spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */
        struct list_head ct_ev_waiters;
-       struct unsol_rcv_ct_ctx ct_ctx[64];
+       struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX];
        uint32_t ctx_idx;
 
        uint8_t menlo_flag;     /* menlo generic flags */
index f7368eb8041556c684418da59f2fbca14f0d203c..32d5683e6181e5def7d42b4ea02897056f17e788 100644 (file)
@@ -955,9 +955,9 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                spin_lock_irqsave(&phba->ct_ev_lock, flags);
                if (phba->sli_rev == LPFC_SLI_REV4) {
                        evt_dat->immed_dat = phba->ctx_idx;
-                       phba->ctx_idx = (phba->ctx_idx + 1) % 64;
+                       phba->ctx_idx = (phba->ctx_idx + 1) % LPFC_CT_CTX_MAX;
                        /* Provide warning for over-run of the ct_ctx array */
-                       if (phba->ct_ctx[evt_dat->immed_dat].flags &
+                       if (phba->ct_ctx[evt_dat->immed_dat].valid ==
                            UNSOL_VALID)
                                lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
                                                "2717 CT context array entry "
@@ -973,7 +973,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                                piocbq->iocb.unsli3.rcvsli3.ox_id;
                        phba->ct_ctx[evt_dat->immed_dat].SID =
                                piocbq->iocb.un.rcvels.remoteID;
-                       phba->ct_ctx[evt_dat->immed_dat].flags = UNSOL_VALID;
+                       phba->ct_ctx[evt_dat->immed_dat].valid = UNSOL_VALID;
                } else
                        evt_dat->immed_dat = piocbq->iocb.ulpContext;
 
@@ -1012,6 +1012,47 @@ error_ct_unsol_exit:
        return 1;
 }
 
+/**
+ * lpfc_bsg_ct_unsol_abort - handler ct abort to management plane
+ * @phba: Pointer to HBA context object.
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function handles abort to the CT command toward management plane
+ * for SLI4 port.
+ *
+ * If the pending context of a CT command to management plane present, clears
+ * such context and returns 1 for handled; otherwise, it returns 0 indicating
+ * no context exists.
+ **/
+int
+lpfc_bsg_ct_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf)
+{
+       struct fc_frame_header fc_hdr;
+       struct fc_frame_header *fc_hdr_ptr = &fc_hdr;
+       int ctx_idx, handled = 0;
+       uint16_t oxid, rxid;
+       uint32_t sid;
+
+       memcpy(fc_hdr_ptr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header));
+       sid = sli4_sid_from_fc_hdr(fc_hdr_ptr);
+       oxid = be16_to_cpu(fc_hdr_ptr->fh_ox_id);
+       rxid = be16_to_cpu(fc_hdr_ptr->fh_rx_id);
+
+       for (ctx_idx = 0; ctx_idx < LPFC_CT_CTX_MAX; ctx_idx++) {
+               if (phba->ct_ctx[ctx_idx].valid != UNSOL_VALID)
+                       continue;
+               if (phba->ct_ctx[ctx_idx].rxid != rxid)
+                       continue;
+               if (phba->ct_ctx[ctx_idx].oxid != oxid)
+                       continue;
+               if (phba->ct_ctx[ctx_idx].SID != sid)
+                       continue;
+               phba->ct_ctx[ctx_idx].valid = UNSOL_INVALID;
+               handled = 1;
+       }
+       return handled;
+}
+
 /**
  * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command
  * @job: SET_EVENT fc_bsg_job
@@ -1318,7 +1359,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag,
        icmd->ulpClass = CLASS3;
        if (phba->sli_rev == LPFC_SLI_REV4) {
                /* Do not issue unsol response if oxid not marked as valid */
-               if (!(phba->ct_ctx[tag].flags & UNSOL_VALID)) {
+               if (phba->ct_ctx[tag].valid != UNSOL_VALID) {
                        rc = IOCB_ERROR;
                        goto issue_ct_rsp_exit;
                }
@@ -1352,7 +1393,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag,
                                phba->sli4_hba.rpi_ids[ndlp->nlp_rpi];
 
                /* The exchange is done, mark the entry as invalid */
-               phba->ct_ctx[tag].flags &= ~UNSOL_VALID;
+               phba->ct_ctx[tag].valid = UNSOL_INVALID;
        } else
                icmd->ulpContext = (ushort) tag;
 
index 69d66e3662cb6fc821614ddaaeebec0435231e37..76ca65dae781e734afa828e580ec357618abd13d 100644 (file)
@@ -164,8 +164,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *);
 
 void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
                         struct lpfc_iocbq *);
-void lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
-                                   struct lpfc_iocbq *);
+int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *);
 int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t);
 int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int);
 void lpfc_fdmi_tmo(unsigned long);
@@ -427,6 +426,7 @@ int lpfc_bsg_request(struct fc_bsg_job *);
 int lpfc_bsg_timeout(struct fc_bsg_job *);
 int lpfc_bsg_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
                             struct lpfc_iocbq *);
+int lpfc_bsg_ct_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *);
 void __lpfc_sli_ringtx_put(struct lpfc_hba *, struct lpfc_sli_ring *,
        struct lpfc_iocbq *);
 struct lpfc_iocbq *lpfc_sli_ringtx_get(struct lpfc_hba *,
index 65f9fb6862e6b36ddde7cbf80074eefcb2593e31..7bff3a19af56880dba0fdca4e203676d52c4e916 100644 (file)
@@ -164,37 +164,24 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 }
 
 /**
- * lpfc_sli4_ct_abort_unsol_event - Default handle for sli4 unsol abort
+ * lpfc_ct_handle_unsol_abort - ct upper level protocol abort handler
  * @phba: Pointer to HBA context object.
- * @pring: Pointer to the driver internal I/O ring.
- * @piocbq: Pointer to the IOCBQ.
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
  *
- * This function serves as the default handler for the sli4 unsolicited
- * abort event. It shall be invoked when there is no application interface
- * registered unsolicited abort handler. This handler does nothing but
- * just simply releases the dma buffer used by the unsol abort event.
+ * This function serves as the upper level protocol abort handler for CT
+ * protocol.
+ *
+ * Return 1 if abort has been handled, 0 otherwise.
  **/
-void
-lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *phba,
-                              struct lpfc_sli_ring *pring,
-                              struct lpfc_iocbq *piocbq)
+int
+lpfc_ct_handle_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf)
 {
-       IOCB_t *icmd = &piocbq->iocb;
-       struct lpfc_dmabuf *bdeBuf;
-       uint32_t size;
+       int handled;
 
-       /* Forward abort event to any process registered to receive ct event */
-       if (lpfc_bsg_ct_unsol_event(phba, pring, piocbq) == 0)
-               return;
+       /* CT upper level goes through BSG */
+       handled = lpfc_bsg_ct_unsol_abort(phba, dmabuf);
 
-       /* If there is no BDE associated with IOCB, there is nothing to do */
-       if (icmd->ulpBdeCount == 0)
-               return;
-       bdeBuf = piocbq->context2;
-       piocbq->context2 = NULL;
-       size  = icmd->un.cont64[0].tus.f.bdeSize;
-       lpfc_ct_unsol_buffer(phba, piocbq, bdeBuf, size);
-       lpfc_in_buf_free(phba, bdeBuf);
+       return handled;
 }
 
 static void
index b9440deaad459a59bb9cc912dfb9f2e5e5ef5db4..08d156a9094feaf9c285eee47822483bc8e48633 100644 (file)
@@ -3122,6 +3122,13 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
                case IOERR_SEQUENCE_TIMEOUT:
                case IOERR_INVALID_RPI:
+                       if (cmd == ELS_CMD_PLOGI &&
+                           did == NameServer_DID) {
+                               /* Continue forever if plogi to */
+                               /* the nameserver fails */
+                               maxretry = 0;
+                               delay = 100;
+                       }
                        retry = 1;
                        break;
                }
@@ -6517,7 +6524,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
        struct lpfc_nodelist *ndlp;
        struct ls_rjt stat;
        uint32_t *payload;
-       uint32_t cmd, did, newnode, rjt_err = 0;
+       uint32_t cmd, did, newnode;
+       uint8_t rjt_exp, rjt_err = 0;
        IOCB_t *icmd = &elsiocb->iocb;
 
        if (!vport || !(elsiocb->context2))
@@ -6606,12 +6614,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                /* If Nport discovery is delayed, reject PLOGIs */
                if (vport->fc_flag & FC_DISC_DELAYED) {
                        rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_NOTHING_MORE;
                        break;
                }
                if (vport->port_state < LPFC_DISC_AUTH) {
                        if (!(phba->pport->fc_flag & FC_PT2PT) ||
                                (phba->pport->fc_flag & FC_PT2PT_PLOGI)) {
                                rjt_err = LSRJT_UNABLE_TPC;
+                               rjt_exp = LSEXP_NOTHING_MORE;
                                break;
                        }
                        /* We get here, and drop thru, if we are PT2PT with
@@ -6648,6 +6658,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                lpfc_send_els_event(vport, ndlp, payload);
                if (vport->port_state < LPFC_DISC_AUTH) {
                        rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_NOTHING_MORE;
                        break;
                }
                lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
@@ -6661,6 +6672,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                lpfc_send_els_event(vport, ndlp, payload);
                if (vport->port_state < LPFC_DISC_AUTH) {
                        rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_NOTHING_MORE;
                        break;
                }
                lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO);
@@ -6680,6 +6692,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                phba->fc_stat.elsRcvADISC++;
                if (vport->port_state < LPFC_DISC_AUTH) {
                        rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_NOTHING_MORE;
                        break;
                }
                lpfc_disc_state_machine(vport, ndlp, elsiocb,
@@ -6693,6 +6706,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                phba->fc_stat.elsRcvPDISC++;
                if (vport->port_state < LPFC_DISC_AUTH) {
                        rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_NOTHING_MORE;
                        break;
                }
                lpfc_disc_state_machine(vport, ndlp, elsiocb,
@@ -6730,6 +6744,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                phba->fc_stat.elsRcvPRLI++;
                if (vport->port_state < LPFC_DISC_AUTH) {
                        rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_NOTHING_MORE;
                        break;
                }
                lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI);
@@ -6813,6 +6828,11 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                if (newnode)
                        lpfc_nlp_put(ndlp);
                break;
+       case ELS_CMD_REC:
+                       /* receive this due to exchange closed */
+                       rjt_err = LSRJT_UNABLE_TPC;
+                       rjt_exp = LSEXP_INVALID_OX_RX;
+               break;
        default:
                lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
                        "RCV ELS cmd:     cmd:x%x did:x%x/ste:x%x",
@@ -6820,6 +6840,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
                /* Unsupported ELS command, reject */
                rjt_err = LSRJT_CMD_UNSUPPORTED;
+               rjt_exp = LSEXP_NOTHING_MORE;
 
                /* Unknown ELS command <elsCmd> received from NPORT <did> */
                lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
@@ -6834,7 +6855,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
        if (rjt_err) {
                memset(&stat, 0, sizeof(stat));
                stat.un.b.lsRjtRsnCode = rjt_err;
-               stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+               stat.un.b.lsRjtRsnCodeExp = rjt_exp;
                lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp,
                        NULL);
        }
index 7398ca862e9750be3b943b4b0f14f02973648331..e8c47603170370ca0d2418bb3e4d759b377d60e3 100644 (file)
@@ -538,6 +538,7 @@ struct fc_vft_header {
 #define ELS_CMD_ECHO      0x10000000
 #define ELS_CMD_TEST      0x11000000
 #define ELS_CMD_RRQ       0x12000000
+#define ELS_CMD_REC       0x13000000
 #define ELS_CMD_PRLI      0x20100014
 #define ELS_CMD_PRLO      0x21100014
 #define ELS_CMD_PRLO_ACC  0x02100014
@@ -574,6 +575,7 @@ struct fc_vft_header {
 #define ELS_CMD_ECHO      0x10
 #define ELS_CMD_TEST      0x11
 #define ELS_CMD_RRQ       0x12
+#define ELS_CMD_REC       0x13
 #define ELS_CMD_PRLI      0x14001020
 #define ELS_CMD_PRLO      0x14001021
 #define ELS_CMD_PRLO_ACC  0x14001002
index a47cfbdd05f28d4497ea6eab982cd13d86dde803..6e93b886cd4d3f15c327f155d904d0dd162cf8da 100644 (file)
@@ -106,6 +106,7 @@ struct lpfc_sli_intf {
 
 #define LPFC_SLI4_MB_WORD_COUNT                64
 #define LPFC_MAX_MQ_PAGE               8
+#define LPFC_MAX_WQ_PAGE_V0            4
 #define LPFC_MAX_WQ_PAGE               8
 #define LPFC_MAX_CQ_PAGE               4
 #define LPFC_MAX_EQ_PAGE               8
@@ -703,24 +704,41 @@ struct lpfc_register {
  * BAR0.  The offsets are the same so the driver must account for
  * any base address difference.
  */
-#define LPFC_RQ_DOORBELL               0x00A0
-#define lpfc_rq_doorbell_num_posted_SHIFT      16
-#define lpfc_rq_doorbell_num_posted_MASK       0x3FFF
-#define lpfc_rq_doorbell_num_posted_WORD       word0
-#define lpfc_rq_doorbell_id_SHIFT              0
-#define lpfc_rq_doorbell_id_MASK               0xFFFF
-#define lpfc_rq_doorbell_id_WORD               word0
-
-#define LPFC_WQ_DOORBELL               0x0040
-#define lpfc_wq_doorbell_num_posted_SHIFT      24
-#define lpfc_wq_doorbell_num_posted_MASK       0x00FF
-#define lpfc_wq_doorbell_num_posted_WORD       word0
-#define lpfc_wq_doorbell_index_SHIFT           16
-#define lpfc_wq_doorbell_index_MASK            0x00FF
-#define lpfc_wq_doorbell_index_WORD            word0
-#define lpfc_wq_doorbell_id_SHIFT              0
-#define lpfc_wq_doorbell_id_MASK               0xFFFF
-#define lpfc_wq_doorbell_id_WORD               word0
+#define LPFC_ULP0_RQ_DOORBELL          0x00A0
+#define LPFC_ULP1_RQ_DOORBELL          0x00C0
+#define lpfc_rq_db_list_fm_num_posted_SHIFT    24
+#define lpfc_rq_db_list_fm_num_posted_MASK     0x00FF
+#define lpfc_rq_db_list_fm_num_posted_WORD     word0
+#define lpfc_rq_db_list_fm_index_SHIFT         16
+#define lpfc_rq_db_list_fm_index_MASK          0x00FF
+#define lpfc_rq_db_list_fm_index_WORD          word0
+#define lpfc_rq_db_list_fm_id_SHIFT            0
+#define lpfc_rq_db_list_fm_id_MASK             0xFFFF
+#define lpfc_rq_db_list_fm_id_WORD             word0
+#define lpfc_rq_db_ring_fm_num_posted_SHIFT    16
+#define lpfc_rq_db_ring_fm_num_posted_MASK     0x3FFF
+#define lpfc_rq_db_ring_fm_num_posted_WORD     word0
+#define lpfc_rq_db_ring_fm_id_SHIFT            0
+#define lpfc_rq_db_ring_fm_id_MASK             0xFFFF
+#define lpfc_rq_db_ring_fm_id_WORD             word0
+
+#define LPFC_ULP0_WQ_DOORBELL          0x0040
+#define LPFC_ULP1_WQ_DOORBELL          0x0060
+#define lpfc_wq_db_list_fm_num_posted_SHIFT    24
+#define lpfc_wq_db_list_fm_num_posted_MASK     0x00FF
+#define lpfc_wq_db_list_fm_num_posted_WORD     word0
+#define lpfc_wq_db_list_fm_index_SHIFT         16
+#define lpfc_wq_db_list_fm_index_MASK          0x00FF
+#define lpfc_wq_db_list_fm_index_WORD          word0
+#define lpfc_wq_db_list_fm_id_SHIFT            0
+#define lpfc_wq_db_list_fm_id_MASK             0xFFFF
+#define lpfc_wq_db_list_fm_id_WORD             word0
+#define lpfc_wq_db_ring_fm_num_posted_SHIFT     16
+#define lpfc_wq_db_ring_fm_num_posted_MASK      0x3FFF
+#define lpfc_wq_db_ring_fm_num_posted_WORD      word0
+#define lpfc_wq_db_ring_fm_id_SHIFT             0
+#define lpfc_wq_db_ring_fm_id_MASK              0xFFFF
+#define lpfc_wq_db_ring_fm_id_WORD              word0
 
 #define LPFC_EQCQ_DOORBELL             0x0120
 #define lpfc_eqcq_doorbell_se_SHIFT            31
@@ -1131,12 +1149,22 @@ struct lpfc_mbx_wq_create {
                struct {        /* Version 0 Request */
                        uint32_t word0;
 #define lpfc_mbx_wq_create_num_pages_SHIFT     0
-#define lpfc_mbx_wq_create_num_pages_MASK      0x0000FFFF
+#define lpfc_mbx_wq_create_num_pages_MASK      0x000000FF
 #define lpfc_mbx_wq_create_num_pages_WORD      word0
+#define lpfc_mbx_wq_create_dua_SHIFT           8
+#define lpfc_mbx_wq_create_dua_MASK            0x00000001
+#define lpfc_mbx_wq_create_dua_WORD            word0
 #define lpfc_mbx_wq_create_cq_id_SHIFT         16
 #define lpfc_mbx_wq_create_cq_id_MASK          0x0000FFFF
 #define lpfc_mbx_wq_create_cq_id_WORD          word0
-                       struct dma_address page[LPFC_MAX_WQ_PAGE];
+                       struct dma_address page[LPFC_MAX_WQ_PAGE_V0];
+                       uint32_t word9;
+#define lpfc_mbx_wq_create_bua_SHIFT           0
+#define lpfc_mbx_wq_create_bua_MASK            0x00000001
+#define lpfc_mbx_wq_create_bua_WORD            word9
+#define lpfc_mbx_wq_create_ulp_num_SHIFT       8
+#define lpfc_mbx_wq_create_ulp_num_MASK                0x000000FF
+#define lpfc_mbx_wq_create_ulp_num_WORD                word9
                } request;
                struct {        /* Version 1 Request */
                        uint32_t word0; /* Word 0 is the same as in v0 */
@@ -1160,6 +1188,17 @@ struct lpfc_mbx_wq_create {
 #define lpfc_mbx_wq_create_q_id_SHIFT  0
 #define lpfc_mbx_wq_create_q_id_MASK   0x0000FFFF
 #define lpfc_mbx_wq_create_q_id_WORD   word0
+                       uint32_t doorbell_offset;
+                       uint32_t word2;
+#define lpfc_mbx_wq_create_bar_set_SHIFT       0
+#define lpfc_mbx_wq_create_bar_set_MASK                0x0000FFFF
+#define lpfc_mbx_wq_create_bar_set_WORD                word2
+#define WQ_PCI_BAR_0_AND_1     0x00
+#define WQ_PCI_BAR_2_AND_3     0x01
+#define WQ_PCI_BAR_4_AND_5     0x02
+#define lpfc_mbx_wq_create_db_format_SHIFT     16
+#define lpfc_mbx_wq_create_db_format_MASK      0x0000FFFF
+#define lpfc_mbx_wq_create_db_format_WORD      word2
                } response;
        } u;
 };
@@ -1223,14 +1262,31 @@ struct lpfc_mbx_rq_create {
 #define lpfc_mbx_rq_create_num_pages_SHIFT     0
 #define lpfc_mbx_rq_create_num_pages_MASK      0x0000FFFF
 #define lpfc_mbx_rq_create_num_pages_WORD      word0
+#define lpfc_mbx_rq_create_dua_SHIFT           16
+#define lpfc_mbx_rq_create_dua_MASK            0x00000001
+#define lpfc_mbx_rq_create_dua_WORD            word0
+#define lpfc_mbx_rq_create_bqu_SHIFT           17
+#define lpfc_mbx_rq_create_bqu_MASK            0x00000001
+#define lpfc_mbx_rq_create_bqu_WORD            word0
+#define lpfc_mbx_rq_create_ulp_num_SHIFT       24
+#define lpfc_mbx_rq_create_ulp_num_MASK                0x000000FF
+#define lpfc_mbx_rq_create_ulp_num_WORD                word0
                        struct rq_context context;
                        struct dma_address page[LPFC_MAX_WQ_PAGE];
                } request;
                struct {
                        uint32_t word0;
-#define lpfc_mbx_rq_create_q_id_SHIFT  0
-#define lpfc_mbx_rq_create_q_id_MASK   0x0000FFFF
-#define lpfc_mbx_rq_create_q_id_WORD   word0
+#define lpfc_mbx_rq_create_q_id_SHIFT          0
+#define lpfc_mbx_rq_create_q_id_MASK           0x0000FFFF
+#define lpfc_mbx_rq_create_q_id_WORD           word0
+                       uint32_t doorbell_offset;
+                       uint32_t word2;
+#define lpfc_mbx_rq_create_bar_set_SHIFT       0
+#define lpfc_mbx_rq_create_bar_set_MASK                0x0000FFFF
+#define lpfc_mbx_rq_create_bar_set_WORD                word2
+#define lpfc_mbx_rq_create_db_format_SHIFT     16
+#define lpfc_mbx_rq_create_db_format_MASK      0x0000FFFF
+#define lpfc_mbx_rq_create_db_format_WORD      word2
                } response;
        } u;
 };
@@ -1388,6 +1444,33 @@ struct lpfc_mbx_get_rsrc_extent_info {
        } u;
 };
 
+struct lpfc_mbx_query_fw_config {
+       struct mbox_header header;
+       struct {
+               uint32_t config_number;
+#define        LPFC_FC_FCOE            0x00000007
+               uint32_t asic_revision;
+               uint32_t physical_port;
+               uint32_t function_mode;
+#define LPFC_FCOE_INI_MODE     0x00000040
+#define LPFC_FCOE_TGT_MODE     0x00000080
+#define LPFC_DUA_MODE          0x00000800
+               uint32_t ulp0_mode;
+#define LPFC_ULP_FCOE_INIT_MODE        0x00000040
+#define LPFC_ULP_FCOE_TGT_MODE 0x00000080
+               uint32_t ulp0_nap_words[12];
+               uint32_t ulp1_mode;
+               uint32_t ulp1_nap_words[12];
+               uint32_t function_capabilities;
+               uint32_t cqid_base;
+               uint32_t cqid_tot;
+               uint32_t eqid_base;
+               uint32_t eqid_tot;
+               uint32_t ulp0_nap2_words[2];
+               uint32_t ulp1_nap2_words[2];
+       } rsp;
+};
+
 struct lpfc_id_range {
        uint32_t word5;
 #define lpfc_mbx_rsrc_id_word4_0_SHIFT 0
@@ -1803,51 +1886,6 @@ struct lpfc_mbx_redisc_fcf_tbl {
 #define lpfc_mbx_redisc_fcf_index_WORD         word12
 };
 
-struct lpfc_mbx_query_fw_cfg {
-       struct mbox_header header;
-       uint32_t config_number;
-       uint32_t asic_rev;
-       uint32_t phys_port;
-       uint32_t function_mode;
-/* firmware Function Mode */
-#define lpfc_function_mode_toe_SHIFT           0
-#define lpfc_function_mode_toe_MASK            0x00000001
-#define lpfc_function_mode_toe_WORD            function_mode
-#define lpfc_function_mode_nic_SHIFT           1
-#define lpfc_function_mode_nic_MASK            0x00000001
-#define lpfc_function_mode_nic_WORD            function_mode
-#define lpfc_function_mode_rdma_SHIFT          2
-#define lpfc_function_mode_rdma_MASK           0x00000001
-#define lpfc_function_mode_rdma_WORD           function_mode
-#define lpfc_function_mode_vm_SHIFT            3
-#define lpfc_function_mode_vm_MASK             0x00000001
-#define lpfc_function_mode_vm_WORD             function_mode
-#define lpfc_function_mode_iscsi_i_SHIFT       4
-#define lpfc_function_mode_iscsi_i_MASK                0x00000001
-#define lpfc_function_mode_iscsi_i_WORD                function_mode
-#define lpfc_function_mode_iscsi_t_SHIFT       5
-#define lpfc_function_mode_iscsi_t_MASK                0x00000001
-#define lpfc_function_mode_iscsi_t_WORD                function_mode
-#define lpfc_function_mode_fcoe_i_SHIFT                6
-#define lpfc_function_mode_fcoe_i_MASK         0x00000001
-#define lpfc_function_mode_fcoe_i_WORD         function_mode
-#define lpfc_function_mode_fcoe_t_SHIFT                7
-#define lpfc_function_mode_fcoe_t_MASK         0x00000001
-#define lpfc_function_mode_fcoe_t_WORD         function_mode
-#define lpfc_function_mode_dal_SHIFT           8
-#define lpfc_function_mode_dal_MASK            0x00000001
-#define lpfc_function_mode_dal_WORD            function_mode
-#define lpfc_function_mode_lro_SHIFT           9
-#define lpfc_function_mode_lro_MASK            0x00000001
-#define lpfc_function_mode_lro_WORD            function_mode
-#define lpfc_function_mode_flex10_SHIFT                10
-#define lpfc_function_mode_flex10_MASK         0x00000001
-#define lpfc_function_mode_flex10_WORD         function_mode
-#define lpfc_function_mode_ncsi_SHIFT          11
-#define lpfc_function_mode_ncsi_MASK           0x00000001
-#define lpfc_function_mode_ncsi_WORD           function_mode
-};
-
 /* Status field for embedded SLI_CONFIG mailbox command */
 #define STATUS_SUCCESS                                 0x0
 #define STATUS_FAILED                                  0x1
@@ -2965,7 +3003,7 @@ struct lpfc_mqe {
                struct lpfc_mbx_read_config rd_config;
                struct lpfc_mbx_request_features req_ftrs;
                struct lpfc_mbx_post_hdr_tmpl hdr_tmpl;
-               struct lpfc_mbx_query_fw_cfg query_fw_cfg;
+               struct lpfc_mbx_query_fw_config query_fw_cfg;
                struct lpfc_mbx_supp_pages supp_pages;
                struct lpfc_mbx_pc_sli4_params sli4_params;
                struct lpfc_mbx_get_sli4_parameters get_sli4_parameters;
index 7de4ef14698f37dc05cab98b80845390c0318c59..314b4f61b9e3fc73035fbdf48995e34bb31ed4dc 100644 (file)
@@ -6229,9 +6229,11 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type)
                        phba->sli4_hba.conf_regs_memmap_p +
                                                LPFC_CTL_PORT_SEM_OFFSET;
                phba->sli4_hba.RQDBregaddr =
-                       phba->sli4_hba.conf_regs_memmap_p + LPFC_RQ_DOORBELL;
+                       phba->sli4_hba.conf_regs_memmap_p +
+                                               LPFC_ULP0_RQ_DOORBELL;
                phba->sli4_hba.WQDBregaddr =
-                       phba->sli4_hba.conf_regs_memmap_p + LPFC_WQ_DOORBELL;
+                       phba->sli4_hba.conf_regs_memmap_p +
+                                               LPFC_ULP0_WQ_DOORBELL;
                phba->sli4_hba.EQCQDBregaddr =
                        phba->sli4_hba.conf_regs_memmap_p + LPFC_EQCQ_DOORBELL;
                phba->sli4_hba.MQDBregaddr =
@@ -6285,9 +6287,11 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf)
                return -ENODEV;
 
        phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
-                               vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL);
+                               vf * LPFC_VFR_PAGE_SIZE +
+                                       LPFC_ULP0_RQ_DOORBELL);
        phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
-                               vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL);
+                               vf * LPFC_VFR_PAGE_SIZE +
+                                       LPFC_ULP0_WQ_DOORBELL);
        phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
                                vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL);
        phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p +
@@ -6983,6 +6987,19 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
                phba->sli4_hba.fcp_wq = NULL;
        }
 
+       if (phba->pci_bar0_memmap_p) {
+               iounmap(phba->pci_bar0_memmap_p);
+               phba->pci_bar0_memmap_p = NULL;
+       }
+       if (phba->pci_bar2_memmap_p) {
+               iounmap(phba->pci_bar2_memmap_p);
+               phba->pci_bar2_memmap_p = NULL;
+       }
+       if (phba->pci_bar4_memmap_p) {
+               iounmap(phba->pci_bar4_memmap_p);
+               phba->pci_bar4_memmap_p = NULL;
+       }
+
        /* Release FCP CQ mapping array */
        if (phba->sli4_hba.fcp_cq_map != NULL) {
                kfree(phba->sli4_hba.fcp_cq_map);
@@ -7046,6 +7063,53 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
        int rc = -ENOMEM;
        int fcp_eqidx, fcp_cqidx, fcp_wqidx;
        int fcp_cq_index = 0;
+       uint32_t shdr_status, shdr_add_status;
+       union lpfc_sli4_cfg_shdr *shdr;
+       LPFC_MBOXQ_t *mboxq;
+       uint32_t length;
+
+       /* Check for dual-ULP support */
+       mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+       if (!mboxq) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                               "3249 Unable to allocate memory for "
+                               "QUERY_FW_CFG mailbox command\n");
+               return -ENOMEM;
+       }
+       length = (sizeof(struct lpfc_mbx_query_fw_config) -
+                 sizeof(struct lpfc_sli4_cfg_mhdr));
+       lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
+                        LPFC_MBOX_OPCODE_QUERY_FW_CFG,
+                        length, LPFC_SLI4_MBX_EMBED);
+
+       rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
+
+       shdr = (union lpfc_sli4_cfg_shdr *)
+                       &mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
+       shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
+       shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
+       if (shdr_status || shdr_add_status || rc) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                               "3250 QUERY_FW_CFG mailbox failed with status "
+                               "x%x add_status x%x, mbx status x%x\n",
+                               shdr_status, shdr_add_status, rc);
+               if (rc != MBX_TIMEOUT)
+                       mempool_free(mboxq, phba->mbox_mem_pool);
+               rc = -ENXIO;
+               goto out_error;
+       }
+
+       phba->sli4_hba.fw_func_mode =
+                       mboxq->u.mqe.un.query_fw_cfg.rsp.function_mode;
+       phba->sli4_hba.ulp0_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp0_mode;
+       phba->sli4_hba.ulp1_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp1_mode;
+       lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                       "3251 QUERY_FW_CFG: func_mode:x%x, ulp0_mode:x%x, "
+                       "ulp1_mode:x%x\n", phba->sli4_hba.fw_func_mode,
+                       phba->sli4_hba.ulp0_mode, phba->sli4_hba.ulp1_mode);
+
+       if (rc != MBX_TIMEOUT)
+               mempool_free(mboxq, phba->mbox_mem_pool);
 
        /*
         * Set up HBA Event Queues (EQs)
@@ -7659,78 +7723,6 @@ out:
        return rc;
 }
 
-/**
- * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands
- * @phba: pointer to lpfc hba data structure.
- * @cnt: number of nop mailbox commands to send.
- *
- * This routine is invoked to send a number @cnt of NOP mailbox command and
- * wait for each command to complete.
- *
- * Return: the number of NOP mailbox command completed.
- **/
-static int
-lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt)
-{
-       LPFC_MBOXQ_t *mboxq;
-       int length, cmdsent;
-       uint32_t mbox_tmo;
-       uint32_t rc = 0;
-       uint32_t shdr_status, shdr_add_status;
-       union lpfc_sli4_cfg_shdr *shdr;
-
-       if (cnt == 0) {
-               lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-                               "2518 Requested to send 0 NOP mailbox cmd\n");
-               return cnt;
-       }
-
-       mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-       if (!mboxq) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                               "2519 Unable to allocate memory for issuing "
-                               "NOP mailbox command\n");
-               return 0;
-       }
-
-       /* Set up NOP SLI4_CONFIG mailbox-ioctl command */
-       length = (sizeof(struct lpfc_mbx_nop) -
-                 sizeof(struct lpfc_sli4_cfg_mhdr));
-
-       for (cmdsent = 0; cmdsent < cnt; cmdsent++) {
-               lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON,
-                                LPFC_MBOX_OPCODE_NOP, length,
-                                LPFC_SLI4_MBX_EMBED);
-               if (!phba->sli4_hba.intr_enable)
-                       rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL);
-               else {
-                       mbox_tmo = lpfc_mbox_tmo_val(phba, mboxq);
-                       rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo);
-               }
-               if (rc == MBX_TIMEOUT)
-                       break;
-               /* Check return status */
-               shdr = (union lpfc_sli4_cfg_shdr *)
-                       &mboxq->u.mqe.un.sli4_config.header.cfg_shdr;
-               shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
-               shdr_add_status = bf_get(lpfc_mbox_hdr_add_status,
-                                        &shdr->response);
-               if (shdr_status || shdr_add_status || rc) {
-                       lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
-                                       "2520 NOP mailbox command failed "
-                                       "status x%x add_status x%x mbx "
-                                       "status x%x\n", shdr_status,
-                                       shdr_add_status, rc);
-                       break;
-               }
-       }
-
-       if (rc != MBX_TIMEOUT)
-               mempool_free(mboxq, phba->mbox_mem_pool);
-
-       return cmdsent;
-}
-
 /**
  * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space.
  * @phba: pointer to lpfc hba data structure.
@@ -8498,37 +8490,6 @@ lpfc_unset_hba(struct lpfc_hba *phba)
        return;
 }
 
-/**
- * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization.
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked to unset the HBA device initialization steps to
- * a device with SLI-4 interface spec.
- **/
-static void
-lpfc_sli4_unset_hba(struct lpfc_hba *phba)
-{
-       struct lpfc_vport *vport = phba->pport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
-
-       spin_lock_irq(shost->host_lock);
-       vport->load_flag |= FC_UNLOADING;
-       spin_unlock_irq(shost->host_lock);
-
-       phba->pport->work_port_events = 0;
-
-       /* Stop the SLI4 device port */
-       lpfc_stop_port(phba);
-
-       lpfc_sli4_disable_intr(phba);
-
-       /* Reset SLI4 HBA FCoE function */
-       lpfc_pci_function_reset(phba);
-       lpfc_sli4_queue_destroy(phba);
-
-       return;
-}
-
 /**
  * lpfc_sli4_xri_exchange_busy_wait - Wait for device XRI exchange busy
  * @phba: Pointer to HBA context object.
@@ -9591,7 +9552,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
        struct Scsi_Host  *shost = NULL;
        int error, ret;
        uint32_t cfg_mode, intr_mode;
-       int mcnt;
        int adjusted_fcp_io_channel;
 
        /* Allocate memory for HBA structure */
@@ -9680,58 +9640,35 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid)
        shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */
        /* Now, trying to enable interrupt and bring up the device */
        cfg_mode = phba->cfg_use_msi;
-       while (true) {
-               /* Put device to a known state before enabling interrupt */
-               lpfc_stop_port(phba);
-               /* Configure and enable interrupt */
-               intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
-               if (intr_mode == LPFC_INTR_ERROR) {
-                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                       "0426 Failed to enable interrupt.\n");
-                       error = -ENODEV;
-                       goto out_free_sysfs_attr;
-               }
-               /* Default to single EQ for non-MSI-X */
-               if (phba->intr_type != MSIX)
-                       adjusted_fcp_io_channel = 1;
-               else
-                       adjusted_fcp_io_channel = phba->cfg_fcp_io_channel;
-               phba->cfg_fcp_io_channel = adjusted_fcp_io_channel;
-               /* Set up SLI-4 HBA */
-               if (lpfc_sli4_hba_setup(phba)) {
-                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-                                       "1421 Failed to set up hba\n");
-                       error = -ENODEV;
-                       goto out_disable_intr;
-               }
 
-               /* Send NOP mbx cmds for non-INTx mode active interrupt test */
-               if (intr_mode != 0)
-                       mcnt = lpfc_sli4_send_nop_mbox_cmds(phba,
-                                                           LPFC_ACT_INTR_CNT);
-
-               /* Check active interrupts received only for MSI/MSI-X */
-               if (intr_mode == 0 ||
-                   phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) {
-                       /* Log the current active interrupt mode */
-                       phba->intr_mode = intr_mode;
-                       lpfc_log_intr_mode(phba, intr_mode);
-                       break;
-               }
-               lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-                               "0451 Configure interrupt mode (%d) "
-                               "failed active interrupt test.\n",
-                               intr_mode);
-               /* Unset the previous SLI-4 HBA setup. */
-               /*
-                * TODO:  Is this operation compatible with IF TYPE 2
-                * devices?  All port state is deleted and cleared.
-                */
-               lpfc_sli4_unset_hba(phba);
-               /* Try next level of interrupt mode */
-               cfg_mode = --intr_mode;
+       /* Put device to a known state before enabling interrupt */
+       lpfc_stop_port(phba);
+       /* Configure and enable interrupt */
+       intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode);
+       if (intr_mode == LPFC_INTR_ERROR) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                               "0426 Failed to enable interrupt.\n");
+               error = -ENODEV;
+               goto out_free_sysfs_attr;
+       }
+       /* Default to single EQ for non-MSI-X */
+       if (phba->intr_type != MSIX)
+               adjusted_fcp_io_channel = 1;
+       else
+               adjusted_fcp_io_channel = phba->cfg_fcp_io_channel;
+       phba->cfg_fcp_io_channel = adjusted_fcp_io_channel;
+       /* Set up SLI-4 HBA */
+       if (lpfc_sli4_hba_setup(phba)) {
+               lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                               "1421 Failed to set up hba\n");
+               error = -ENODEV;
+               goto out_disable_intr;
        }
 
+       /* Log the current active interrupt mode */
+       phba->intr_mode = intr_mode;
+       lpfc_log_intr_mode(phba, intr_mode);
+
        /* Perform post initialization setup */
        lpfc_post_init_setup(phba);
 
index d8fadcb2db73d1794b1ef9d481a6c65e35ff971d..46128c679202199537be8053089f08d1f8835a24 100644 (file)
@@ -1115,6 +1115,13 @@ out:
                                 "0261 Cannot Register NameServer login\n");
        }
 
+       /*
+       ** In case the node reference counter does not go to zero, ensure that
+       ** the stale state for the node is not processed.
+       */
+
+       ndlp->nlp_prev_state = ndlp->nlp_state;
+       lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
        spin_lock_irq(shost->host_lock);
        ndlp->nlp_flag |= NLP_DEFER_RM;
        spin_unlock_irq(shost->host_lock);
@@ -2159,13 +2166,16 @@ lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 {
        struct lpfc_iocbq *cmdiocb, *rspiocb;
        IOCB_t *irsp;
+       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        cmdiocb = (struct lpfc_iocbq *) arg;
        rspiocb = cmdiocb->context_un.rsp_iocb;
 
        irsp = &rspiocb->iocb;
        if (irsp->ulpStatus) {
+               spin_lock_irq(shost->host_lock);
                ndlp->nlp_flag |= NLP_DEFER_RM;
+               spin_unlock_irq(shost->host_lock);
                return NLP_STE_FREED_NODE;
        }
        return ndlp->nlp_state;
index 60e5a177644ce4215e84d417dc27df2e5937b1d0..98af07c6e300bd4aff4a64d5bbb844fcc6ff1ddb 100644 (file)
@@ -287,6 +287,26 @@ lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
        return sdev->queue_depth;
 }
 
+/**
+ * lpfc_change_queue_type() - Change a device's scsi tag queuing type
+ * @sdev: Pointer the scsi device whose queue depth is to change
+ * @tag_type: Identifier for queue tag type
+ */
+static int
+lpfc_change_queue_type(struct scsi_device *sdev, int tag_type)
+{
+       if (sdev->tagged_supported) {
+               scsi_set_tag_type(sdev, tag_type);
+               if (tag_type)
+                       scsi_activate_tcq(sdev, sdev->queue_depth);
+               else
+                       scsi_deactivate_tcq(sdev, sdev->queue_depth);
+       } else
+               tag_type = 0;
+
+       return tag_type;
+}
+
 /**
  * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread
  * @phba: The Hba for which this call is being executed.
@@ -3972,7 +3992,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
                        break;
                }
        } else
-               fcp_cmnd->fcpCntl1 = 0;
+               fcp_cmnd->fcpCntl1 = SIMPLE_Q;
 
        sli4 = (phba->sli_rev == LPFC_SLI_REV4);
 
@@ -5150,6 +5170,7 @@ struct scsi_host_template lpfc_template = {
        .max_sectors            = 0xFFFF,
        .vendor_id              = LPFC_NL_VENDOR_ID,
        .change_queue_depth     = lpfc_change_queue_depth,
+       .change_queue_type      = lpfc_change_queue_type,
 };
 
 struct scsi_host_template lpfc_vport_template = {
@@ -5172,4 +5193,5 @@ struct scsi_host_template lpfc_vport_template = {
        .shost_attrs            = lpfc_vport_attrs,
        .max_sectors            = 0xFFFF,
        .change_queue_depth     = lpfc_change_queue_depth,
+       .change_queue_type      = lpfc_change_queue_type,
 };
index 624eab370396d8bfc0bf8dea421f5370646d6087..55b6fc83ad714780fe89316e1eab5354cb0784db 100644 (file)
@@ -124,10 +124,17 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe)
 
        /* Ring Doorbell */
        doorbell.word0 = 0;
-       bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1);
-       bf_set(lpfc_wq_doorbell_index, &doorbell, host_index);
-       bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id);
-       writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr);
+       if (q->db_format == LPFC_DB_LIST_FORMAT) {
+               bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1);
+               bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index);
+               bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id);
+       } else if (q->db_format == LPFC_DB_RING_FORMAT) {
+               bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1);
+               bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id);
+       } else {
+               return -EINVAL;
+       }
+       writel(doorbell.word0, q->db_regaddr);
 
        return 0;
 }
@@ -456,10 +463,20 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
        /* Ring The Header Receive Queue Doorbell */
        if (!(hq->host_index % hq->entry_repost)) {
                doorbell.word0 = 0;
-               bf_set(lpfc_rq_doorbell_num_posted, &doorbell,
-                      hq->entry_repost);
-               bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id);
-               writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr);
+               if (hq->db_format == LPFC_DB_RING_FORMAT) {
+                       bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell,
+                              hq->entry_repost);
+                       bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id);
+               } else if (hq->db_format == LPFC_DB_LIST_FORMAT) {
+                       bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell,
+                              hq->entry_repost);
+                       bf_set(lpfc_rq_db_list_fm_index, &doorbell,
+                              hq->host_index);
+                       bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id);
+               } else {
+                       return -EINVAL;
+               }
+               writel(doorbell.word0, hq->db_regaddr);
        }
        return put_index;
 }
@@ -4939,7 +4956,7 @@ out_free_mboxq:
 static void
 lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 {
-       uint8_t fcp_eqidx;
+       int fcp_eqidx;
 
        lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM);
        lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM);
@@ -5622,6 +5639,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
                }
                /* RPIs. */
                count = phba->sli4_hba.max_cfg_param.max_rpi;
+               if (count <= 0) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                                       "3279 Invalid provisioning of "
+                                       "rpi:%d\n", count);
+                       rc = -EINVAL;
+                       goto err_exit;
+               }
                base = phba->sli4_hba.max_cfg_param.rpi_base;
                longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
                phba->sli4_hba.rpi_bmask = kzalloc(longs *
@@ -5644,6 +5668,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
 
                /* VPIs. */
                count = phba->sli4_hba.max_cfg_param.max_vpi;
+               if (count <= 0) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                                       "3280 Invalid provisioning of "
+                                       "vpi:%d\n", count);
+                       rc = -EINVAL;
+                       goto free_rpi_ids;
+               }
                base = phba->sli4_hba.max_cfg_param.vpi_base;
                longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
                phba->vpi_bmask = kzalloc(longs *
@@ -5666,6 +5697,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
 
                /* XRIs. */
                count = phba->sli4_hba.max_cfg_param.max_xri;
+               if (count <= 0) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                                       "3281 Invalid provisioning of "
+                                       "xri:%d\n", count);
+                       rc = -EINVAL;
+                       goto free_vpi_ids;
+               }
                base = phba->sli4_hba.max_cfg_param.xri_base;
                longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
                phba->sli4_hba.xri_bmask = kzalloc(longs *
@@ -5689,6 +5727,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba)
 
                /* VFIs. */
                count = phba->sli4_hba.max_cfg_param.max_vfi;
+               if (count <= 0) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+                                       "3282 Invalid provisioning of "
+                                       "vfi:%d\n", count);
+                       rc = -EINVAL;
+                       goto free_xri_ids;
+               }
                base = phba->sli4_hba.max_cfg_param.vfi_base;
                longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG;
                phba->sli4_hba.vfi_bmask = kzalloc(longs *
@@ -8370,7 +8415,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
                 * This is a continuation of a commandi,(CX) so this
                 * sglq is on the active list
                 */
-               sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag);
+               sglq = __lpfc_get_active_sglq(phba, piocb->sli4_lxritag);
                if (!sglq)
                        return IOCB_ERROR;
        }
@@ -8855,12 +8900,6 @@ lpfc_sli_setup(struct lpfc_hba *phba)
                        pring->prt[3].type = FC_TYPE_CT;
                        pring->prt[3].lpfc_sli_rcv_unsol_event =
                            lpfc_ct_unsol_event;
-                       /* abort unsolicited sequence */
-                       pring->prt[4].profile = 0;      /* Mask 4 */
-                       pring->prt[4].rctl = FC_RCTL_BA_ABTS;
-                       pring->prt[4].type = FC_TYPE_BLS;
-                       pring->prt[4].lpfc_sli_rcv_unsol_event =
-                           lpfc_sli4_ct_abort_unsol_event;
                        break;
                }
                totiocbsize += (pring->sli.sli3.numCiocb *
@@ -11873,7 +11912,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
        struct lpfc_eqe *eqe;
        unsigned long iflag;
        int ecount = 0;
-       uint32_t fcp_eqidx;
+       int fcp_eqidx;
 
        /* Get the driver's phba structure from the dev_id */
        fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id;
@@ -11975,7 +12014,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
        struct lpfc_hba  *phba;
        irqreturn_t hba_irq_rc;
        bool hba_handled = false;
-       uint32_t fcp_eqidx;
+       int fcp_eqidx;
 
        /* Get the driver's phba structure from the dev_id */
        phba = (struct lpfc_hba *)dev_id;
@@ -12096,6 +12135,54 @@ out_fail:
        return NULL;
 }
 
+/**
+ * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory
+ * @phba: HBA structure that indicates port to create a queue on.
+ * @pci_barset: PCI BAR set flag.
+ *
+ * This function shall perform iomap of the specified PCI BAR address to host
+ * memory address if not already done so and return it. The returned host
+ * memory address can be NULL.
+ */
+static void __iomem *
+lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
+{
+       struct pci_dev *pdev;
+       unsigned long bar_map, bar_map_len;
+
+       if (!phba->pcidev)
+               return NULL;
+       else
+               pdev = phba->pcidev;
+
+       switch (pci_barset) {
+       case WQ_PCI_BAR_0_AND_1:
+               if (!phba->pci_bar0_memmap_p) {
+                       bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0);
+                       bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0);
+                       phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len);
+               }
+               return phba->pci_bar0_memmap_p;
+       case WQ_PCI_BAR_2_AND_3:
+               if (!phba->pci_bar2_memmap_p) {
+                       bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2);
+                       bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2);
+                       phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len);
+               }
+               return phba->pci_bar2_memmap_p;
+       case WQ_PCI_BAR_4_AND_5:
+               if (!phba->pci_bar4_memmap_p) {
+                       bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4);
+                       bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4);
+                       phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len);
+               }
+               return phba->pci_bar4_memmap_p;
+       default:
+               break;
+       }
+       return NULL;
+}
+
 /**
  * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs
  * @phba: HBA structure that indicates port to create a queue on.
@@ -12673,6 +12760,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
        union lpfc_sli4_cfg_shdr *shdr;
        uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
        struct dma_address *page;
+       void __iomem *bar_memmap_p;
+       uint32_t db_offset;
+       uint16_t pci_barset;
 
        /* sanity check on queue memory */
        if (!wq || !cq)
@@ -12696,6 +12786,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
                    cq->queue_id);
        bf_set(lpfc_mbox_hdr_version, &shdr->request,
               phba->sli4_hba.pc_sli4_params.wqv);
+
        if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) {
                bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1,
                       wq->entry_count);
@@ -12723,6 +12814,10 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
                page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys);
                page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys);
        }
+
+       if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE)
+               bf_set(lpfc_mbx_wq_create_dua, &wq_create->u.request, 1);
+
        rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
        /* The IOCTL status is embedded in the mailbox subheader. */
        shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
@@ -12740,6 +12835,47 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
                status = -ENXIO;
                goto out;
        }
+       if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) {
+               wq->db_format = bf_get(lpfc_mbx_wq_create_db_format,
+                                      &wq_create->u.response);
+               if ((wq->db_format != LPFC_DB_LIST_FORMAT) &&
+                   (wq->db_format != LPFC_DB_RING_FORMAT)) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "3265 WQ[%d] doorbell format not "
+                                       "supported: x%x\n", wq->queue_id,
+                                       wq->db_format);
+                       status = -EINVAL;
+                       goto out;
+               }
+               pci_barset = bf_get(lpfc_mbx_wq_create_bar_set,
+                                   &wq_create->u.response);
+               bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset);
+               if (!bar_memmap_p) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "3263 WQ[%d] failed to memmap pci "
+                                       "barset:x%x\n", wq->queue_id,
+                                       pci_barset);
+                       status = -ENOMEM;
+                       goto out;
+               }
+               db_offset = wq_create->u.response.doorbell_offset;
+               if ((db_offset != LPFC_ULP0_WQ_DOORBELL) &&
+                   (db_offset != LPFC_ULP1_WQ_DOORBELL)) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "3252 WQ[%d] doorbell offset not "
+                                       "supported: x%x\n", wq->queue_id,
+                                       db_offset);
+                       status = -EINVAL;
+                       goto out;
+               }
+               wq->db_regaddr = bar_memmap_p + db_offset;
+               lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                               "3264 WQ[%d]: barset:x%x, offset:x%x\n",
+                               wq->queue_id, pci_barset, db_offset);
+       } else {
+               wq->db_format = LPFC_DB_LIST_FORMAT;
+               wq->db_regaddr = phba->sli4_hba.WQDBregaddr;
+       }
        wq->type = LPFC_WQ;
        wq->assoc_qid = cq->queue_id;
        wq->subtype = subtype;
@@ -12816,6 +12952,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
        uint32_t shdr_status, shdr_add_status;
        union lpfc_sli4_cfg_shdr *shdr;
        uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz;
+       void __iomem *bar_memmap_p;
+       uint32_t db_offset;
+       uint16_t pci_barset;
 
        /* sanity check on queue memory */
        if (!hrq || !drq || !cq)
@@ -12894,6 +13033,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
                rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
                                        putPaddrHigh(dmabuf->phys);
        }
+       if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE)
+               bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1);
+
        rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
        /* The IOCTL status is embedded in the mailbox subheader. */
        shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
@@ -12911,6 +13053,50 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
                status = -ENXIO;
                goto out;
        }
+
+       if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) {
+               hrq->db_format = bf_get(lpfc_mbx_rq_create_db_format,
+                                       &rq_create->u.response);
+               if ((hrq->db_format != LPFC_DB_LIST_FORMAT) &&
+                   (hrq->db_format != LPFC_DB_RING_FORMAT)) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "3262 RQ [%d] doorbell format not "
+                                       "supported: x%x\n", hrq->queue_id,
+                                       hrq->db_format);
+                       status = -EINVAL;
+                       goto out;
+               }
+
+               pci_barset = bf_get(lpfc_mbx_rq_create_bar_set,
+                                   &rq_create->u.response);
+               bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset);
+               if (!bar_memmap_p) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "3269 RQ[%d] failed to memmap pci "
+                                       "barset:x%x\n", hrq->queue_id,
+                                       pci_barset);
+                       status = -ENOMEM;
+                       goto out;
+               }
+
+               db_offset = rq_create->u.response.doorbell_offset;
+               if ((db_offset != LPFC_ULP0_RQ_DOORBELL) &&
+                   (db_offset != LPFC_ULP1_RQ_DOORBELL)) {
+                       lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+                                       "3270 RQ[%d] doorbell offset not "
+                                       "supported: x%x\n", hrq->queue_id,
+                                       db_offset);
+                       status = -EINVAL;
+                       goto out;
+               }
+               hrq->db_regaddr = bar_memmap_p + db_offset;
+               lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+                               "3266 RQ[qid:%d]: barset:x%x, offset:x%x\n",
+                               hrq->queue_id, pci_barset, db_offset);
+       } else {
+               hrq->db_format = LPFC_DB_RING_FORMAT;
+               hrq->db_regaddr = phba->sli4_hba.RQDBregaddr;
+       }
        hrq->type = LPFC_HRQ;
        hrq->assoc_qid = cq->queue_id;
        hrq->subtype = subtype;
@@ -12976,6 +13162,8 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
                rq_create->u.request.page[dmabuf->buffer_tag].addr_hi =
                                        putPaddrHigh(dmabuf->phys);
        }
+       if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE)
+               bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1);
        rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
        /* The IOCTL status is embedded in the mailbox subheader. */
        shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr;
@@ -14062,6 +14250,40 @@ lpfc_sli4_abort_partial_seq(struct lpfc_vport *vport,
        return false;
 }
 
+/**
+ * lpfc_sli4_abort_ulp_seq - Abort assembled unsol sequence from ulp
+ * @vport: pointer to a vitural port
+ * @dmabuf: pointer to a dmabuf that describes the FC sequence
+ *
+ * This function tries to abort from the assembed sequence from upper level
+ * protocol, described by the information from basic abbort @dmabuf. It
+ * checks to see whether such pending context exists at upper level protocol.
+ * If so, it shall clean up the pending context.
+ *
+ * Return
+ * true  -- if there is matching pending context of the sequence cleaned
+ *          at ulp;
+ * false -- if there is no matching pending context of the sequence present
+ *          at ulp.
+ **/
+static bool
+lpfc_sli4_abort_ulp_seq(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf)
+{
+       struct lpfc_hba *phba = vport->phba;
+       int handled;
+
+       /* Accepting abort at ulp with SLI4 only */
+       if (phba->sli_rev < LPFC_SLI_REV4)
+               return false;
+
+       /* Register all caring upper level protocols to attend abort */
+       handled = lpfc_ct_handle_unsol_abort(phba, dmabuf);
+       if (handled)
+               return true;
+
+       return false;
+}
+
 /**
  * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler
  * @phba: Pointer to HBA context object.
@@ -14077,8 +14299,14 @@ lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba,
                             struct lpfc_iocbq *cmd_iocbq,
                             struct lpfc_iocbq *rsp_iocbq)
 {
-       if (cmd_iocbq)
+       struct lpfc_nodelist *ndlp;
+
+       if (cmd_iocbq) {
+               ndlp = (struct lpfc_nodelist *)cmd_iocbq->context1;
+               lpfc_nlp_put(ndlp);
+               lpfc_nlp_not_used(ndlp);
                lpfc_sli_release_iocbq(phba, cmd_iocbq);
+       }
 
        /* Failure means BLS ABORT RSP did not get delivered to remote node*/
        if (rsp_iocbq && rsp_iocbq->iocb.ulpStatus)
@@ -14118,9 +14346,10 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba,
  * event after aborting the sequence handling.
  **/
 static void
-lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
-                       struct fc_frame_header *fc_hdr)
+lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
+                       struct fc_frame_header *fc_hdr, bool aborted)
 {
+       struct lpfc_hba *phba = vport->phba;
        struct lpfc_iocbq *ctiocb = NULL;
        struct lpfc_nodelist *ndlp;
        uint16_t oxid, rxid, xri, lxri;
@@ -14135,12 +14364,27 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
        oxid = be16_to_cpu(fc_hdr->fh_ox_id);
        rxid = be16_to_cpu(fc_hdr->fh_rx_id);
 
-       ndlp = lpfc_findnode_did(phba->pport, sid);
+       ndlp = lpfc_findnode_did(vport, sid);
        if (!ndlp) {
-               lpfc_printf_log(phba, KERN_WARNING, LOG_ELS,
-                               "1268 Find ndlp returned NULL for oxid:x%x "
-                               "SID:x%x\n", oxid, sid);
-               return;
+               ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+               if (!ndlp) {
+                       lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
+                                        "1268 Failed to allocate ndlp for "
+                                        "oxid:x%x SID:x%x\n", oxid, sid);
+                       return;
+               }
+               lpfc_nlp_init(vport, ndlp, sid);
+               /* Put ndlp onto pport node list */
+               lpfc_enqueue_node(vport, ndlp);
+       } else if (!NLP_CHK_NODE_ACT(ndlp)) {
+               /* re-setup ndlp without removing from node list */
+               ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE);
+               if (!ndlp) {
+                       lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
+                                        "3275 Failed to active ndlp found "
+                                        "for oxid:x%x SID:x%x\n", oxid, sid);
+                       return;
+               }
        }
 
        /* Allocate buffer for rsp iocb */
@@ -14164,7 +14408,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
        icmd->ulpLe = 1;
        icmd->ulpClass = CLASS3;
        icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi];
-       ctiocb->context1 = ndlp;
+       ctiocb->context1 = lpfc_nlp_get(ndlp);
 
        ctiocb->iocb_cmpl = NULL;
        ctiocb->vport = phba->pport;
@@ -14183,14 +14427,24 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
        if (lxri != NO_XRI)
                lpfc_set_rrq_active(phba, ndlp, lxri,
                        (xri == oxid) ? rxid : oxid, 0);
-       /* If the oxid maps to the FCP XRI range or if it is out of range,
-        * send a BLS_RJT.  The driver no longer has that exchange.
-        * Override the IOCB for a BA_RJT.
+       /* For BA_ABTS from exchange responder, if the logical xri with
+        * the oxid maps to the FCP XRI range, the port no longer has
+        * that exchange context, send a BLS_RJT. Override the IOCB for
+        * a BA_RJT.
         */
-       if (xri > (phba->sli4_hba.max_cfg_param.max_xri +
-                   phba->sli4_hba.max_cfg_param.xri_base) ||
-           xri > (lpfc_sli4_get_els_iocb_cnt(phba) +
-                   phba->sli4_hba.max_cfg_param.xri_base)) {
+       if ((fctl & FC_FC_EX_CTX) &&
+           (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) {
+               icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT;
+               bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0);
+               bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID);
+               bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE);
+       }
+
+       /* If BA_ABTS failed to abort a partially assembled receive sequence,
+        * the driver no longer has that exchange, send a BLS_RJT. Override
+        * the IOCB for a BA_RJT.
+        */
+       if (aborted == false) {
                icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT;
                bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0);
                bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID);
@@ -14214,17 +14468,19 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba,
        bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid);
 
        /* Xmit CT abts response on exchange <xid> */
-       lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
-                       "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n",
-                       icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state);
+       lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
+                        "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n",
+                        icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state);
 
        rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0);
        if (rc == IOCB_ERROR) {
-               lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
-                               "2925 Failed to issue CT ABTS RSP x%x on "
-                               "xri x%x, Data x%x\n",
-                               icmd->un.xseq64.w5.hcsw.Rctl, oxid,
-                               phba->link_state);
+               lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+                                "2925 Failed to issue CT ABTS RSP x%x on "
+                                "xri x%x, Data x%x\n",
+                                icmd->un.xseq64.w5.hcsw.Rctl, oxid,
+                                phba->link_state);
+               lpfc_nlp_put(ndlp);
+               ctiocb->context1 = NULL;
                lpfc_sli_release_iocbq(phba, ctiocb);
        }
 }
@@ -14249,32 +14505,25 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport,
        struct lpfc_hba *phba = vport->phba;
        struct fc_frame_header fc_hdr;
        uint32_t fctl;
-       bool abts_par;
+       bool aborted;
 
        /* Make a copy of fc_hdr before the dmabuf being released */
        memcpy(&fc_hdr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header));
        fctl = sli4_fctl_from_fc_hdr(&fc_hdr);
 
        if (fctl & FC_FC_EX_CTX) {
-               /*
-                * ABTS sent by responder to exchange, just free the buffer
-                */
-               lpfc_in_buf_free(phba, &dmabuf->dbuf);
+               /* ABTS by responder to exchange, no cleanup needed */
+               aborted = true;
        } else {
-               /*
-                * ABTS sent by initiator to exchange, need to do cleanup
-                */
-               /* Try to abort partially assembled seq */
-               abts_par = lpfc_sli4_abort_partial_seq(vport, dmabuf);
-
-               /* Send abort to ULP if partially seq abort failed */
-               if (abts_par == false)
-                       lpfc_sli4_send_seq_to_ulp(vport, dmabuf);
-               else
-                       lpfc_in_buf_free(phba, &dmabuf->dbuf);
+               /* ABTS by initiator to exchange, need to do cleanup */
+               aborted = lpfc_sli4_abort_partial_seq(vport, dmabuf);
+               if (aborted == false)
+                       aborted = lpfc_sli4_abort_ulp_seq(vport, dmabuf);
        }
-       /* Send basic accept (BA_ACC) to the abort requester */
-       lpfc_sli4_seq_abort_rsp(phba, &fc_hdr);
+       lpfc_in_buf_free(phba, &dmabuf->dbuf);
+
+       /* Respond with BA_ACC or BA_RJT accordingly */
+       lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted);
 }
 
 /**
@@ -15307,10 +15556,13 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba)
 {
        uint16_t next_fcf_index;
 
+initial_priority:
        /* Search start from next bit of currently registered FCF index */
+       next_fcf_index = phba->fcf.current_rec.fcf_indx;
+
 next_priority:
-       next_fcf_index = (phba->fcf.current_rec.fcf_indx + 1) %
-                                       LPFC_SLI4_FCF_TBL_INDX_MAX;
+       /* Determine the next fcf index to check */
+       next_fcf_index = (next_fcf_index + 1) % LPFC_SLI4_FCF_TBL_INDX_MAX;
        next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask,
                                       LPFC_SLI4_FCF_TBL_INDX_MAX,
                                       next_fcf_index);
@@ -15337,7 +15589,7 @@ next_priority:
                 * at that level and continue the selection process.
                 */
                if (lpfc_check_next_fcf_pri_level(phba))
-                       goto next_priority;
+                       goto initial_priority;
                lpfc_printf_log(phba, KERN_WARNING, LOG_FIP,
                                "2844 No roundrobin failover FCF available\n");
                if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX)
index 44c427a45d664fce9089ca60cf5e03386707fbf5..be02b59ea2797a53972a1795dcd4b0b1c156a846 100644 (file)
@@ -139,6 +139,10 @@ struct lpfc_queue {
 
        struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */
 
+       uint16_t db_format;
+#define LPFC_DB_RING_FORMAT    0x01
+#define LPFC_DB_LIST_FORMAT    0x02
+       void __iomem *db_regaddr;
        /* For q stats */
        uint32_t q_cnt_1;
        uint32_t q_cnt_2;
@@ -508,6 +512,10 @@ struct lpfc_sli4_hba {
        struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */
        struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */
 
+       uint8_t fw_func_mode;   /* FW function protocol mode */
+       uint32_t ulp0_mode;     /* ULP0 protocol mode */
+       uint32_t ulp1_mode;     /* ULP1 protocol mode */
+
        /* Setup information for various queue parameters */
        int eq_esize;
        int eq_ecount;
index ba596e854bbc6827b7d755ac9dbd78ca0d8dfa92..f3b7795a296b433632609f7c41225b3a0f529c92 100644 (file)
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.36"
+#define LPFC_DRIVER_VERSION "8.3.37"
 #define LPFC_DRIVER_NAME               "lpfc"
 
 /* Used for SLI 2/3 */
index ffd85c511c8e2925314d27a96f268f922d1db3aa..5e24e7e7371404e8b6f723b2e8d762a3b4511659 100644 (file)
@@ -155,7 +155,7 @@ _base_fault_reset_work(struct work_struct *work)
        struct task_struct *p;
 
        spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
-       if (ioc->shost_recovery)
+       if (ioc->shost_recovery || ioc->pci_error_recovery)
                goto rearm_timer;
        spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags);
 
@@ -164,6 +164,20 @@ _base_fault_reset_work(struct work_struct *work)
                printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n",
                        ioc->name, __func__);
 
+               /* It may be possible that EEH recovery can resolve some of
+                * pci bus failure issues rather removing the dead ioc function
+                * by considering controller is in a non-operational state. So
+                * here priority is given to the EEH recovery. If it doesn't
+                * not resolve this issue, mpt2sas driver will consider this
+                * controller to non-operational state and remove the dead ioc
+                * function.
+                */
+               if (ioc->non_operational_loop++ < 5) {
+                       spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock,
+                                                        flags);
+                       goto rearm_timer;
+               }
+
                /*
                 * Call _scsih_flush_pending_cmds callback so that we flush all
                 * pending commands back to OS. This call is required to aovid
@@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work)
                return; /* don't rearm timer */
        }
 
+       ioc->non_operational_loop = 0;
+
        if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
                rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP,
                    FORCE_BIG_HAMMER);
@@ -4386,6 +4402,7 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc)
        if (missing_delay[0] != -1 && missing_delay[1] != -1)
                _base_update_missing_delay(ioc, missing_delay[0],
                    missing_delay[1]);
+       ioc->non_operational_loop = 0;
 
        return 0;
 
index 543d8d637479d4a983a80174412fe0f2f05e990a..c6ee7aad7501cd274b02071cdba69d70fafe6281 100644 (file)
@@ -835,6 +835,7 @@ struct MPT2SAS_ADAPTER {
        u16             cpu_msix_table_sz;
        u32             ioc_reset_count;
        MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds;
+       u32             non_operational_loop;
 
        /* internal commands, callback index */
        u8              scsi_io_cb_idx;
index 04f8010f0770948dcc09cb6e12916932efb2d434..18360032a520b5d7e5d424558c0f438a7fffe35d 100644 (file)
@@ -42,7 +42,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -1310,7 +1309,6 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc,
        void *sg_local, *chain;
        u32 chain_offset;
        u32 chain_length;
-       u32 chain_flags;
        int sges_left;
        u32 sges_in_segment;
        u8 simple_sgl_flags;
@@ -1356,8 +1354,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc,
                sges_in_segment--;
        }
 
-       /* initializing the chain flags and pointers */
-       chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT;
+       /* initializing the pointers */
        chain_req = _base_get_chain_buffer_tracker(ioc, smid);
        if (!chain_req)
                return -1;
index ce7e59b2fc08d8aaa0408ecb9ecbae6c20ecd707..1df9ed4f371d31233c8d9edacc2f42e861c387d5 100644 (file)
@@ -41,7 +41,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
index 8af944d7d13da258d5ddff2c0f82aa2bb02ff33c..054d5231c974771e5af6a47fe4f8e62e074d2578 100644 (file)
@@ -42,7 +42,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
@@ -3136,7 +3135,7 @@ _ctl_diag_trigger_mpi_store(struct device *cdev,
        spin_lock_irqsave(&ioc->diag_trigger_lock, flags);
        sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count);
        memset(&ioc->diag_trigger_mpi, 0,
-           sizeof(struct SL_WH_EVENT_TRIGGERS_T));
+           sizeof(ioc->diag_trigger_mpi));
        memcpy(&ioc->diag_trigger_mpi, buf, sz);
        if (ioc->diag_trigger_mpi.ValidEntries > NUM_VALID_ENTRIES)
                ioc->diag_trigger_mpi.ValidEntries = NUM_VALID_ENTRIES;
index 6421a06c4ce20dac97de5f4ada0e431ed33a24f0..dcbf7c880cb282502b4e0b55f77d317f3f0b4496 100644 (file)
@@ -41,7 +41,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -2755,13 +2754,11 @@ _scsih_block_io_to_children_attached_directly(struct MPT3SAS_ADAPTER *ioc,
        int i;
        u16 handle;
        u16 reason_code;
-       u8 phy_number;
 
        for (i = 0; i < event_data->NumEntries; i++) {
                handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle);
                if (!handle)
                        continue;
-               phy_number = event_data->StartPhyNum + i;
                reason_code = event_data->PHY[i].PhyStatus &
                    MPI2_EVENT_SAS_TOPO_RC_MASK;
                if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING)
index da6c5f25749cd6f03e8d645c1c40fd07f97cb4c6..6f8d6213040bc7b327cfd55f5ab4bed74a844bef 100644 (file)
@@ -42,7 +42,6 @@
  * USA.
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
index 6e9af20be12f8a720b06279a1800c579f2fc713b..5d8fe4f7565043a70ed623120c48bd0111f93840 100644 (file)
@@ -538,7 +538,7 @@ struct device_info {
        int port_num;
 };
 
-static int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha)
+int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha)
 {
        uint32_t drv_active;
        uint32_t dev_part, dev_part1, dev_part2;
@@ -1351,31 +1351,58 @@ exit_start_fw:
 
 /*----------------------Interrupt Related functions ---------------------*/
 
-void qla4_83xx_disable_intrs(struct scsi_qla_host *ha)
+static void qla4_83xx_disable_iocb_intrs(struct scsi_qla_host *ha)
+{
+       if (test_and_clear_bit(AF_83XX_IOCB_INTR_ON, &ha->flags))
+               qla4_8xxx_intr_disable(ha);
+}
+
+static void qla4_83xx_disable_mbox_intrs(struct scsi_qla_host *ha)
 {
        uint32_t mb_int, ret;
 
-       if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags))
-               qla4_8xxx_mbx_intr_disable(ha);
+       if (test_and_clear_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) {
+               ret = readl(&ha->qla4_83xx_reg->mbox_int);
+               mb_int = ret & ~INT_ENABLE_FW_MB;
+               writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
+               writel(1, &ha->qla4_83xx_reg->leg_int_mask);
+       }
+}
 
-       ret = readl(&ha->qla4_83xx_reg->mbox_int);
-       mb_int = ret & ~INT_ENABLE_FW_MB;
-       writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
-       writel(1, &ha->qla4_83xx_reg->leg_int_mask);
+void qla4_83xx_disable_intrs(struct scsi_qla_host *ha)
+{
+       qla4_83xx_disable_mbox_intrs(ha);
+       qla4_83xx_disable_iocb_intrs(ha);
 }
 
-void qla4_83xx_enable_intrs(struct scsi_qla_host *ha)
+static void qla4_83xx_enable_iocb_intrs(struct scsi_qla_host *ha)
+{
+       if (!test_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) {
+               qla4_8xxx_intr_enable(ha);
+               set_bit(AF_83XX_IOCB_INTR_ON, &ha->flags);
+       }
+}
+
+void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha)
 {
        uint32_t mb_int;
 
-       qla4_8xxx_mbx_intr_enable(ha);
-       mb_int = INT_ENABLE_FW_MB;
-       writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
-       writel(0, &ha->qla4_83xx_reg->leg_int_mask);
+       if (!test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) {
+               mb_int = INT_ENABLE_FW_MB;
+               writel(mb_int, &ha->qla4_83xx_reg->mbox_int);
+               writel(0, &ha->qla4_83xx_reg->leg_int_mask);
+               set_bit(AF_83XX_MBOX_INTR_ON, &ha->flags);
+       }
+}
 
-       set_bit(AF_INTERRUPTS_ON, &ha->flags);
+
+void qla4_83xx_enable_intrs(struct scsi_qla_host *ha)
+{
+       qla4_83xx_enable_mbox_intrs(ha);
+       qla4_83xx_enable_iocb_intrs(ha);
 }
 
+
 void qla4_83xx_queue_mbox_cmd(struct scsi_qla_host *ha, uint32_t *mbx_cmd,
                              int incount)
 {
index 76819b71ada761f200a9182d20cb746e41b5d6be..19ee55a6226cb5950226692a2435f35badb0b120 100644 (file)
@@ -74,16 +74,22 @@ qla4_8xxx_sysfs_write_fw_dump(struct file *filep, struct kobject *kobj,
                }
                break;
        case 2:
-               /* Reset HBA */
+               /* Reset HBA and collect FW dump */
                ha->isp_ops->idc_lock(ha);
                dev_state = qla4_8xxx_rd_direct(ha, QLA8XXX_CRB_DEV_STATE);
                if (dev_state == QLA8XXX_DEV_READY) {
-                       ql4_printk(KERN_INFO, ha,
-                                  "%s: Setting Need reset, reset_owner is 0x%x.\n",
-                                  __func__, ha->func_num);
+                       ql4_printk(KERN_INFO, ha, "%s: Setting Need reset\n",
+                                  __func__);
                        qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DEV_STATE,
                                            QLA8XXX_DEV_NEED_RESET);
-                       set_bit(AF_8XXX_RST_OWNER, &ha->flags);
+                       if (is_qla8022(ha) ||
+                           (is_qla8032(ha) &&
+                            qla4_83xx_can_perform_reset(ha))) {
+                               set_bit(AF_8XXX_RST_OWNER, &ha->flags);
+                               set_bit(AF_FW_RECOVERY, &ha->flags);
+                               ql4_printk(KERN_INFO, ha, "%s: Reset owner is 0x%x\n",
+                                          __func__, ha->func_num);
+                       }
                } else
                        ql4_printk(KERN_INFO, ha,
                                   "%s: Reset not performed as device state is 0x%x\n",
index 329d553eae943d9acc524af42652721df8ad3fa1..129f5dd02822149af1cce4a7309241d87d841e2a 100644 (file)
 #define RESPONSE_QUEUE_DEPTH           64
 #define QUEUE_SIZE                     64
 #define DMA_BUFFER_SIZE                        512
+#define IOCB_HIWAT_CUSHION             4
 
 /*
  * Misc
 #define DISABLE_ACB_TOV                        30
 #define IP_CONFIG_TOV                  30
 #define LOGIN_TOV                      12
+#define BOOT_LOGIN_RESP_TOV            60
 
 #define MAX_RESET_HA_RETRIES           2
 #define FW_ALIVE_WAIT_TOV              3
@@ -314,6 +316,7 @@ struct ql4_tuple_ddb {
  * DDB flags.
  */
 #define DF_RELOGIN             0       /* Relogin to device */
+#define DF_BOOT_TGT            1       /* Boot target entry */
 #define DF_ISNS_DISCOVERED     2       /* Device was discovered via iSNS */
 #define DF_FO_MASKED           3
 
@@ -501,6 +504,7 @@ struct scsi_qla_host {
 #define AF_INTERRUPTS_ON               6 /* 0x00000040 */
 #define AF_GET_CRASH_RECORD            7 /* 0x00000080 */
 #define AF_LINK_UP                     8 /* 0x00000100 */
+#define AF_LOOPBACK                    9 /* 0x00000200 */
 #define AF_IRQ_ATTACHED                        10 /* 0x00000400 */
 #define AF_DISABLE_ACB_COMPLETE                11 /* 0x00000800 */
 #define AF_HA_REMOVAL                  12 /* 0x00001000 */
@@ -516,6 +520,8 @@ struct scsi_qla_host {
 #define AF_8XXX_RST_OWNER              25 /* 0x02000000 */
 #define AF_82XX_DUMP_READING           26 /* 0x04000000 */
 #define AF_83XX_NO_FW_DUMP             27 /* 0x08000000 */
+#define AF_83XX_IOCB_INTR_ON           28 /* 0x10000000 */
+#define AF_83XX_MBOX_INTR_ON           29 /* 0x20000000 */
 
        unsigned long dpc_flags;
 
@@ -537,6 +543,7 @@ struct scsi_qla_host {
        uint32_t tot_ddbs;
 
        uint16_t iocb_cnt;
+       uint16_t iocb_hiwat;
 
        /* SRB cache. */
 #define SRB_MIN_REQ    128
@@ -838,7 +845,8 @@ static inline int is_aer_supported(struct scsi_qla_host *ha)
 static inline int adapter_up(struct scsi_qla_host *ha)
 {
        return (test_bit(AF_ONLINE, &ha->flags) != 0) &&
-               (test_bit(AF_LINK_UP, &ha->flags) != 0);
+              (test_bit(AF_LINK_UP, &ha->flags) != 0) &&
+              (!test_bit(AF_LOOPBACK, &ha->flags));
 }
 
 static inline struct scsi_qla_host* to_qla_host(struct Scsi_Host *shost)
index 1c47950203573e096768d371dd1c131f2aa35e7e..ad9d2e2d370f016f93af0a8e1f5852b8d0c0c7d4 100644 (file)
@@ -495,7 +495,7 @@ struct qla_flt_region {
 #define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED      0x802D
 #define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD                0x802E
 #define MBOX_ASTS_IDC_COMPLETE                 0x8100
-#define MBOX_ASTS_IDC_NOTIFY                   0x8101
+#define MBOX_ASTS_IDC_REQUEST_NOTIFICATION     0x8101
 #define MBOX_ASTS_TXSCVR_INSERTED              0x8130
 #define MBOX_ASTS_TXSCVR_REMOVED               0x8131
 
@@ -522,6 +522,10 @@ struct qla_flt_region {
 #define FLASH_OPT_COMMIT       2
 #define FLASH_OPT_RMW_COMMIT   3
 
+/* Loopback type */
+#define ENABLE_INTERNAL_LOOPBACK       0x04
+#define ENABLE_EXTERNAL_LOOPBACK       0x08
+
 /*************************************************************************/
 
 /* Host Adapter Initialization Control Block (from host) */
index 57a5a3cf5770d5e402d88db55feead1b417a3175..982293edf02cb36c703daa5294af347f73070b37 100644 (file)
@@ -253,12 +253,14 @@ void qla4_8xxx_set_rst_ready(struct scsi_qla_host *ha);
 void qla4_8xxx_clear_rst_ready(struct scsi_qla_host *ha);
 int qla4_8xxx_device_bootstrap(struct scsi_qla_host *ha);
 void qla4_8xxx_get_minidump(struct scsi_qla_host *ha);
-int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha);
-int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha);
+int qla4_8xxx_intr_disable(struct scsi_qla_host *ha);
+int qla4_8xxx_intr_enable(struct scsi_qla_host *ha);
 int qla4_8xxx_set_param(struct scsi_qla_host *ha, int param);
 int qla4_8xxx_update_idc_reg(struct scsi_qla_host *ha);
 int qla4_83xx_post_idc_ack(struct scsi_qla_host *ha);
 void qla4_83xx_disable_pause(struct scsi_qla_host *ha);
+void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha);
+int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha);
 
 extern int ql4xextended_error_logging;
 extern int ql4xdontresethba;
index 1aca1b4f70b820edd867ba566d883e4ec1b6793c..8fc8548ba4baf1171fb0d707d63a7463ba8a1766 100644 (file)
@@ -195,12 +195,10 @@ exit_get_sys_info_no_free:
  * @ha: pointer to host adapter structure.
  *
  **/
-static int qla4xxx_init_local_data(struct scsi_qla_host *ha)
+static void qla4xxx_init_local_data(struct scsi_qla_host *ha)
 {
        /* Initialize aen queue */
        ha->aen_q_count = MAX_AEN_ENTRIES;
-
-       return qla4xxx_get_firmware_status(ha);
 }
 
 static uint8_t
@@ -935,14 +933,23 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset)
        if (ha->isp_ops->start_firmware(ha) == QLA_ERROR)
                goto exit_init_hba;
 
+       /*
+        * For ISP83XX, mailbox and IOCB interrupts are enabled separately.
+        * Mailbox interrupts must be enabled prior to issuing any mailbox
+        * command in order to prevent the possibility of losing interrupts
+        * while switching from polling to interrupt mode. IOCB interrupts are
+        * enabled via isp_ops->enable_intrs.
+        */
+       if (is_qla8032(ha))
+               qla4_83xx_enable_mbox_intrs(ha);
+
        if (qla4xxx_about_firmware(ha) == QLA_ERROR)
                goto exit_init_hba;
 
        if (ha->isp_ops->get_sys_info(ha) == QLA_ERROR)
                goto exit_init_hba;
 
-       if (qla4xxx_init_local_data(ha) == QLA_ERROR)
-               goto exit_init_hba;
+       qla4xxx_init_local_data(ha);
 
        status = qla4xxx_init_firmware(ha);
        if (status == QLA_ERROR)
index f48f37a281d185c32dcc23790413109ffc158289..14fec976f634e1c4804855617c65aae7fcd4a566 100644 (file)
@@ -316,7 +316,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb)
                goto queuing_error;
 
        /* total iocbs active */
-       if ((ha->iocb_cnt + req_cnt) >= REQUEST_QUEUE_DEPTH)
+       if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat)
                goto queuing_error;
 
        /* Build command packet */
index 15ea81465ce4eaf7facddd6ecce4d682b36b6b5b..1b83dc283d2e7a568ec0fe46ef08fc579742492a 100644 (file)
@@ -581,6 +581,33 @@ exit_prq_error:
        set_bit(DPC_RESET_HA, &ha->dpc_flags);
 }
 
+/**
+ * qla4_83xx_loopback_in_progress: Is loopback in progress?
+ * @ha: Pointer to host adapter structure.
+ * @ret: 1 = loopback in progress, 0 = loopback not in progress
+ **/
+static int qla4_83xx_loopback_in_progress(struct scsi_qla_host *ha)
+{
+       int rval = 1;
+
+       if (is_qla8032(ha)) {
+               if ((ha->idc_info.info2 & ENABLE_INTERNAL_LOOPBACK) ||
+                   (ha->idc_info.info2 & ENABLE_EXTERNAL_LOOPBACK)) {
+                       DEBUG2(ql4_printk(KERN_INFO, ha,
+                                         "%s: Loopback diagnostics in progress\n",
+                                         __func__));
+                       rval = 1;
+               } else {
+                       DEBUG2(ql4_printk(KERN_INFO, ha,
+                                         "%s: Loopback diagnostics not in progress\n",
+                                         __func__));
+                       rval = 0;
+               }
+       }
+
+       return rval;
+}
+
 /**
  * qla4xxx_isr_decode_mailbox - decodes mailbox status
  * @ha: Pointer to host adapter structure.
@@ -676,8 +703,10 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha,
 
                case MBOX_ASTS_LINK_DOWN:
                        clear_bit(AF_LINK_UP, &ha->flags);
-                       if (test_bit(AF_INIT_DONE, &ha->flags))
+                       if (test_bit(AF_INIT_DONE, &ha->flags)) {
                                set_bit(DPC_LINK_CHANGED, &ha->dpc_flags);
+                               qla4xxx_wake_dpc(ha);
+                       }
 
                        ql4_printk(KERN_INFO, ha, "%s: LINK DOWN\n", __func__);
                        qla4xxx_post_aen_work(ha, ISCSI_EVENT_LINKDOWN,
@@ -806,7 +835,7 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha,
                            " removed\n",  ha->host_no, mbox_sts[0]));
                        break;
 
-               case MBOX_ASTS_IDC_NOTIFY:
+               case MBOX_ASTS_IDC_REQUEST_NOTIFICATION:
                {
                        uint32_t opcode;
                        if (is_qla8032(ha)) {
@@ -840,6 +869,11 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha,
                                DEBUG2(ql4_printk(KERN_INFO, ha,
                                                  "scsi:%ld: AEN %04x IDC Complete notification\n",
                                                  ha->host_no, mbox_sts[0]));
+
+                               if (qla4_83xx_loopback_in_progress(ha))
+                                       set_bit(AF_LOOPBACK, &ha->flags);
+                               else
+                                       clear_bit(AF_LOOPBACK, &ha->flags);
                        }
                        break;
 
@@ -1124,17 +1158,18 @@ irqreturn_t qla4_83xx_intr_handler(int irq, void *dev_id)
 
        /* Legacy interrupt is valid if bit31 of leg_int_ptr is set */
        if (!(leg_int_ptr & LEG_INT_PTR_B31)) {
-               ql4_printk(KERN_ERR, ha,
-                          "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n",
-                          __func__);
+               DEBUG2(ql4_printk(KERN_ERR, ha,
+                                 "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n",
+                                 __func__));
                return IRQ_NONE;
        }
 
        /* Validate the PCIE function ID set in leg_int_ptr bits [19..16] */
        if ((leg_int_ptr & PF_BITS_MASK) != ha->pf_bit) {
-               ql4_printk(KERN_ERR, ha,
-                          "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n",
-                          __func__, (leg_int_ptr & PF_BITS_MASK), ha->pf_bit);
+               DEBUG2(ql4_printk(KERN_ERR, ha,
+                                 "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n",
+                                 __func__, (leg_int_ptr & PF_BITS_MASK),
+                                 ha->pf_bit));
                return IRQ_NONE;
        }
 
@@ -1437,11 +1472,14 @@ irq_not_attached:
 
 void qla4xxx_free_irqs(struct scsi_qla_host *ha)
 {
-       if (test_bit(AF_MSIX_ENABLED, &ha->flags))
-               qla4_8xxx_disable_msix(ha);
-       else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) {
-               free_irq(ha->pdev->irq, ha);
-               pci_disable_msi(ha->pdev);
-       } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags))
-               free_irq(ha->pdev->irq, ha);
+       if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) {
+               if (test_bit(AF_MSIX_ENABLED, &ha->flags)) {
+                       qla4_8xxx_disable_msix(ha);
+               } else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) {
+                       free_irq(ha->pdev->irq, ha);
+                       pci_disable_msi(ha->pdev);
+               } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) {
+                       free_irq(ha->pdev->irq, ha);
+               }
+       }
 }
index 3d41034191f02588b6c94cc4ec7c4d6d53fb0650..81e738d61ec02073580981ef85d12e2c016402ac 100644 (file)
@@ -43,6 +43,30 @@ void qla4xxx_process_mbox_intr(struct scsi_qla_host *ha, int out_count)
        }
 }
 
+/**
+ * qla4xxx_is_intr_poll_mode â€“ Are we allowed to poll for interrupts?
+ * @ha: Pointer to host adapter structure.
+ * @ret: 1=polling mode, 0=non-polling mode
+ **/
+static int qla4xxx_is_intr_poll_mode(struct scsi_qla_host *ha)
+{
+       int rval = 1;
+
+       if (is_qla8032(ha)) {
+               if (test_bit(AF_IRQ_ATTACHED, &ha->flags) &&
+                   test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags))
+                       rval = 0;
+       } else {
+               if (test_bit(AF_IRQ_ATTACHED, &ha->flags) &&
+                   test_bit(AF_INTERRUPTS_ON, &ha->flags) &&
+                   test_bit(AF_ONLINE, &ha->flags) &&
+                   !test_bit(AF_HA_REMOVAL, &ha->flags))
+                       rval = 0;
+       }
+
+       return rval;
+}
+
 /**
  * qla4xxx_mailbox_command - issues mailbox commands
  * @ha: Pointer to host adapter structure.
@@ -153,33 +177,28 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount,
        /*
         * Wait for completion: Poll or completion queue
         */
-       if (test_bit(AF_IRQ_ATTACHED, &ha->flags) &&
-           test_bit(AF_INTERRUPTS_ON, &ha->flags) &&
-           test_bit(AF_ONLINE, &ha->flags) &&
-           !test_bit(AF_HA_REMOVAL, &ha->flags)) {
-               /* Do not poll for completion. Use completion queue */
-               set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
-               wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ);
-               clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
-       } else {
+       if (qla4xxx_is_intr_poll_mode(ha)) {
                /* Poll for command to complete */
                wait_count = jiffies + MBOX_TOV * HZ;
                while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) {
                        if (time_after_eq(jiffies, wait_count))
                                break;
-
                        /*
                         * Service the interrupt.
                         * The ISR will save the mailbox status registers
                         * to a temporary storage location in the adapter
                         * structure.
                         */
-
                        spin_lock_irqsave(&ha->hardware_lock, flags);
                        ha->isp_ops->process_mailbox_interrupt(ha, outCount);
                        spin_unlock_irqrestore(&ha->hardware_lock, flags);
                        msleep(10);
                }
+       } else {
+               /* Do not poll for completion. Use completion queue */
+               set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
+               wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ);
+               clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags);
        }
 
        /* Check for mailbox timeout. */
@@ -678,8 +697,24 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha)
                return QLA_ERROR;
        }
 
-       ql4_printk(KERN_INFO, ha, "%ld firmware IOCBs available (%d).\n",
-           ha->host_no, mbox_sts[2]);
+       /* High-water mark of IOCBs */
+       ha->iocb_hiwat = mbox_sts[2];
+       DEBUG2(ql4_printk(KERN_INFO, ha,
+                         "%s: firmware IOCBs available = %d\n", __func__,
+                         ha->iocb_hiwat));
+
+       if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION)
+               ha->iocb_hiwat -= IOCB_HIWAT_CUSHION;
+
+       /* Ideally, we should not enter this code, as the # of firmware
+        * IOCBs is hard-coded in the firmware. We set a default
+        * iocb_hiwat here just in case */
+       if (ha->iocb_hiwat == 0) {
+               ha->iocb_hiwat = REQUEST_QUEUE_DEPTH / 4;
+               DEBUG2(ql4_printk(KERN_WARNING, ha,
+                                 "%s: Setting IOCB's to = %d\n", __func__,
+                                 ha->iocb_hiwat));
+       }
 
        return QLA_SUCCESS;
 }
index 499a92db1cf64d3b1d44201592b289aefd38cd71..71d3d234f5265863f847d9de2694e4ca814d1a78 100644 (file)
@@ -2986,7 +2986,7 @@ int qla4_8xxx_load_risc(struct scsi_qla_host *ha)
 
        retval = qla4_8xxx_device_state_handler(ha);
 
-       if (retval == QLA_SUCCESS && !test_bit(AF_INIT_DONE, &ha->flags))
+       if (retval == QLA_SUCCESS && !test_bit(AF_IRQ_ATTACHED, &ha->flags))
                retval = qla4xxx_request_irqs(ha);
 
        return retval;
@@ -3427,11 +3427,11 @@ int qla4_8xxx_get_sys_info(struct scsi_qla_host *ha)
        }
 
        /* Make sure we receive the minimum required data to cache internally */
-       if (mbox_sts[4] < offsetof(struct mbx_sys_info, reserved)) {
+       if ((is_qla8032(ha) ? mbox_sts[3] : mbox_sts[4]) <
+           offsetof(struct mbx_sys_info, reserved)) {
                DEBUG2(printk("scsi%ld: %s: GET_SYS_INFO data receive"
                    " error (%x)\n", ha->host_no, __func__, mbox_sts[4]));
                goto exit_validate_mac82;
-
        }
 
        /* Save M.A.C. address & serial_number */
@@ -3463,7 +3463,7 @@ exit_validate_mac82:
 
 /* Interrupt handling helpers. */
 
-int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha)
+int qla4_8xxx_intr_enable(struct scsi_qla_host *ha)
 {
        uint32_t mbox_cmd[MBOX_REG_COUNT];
        uint32_t mbox_sts[MBOX_REG_COUNT];
@@ -3484,7 +3484,7 @@ int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha)
        return QLA_SUCCESS;
 }
 
-int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha)
+int qla4_8xxx_intr_disable(struct scsi_qla_host *ha)
 {
        uint32_t mbox_cmd[MBOX_REG_COUNT];
        uint32_t mbox_sts[MBOX_REG_COUNT];
@@ -3509,7 +3509,7 @@ int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha)
 void
 qla4_82xx_enable_intrs(struct scsi_qla_host *ha)
 {
-       qla4_8xxx_mbx_intr_enable(ha);
+       qla4_8xxx_intr_enable(ha);
 
        spin_lock_irq(&ha->hardware_lock);
        /* BIT 10 - reset */
@@ -3522,7 +3522,7 @@ void
 qla4_82xx_disable_intrs(struct scsi_qla_host *ha)
 {
        if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags))
-               qla4_8xxx_mbx_intr_disable(ha);
+               qla4_8xxx_intr_disable(ha);
 
        spin_lock_irq(&ha->hardware_lock);
        /* BIT 10 - set */
index 4cec123a6a6a5f02503bb3ebc4175479cbe10030..6142729167f47ab47d75c5ae35646c7527e606f0 100644 (file)
@@ -1337,18 +1337,18 @@ static int qla4xxx_session_get_param(struct iscsi_cls_session *cls_sess,
                                              sess->password_in, BIDI_CHAP,
                                              &idx);
                if (rval)
-                       return -EINVAL;
-
-               len = sprintf(buf, "%hu\n", idx);
+                       len = sprintf(buf, "\n");
+               else
+                       len = sprintf(buf, "%hu\n", idx);
                break;
        case ISCSI_PARAM_CHAP_OUT_IDX:
                rval = qla4xxx_get_chap_index(ha, sess->username,
                                              sess->password, LOCAL_CHAP,
                                              &idx);
                if (rval)
-                       return -EINVAL;
-
-               len = sprintf(buf, "%hu\n", idx);
+                       len = sprintf(buf, "\n");
+               else
+                       len = sprintf(buf, "%hu\n", idx);
                break;
        default:
                return iscsi_session_get_param(cls_sess, param, buf);
@@ -2242,6 +2242,7 @@ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
            test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) ||
            !test_bit(AF_ONLINE, &ha->flags) ||
            !test_bit(AF_LINK_UP, &ha->flags) ||
+           test_bit(AF_LOOPBACK, &ha->flags) ||
            test_bit(DPC_RESET_HA_FW_CONTEXT, &ha->dpc_flags))
                goto qc_host_busy;
 
@@ -2978,6 +2979,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha)
                if (status == QLA_SUCCESS) {
                        if (!test_bit(AF_FW_RECOVERY, &ha->flags))
                                qla4xxx_cmd_wait(ha);
+
                        ha->isp_ops->disable_intrs(ha);
                        qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
                        qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
@@ -3479,7 +3481,8 @@ dpc_post_reset_ha:
        }
 
        /* ---- link change? --- */
-       if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) {
+       if (!test_bit(AF_LOOPBACK, &ha->flags) &&
+           test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) {
                if (!test_bit(AF_LINK_UP, &ha->flags)) {
                        /* ---- link down? --- */
                        qla4xxx_mark_all_devices_missing(ha);
@@ -3508,10 +3511,8 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha)
 {
        qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16);
 
-       if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) {
-               /* Turn-off interrupts on the card. */
-               ha->isp_ops->disable_intrs(ha);
-       }
+       /* Turn-off interrupts on the card. */
+       ha->isp_ops->disable_intrs(ha);
 
        if (is_qla40XX(ha)) {
                writel(set_rmask(CSR_SCSI_PROCESSOR_INTR),
@@ -3547,8 +3548,7 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha)
        }
 
        /* Detach interrupts */
-       if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags))
-               qla4xxx_free_irqs(ha);
+       qla4xxx_free_irqs(ha);
 
        /* free extra memory */
        qla4xxx_mem_free(ha);
@@ -4687,7 +4687,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha,
        struct iscsi_endpoint *ep;
        struct sockaddr_in *addr;
        struct sockaddr_in6 *addr6;
-       struct sockaddr *dst_addr;
+       struct sockaddr *t_addr;
+       struct sockaddr_storage *dst_addr;
        char *ip;
 
        /* TODO: need to destroy on unload iscsi_endpoint*/
@@ -4696,21 +4697,23 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha,
                return NULL;
 
        if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) {
-               dst_addr->sa_family = AF_INET6;
+               t_addr = (struct sockaddr *)dst_addr;
+               t_addr->sa_family = AF_INET6;
                addr6 = (struct sockaddr_in6 *)dst_addr;
                ip = (char *)&addr6->sin6_addr;
                memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN);
                addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port));
 
        } else {
-               dst_addr->sa_family = AF_INET;
+               t_addr = (struct sockaddr *)dst_addr;
+               t_addr->sa_family = AF_INET;
                addr = (struct sockaddr_in *)dst_addr;
                ip = (char *)&addr->sin_addr;
                memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN);
                addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port));
        }
 
-       ep = qla4xxx_ep_connect(ha->host, dst_addr, 0);
+       ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0);
        vfree(dst_addr);
        return ep;
 }
@@ -4725,7 +4728,8 @@ static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx)
 }
 
 static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha,
-                                         struct ddb_entry *ddb_entry)
+                                         struct ddb_entry *ddb_entry,
+                                         uint16_t idx)
 {
        uint16_t def_timeout;
 
@@ -4745,6 +4749,10 @@ static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha,
                def_timeout : LOGIN_TOV;
        ddb_entry->default_time2wait =
                le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait);
+
+       if (ql4xdisablesysfsboot &&
+           (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx))
+               set_bit(DF_BOOT_TGT, &ddb_entry->flags);
 }
 
 static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha)
@@ -4881,7 +4889,7 @@ static void qla4xxx_remove_failed_ddb(struct scsi_qla_host *ha,
 
 static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha,
                                   struct dev_db_entry *fw_ddb_entry,
-                                  int is_reset)
+                                  int is_reset, uint16_t idx)
 {
        struct iscsi_cls_session *cls_sess;
        struct iscsi_session *sess;
@@ -4919,7 +4927,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha,
        memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry,
               sizeof(struct dev_db_entry));
 
-       qla4xxx_setup_flash_ddb_entry(ha, ddb_entry);
+       qla4xxx_setup_flash_ddb_entry(ha, ddb_entry, idx);
 
        cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), conn_id);
 
@@ -5036,7 +5044,7 @@ static void qla4xxx_build_nt_list(struct scsi_qla_host *ha,
                                goto continue_next_nt;
                }
 
-               ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset);
+               ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset, idx);
                if (ret == QLA_ERROR)
                        goto exit_nt_list;
 
@@ -5115,6 +5123,78 @@ void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset)
        qla4xxx_free_ddb_index(ha);
 }
 
+/**
+ * qla4xxx_wait_login_resp_boot_tgt -  Wait for iSCSI boot target login
+ * response.
+ * @ha: pointer to adapter structure
+ *
+ * When the boot entry is normal iSCSI target then DF_BOOT_TGT flag will be
+ * set in DDB and we will wait for login response of boot targets during
+ * probe.
+ **/
+static void qla4xxx_wait_login_resp_boot_tgt(struct scsi_qla_host *ha)
+{
+       struct ddb_entry *ddb_entry;
+       struct dev_db_entry *fw_ddb_entry = NULL;
+       dma_addr_t fw_ddb_entry_dma;
+       unsigned long wtime;
+       uint32_t ddb_state;
+       int max_ddbs, idx, ret;
+
+       max_ddbs =  is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX :
+                                    MAX_DEV_DB_ENTRIES;
+
+       fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
+                                         &fw_ddb_entry_dma, GFP_KERNEL);
+       if (!fw_ddb_entry) {
+               ql4_printk(KERN_ERR, ha,
+                          "%s: Unable to allocate dma buffer\n", __func__);
+               goto exit_login_resp;
+       }
+
+       wtime = jiffies + (HZ * BOOT_LOGIN_RESP_TOV);
+
+       for (idx = 0; idx < max_ddbs; idx++) {
+               ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx);
+               if (ddb_entry == NULL)
+                       continue;
+
+               if (test_bit(DF_BOOT_TGT, &ddb_entry->flags)) {
+                       DEBUG2(ql4_printk(KERN_INFO, ha,
+                                         "%s: DDB index [%d]\n", __func__,
+                                         ddb_entry->fw_ddb_index));
+                       do {
+                               ret = qla4xxx_get_fwddb_entry(ha,
+                                               ddb_entry->fw_ddb_index,
+                                               fw_ddb_entry, fw_ddb_entry_dma,
+                                               NULL, NULL, &ddb_state, NULL,
+                                               NULL, NULL);
+                               if (ret == QLA_ERROR)
+                                       goto exit_login_resp;
+
+                               if ((ddb_state == DDB_DS_SESSION_ACTIVE) ||
+                                   (ddb_state == DDB_DS_SESSION_FAILED))
+                                       break;
+
+                               schedule_timeout_uninterruptible(HZ);
+
+                       } while ((time_after(wtime, jiffies)));
+
+                       if (!time_after(wtime, jiffies)) {
+                               DEBUG2(ql4_printk(KERN_INFO, ha,
+                                                 "%s: Login response wait timer expired\n",
+                                                 __func__));
+                                goto exit_login_resp;
+                       }
+               }
+       }
+
+exit_login_resp:
+       if (fw_ddb_entry)
+               dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry),
+                                 fw_ddb_entry, fw_ddb_entry_dma);
+}
+
 /**
  * qla4xxx_probe_adapter - callback function to probe HBA
  * @pdev: pointer to pci_dev structure
@@ -5270,7 +5350,7 @@ static int qla4xxx_probe_adapter(struct pci_dev *pdev,
                if (is_qla80XX(ha)) {
                        ha->isp_ops->idc_lock(ha);
                        dev_state = qla4_8xxx_rd_direct(ha,
-                                                       QLA82XX_CRB_DEV_STATE);
+                                                       QLA8XXX_CRB_DEV_STATE);
                        ha->isp_ops->idc_unlock(ha);
                        if (dev_state == QLA8XXX_DEV_FAILED) {
                                ql4_printk(KERN_WARNING, ha, "%s: don't retry "
@@ -5368,6 +5448,7 @@ skip_retry_init:
                /* Perform the build ddb list and login to each */
        qla4xxx_build_ddb_list(ha, INIT_ADAPTER);
        iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb);
+       qla4xxx_wait_login_resp_boot_tgt(ha);
 
        qla4xxx_create_chap_list(ha);
 
@@ -6008,14 +6089,6 @@ static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type)
                goto exit_host_reset;
        }
 
-       rval = qla4xxx_wait_for_hba_online(ha);
-       if (rval != QLA_SUCCESS) {
-               DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unable to reset host "
-                                 "adapter\n", __func__));
-               rval = -EIO;
-               goto exit_host_reset;
-       }
-
        if (test_bit(DPC_RESET_HA, &ha->dpc_flags))
                goto recover_adapter;
 
@@ -6115,7 +6188,6 @@ qla4xxx_pci_mmio_enabled(struct pci_dev *pdev)
 static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
 {
        uint32_t rval = QLA_ERROR;
-       uint32_t ret = 0;
        int fn;
        struct pci_dev *other_pdev = NULL;
 
@@ -6201,16 +6273,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
                        qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DRV_STATE, 0);
                        qla4_8xxx_set_drv_active(ha);
                        ha->isp_ops->idc_unlock(ha);
-                       ret = qla4xxx_request_irqs(ha);
-                       if (ret) {
-                               ql4_printk(KERN_WARNING, ha, "Failed to "
-                                   "reserve interrupt %d already in use.\n",
-                                   ha->pdev->irq);
-                               rval = QLA_ERROR;
-                       } else {
-                               ha->isp_ops->enable_intrs(ha);
-                               rval = QLA_SUCCESS;
-                       }
+                       ha->isp_ops->enable_intrs(ha);
                }
        } else {
                ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not "
@@ -6220,18 +6283,9 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
                     QLA8XXX_DEV_READY)) {
                        clear_bit(AF_FW_RECOVERY, &ha->flags);
                        rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER);
-                       if (rval == QLA_SUCCESS) {
-                               ret = qla4xxx_request_irqs(ha);
-                               if (ret) {
-                                       ql4_printk(KERN_WARNING, ha, "Failed to"
-                                           " reserve interrupt %d already in"
-                                           " use.\n", ha->pdev->irq);
-                                       rval = QLA_ERROR;
-                               } else {
-                                       ha->isp_ops->enable_intrs(ha);
-                                       rval = QLA_SUCCESS;
-                               }
-                       }
+                       if (rval == QLA_SUCCESS)
+                               ha->isp_ops->enable_intrs(ha);
+
                        ha->isp_ops->idc_lock(ha);
                        qla4_8xxx_set_drv_active(ha);
                        ha->isp_ops->idc_unlock(ha);
index f6df2ea91ab5e84772adfc5b73853176a2073ad1..6775a45af3159ec61781d846bb1b0ee22017ba95 100644 (file)
@@ -5,4 +5,4 @@
  * See LICENSE.qla4xxx for copyright and licensing details.
  */
 
-#define QLA4XXX_DRIVER_VERSION "5.03.00-k1"
+#define QLA4XXX_DRIVER_VERSION "5.03.00-k4"
index 59d427bf08e20e8ac379afe51ddc0cb86001f2cc..0a74b975efdf798a8149a72c767112e35e0c9531 100644 (file)
@@ -2503,6 +2503,15 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr,
 }
 static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator,
                        NULL);
+static ssize_t
+show_priv_session_target_id(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
+       return sprintf(buf, "%d\n", session->target_id);
+}
+static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO,
+                       show_priv_session_target_id, NULL);
 
 #define iscsi_priv_session_attr_show(field, format)                    \
 static ssize_t                                                         \
@@ -2575,6 +2584,7 @@ static struct attribute *iscsi_session_attrs[] = {
        &dev_attr_priv_sess_creator.attr,
        &dev_attr_sess_chap_out_idx.attr,
        &dev_attr_sess_chap_in_idx.attr,
+       &dev_attr_priv_sess_target_id.attr,
        NULL,
 };
 
@@ -2638,6 +2648,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj,
                return S_IRUGO;
        else if (attr == &dev_attr_priv_sess_creator.attr)
                return S_IRUGO;
+       else if (attr == &dev_attr_priv_sess_target_id.attr)
+               return S_IRUGO;
        else {
                WARN_ONCE(1, "Invalid session attr");
                return 0;
index c2c77d1ac499d3ccb4d4c141d66986e88e980bf6..a764f165b58930da5044c208693a86a817bab93f 100644 (file)
@@ -29,14 +29,14 @@ choice
 
 config THERMAL_DEFAULT_GOV_STEP_WISE
        bool "step_wise"
-       select STEP_WISE
+       select THERMAL_GOV_STEP_WISE
        help
          Use the step_wise governor as default. This throttles the
          devices one step at a time.
 
 config THERMAL_DEFAULT_GOV_FAIR_SHARE
        bool "fair_share"
-       select FAIR_SHARE
+       select THERMAL_GOV_FAIR_SHARE
        help
          Use the fair_share governor as default. This throttles the
          devices based on their 'contribution' to a zone. The
@@ -44,24 +44,24 @@ config THERMAL_DEFAULT_GOV_FAIR_SHARE
 
 config THERMAL_DEFAULT_GOV_USER_SPACE
        bool "user_space"
-       select USER_SPACE
+       select THERMAL_GOV_USER_SPACE
        help
          Select this if you want to let the user space manage the
          lpatform thermals.
 
 endchoice
 
-config FAIR_SHARE
+config THERMAL_GOV_FAIR_SHARE
        bool "Fair-share thermal governor"
        help
          Enable this to manage platform thermals using fair-share governor.
 
-config STEP_WISE
+config THERMAL_GOV_STEP_WISE
        bool "Step_wise thermal governor"
        help
          Enable this to manage platform thermals using a simple linear
 
-config USER_SPACE
+config THERMAL_GOV_USER_SPACE
        bool "User_space thermal governor"
        help
          Enable this to let the user space manage the platform thermals.
@@ -78,6 +78,14 @@ config CPU_THERMAL
          and not the ACPI interface.
          If you want this support, you should say Y here.
 
+config THERMAL_EMULATION
+       bool "Thermal emulation mode support"
+       help
+         Enable this option to make a emul_temp sysfs node in thermal zone
+         directory to support temperature emulation. With emulation sysfs node,
+         user can manually input temperature and test the different trip
+         threshold behaviour for simulation purpose.
+
 config SPEAR_THERMAL
        bool "SPEAr thermal sensor driver"
        depends on PLAT_SPEAR
@@ -93,6 +101,14 @@ config RCAR_THERMAL
          Enable this to plug the R-Car thermal sensor driver into the Linux
          thermal framework
 
+config KIRKWOOD_THERMAL
+       tristate "Temperature sensor on Marvell Kirkwood SoCs"
+       depends on ARCH_KIRKWOOD
+       depends on OF
+       help
+         Support for the Kirkwood thermal sensor driver into the Linux thermal
+         framework. Only kirkwood 88F6282 and 88F6283 have this sensor.
+
 config EXYNOS_THERMAL
        tristate "Temperature sensor on Samsung EXYNOS"
        depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5)
@@ -101,6 +117,23 @@ config EXYNOS_THERMAL
          If you say yes here you get support for TMU (Thermal Management
          Unit) on SAMSUNG EXYNOS series of SoC.
 
+config EXYNOS_THERMAL_EMUL
+       bool "EXYNOS TMU emulation mode support"
+       depends on EXYNOS_THERMAL
+       help
+         Exynos 4412 and 4414 and 5 series has emulation mode on TMU.
+         Enable this option will be make sysfs node in exynos thermal platform
+         device directory to support emulation mode. With emulation mode sysfs
+         node, you can manually input temperature to TMU for simulation purpose.
+
+config DOVE_THERMAL
+       tristate "Temperature sensor on Marvell Dove SoCs"
+       depends on ARCH_DOVE
+       depends on OF
+       help
+         Support for the Dove thermal sensor driver in the Linux thermal
+         framework.
+
 config DB8500_THERMAL
        bool "DB8500 thermal management"
        depends on ARCH_U8500
@@ -122,4 +155,14 @@ config DB8500_CPUFREQ_COOLING
          bound cpufreq cooling device turns active to set CPU frequency low to
          cool down the CPU.
 
+config INTEL_POWERCLAMP
+       tristate "Intel PowerClamp idle injection driver"
+       depends on THERMAL
+       depends on X86
+       depends on CPU_SUP_INTEL
+       help
+         Enable this to enable Intel PowerClamp idle injection driver. This
+         enforce idle time which results in more package C-state residency. The
+         user interface is exposed via generic thermal framework.
+
 endif
index d8da683245fce70065aa3b707dc5d2a8b14447b9..d3a2b38c31e86b694df71a4ccbb2b366025fb317 100644 (file)
@@ -5,9 +5,9 @@
 obj-$(CONFIG_THERMAL)          += thermal_sys.o
 
 # governors
-obj-$(CONFIG_FAIR_SHARE)       += fair_share.o
-obj-$(CONFIG_STEP_WISE)                += step_wise.o
-obj-$(CONFIG_USER_SPACE)       += user_space.o
+obj-$(CONFIG_THERMAL_GOV_FAIR_SHARE)   += fair_share.o
+obj-$(CONFIG_THERMAL_GOV_STEP_WISE)    += step_wise.o
+obj-$(CONFIG_THERMAL_GOV_USER_SPACE)   += user_space.o
 
 # cpufreq cooling
 obj-$(CONFIG_CPU_THERMAL)      += cpu_cooling.o
@@ -15,6 +15,10 @@ obj-$(CONFIG_CPU_THERMAL)    += cpu_cooling.o
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)    += spear_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)     += rcar_thermal.o
+obj-$(CONFIG_KIRKWOOD_THERMAL)  += kirkwood_thermal.o
 obj-$(CONFIG_EXYNOS_THERMAL)   += exynos_thermal.o
+obj-$(CONFIG_DOVE_THERMAL)     += dove_thermal.o
 obj-$(CONFIG_DB8500_THERMAL)   += db8500_thermal.o
 obj-$(CONFIG_DB8500_CPUFREQ_COOLING)   += db8500_cpufreq_cooling.o
+obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o
+
index c33fa5315d6bd8edb8d3652ba80895a117ed0279..8dc44cbb3e09fe253546347e376c0834957efc1e 100644 (file)
@@ -111,8 +111,8 @@ static int is_cpufreq_valid(int cpu)
 /**
  * get_cpu_frequency - get the absolute value of frequency from level.
  * @cpu: cpu for which frequency is fetched.
- * @level: level of frequency of the CPU
- *     e.g level=1 --> 1st MAX FREQ, LEVEL=2 ---> 2nd MAX FREQ, .... etc
+ * @level: level of frequency, equals cooling state of cpu cooling device
+ *     e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
  */
 static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level)
 {
index 4cf8e72af90a351687da093d7b80bc6678745153..21419851fc0284a5447649580edb007ff079b471 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/cpufreq.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -73,15 +74,13 @@ static const struct of_device_id db8500_cpufreq_cooling_match[] = {
        { .compatible = "stericsson,db8500-cpufreq-cooling" },
        {},
 };
-#else
-#define db8500_cpufreq_cooling_match NULL
 #endif
 
 static struct platform_driver db8500_cpufreq_cooling_driver = {
        .driver = {
                .owner = THIS_MODULE,
                .name = "db8500-cpufreq-cooling",
-               .of_match_table = db8500_cpufreq_cooling_match,
+               .of_match_table = of_match_ptr(db8500_cpufreq_cooling_match),
        },
        .probe = db8500_cpufreq_cooling_probe,
        .suspend = db8500_cpufreq_cooling_suspend,
index ec71ade3e3174491df515a33bd0c3b25e7f0c818..61ce60a35921d8ca543bcc275a5b7d479b98fa4f 100644 (file)
@@ -508,15 +508,13 @@ static const struct of_device_id db8500_thermal_match[] = {
        { .compatible = "stericsson,db8500-thermal" },
        {},
 };
-#else
-#define db8500_thermal_match NULL
 #endif
 
 static struct platform_driver db8500_thermal_driver = {
        .driver = {
                .owner = THIS_MODULE,
                .name = "db8500-thermal",
-               .of_match_table = db8500_thermal_match,
+               .of_match_table = of_match_ptr(db8500_thermal_match),
        },
        .probe = db8500_thermal_probe,
        .suspend = db8500_thermal_suspend,
diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c
new file mode 100644 (file)
index 0000000..7b0bfa0
--- /dev/null
@@ -0,0 +1,209 @@
+/*
+ * Dove thermal sensor driver
+ *
+ * Copyright (C) 2013 Andrew Lunn <andrew@lunn.ch>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define DOVE_THERMAL_TEMP_OFFSET       1
+#define DOVE_THERMAL_TEMP_MASK         0x1FF
+
+/* Dove Thermal Manager Control and Status Register */
+#define PMU_TM_DISABLE_OFFS            0
+#define PMU_TM_DISABLE_MASK            (0x1 << PMU_TM_DISABLE_OFFS)
+
+/* Dove Theraml Diode Control 0 Register */
+#define PMU_TDC0_SW_RST_MASK           (0x1 << 1)
+#define PMU_TDC0_SEL_VCAL_OFFS         5
+#define PMU_TDC0_SEL_VCAL_MASK         (0x3 << PMU_TDC0_SEL_VCAL_OFFS)
+#define PMU_TDC0_REF_CAL_CNT_OFFS      11
+#define PMU_TDC0_REF_CAL_CNT_MASK      (0x1FF << PMU_TDC0_REF_CAL_CNT_OFFS)
+#define PMU_TDC0_AVG_NUM_OFFS          25
+#define PMU_TDC0_AVG_NUM_MASK          (0x7 << PMU_TDC0_AVG_NUM_OFFS)
+
+/* Dove Thermal Diode Control 1 Register */
+#define PMU_TEMP_DIOD_CTRL1_REG                0x04
+#define PMU_TDC1_TEMP_VALID_MASK       (0x1 << 10)
+
+/* Dove Thermal Sensor Dev Structure */
+struct dove_thermal_priv {
+       void __iomem *sensor;
+       void __iomem *control;
+};
+
+static int dove_init_sensor(const struct dove_thermal_priv *priv)
+{
+       u32 reg;
+       u32 i;
+
+       /* Configure the Diode Control Register #0 */
+       reg = readl_relaxed(priv->control);
+
+       /* Use average of 2 */
+       reg &= ~PMU_TDC0_AVG_NUM_MASK;
+       reg |= (0x1 << PMU_TDC0_AVG_NUM_OFFS);
+
+       /* Reference calibration value */
+       reg &= ~PMU_TDC0_REF_CAL_CNT_MASK;
+       reg |= (0x0F1 << PMU_TDC0_REF_CAL_CNT_OFFS);
+
+       /* Set the high level reference for calibration */
+       reg &= ~PMU_TDC0_SEL_VCAL_MASK;
+       reg |= (0x2 << PMU_TDC0_SEL_VCAL_OFFS);
+       writel(reg, priv->control);
+
+       /* Reset the sensor */
+       reg = readl_relaxed(priv->control);
+       writel((reg | PMU_TDC0_SW_RST_MASK), priv->control);
+       writel(reg, priv->control);
+
+       /* Enable the sensor */
+       reg = readl_relaxed(priv->sensor);
+       reg &= ~PMU_TM_DISABLE_MASK;
+       writel(reg, priv->sensor);
+
+       /* Poll the sensor for the first reading */
+       for (i = 0; i < 1000000; i++) {
+               reg = readl_relaxed(priv->sensor);
+               if (reg & DOVE_THERMAL_TEMP_MASK)
+                       break;
+       }
+
+       if (i == 1000000)
+               return -EIO;
+
+       return 0;
+}
+
+static int dove_get_temp(struct thermal_zone_device *thermal,
+                         unsigned long *temp)
+{
+       unsigned long reg;
+       struct dove_thermal_priv *priv = thermal->devdata;
+
+       /* Valid check */
+       reg = readl_relaxed(priv->control + PMU_TEMP_DIOD_CTRL1_REG);
+       if ((reg & PMU_TDC1_TEMP_VALID_MASK) == 0x0) {
+               dev_err(&thermal->device,
+                       "Temperature sensor reading not valid\n");
+               return -EIO;
+       }
+
+       /*
+        * Calculate temperature. See Section 8.10.1 of 88AP510,
+        * Documentation/arm/Marvell/README
+        */
+       reg = readl_relaxed(priv->sensor);
+       reg = (reg >> DOVE_THERMAL_TEMP_OFFSET) & DOVE_THERMAL_TEMP_MASK;
+       *temp = ((2281638UL - (7298*reg)) / 10);
+
+       return 0;
+}
+
+static struct thermal_zone_device_ops ops = {
+       .get_temp = dove_get_temp,
+};
+
+static const struct of_device_id dove_thermal_id_table[] = {
+       { .compatible = "marvell,dove-thermal" },
+       {}
+};
+
+static int dove_thermal_probe(struct platform_device *pdev)
+{
+       struct thermal_zone_device *thermal = NULL;
+       struct dove_thermal_priv *priv;
+       struct resource *res;
+       int ret;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get platform resource\n");
+               return -ENODEV;
+       }
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->sensor = devm_request_and_ioremap(&pdev->dev, res);
+       if (!priv->sensor) {
+               dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
+               return -EADDRNOTAVAIL;
+       }
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get platform resource\n");
+               return -ENODEV;
+       }
+       priv->control = devm_request_and_ioremap(&pdev->dev, res);
+       if (!priv->control) {
+               dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
+               return -EADDRNOTAVAIL;
+       }
+
+       ret = dove_init_sensor(priv);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to initialize sensor\n");
+               return ret;
+       }
+
+       thermal = thermal_zone_device_register("dove_thermal", 0, 0,
+                                              priv, &ops, NULL, 0, 0);
+       if (IS_ERR(thermal)) {
+               dev_err(&pdev->dev,
+                       "Failed to register thermal zone device\n");
+               return PTR_ERR(thermal);
+       }
+
+       platform_set_drvdata(pdev, thermal);
+
+       return 0;
+}
+
+static int dove_thermal_exit(struct platform_device *pdev)
+{
+       struct thermal_zone_device *dove_thermal =
+               platform_get_drvdata(pdev);
+
+       thermal_zone_device_unregister(dove_thermal);
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+MODULE_DEVICE_TABLE(of, dove_thermal_id_table);
+
+static struct platform_driver dove_thermal_driver = {
+       .probe = dove_thermal_probe,
+       .remove = dove_thermal_exit,
+       .driver = {
+               .name = "dove_thermal",
+               .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(dove_thermal_id_table),
+       },
+};
+
+module_platform_driver(dove_thermal_driver);
+
+MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>");
+MODULE_DESCRIPTION("Dove thermal driver");
+MODULE_LICENSE("GPL");
index bada1308318bc0cd31b94cb8433508b04a898b8d..e04ebd8671aca27dc390c32d6ec00d6033c7d7ae 100644 (file)
@@ -82,7 +82,7 @@
 
 #define EXYNOS_TRIMINFO_RELOAD         0x1
 #define EXYNOS_TMU_CLEAR_RISE_INT      0x111
-#define EXYNOS_TMU_CLEAR_FALL_INT      (0x111 << 16)
+#define EXYNOS_TMU_CLEAR_FALL_INT      (0x111 << 12)
 #define EXYNOS_MUX_ADDR_VALUE          6
 #define EXYNOS_MUX_ADDR_SHIFT          20
 #define EXYNOS_TMU_TRIP_MODE_SHIFT     13
 #define SENSOR_NAME_LEN        16
 #define MAX_TRIP_COUNT 8
 #define MAX_COOLING_DEVICE 4
+#define MAX_THRESHOLD_LEVS 4
 
 #define ACTIVE_INTERVAL 500
 #define IDLE_INTERVAL 10000
 #define MCELSIUS       1000
 
+#ifdef CONFIG_EXYNOS_THERMAL_EMUL
+#define EXYNOS_EMUL_TIME       0x57F0
+#define EXYNOS_EMUL_TIME_SHIFT 16
+#define EXYNOS_EMUL_DATA_SHIFT 8
+#define EXYNOS_EMUL_DATA_MASK  0xFF
+#define EXYNOS_EMUL_ENABLE     0x1
+#endif /* CONFIG_EXYNOS_THERMAL_EMUL */
+
 /* CPU Zone information */
 #define PANIC_ZONE      4
 #define WARN_ZONE       3
@@ -125,6 +134,7 @@ struct exynos_tmu_data {
 struct thermal_trip_point_conf {
        int trip_val[MAX_TRIP_COUNT];
        int trip_count;
+       u8 trigger_falling;
 };
 
 struct thermal_cooling_conf {
@@ -174,7 +184,8 @@ static int exynos_set_mode(struct thermal_zone_device *thermal,
 
        mutex_lock(&th_zone->therm_dev->lock);
 
-       if (mode == THERMAL_DEVICE_ENABLED)
+       if (mode == THERMAL_DEVICE_ENABLED &&
+               !th_zone->sensor_conf->trip_data.trigger_falling)
                th_zone->therm_dev->polling_delay = IDLE_INTERVAL;
        else
                th_zone->therm_dev->polling_delay = 0;
@@ -284,7 +295,7 @@ static int exynos_bind(struct thermal_zone_device *thermal,
                case MONITOR_ZONE:
                case WARN_ZONE:
                        if (thermal_zone_bind_cooling_device(thermal, i, cdev,
-                                                               level, level)) {
+                                                               level, 0)) {
                                pr_err("error binding cdev inst %d\n", i);
                                ret = -EINVAL;
                        }
@@ -362,10 +373,17 @@ static int exynos_get_temp(struct thermal_zone_device *thermal,
 static int exynos_get_trend(struct thermal_zone_device *thermal,
                        int trip, enum thermal_trend *trend)
 {
-       if (thermal->temperature >= trip)
-               *trend = THERMAL_TREND_RAISING;
+       int ret;
+       unsigned long trip_temp;
+
+       ret = exynos_get_trip_temp(thermal, trip, &trip_temp);
+       if (ret < 0)
+               return ret;
+
+       if (thermal->temperature >= trip_temp)
+               *trend = THERMAL_TREND_RAISE_FULL;
        else
-               *trend = THERMAL_TREND_DROPPING;
+               *trend = THERMAL_TREND_DROP_FULL;
 
        return 0;
 }
@@ -413,7 +431,8 @@ static void exynos_report_trigger(void)
                        break;
        }
 
-       if (th_zone->mode == THERMAL_DEVICE_ENABLED) {
+       if (th_zone->mode == THERMAL_DEVICE_ENABLED &&
+               !th_zone->sensor_conf->trip_data.trigger_falling) {
                if (i > 0)
                        th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL;
                else
@@ -452,7 +471,8 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
 
        th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name,
                        EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0,
-                       IDLE_INTERVAL);
+                       sensor_conf->trip_data.trigger_falling ?
+                       0 : IDLE_INTERVAL);
 
        if (IS_ERR(th_zone->therm_dev)) {
                pr_err("Failed to register thermal zone device\n");
@@ -559,8 +579,9 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 {
        struct exynos_tmu_data *data = platform_get_drvdata(pdev);
        struct exynos_tmu_platform_data *pdata = data->pdata;
-       unsigned int status, trim_info, rising_threshold;
-       int ret = 0, threshold_code;
+       unsigned int status, trim_info;
+       unsigned int rising_threshold = 0, falling_threshold = 0;
+       int ret = 0, threshold_code, i, trigger_levs = 0;
 
        mutex_lock(&data->lock);
        clk_enable(data->clk);
@@ -585,6 +606,11 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
                        (data->temp_error2 != 0))
                data->temp_error1 = pdata->efuse_value;
 
+       /* Count trigger levels to be enabled */
+       for (i = 0; i < MAX_THRESHOLD_LEVS; i++)
+               if (pdata->trigger_levels[i])
+                       trigger_levs++;
+
        if (data->soc == SOC_ARCH_EXYNOS4210) {
                /* Write temperature code for threshold */
                threshold_code = temp_to_code(data, pdata->threshold);
@@ -594,44 +620,38 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
                }
                writeb(threshold_code,
                        data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP);
-
-               writeb(pdata->trigger_levels[0],
-                       data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0);
-               writeb(pdata->trigger_levels[1],
-                       data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL1);
-               writeb(pdata->trigger_levels[2],
-                       data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL2);
-               writeb(pdata->trigger_levels[3],
-                       data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL3);
+               for (i = 0; i < trigger_levs; i++)
+                       writeb(pdata->trigger_levels[i],
+                       data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4);
 
                writel(EXYNOS4210_TMU_INTCLEAR_VAL,
                        data->base + EXYNOS_TMU_REG_INTCLEAR);
        } else if (data->soc == SOC_ARCH_EXYNOS) {
-               /* Write temperature code for threshold */
-               threshold_code = temp_to_code(data, pdata->trigger_levels[0]);
-               if (threshold_code < 0) {
-                       ret = threshold_code;
-                       goto out;
-               }
-               rising_threshold = threshold_code;
-               threshold_code = temp_to_code(data, pdata->trigger_levels[1]);
-               if (threshold_code < 0) {
-                       ret = threshold_code;
-                       goto out;
-               }
-               rising_threshold |= (threshold_code << 8);
-               threshold_code = temp_to_code(data, pdata->trigger_levels[2]);
-               if (threshold_code < 0) {
-                       ret = threshold_code;
-                       goto out;
+               /* Write temperature code for rising and falling threshold */
+               for (i = 0; i < trigger_levs; i++) {
+                       threshold_code = temp_to_code(data,
+                                               pdata->trigger_levels[i]);
+                       if (threshold_code < 0) {
+                               ret = threshold_code;
+                               goto out;
+                       }
+                       rising_threshold |= threshold_code << 8 * i;
+                       if (pdata->threshold_falling) {
+                               threshold_code = temp_to_code(data,
+                                               pdata->trigger_levels[i] -
+                                               pdata->threshold_falling);
+                               if (threshold_code > 0)
+                                       falling_threshold |=
+                                               threshold_code << 8 * i;
+                       }
                }
-               rising_threshold |= (threshold_code << 16);
 
                writel(rising_threshold,
                                data->base + EXYNOS_THD_TEMP_RISE);
-               writel(0, data->base + EXYNOS_THD_TEMP_FALL);
+               writel(falling_threshold,
+                               data->base + EXYNOS_THD_TEMP_FALL);
 
-               writel(EXYNOS_TMU_CLEAR_RISE_INT|EXYNOS_TMU_CLEAR_FALL_INT,
+               writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT,
                                data->base + EXYNOS_TMU_REG_INTCLEAR);
        }
 out:
@@ -664,6 +684,8 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on)
                        pdata->trigger_level2_en << 8 |
                        pdata->trigger_level1_en << 4 |
                        pdata->trigger_level0_en;
+               if (pdata->threshold_falling)
+                       interrupt_en |= interrupt_en << 16;
        } else {
                con |= EXYNOS_TMU_CORE_OFF;
                interrupt_en = 0; /* Disable all interrupts */
@@ -697,20 +719,19 @@ static void exynos_tmu_work(struct work_struct *work)
        struct exynos_tmu_data *data = container_of(work,
                        struct exynos_tmu_data, irq_work);
 
+       exynos_report_trigger();
        mutex_lock(&data->lock);
        clk_enable(data->clk);
-
-
        if (data->soc == SOC_ARCH_EXYNOS)
-               writel(EXYNOS_TMU_CLEAR_RISE_INT,
+               writel(EXYNOS_TMU_CLEAR_RISE_INT |
+                               EXYNOS_TMU_CLEAR_FALL_INT,
                                data->base + EXYNOS_TMU_REG_INTCLEAR);
        else
                writel(EXYNOS4210_TMU_INTCLEAR_VAL,
                                data->base + EXYNOS_TMU_REG_INTCLEAR);
-
        clk_disable(data->clk);
        mutex_unlock(&data->lock);
-       exynos_report_trigger();
+
        enable_irq(data->irq);
 }
 
@@ -759,6 +780,7 @@ static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = {
 
 #if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412)
 static struct exynos_tmu_platform_data const exynos_default_tmu_data = {
+       .threshold_falling = 10,
        .trigger_levels[0] = 85,
        .trigger_levels[1] = 103,
        .trigger_levels[2] = 110,
@@ -800,8 +822,6 @@ static const struct of_device_id exynos_tmu_match[] = {
        {},
 };
 MODULE_DEVICE_TABLE(of, exynos_tmu_match);
-#else
-#define  exynos_tmu_match NULL
 #endif
 
 static struct platform_device_id exynos_tmu_driver_ids[] = {
@@ -832,6 +852,94 @@ static inline struct  exynos_tmu_platform_data *exynos_get_driver_data(
        return (struct exynos_tmu_platform_data *)
                        platform_get_device_id(pdev)->driver_data;
 }
+
+#ifdef CONFIG_EXYNOS_THERMAL_EMUL
+static ssize_t exynos_tmu_emulation_show(struct device *dev,
+                                        struct device_attribute *attr,
+                                        char *buf)
+{
+       struct platform_device *pdev = container_of(dev,
+                                       struct platform_device, dev);
+       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+       unsigned int reg;
+       u8 temp_code;
+       int temp = 0;
+
+       if (data->soc == SOC_ARCH_EXYNOS4210)
+               goto out;
+
+       mutex_lock(&data->lock);
+       clk_enable(data->clk);
+       reg = readl(data->base + EXYNOS_EMUL_CON);
+       clk_disable(data->clk);
+       mutex_unlock(&data->lock);
+
+       if (reg & EXYNOS_EMUL_ENABLE) {
+               reg >>= EXYNOS_EMUL_DATA_SHIFT;
+               temp_code = reg & EXYNOS_EMUL_DATA_MASK;
+               temp = code_to_temp(data, temp_code);
+       }
+out:
+       return sprintf(buf, "%d\n", temp * MCELSIUS);
+}
+
+static ssize_t exynos_tmu_emulation_store(struct device *dev,
+                                       struct device_attribute *attr,
+                                       const char *buf, size_t count)
+{
+       struct platform_device *pdev = container_of(dev,
+                                       struct platform_device, dev);
+       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+       unsigned int reg;
+       int temp;
+
+       if (data->soc == SOC_ARCH_EXYNOS4210)
+               goto out;
+
+       if (!sscanf(buf, "%d\n", &temp) || temp < 0)
+               return -EINVAL;
+
+       mutex_lock(&data->lock);
+       clk_enable(data->clk);
+
+       reg = readl(data->base + EXYNOS_EMUL_CON);
+
+       if (temp) {
+               /* Both CELSIUS and MCELSIUS type are available for input */
+               if (temp > MCELSIUS)
+                       temp /= MCELSIUS;
+
+               reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) |
+                       (temp_to_code(data, (temp / MCELSIUS))
+                        << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE;
+       } else {
+               reg &= ~EXYNOS_EMUL_ENABLE;
+       }
+
+       writel(reg, data->base + EXYNOS_EMUL_CON);
+
+       clk_disable(data->clk);
+       mutex_unlock(&data->lock);
+
+out:
+       return count;
+}
+
+static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show,
+                                       exynos_tmu_emulation_store);
+static int create_emulation_sysfs(struct device *dev)
+{
+       return device_create_file(dev, &dev_attr_emulation);
+}
+static void remove_emulation_sysfs(struct device *dev)
+{
+       device_remove_file(dev, &dev_attr_emulation);
+}
+#else
+static inline int create_emulation_sysfs(struct device *dev) { return 0; }
+static inline void remove_emulation_sysfs(struct device *dev) {}
+#endif
+
 static int exynos_tmu_probe(struct platform_device *pdev)
 {
        struct exynos_tmu_data *data;
@@ -914,6 +1022,8 @@ static int exynos_tmu_probe(struct platform_device *pdev)
                exynos_sensor_conf.trip_data.trip_val[i] =
                        pdata->threshold + pdata->trigger_levels[i];
 
+       exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling;
+
        exynos_sensor_conf.cooling_data.freq_clip_count =
                                                pdata->freq_tab_count;
        for (i = 0; i < pdata->freq_tab_count; i++) {
@@ -928,6 +1038,11 @@ static int exynos_tmu_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "Failed to register thermal interface\n");
                goto err_clk;
        }
+
+       ret = create_emulation_sysfs(&pdev->dev);
+       if (ret)
+               dev_err(&pdev->dev, "Failed to create emulation mode sysfs node\n");
+
        return 0;
 err_clk:
        platform_set_drvdata(pdev, NULL);
@@ -939,6 +1054,8 @@ static int exynos_tmu_remove(struct platform_device *pdev)
 {
        struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 
+       remove_emulation_sysfs(&pdev->dev);
+
        exynos_tmu_control(pdev, false);
 
        exynos_unregister_thermal();
@@ -980,7 +1097,7 @@ static struct platform_driver exynos_tmu_driver = {
                .name   = "exynos-tmu",
                .owner  = THIS_MODULE,
                .pm     = EXYNOS_TMU_PM,
-               .of_match_table = exynos_tmu_match,
+               .of_match_table = of_match_ptr(exynos_tmu_match),
        },
        .probe = exynos_tmu_probe,
        .remove = exynos_tmu_remove,
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
new file mode 100644 (file)
index 0000000..b40b37c
--- /dev/null
@@ -0,0 +1,795 @@
+/*
+ * intel_powerclamp.c - package c-state idle injection
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ *
+ * Authors:
+ *     Arjan van de Ven <arjan@linux.intel.com>
+ *     Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *
+ *     TODO:
+ *           1. better handle wakeup from external interrupts, currently a fixed
+ *              compensation is added to clamping duration when excessive amount
+ *              of wakeups are observed during idle time. the reason is that in
+ *              case of external interrupts without need for ack, clamping down
+ *              cpu in non-irq context does not reduce irq. for majority of the
+ *              cases, clamping down cpu does help reduce irq as well, we should
+ *              be able to differenciate the two cases and give a quantitative
+ *              solution for the irqs that we can control. perhaps based on
+ *              get_cpu_iowait_time_us()
+ *
+ *          2. synchronization with other hw blocks
+ *
+ *
+ */
+
+#define pr_fmt(fmt)    KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/thermal.h>
+#include <linux/slab.h>
+#include <linux/tick.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/sched/rt.h>
+
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/mwait.h>
+#include <asm/cpu_device_id.h>
+#include <asm/idle.h>
+#include <asm/hardirq.h>
+
+#define MAX_TARGET_RATIO (50U)
+/* For each undisturbed clamping period (no extra wake ups during idle time),
+ * we increment the confidence counter for the given target ratio.
+ * CONFIDENCE_OK defines the level where runtime calibration results are
+ * valid.
+ */
+#define CONFIDENCE_OK (3)
+/* Default idle injection duration, driver adjust sleep time to meet target
+ * idle ratio. Similar to frequency modulation.
+ */
+#define DEFAULT_DURATION_JIFFIES (6)
+
+static unsigned int target_mwait;
+static struct dentry *debug_dir;
+
+/* user selected target */
+static unsigned int set_target_ratio;
+static unsigned int current_ratio;
+static bool should_skip;
+static bool reduce_irq;
+static atomic_t idle_wakeup_counter;
+static unsigned int control_cpu; /* The cpu assigned to collect stat and update
+                                 * control parameters. default to BSP but BSP
+                                 * can be offlined.
+                                 */
+static bool clamping;
+
+
+static struct task_struct * __percpu *powerclamp_thread;
+static struct thermal_cooling_device *cooling_dev;
+static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
+                                          * clamping thread
+                                          */
+
+static unsigned int duration;
+static unsigned int pkg_cstate_ratio_cur;
+static unsigned int window_size;
+
+static int duration_set(const char *arg, const struct kernel_param *kp)
+{
+       int ret = 0;
+       unsigned long new_duration;
+
+       ret = kstrtoul(arg, 10, &new_duration);
+       if (ret)
+               goto exit;
+       if (new_duration > 25 || new_duration < 6) {
+               pr_err("Out of recommended range %lu, between 6-25ms\n",
+                       new_duration);
+               ret = -EINVAL;
+       }
+
+       duration = clamp(new_duration, 6ul, 25ul);
+       smp_mb();
+
+exit:
+
+       return ret;
+}
+
+static struct kernel_param_ops duration_ops = {
+       .set = duration_set,
+       .get = param_get_int,
+};
+
+
+module_param_cb(duration, &duration_ops, &duration, 0644);
+MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
+
+struct powerclamp_calibration_data {
+       unsigned long confidence;  /* used for calibration, basically a counter
+                                   * gets incremented each time a clamping
+                                   * period is completed without extra wakeups
+                                   * once that counter is reached given level,
+                                   * compensation is deemed usable.
+                                   */
+       unsigned long steady_comp; /* steady state compensation used when
+                                   * no extra wakeups occurred.
+                                   */
+       unsigned long dynamic_comp; /* compensate excessive wakeup from idle
+                                    * mostly from external interrupts.
+                                    */
+};
+
+static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
+
+static int window_size_set(const char *arg, const struct kernel_param *kp)
+{
+       int ret = 0;
+       unsigned long new_window_size;
+
+       ret = kstrtoul(arg, 10, &new_window_size);
+       if (ret)
+               goto exit_win;
+       if (new_window_size > 10 || new_window_size < 2) {
+               pr_err("Out of recommended window size %lu, between 2-10\n",
+                       new_window_size);
+               ret = -EINVAL;
+       }
+
+       window_size = clamp(new_window_size, 2ul, 10ul);
+       smp_mb();
+
+exit_win:
+
+       return ret;
+}
+
+static struct kernel_param_ops window_size_ops = {
+       .set = window_size_set,
+       .get = param_get_int,
+};
+
+module_param_cb(window_size, &window_size_ops, &window_size, 0644);
+MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
+       "\tpowerclamp controls idle ratio within this window. larger\n"
+       "\twindow size results in slower response time but more smooth\n"
+       "\tclamping results. default to 2.");
+
+static void find_target_mwait(void)
+{
+       unsigned int eax, ebx, ecx, edx;
+       unsigned int highest_cstate = 0;
+       unsigned int highest_subcstate = 0;
+       int i;
+
+       if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+               return;
+
+       cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+       if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+           !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+               return;
+
+       edx >>= MWAIT_SUBSTATE_SIZE;
+       for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+               if (edx & MWAIT_SUBSTATE_MASK) {
+                       highest_cstate = i;
+                       highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+               }
+       }
+       target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+               (highest_subcstate - 1);
+
+}
+
+static u64 pkg_state_counter(void)
+{
+       u64 val;
+       u64 count = 0;
+
+       static bool skip_c2;
+       static bool skip_c3;
+       static bool skip_c6;
+       static bool skip_c7;
+
+       if (!skip_c2) {
+               if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val))
+                       count += val;
+               else
+                       skip_c2 = true;
+       }
+
+       if (!skip_c3) {
+               if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val))
+                       count += val;
+               else
+                       skip_c3 = true;
+       }
+
+       if (!skip_c6) {
+               if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val))
+                       count += val;
+               else
+                       skip_c6 = true;
+       }
+
+       if (!skip_c7) {
+               if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val))
+                       count += val;
+               else
+                       skip_c7 = true;
+       }
+
+       return count;
+}
+
+static void noop_timer(unsigned long foo)
+{
+       /* empty... just the fact that we get the interrupt wakes us up */
+}
+
+static unsigned int get_compensation(int ratio)
+{
+       unsigned int comp = 0;
+
+       /* we only use compensation if all adjacent ones are good */
+       if (ratio == 1 &&
+               cal_data[ratio].confidence >= CONFIDENCE_OK &&
+               cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
+               cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
+               comp = (cal_data[ratio].steady_comp +
+                       cal_data[ratio + 1].steady_comp +
+                       cal_data[ratio + 2].steady_comp) / 3;
+       } else if (ratio == MAX_TARGET_RATIO - 1 &&
+               cal_data[ratio].confidence >= CONFIDENCE_OK &&
+               cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
+               cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
+               comp = (cal_data[ratio].steady_comp +
+                       cal_data[ratio - 1].steady_comp +
+                       cal_data[ratio - 2].steady_comp) / 3;
+       } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
+               cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
+               cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
+               comp = (cal_data[ratio].steady_comp +
+                       cal_data[ratio - 1].steady_comp +
+                       cal_data[ratio + 1].steady_comp) / 3;
+       }
+
+       /* REVISIT: simple penalty of double idle injection */
+       if (reduce_irq)
+               comp = ratio;
+       /* do not exceed limit */
+       if (comp + ratio >= MAX_TARGET_RATIO)
+               comp = MAX_TARGET_RATIO - ratio - 1;
+
+       return comp;
+}
+
+static void adjust_compensation(int target_ratio, unsigned int win)
+{
+       int delta;
+       struct powerclamp_calibration_data *d = &cal_data[target_ratio];
+
+       /*
+        * adjust compensations if confidence level has not been reached or
+        * there are too many wakeups during the last idle injection period, we
+        * cannot trust the data for compensation.
+        */
+       if (d->confidence >= CONFIDENCE_OK ||
+               atomic_read(&idle_wakeup_counter) >
+               win * num_online_cpus())
+               return;
+
+       delta = set_target_ratio - current_ratio;
+       /* filter out bad data */
+       if (delta >= 0 && delta <= (1+target_ratio/10)) {
+               if (d->steady_comp)
+                       d->steady_comp =
+                               roundup(delta+d->steady_comp, 2)/2;
+               else
+                       d->steady_comp = delta;
+               d->confidence++;
+       }
+}
+
+static bool powerclamp_adjust_controls(unsigned int target_ratio,
+                               unsigned int guard, unsigned int win)
+{
+       static u64 msr_last, tsc_last;
+       u64 msr_now, tsc_now;
+       u64 val64;
+
+       /* check result for the last window */
+       msr_now = pkg_state_counter();
+       rdtscll(tsc_now);
+
+       /* calculate pkg cstate vs tsc ratio */
+       if (!msr_last || !tsc_last)
+               current_ratio = 1;
+       else if (tsc_now-tsc_last) {
+               val64 = 100*(msr_now-msr_last);
+               do_div(val64, (tsc_now-tsc_last));
+               current_ratio = val64;
+       }
+
+       /* update record */
+       msr_last = msr_now;
+       tsc_last = tsc_now;
+
+       adjust_compensation(target_ratio, win);
+       /*
+        * too many external interrupts, set flag such
+        * that we can take measure later.
+        */
+       reduce_irq = atomic_read(&idle_wakeup_counter) >=
+               2 * win * num_online_cpus();
+
+       atomic_set(&idle_wakeup_counter, 0);
+       /* if we are above target+guard, skip */
+       return set_target_ratio + guard <= current_ratio;
+}
+
+static int clamp_thread(void *arg)
+{
+       int cpunr = (unsigned long)arg;
+       DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
+       static const struct sched_param param = {
+               .sched_priority = MAX_USER_RT_PRIO/2,
+       };
+       unsigned int count = 0;
+       unsigned int target_ratio;
+
+       set_bit(cpunr, cpu_clamping_mask);
+       set_freezable();
+       init_timer_on_stack(&wakeup_timer);
+       sched_setscheduler(current, SCHED_FIFO, &param);
+
+       while (true == clamping && !kthread_should_stop() &&
+               cpu_online(cpunr)) {
+               int sleeptime;
+               unsigned long target_jiffies;
+               unsigned int guard;
+               unsigned int compensation = 0;
+               int interval; /* jiffies to sleep for each attempt */
+               unsigned int duration_jiffies = msecs_to_jiffies(duration);
+               unsigned int window_size_now;
+
+               try_to_freeze();
+               /*
+                * make sure user selected ratio does not take effect until
+                * the next round. adjust target_ratio if user has changed
+                * target such that we can converge quickly.
+                */
+               target_ratio = set_target_ratio;
+               guard = 1 + target_ratio/20;
+               window_size_now = window_size;
+               count++;
+
+               /*
+                * systems may have different ability to enter package level
+                * c-states, thus we need to compensate the injected idle ratio
+                * to achieve the actual target reported by the HW.
+                */
+               compensation = get_compensation(target_ratio);
+               interval = duration_jiffies*100/(target_ratio+compensation);
+
+               /* align idle time */
+               target_jiffies = roundup(jiffies, interval);
+               sleeptime = target_jiffies - jiffies;
+               if (sleeptime <= 0)
+                       sleeptime = 1;
+               schedule_timeout_interruptible(sleeptime);
+               /*
+                * only elected controlling cpu can collect stats and update
+                * control parameters.
+                */
+               if (cpunr == control_cpu && !(count%window_size_now)) {
+                       should_skip =
+                               powerclamp_adjust_controls(target_ratio,
+                                                       guard, window_size_now);
+                       smp_mb();
+               }
+
+               if (should_skip)
+                       continue;
+
+               target_jiffies = jiffies + duration_jiffies;
+               mod_timer(&wakeup_timer, target_jiffies);
+               if (unlikely(local_softirq_pending()))
+                       continue;
+               /*
+                * stop tick sched during idle time, interrupts are still
+                * allowed. thus jiffies are updated properly.
+                */
+               preempt_disable();
+               tick_nohz_idle_enter();
+               /* mwait until target jiffies is reached */
+               while (time_before(jiffies, target_jiffies)) {
+                       unsigned long ecx = 1;
+                       unsigned long eax = target_mwait;
+
+                       /*
+                        * REVISIT: may call enter_idle() to notify drivers who
+                        * can save power during cpu idle. same for exit_idle()
+                        */
+                       local_touch_nmi();
+                       stop_critical_timings();
+                       __monitor((void *)&current_thread_info()->flags, 0, 0);
+                       cpu_relax(); /* allow HT sibling to run */
+                       __mwait(eax, ecx);
+                       start_critical_timings();
+                       atomic_inc(&idle_wakeup_counter);
+               }
+               tick_nohz_idle_exit();
+               preempt_enable_no_resched();
+       }
+       del_timer_sync(&wakeup_timer);
+       clear_bit(cpunr, cpu_clamping_mask);
+
+       return 0;
+}
+
+/*
+ * 1 HZ polling while clamping is active, useful for userspace
+ * to monitor actual idle ratio.
+ */
+static void poll_pkg_cstate(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
+static void poll_pkg_cstate(struct work_struct *dummy)
+{
+       static u64 msr_last;
+       static u64 tsc_last;
+       static unsigned long jiffies_last;
+
+       u64 msr_now;
+       unsigned long jiffies_now;
+       u64 tsc_now;
+       u64 val64;
+
+       msr_now = pkg_state_counter();
+       rdtscll(tsc_now);
+       jiffies_now = jiffies;
+
+       /* calculate pkg cstate vs tsc ratio */
+       if (!msr_last || !tsc_last)
+               pkg_cstate_ratio_cur = 1;
+       else {
+               if (tsc_now - tsc_last) {
+                       val64 = 100 * (msr_now - msr_last);
+                       do_div(val64, (tsc_now - tsc_last));
+                       pkg_cstate_ratio_cur = val64;
+               }
+       }
+
+       /* update record */
+       msr_last = msr_now;
+       jiffies_last = jiffies_now;
+       tsc_last = tsc_now;
+
+       if (true == clamping)
+               schedule_delayed_work(&poll_pkg_cstate_work, HZ);
+}
+
+static int start_power_clamp(void)
+{
+       unsigned long cpu;
+       struct task_struct *thread;
+
+       /* check if pkg cstate counter is completely 0, abort in this case */
+       if (!pkg_state_counter()) {
+               pr_err("pkg cstate counter not functional, abort\n");
+               return -EINVAL;
+       }
+
+       set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
+       /* prevent cpu hotplug */
+       get_online_cpus();
+
+       /* prefer BSP */
+       control_cpu = 0;
+       if (!cpu_online(control_cpu))
+               control_cpu = smp_processor_id();
+
+       clamping = true;
+       schedule_delayed_work(&poll_pkg_cstate_work, 0);
+
+       /* start one thread per online cpu */
+       for_each_online_cpu(cpu) {
+               struct task_struct **p =
+                       per_cpu_ptr(powerclamp_thread, cpu);
+
+               thread = kthread_create_on_node(clamp_thread,
+                                               (void *) cpu,
+                                               cpu_to_node(cpu),
+                                               "kidle_inject/%ld", cpu);
+               /* bind to cpu here */
+               if (likely(!IS_ERR(thread))) {
+                       kthread_bind(thread, cpu);
+                       wake_up_process(thread);
+                       *p = thread;
+               }
+
+       }
+       put_online_cpus();
+
+       return 0;
+}
+
+static void end_power_clamp(void)
+{
+       int i;
+       struct task_struct *thread;
+
+       clamping = false;
+       /*
+        * make clamping visible to other cpus and give per cpu clamping threads
+        * sometime to exit, or gets killed later.
+        */
+       smp_mb();
+       msleep(20);
+       if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
+               for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
+                       pr_debug("clamping thread for cpu %d alive, kill\n", i);
+                       thread = *per_cpu_ptr(powerclamp_thread, i);
+                       kthread_stop(thread);
+               }
+       }
+}
+
+static int powerclamp_cpu_callback(struct notifier_block *nfb,
+                               unsigned long action, void *hcpu)
+{
+       unsigned long cpu = (unsigned long)hcpu;
+       struct task_struct *thread;
+       struct task_struct **percpu_thread =
+               per_cpu_ptr(powerclamp_thread, cpu);
+
+       if (false == clamping)
+               goto exit_ok;
+
+       switch (action) {
+       case CPU_ONLINE:
+               thread = kthread_create_on_node(clamp_thread,
+                                               (void *) cpu,
+                                               cpu_to_node(cpu),
+                                               "kidle_inject/%lu", cpu);
+               if (likely(!IS_ERR(thread))) {
+                       kthread_bind(thread, cpu);
+                       wake_up_process(thread);
+                       *percpu_thread = thread;
+               }
+               /* prefer BSP as controlling CPU */
+               if (cpu == 0) {
+                       control_cpu = 0;
+                       smp_mb();
+               }
+               break;
+       case CPU_DEAD:
+               if (test_bit(cpu, cpu_clamping_mask)) {
+                       pr_err("cpu %lu dead but powerclamping thread is not\n",
+                               cpu);
+                       kthread_stop(*percpu_thread);
+               }
+               if (cpu == control_cpu) {
+                       control_cpu = smp_processor_id();
+                       smp_mb();
+               }
+       }
+
+exit_ok:
+       return NOTIFY_OK;
+}
+
+static struct notifier_block powerclamp_cpu_notifier = {
+       .notifier_call = powerclamp_cpu_callback,
+};
+
+static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
+                                unsigned long *state)
+{
+       *state = MAX_TARGET_RATIO;
+
+       return 0;
+}
+
+static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
+                                unsigned long *state)
+{
+       if (true == clamping)
+               *state = pkg_cstate_ratio_cur;
+       else
+               /* to save power, do not poll idle ratio while not clamping */
+               *state = -1; /* indicates invalid state */
+
+       return 0;
+}
+
+static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
+                                unsigned long new_target_ratio)
+{
+       int ret = 0;
+
+       new_target_ratio = clamp(new_target_ratio, 0UL,
+                               (unsigned long) (MAX_TARGET_RATIO-1));
+       if (set_target_ratio == 0 && new_target_ratio > 0) {
+               pr_info("Start idle injection to reduce power\n");
+               set_target_ratio = new_target_ratio;
+               ret = start_power_clamp();
+               goto exit_set;
+       } else  if (set_target_ratio > 0 && new_target_ratio == 0) {
+               pr_info("Stop forced idle injection\n");
+               set_target_ratio = 0;
+               end_power_clamp();
+       } else  /* adjust currently running */ {
+               set_target_ratio = new_target_ratio;
+               /* make new set_target_ratio visible to other cpus */
+               smp_mb();
+       }
+
+exit_set:
+       return ret;
+}
+
+/* bind to generic thermal layer as cooling device*/
+static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
+       .get_max_state = powerclamp_get_max_state,
+       .get_cur_state = powerclamp_get_cur_state,
+       .set_cur_state = powerclamp_set_cur_state,
+};
+
+/* runs on Nehalem and later */
+static const struct x86_cpu_id intel_powerclamp_ids[] = {
+       { X86_VENDOR_INTEL, 6, 0x1a},
+       { X86_VENDOR_INTEL, 6, 0x1c},
+       { X86_VENDOR_INTEL, 6, 0x1e},
+       { X86_VENDOR_INTEL, 6, 0x1f},
+       { X86_VENDOR_INTEL, 6, 0x25},
+       { X86_VENDOR_INTEL, 6, 0x26},
+       { X86_VENDOR_INTEL, 6, 0x2a},
+       { X86_VENDOR_INTEL, 6, 0x2c},
+       { X86_VENDOR_INTEL, 6, 0x2d},
+       { X86_VENDOR_INTEL, 6, 0x2e},
+       { X86_VENDOR_INTEL, 6, 0x2f},
+       { X86_VENDOR_INTEL, 6, 0x3a},
+       {}
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
+
+static int powerclamp_probe(void)
+{
+       if (!x86_match_cpu(intel_powerclamp_ids)) {
+               pr_err("Intel powerclamp does not run on family %d model %d\n",
+                               boot_cpu_data.x86, boot_cpu_data.x86_model);
+               return -ENODEV;
+       }
+       if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
+               !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ||
+               !boot_cpu_has(X86_FEATURE_MWAIT) ||
+               !boot_cpu_has(X86_FEATURE_ARAT))
+               return -ENODEV;
+
+       /* find the deepest mwait value */
+       find_target_mwait();
+
+       return 0;
+}
+
+static int powerclamp_debug_show(struct seq_file *m, void *unused)
+{
+       int i = 0;
+
+       seq_printf(m, "controlling cpu: %d\n", control_cpu);
+       seq_printf(m, "pct confidence steady dynamic (compensation)\n");
+       for (i = 0; i < MAX_TARGET_RATIO; i++) {
+               seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
+                       i,
+                       cal_data[i].confidence,
+                       cal_data[i].steady_comp,
+                       cal_data[i].dynamic_comp);
+       }
+
+       return 0;
+}
+
+static int powerclamp_debug_open(struct inode *inode,
+                       struct file *file)
+{
+       return single_open(file, powerclamp_debug_show, inode->i_private);
+}
+
+static const struct file_operations powerclamp_debug_fops = {
+       .open           = powerclamp_debug_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+       .owner          = THIS_MODULE,
+};
+
+static inline void powerclamp_create_debug_files(void)
+{
+       debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
+       if (!debug_dir)
+               return;
+
+       if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir,
+                                       cal_data, &powerclamp_debug_fops))
+               goto file_error;
+
+       return;
+
+file_error:
+       debugfs_remove_recursive(debug_dir);
+}
+
+static int powerclamp_init(void)
+{
+       int retval;
+       int bitmap_size;
+
+       bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
+       cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
+       if (!cpu_clamping_mask)
+               return -ENOMEM;
+
+       /* probe cpu features and ids here */
+       retval = powerclamp_probe();
+       if (retval)
+               return retval;
+       /* set default limit, maybe adjusted during runtime based on feedback */
+       window_size = 2;
+       register_hotcpu_notifier(&powerclamp_cpu_notifier);
+       powerclamp_thread = alloc_percpu(struct task_struct *);
+       cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
+                                               &powerclamp_cooling_ops);
+       if (IS_ERR(cooling_dev))
+               return -ENODEV;
+
+       if (!duration)
+               duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
+       powerclamp_create_debug_files();
+
+       return 0;
+}
+module_init(powerclamp_init);
+
+static void powerclamp_exit(void)
+{
+       unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
+       end_power_clamp();
+       free_percpu(powerclamp_thread);
+       thermal_cooling_device_unregister(cooling_dev);
+       kfree(cpu_clamping_mask);
+
+       cancel_delayed_work_sync(&poll_pkg_cstate_work);
+       debugfs_remove_recursive(debug_dir);
+}
+module_exit(powerclamp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
+MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c
new file mode 100644 (file)
index 0000000..65cb4f0
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Kirkwood thermal sensor driver
+ *
+ * Copyright (C) 2012 Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#define KIRKWOOD_THERMAL_VALID_OFFSET  9
+#define KIRKWOOD_THERMAL_VALID_MASK    0x1
+#define KIRKWOOD_THERMAL_TEMP_OFFSET   10
+#define KIRKWOOD_THERMAL_TEMP_MASK     0x1FF
+
+/* Kirkwood Thermal Sensor Dev Structure */
+struct kirkwood_thermal_priv {
+       void __iomem *sensor;
+};
+
+static int kirkwood_get_temp(struct thermal_zone_device *thermal,
+                         unsigned long *temp)
+{
+       unsigned long reg;
+       struct kirkwood_thermal_priv *priv = thermal->devdata;
+
+       reg = readl_relaxed(priv->sensor);
+
+       /* Valid check */
+       if (!(reg >> KIRKWOOD_THERMAL_VALID_OFFSET) &
+           KIRKWOOD_THERMAL_VALID_MASK) {
+               dev_err(&thermal->device,
+                       "Temperature sensor reading not valid\n");
+               return -EIO;
+       }
+
+       /*
+        * Calculate temperature. See Section 8.10.1 of the 88AP510,
+        * datasheet, which has the same sensor.
+        * Documentation/arm/Marvell/README
+        */
+       reg = (reg >> KIRKWOOD_THERMAL_TEMP_OFFSET) &
+               KIRKWOOD_THERMAL_TEMP_MASK;
+       *temp = ((2281638UL - (7298*reg)) / 10);
+
+       return 0;
+}
+
+static struct thermal_zone_device_ops ops = {
+       .get_temp = kirkwood_get_temp,
+};
+
+static const struct of_device_id kirkwood_thermal_id_table[] = {
+       { .compatible = "marvell,kirkwood-thermal" },
+       {}
+};
+
+static int kirkwood_thermal_probe(struct platform_device *pdev)
+{
+       struct thermal_zone_device *thermal = NULL;
+       struct kirkwood_thermal_priv *priv;
+       struct resource *res;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get platform resource\n");
+               return -ENODEV;
+       }
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->sensor = devm_request_and_ioremap(&pdev->dev, res);
+       if (!priv->sensor) {
+               dev_err(&pdev->dev, "Failed to request_ioremap memory\n");
+               return -EADDRNOTAVAIL;
+       }
+
+       thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0,
+                                              priv, &ops, NULL, 0, 0);
+       if (IS_ERR(thermal)) {
+               dev_err(&pdev->dev,
+                       "Failed to register thermal zone device\n");
+               return PTR_ERR(thermal);
+       }
+
+       platform_set_drvdata(pdev, thermal);
+
+       return 0;
+}
+
+static int kirkwood_thermal_exit(struct platform_device *pdev)
+{
+       struct thermal_zone_device *kirkwood_thermal =
+               platform_get_drvdata(pdev);
+
+       thermal_zone_device_unregister(kirkwood_thermal);
+       platform_set_drvdata(pdev, NULL);
+
+       return 0;
+}
+
+MODULE_DEVICE_TABLE(of, kirkwood_thermal_id_table);
+
+static struct platform_driver kirkwood_thermal_driver = {
+       .probe = kirkwood_thermal_probe,
+       .remove = kirkwood_thermal_exit,
+       .driver = {
+               .name = "kirkwood_thermal",
+               .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(kirkwood_thermal_id_table),
+       },
+};
+
+module_platform_driver(kirkwood_thermal_driver);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu@nigauri.org>");
+MODULE_DESCRIPTION("kirkwood thermal driver");
+MODULE_LICENSE("GPL");
index 90db951725da46a7663264d94e2fa928a058e2dc..28f0919940137dcaf57fce04df69ddcdecfc173f 100644 (file)
  */
 #include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/thermal.h>
 
-#define THSCR  0x2c
-#define THSSR  0x30
+#define IDLE_INTERVAL  5000
+
+#define COMMON_STR     0x00
+#define COMMON_ENR     0x04
+#define COMMON_INTMSK  0x0c
+
+#define REG_POSNEG     0x20
+#define REG_FILONOFF   0x28
+#define REG_THSCR      0x2c
+#define REG_THSSR      0x30
+#define REG_INTCTRL    0x34
 
 /* THSCR */
-#define CPTAP  0xf
+#define CPCTL  (1 << 12)
 
 /* THSSR */
 #define CTEMP  0x3f
 
-
-struct rcar_thermal_priv {
+struct rcar_thermal_common {
        void __iomem *base;
        struct device *dev;
+       struct list_head head;
        spinlock_t lock;
-       u32 comp;
 };
 
+struct rcar_thermal_priv {
+       void __iomem *base;
+       struct rcar_thermal_common *common;
+       struct thermal_zone_device *zone;
+       struct delayed_work work;
+       struct mutex lock;
+       struct list_head list;
+       int id;
+       int ctemp;
+};
+
+#define rcar_thermal_for_each_priv(pos, common)        \
+       list_for_each_entry(pos, &common->head, list)
+
 #define MCELSIUS(temp)                 ((temp) * 1000)
-#define rcar_zone_to_priv(zone)                (zone->devdata)
+#define rcar_zone_to_priv(zone)                ((zone)->devdata)
+#define rcar_priv_to_dev(priv)         ((priv)->common->dev)
+#define rcar_has_irq_support(priv)     ((priv)->common->base)
+#define rcar_id_to_shift(priv)         ((priv)->id * 8)
+
+#ifdef DEBUG
+# define rcar_force_update_temp(priv)  1
+#else
+# define rcar_force_update_temp(priv)  0
+#endif
 
 /*
  *             basic functions
  */
-static u32 rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg)
+#define rcar_thermal_common_read(c, r) \
+       _rcar_thermal_common_read(c, COMMON_ ##r)
+static u32 _rcar_thermal_common_read(struct rcar_thermal_common *common,
+                                    u32 reg)
 {
-       unsigned long flags;
-       u32 ret;
-
-       spin_lock_irqsave(&priv->lock, flags);
+       return ioread32(common->base + reg);
+}
 
-       ret = ioread32(priv->base + reg);
+#define rcar_thermal_common_write(c, r, d) \
+       _rcar_thermal_common_write(c, COMMON_ ##r, d)
+static void _rcar_thermal_common_write(struct rcar_thermal_common *common,
+                                      u32 reg, u32 data)
+{
+       iowrite32(data, common->base + reg);
+}
 
-       spin_unlock_irqrestore(&priv->lock, flags);
+#define rcar_thermal_common_bset(c, r, m, d) \
+       _rcar_thermal_common_bset(c, COMMON_ ##r, m, d)
+static void _rcar_thermal_common_bset(struct rcar_thermal_common *common,
+                                     u32 reg, u32 mask, u32 data)
+{
+       u32 val;
 
-       return ret;
+       val = ioread32(common->base + reg);
+       val &= ~mask;
+       val |= (data & mask);
+       iowrite32(val, common->base + reg);
 }
 
-#if 0 /* no user at this point */
-static void rcar_thermal_write(struct rcar_thermal_priv *priv,
-                              u32 reg, u32 data)
+#define rcar_thermal_read(p, r) _rcar_thermal_read(p, REG_ ##r)
+static u32 _rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&priv->lock, flags);
+       return ioread32(priv->base + reg);
+}
 
+#define rcar_thermal_write(p, r, d) _rcar_thermal_write(p, REG_ ##r, d)
+static void _rcar_thermal_write(struct rcar_thermal_priv *priv,
+                               u32 reg, u32 data)
+{
        iowrite32(data, priv->base + reg);
-
-       spin_unlock_irqrestore(&priv->lock, flags);
 }
-#endif
 
-static void rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg,
-                             u32 mask, u32 data)
+#define rcar_thermal_bset(p, r, m, d) _rcar_thermal_bset(p, REG_ ##r, m, d)
+static void _rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg,
+                              u32 mask, u32 data)
 {
-       unsigned long flags;
        u32 val;
 
-       spin_lock_irqsave(&priv->lock, flags);
-
        val = ioread32(priv->base + reg);
        val &= ~mask;
        val |= (data & mask);
        iowrite32(val, priv->base + reg);
-
-       spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 /*
  *             zone device functions
  */
-static int rcar_thermal_get_temp(struct thermal_zone_device *zone,
-                          unsigned long *temp)
+static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv)
 {
-       struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
-       int val, min, max, tmp;
-
-       tmp = -200; /* default */
-       while (1) {
-               if (priv->comp < 1 || priv->comp > 12) {
-                       dev_err(priv->dev,
-                               "THSSR invalid data (%d)\n", priv->comp);
-                       priv->comp = 4; /* for next thermal */
-                       return -EINVAL;
-               }
+       struct device *dev = rcar_priv_to_dev(priv);
+       int i;
+       int ctemp, old, new;
 
-               /*
-                * THS comparator offset and the reference temperature
-                *
-                * Comparator   | reference     | Temperature field
-                * offset       | temperature   | measurement
-                *              | (degrees C)   | (degrees C)
-                * -------------+---------------+-------------------
-                *  1           |  -45          |  -45 to  -30
-                *  2           |  -30          |  -30 to  -15
-                *  3           |  -15          |  -15 to    0
-                *  4           |    0          |    0 to  +15
-                *  5           |  +15          |  +15 to  +30
-                *  6           |  +30          |  +30 to  +45
-                *  7           |  +45          |  +45 to  +60
-                *  8           |  +60          |  +60 to  +75
-                *  9           |  +75          |  +75 to  +90
-                * 10           |  +90          |  +90 to +105
-                * 11           | +105          | +105 to +120
-                * 12           | +120          | +120 to +135
-                */
+       mutex_lock(&priv->lock);
 
-               /* calculate thermal limitation */
-               min = (priv->comp * 15) - 60;
-               max = min + 15;
+       /*
+        * TSC decides a value of CPTAP automatically,
+        * and this is the conditions which validate interrupt.
+        */
+       rcar_thermal_bset(priv, THSCR, CPCTL, CPCTL);
 
+       ctemp = 0;
+       old = ~0;
+       for (i = 0; i < 128; i++) {
                /*
                 * we need to wait 300us after changing comparator offset
                 * to get stable temperature.
                 * see "Usage Notes" on datasheet
                 */
-               rcar_thermal_bset(priv, THSCR, CPTAP, priv->comp);
                udelay(300);
 
-               /* calculate current temperature */
-               val = rcar_thermal_read(priv, THSSR) & CTEMP;
-               val = (val * 5) - 65;
+               new = rcar_thermal_read(priv, THSSR) & CTEMP;
+               if (new == old) {
+                       ctemp = new;
+                       break;
+               }
+               old = new;
+       }
 
-               dev_dbg(priv->dev, "comp/min/max/val = %d/%d/%d/%d\n",
-                       priv->comp, min, max, val);
+       if (!ctemp) {
+               dev_err(dev, "thermal sensor was broken\n");
+               return -EINVAL;
+       }
 
-               /*
-                * If val is same as min/max, then,
-                * it should try again on next comparator.
-                * But the val might be correct temperature.
-                * Keep it on "tmp" and compare with next val.
-                */
-               if (tmp == val)
-                       break;
+       /*
+        * enable IRQ
+        */
+       if (rcar_has_irq_support(priv)) {
+               rcar_thermal_write(priv, FILONOFF, 0);
 
-               if (val <= min) {
-                       tmp = min;
-                       priv->comp--; /* try again */
-               } else if (val >= max) {
-                       tmp = max;
-                       priv->comp++; /* try again */
-               } else {
-                       tmp = val;
-                       break;
-               }
+               /* enable Rising/Falling edge interrupt */
+               rcar_thermal_write(priv, POSNEG,  0x1);
+               rcar_thermal_write(priv, INTCTRL, (((ctemp - 0) << 8) |
+                                                  ((ctemp - 1) << 0)));
+       }
+
+       dev_dbg(dev, "thermal%d  %d -> %d\n", priv->id, priv->ctemp, ctemp);
+
+       priv->ctemp = ctemp;
+
+       mutex_unlock(&priv->lock);
+
+       return 0;
+}
+
+static int rcar_thermal_get_temp(struct thermal_zone_device *zone,
+                                unsigned long *temp)
+{
+       struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+
+       if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
+               rcar_thermal_update_temp(priv);
+
+       mutex_lock(&priv->lock);
+       *temp =  MCELSIUS((priv->ctemp * 5) - 65);
+       mutex_unlock(&priv->lock);
+
+       return 0;
+}
+
+static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone,
+                                     int trip, enum thermal_trip_type *type)
+{
+       struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+       struct device *dev = rcar_priv_to_dev(priv);
+
+       /* see rcar_thermal_get_temp() */
+       switch (trip) {
+       case 0: /* +90 <= temp */
+               *type = THERMAL_TRIP_CRITICAL;
+               break;
+       default:
+               dev_err(dev, "rcar driver trip error\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone,
+                                     int trip, unsigned long *temp)
+{
+       struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+       struct device *dev = rcar_priv_to_dev(priv);
+
+       /* see rcar_thermal_get_temp() */
+       switch (trip) {
+       case 0: /* +90 <= temp */
+               *temp = MCELSIUS(90);
+               break;
+       default:
+               dev_err(dev, "rcar driver trip error\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int rcar_thermal_notify(struct thermal_zone_device *zone,
+                              int trip, enum thermal_trip_type type)
+{
+       struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+       struct device *dev = rcar_priv_to_dev(priv);
+
+       switch (type) {
+       case THERMAL_TRIP_CRITICAL:
+               /* FIXME */
+               dev_warn(dev, "Thermal reached to critical temperature\n");
+               break;
+       default:
+               break;
        }
 
-       *temp = MCELSIUS(tmp);
        return 0;
 }
 
 static struct thermal_zone_device_ops rcar_thermal_zone_ops = {
-       .get_temp = rcar_thermal_get_temp,
+       .get_temp       = rcar_thermal_get_temp,
+       .get_trip_type  = rcar_thermal_get_trip_type,
+       .get_trip_temp  = rcar_thermal_get_trip_temp,
+       .notify         = rcar_thermal_notify,
 };
 
 /*
- *             platform functions
+ *             interrupt
  */
-static int rcar_thermal_probe(struct platform_device *pdev)
+#define rcar_thermal_irq_enable(p)     _rcar_thermal_irq_ctrl(p, 1)
+#define rcar_thermal_irq_disable(p)    _rcar_thermal_irq_ctrl(p, 0)
+static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable)
+{
+       struct rcar_thermal_common *common = priv->common;
+       unsigned long flags;
+       u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
+
+       spin_lock_irqsave(&common->lock, flags);
+
+       rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
+
+       spin_unlock_irqrestore(&common->lock, flags);
+}
+
+static void rcar_thermal_work(struct work_struct *work)
 {
-       struct thermal_zone_device *zone;
        struct rcar_thermal_priv *priv;
-       struct resource *res;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "Could not get platform resource\n");
-               return -ENODEV;
+       priv = container_of(work, struct rcar_thermal_priv, work.work);
+
+       rcar_thermal_update_temp(priv);
+       rcar_thermal_irq_enable(priv);
+       thermal_zone_device_update(priv->zone);
+}
+
+static u32 rcar_thermal_had_changed(struct rcar_thermal_priv *priv, u32 status)
+{
+       struct device *dev = rcar_priv_to_dev(priv);
+
+       status = (status >> rcar_id_to_shift(priv)) & 0x3;
+
+       if (status & 0x3) {
+               dev_dbg(dev, "thermal%d %s%s\n",
+                       priv->id,
+                       (status & 0x2) ? "Rising " : "",
+                       (status & 0x1) ? "Falling" : "");
        }
 
-       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-       if (!priv) {
-               dev_err(&pdev->dev, "Could not allocate priv\n");
-               return -ENOMEM;
+       return status;
+}
+
+static irqreturn_t rcar_thermal_irq(int irq, void *data)
+{
+       struct rcar_thermal_common *common = data;
+       struct rcar_thermal_priv *priv;
+       unsigned long flags;
+       u32 status, mask;
+
+       spin_lock_irqsave(&common->lock, flags);
+
+       mask    = rcar_thermal_common_read(common, INTMSK);
+       status  = rcar_thermal_common_read(common, STR);
+       rcar_thermal_common_write(common, STR, 0x000F0F0F & mask);
+
+       spin_unlock_irqrestore(&common->lock, flags);
+
+       status = status & ~mask;
+
+       /*
+        * check the status
+        */
+       rcar_thermal_for_each_priv(priv, common) {
+               if (rcar_thermal_had_changed(priv, status)) {
+                       rcar_thermal_irq_disable(priv);
+                       schedule_delayed_work(&priv->work,
+                                             msecs_to_jiffies(300));
+               }
        }
 
-       priv->comp = 4; /* basic setup */
-       priv->dev = &pdev->dev;
-       spin_lock_init(&priv->lock);
-       priv->base = devm_ioremap_nocache(&pdev->dev,
-                                         res->start, resource_size(res));
-       if (!priv->base) {
-               dev_err(&pdev->dev, "Unable to ioremap thermal register\n");
+       return IRQ_HANDLED;
+}
+
+/*
+ *             platform functions
+ */
+static int rcar_thermal_probe(struct platform_device *pdev)
+{
+       struct rcar_thermal_common *common;
+       struct rcar_thermal_priv *priv;
+       struct device *dev = &pdev->dev;
+       struct resource *res, *irq;
+       int mres = 0;
+       int i;
+       int idle = IDLE_INTERVAL;
+
+       common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL);
+       if (!common) {
+               dev_err(dev, "Could not allocate common\n");
                return -ENOMEM;
        }
 
-       zone = thermal_zone_device_register("rcar_thermal", 0, 0, priv,
-                                   &rcar_thermal_zone_ops, NULL, 0, 0);
-       if (IS_ERR(zone)) {
-               dev_err(&pdev->dev, "thermal zone device is NULL\n");
-               return PTR_ERR(zone);
+       INIT_LIST_HEAD(&common->head);
+       spin_lock_init(&common->lock);
+       common->dev = dev;
+
+       irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (irq) {
+               int ret;
+
+               /*
+                * platform has IRQ support.
+                * Then, drier use common register
+                */
+               res = platform_get_resource(pdev, IORESOURCE_MEM, mres++);
+               if (!res) {
+                       dev_err(dev, "Could not get platform resource\n");
+                       return -ENODEV;
+               }
+
+               ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, 0,
+                                      dev_name(dev), common);
+               if (ret) {
+                       dev_err(dev, "irq request failed\n ");
+                       return ret;
+               }
+
+               /*
+                * rcar_has_irq_support() will be enabled
+                */
+               common->base = devm_request_and_ioremap(dev, res);
+               if (!common->base) {
+                       dev_err(dev, "Unable to ioremap thermal register\n");
+                       return -ENOMEM;
+               }
+
+               /* enable temperature comparation */
+               rcar_thermal_common_write(common, ENR, 0x00030303);
+
+               idle = 0; /* polling delaye is not needed */
        }
 
-       platform_set_drvdata(pdev, zone);
+       for (i = 0;; i++) {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, mres++);
+               if (!res)
+                       break;
+
+               priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+               if (!priv) {
+                       dev_err(dev, "Could not allocate priv\n");
+                       return -ENOMEM;
+               }
+
+               priv->base = devm_request_and_ioremap(dev, res);
+               if (!priv->base) {
+                       dev_err(dev, "Unable to ioremap priv register\n");
+                       return -ENOMEM;
+               }
 
-       dev_info(&pdev->dev, "proved\n");
+               priv->common = common;
+               priv->id = i;
+               mutex_init(&priv->lock);
+               INIT_LIST_HEAD(&priv->list);
+               INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
+               rcar_thermal_update_temp(priv);
+
+               priv->zone = thermal_zone_device_register("rcar_thermal",
+                                               1, 0, priv,
+                                               &rcar_thermal_zone_ops, NULL, 0,
+                                               idle);
+               if (IS_ERR(priv->zone)) {
+                       dev_err(dev, "can't register thermal zone\n");
+                       goto error_unregister;
+               }
+
+               list_move_tail(&priv->list, &common->head);
+
+               if (rcar_has_irq_support(priv))
+                       rcar_thermal_irq_enable(priv);
+       }
+
+       platform_set_drvdata(pdev, common);
+
+       dev_info(dev, "%d sensor proved\n", i);
 
        return 0;
+
+error_unregister:
+       rcar_thermal_for_each_priv(priv, common)
+               thermal_zone_device_unregister(priv->zone);
+
+       return -ENODEV;
 }
 
 static int rcar_thermal_remove(struct platform_device *pdev)
 {
-       struct thermal_zone_device *zone = platform_get_drvdata(pdev);
+       struct rcar_thermal_common *common = platform_get_drvdata(pdev);
+       struct rcar_thermal_priv *priv;
+
+       rcar_thermal_for_each_priv(priv, common)
+               thermal_zone_device_unregister(priv->zone);
 
-       thermal_zone_device_unregister(zone);
        platform_set_drvdata(pdev, NULL);
 
        return 0;
 }
 
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+       { .compatible = "renesas,rcar-thermal", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
+
 static struct platform_driver rcar_thermal_driver = {
        .driver = {
                .name   = "rcar_thermal",
+               .of_match_table = rcar_thermal_dt_ids,
        },
        .probe          = rcar_thermal_probe,
        .remove         = rcar_thermal_remove,
index 6b2d8b21aaee03b5a723249cea6773f96c5adf19..3c5ee5607977c562dca9209d44c1b2627a2b760f 100644 (file)
@@ -131,7 +131,7 @@ static int spear_thermal_probe(struct platform_device *pdev)
                return -ENOMEM;
        }
 
-       stdev->clk = clk_get(&pdev->dev, NULL);
+       stdev->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(stdev->clk)) {
                dev_err(&pdev->dev, "Can't get clock\n");
                return PTR_ERR(stdev->clk);
@@ -140,7 +140,7 @@ static int spear_thermal_probe(struct platform_device *pdev)
        ret = clk_enable(stdev->clk);
        if (ret) {
                dev_err(&pdev->dev, "Can't enable clock\n");
-               goto put_clk;
+               return ret;
        }
 
        stdev->flags = val;
@@ -163,8 +163,6 @@ static int spear_thermal_probe(struct platform_device *pdev)
 
 disable_clk:
        clk_disable(stdev->clk);
-put_clk:
-       clk_put(stdev->clk);
 
        return ret;
 }
@@ -183,7 +181,6 @@ static int spear_thermal_exit(struct platform_device *pdev)
        writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base);
 
        clk_disable(stdev->clk);
-       clk_put(stdev->clk);
 
        return 0;
 }
index 0cd5e9fbab1c96ea7393db980aeafc26c87bb577..407cde3211c1bccfcf1e43515fb0a47a5a99e67c 100644 (file)
  *       state for this trip point
  *    b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
  *       state for this trip point
+ *    c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit
+ *       for this trip point
+ *    d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit
+ *       for this trip point
+ * If the temperature is lower than a trip point,
+ *    a. if the trend is THERMAL_TREND_RAISING, do nothing
+ *    b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
+ *       state for this trip point, if the cooling state already
+ *       equals lower limit, deactivate the thermal instance
+ *    c. if the trend is THERMAL_TREND_RAISE_FULL, do nothing
+ *    d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit,
+ *       if the cooling state already equals lower limit,
+ *       deactive the thermal instance
  */
 static unsigned long get_target_state(struct thermal_instance *instance,
-                                       enum thermal_trend trend)
+                               enum thermal_trend trend, bool throttle)
 {
        struct thermal_cooling_device *cdev = instance->cdev;
        unsigned long cur_state;
 
        cdev->ops->get_cur_state(cdev, &cur_state);
 
-       if (trend == THERMAL_TREND_RAISING) {
-               cur_state = cur_state < instance->upper ?
-                           (cur_state + 1) : instance->upper;
-       } else if (trend == THERMAL_TREND_DROPPING) {
-               cur_state = cur_state > instance->lower ?
-                           (cur_state - 1) : instance->lower;
+       switch (trend) {
+       case THERMAL_TREND_RAISING:
+               if (throttle)
+                       cur_state = cur_state < instance->upper ?
+                                   (cur_state + 1) : instance->upper;
+               break;
+       case THERMAL_TREND_RAISE_FULL:
+               if (throttle)
+                       cur_state = instance->upper;
+               break;
+       case THERMAL_TREND_DROPPING:
+               if (cur_state == instance->lower) {
+                       if (!throttle)
+                               cur_state = -1;
+               } else
+                       cur_state -= 1;
+               break;
+       case THERMAL_TREND_DROP_FULL:
+               if (cur_state == instance->lower) {
+                       if (!throttle)
+                               cur_state = -1;
+               } else
+                       cur_state = instance->lower;
+               break;
+       default:
+               break;
        }
 
        return cur_state;
@@ -66,57 +99,14 @@ static void update_passive_instance(struct thermal_zone_device *tz,
                tz->passive += value;
 }
 
-static void update_instance_for_throttle(struct thermal_zone_device *tz,
-                               int trip, enum thermal_trip_type trip_type,
-                               enum thermal_trend trend)
-{
-       struct thermal_instance *instance;
-
-       list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
-               if (instance->trip != trip)
-                       continue;
-
-               instance->target = get_target_state(instance, trend);
-
-               /* Activate a passive thermal instance */
-               if (instance->target == THERMAL_NO_TARGET)
-                       update_passive_instance(tz, trip_type, 1);
-
-               instance->cdev->updated = false; /* cdev needs update */
-       }
-}
-
-static void update_instance_for_dethrottle(struct thermal_zone_device *tz,
-                               int trip, enum thermal_trip_type trip_type)
-{
-       struct thermal_instance *instance;
-       struct thermal_cooling_device *cdev;
-       unsigned long cur_state;
-
-       list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
-               if (instance->trip != trip ||
-                       instance->target == THERMAL_NO_TARGET)
-                       continue;
-
-               cdev = instance->cdev;
-               cdev->ops->get_cur_state(cdev, &cur_state);
-
-               instance->target = cur_state > instance->lower ?
-                           (cur_state - 1) : THERMAL_NO_TARGET;
-
-               /* Deactivate a passive thermal instance */
-               if (instance->target == THERMAL_NO_TARGET)
-                       update_passive_instance(tz, trip_type, -1);
-
-               cdev->updated = false; /* cdev needs update */
-       }
-}
-
 static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 {
        long trip_temp;
        enum thermal_trip_type trip_type;
        enum thermal_trend trend;
+       struct thermal_instance *instance;
+       bool throttle = false;
+       int old_target;
 
        if (trip == THERMAL_TRIPS_NONE) {
                trip_temp = tz->forced_passive;
@@ -128,12 +118,30 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
 
        trend = get_tz_trend(tz, trip);
 
+       if (tz->temperature >= trip_temp)
+               throttle = true;
+
        mutex_lock(&tz->lock);
 
-       if (tz->temperature >= trip_temp)
-               update_instance_for_throttle(tz, trip, trip_type, trend);
-       else
-               update_instance_for_dethrottle(tz, trip, trip_type);
+       list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+               if (instance->trip != trip)
+                       continue;
+
+               old_target = instance->target;
+               instance->target = get_target_state(instance, trend, throttle);
+
+               /* Activate a passive thermal instance */
+               if (old_target == THERMAL_NO_TARGET &&
+                       instance->target != THERMAL_NO_TARGET)
+                       update_passive_instance(tz, trip_type, 1);
+               /* Deactivate a passive thermal instance */
+               else if (old_target != THERMAL_NO_TARGET &&
+                       instance->target == THERMAL_NO_TARGET)
+                       update_passive_instance(tz, trip_type, -1);
+
+
+               instance->cdev->updated = false; /* cdev needs update */
+       }
 
        mutex_unlock(&tz->lock);
 }
index 84e95f32cdb69458c4e1a1218c51c0813558d536..5b7863a03f98a59d1e3d7bf107a1f434f0082869 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/kdev_t.h>
 #include <linux/idr.h>
 #include <linux/thermal.h>
-#include <linux/spinlock.h>
 #include <linux/reboot.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -348,8 +347,9 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
                tz->ops->notify(tz, trip, trip_type);
 
        if (trip_type == THERMAL_TRIP_CRITICAL) {
-               pr_emerg("Critical temperature reached(%d C),shutting down\n",
-                        tz->temperature / 1000);
+               dev_emerg(&tz->device,
+                         "critical temperature reached(%d C),shutting down\n",
+                         tz->temperature / 1000);
                orderly_poweroff(true);
        }
 }
@@ -371,23 +371,57 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
        monitor_thermal_zone(tz);
 }
 
+static int thermal_zone_get_temp(struct thermal_zone_device *tz,
+                               unsigned long *temp)
+{
+       int ret = 0;
+#ifdef CONFIG_THERMAL_EMULATION
+       int count;
+       unsigned long crit_temp = -1UL;
+       enum thermal_trip_type type;
+#endif
+
+       mutex_lock(&tz->lock);
+
+       ret = tz->ops->get_temp(tz, temp);
+#ifdef CONFIG_THERMAL_EMULATION
+       if (!tz->emul_temperature)
+               goto skip_emul;
+
+       for (count = 0; count < tz->trips; count++) {
+               ret = tz->ops->get_trip_type(tz, count, &type);
+               if (!ret && type == THERMAL_TRIP_CRITICAL) {
+                       ret = tz->ops->get_trip_temp(tz, count, &crit_temp);
+                       break;
+               }
+       }
+
+       if (ret)
+               goto skip_emul;
+
+       if (*temp < crit_temp)
+               *temp = tz->emul_temperature;
+skip_emul:
+#endif
+       mutex_unlock(&tz->lock);
+       return ret;
+}
+
 static void update_temperature(struct thermal_zone_device *tz)
 {
        long temp;
        int ret;
 
-       mutex_lock(&tz->lock);
-
-       ret = tz->ops->get_temp(tz, &temp);
+       ret = thermal_zone_get_temp(tz, &temp);
        if (ret) {
-               pr_warn("failed to read out thermal zone %d\n", tz->id);
-               goto exit;
+               dev_warn(&tz->device, "failed to read out thermal zone %d\n",
+                        tz->id);
+               return;
        }
 
+       mutex_lock(&tz->lock);
        tz->last_temperature = tz->temperature;
        tz->temperature = temp;
-
-exit:
        mutex_unlock(&tz->lock);
 }
 
@@ -430,10 +464,7 @@ temp_show(struct device *dev, struct device_attribute *attr, char *buf)
        long temperature;
        int ret;
 
-       if (!tz->ops->get_temp)
-               return -EPERM;
-
-       ret = tz->ops->get_temp(tz, &temperature);
+       ret = thermal_zone_get_temp(tz, &temperature);
 
        if (ret)
                return ret;
@@ -693,6 +724,31 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf)
        return sprintf(buf, "%s\n", tz->governor->name);
 }
 
+#ifdef CONFIG_THERMAL_EMULATION
+static ssize_t
+emul_temp_store(struct device *dev, struct device_attribute *attr,
+                    const char *buf, size_t count)
+{
+       struct thermal_zone_device *tz = to_thermal_zone(dev);
+       int ret = 0;
+       unsigned long temperature;
+
+       if (kstrtoul(buf, 10, &temperature))
+               return -EINVAL;
+
+       if (!tz->ops->set_emul_temp) {
+               mutex_lock(&tz->lock);
+               tz->emul_temperature = temperature;
+               mutex_unlock(&tz->lock);
+       } else {
+               ret = tz->ops->set_emul_temp(tz, temperature);
+       }
+
+       return ret ? ret : count;
+}
+static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store);
+#endif/*CONFIG_THERMAL_EMULATION*/
+
 static DEVICE_ATTR(type, 0444, type_show, NULL);
 static DEVICE_ATTR(temp, 0444, temp_show, NULL);
 static DEVICE_ATTR(mode, 0644, mode_show, mode_store);
@@ -835,7 +891,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
                                       temp_input);
        struct thermal_zone_device *tz = temp->tz;
 
-       ret = tz->ops->get_temp(tz, &temperature);
+       ret = thermal_zone_get_temp(tz, &temperature);
 
        if (ret)
                return ret;
@@ -1522,6 +1578,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
        if (!ops || !ops->get_temp)
                return ERR_PTR(-EINVAL);
 
+       if (trips > 0 && !ops->get_trip_type)
+               return ERR_PTR(-EINVAL);
+
        tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL);
        if (!tz)
                return ERR_PTR(-ENOMEM);
@@ -1585,6 +1644,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
                        goto unregister;
        }
 
+#ifdef CONFIG_THERMAL_EMULATION
+       result = device_create_file(&tz->device, &dev_attr_emul_temp);
+       if (result)
+               goto unregister;
+#endif
        /* Create policy attribute */
        result = device_create_file(&tz->device, &dev_attr_policy);
        if (result)
@@ -1704,7 +1768,8 @@ static struct genl_multicast_group thermal_event_mcgrp = {
        .name = THERMAL_GENL_MCAST_GROUP_NAME,
 };
 
-int thermal_generate_netlink_event(u32 orig, enum events event)
+int thermal_generate_netlink_event(struct thermal_zone_device *tz,
+                                       enum events event)
 {
        struct sk_buff *skb;
        struct nlattr *attr;
@@ -1714,6 +1779,9 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
        int result;
        static unsigned int thermal_event_seqnum;
 
+       if (!tz)
+               return -EINVAL;
+
        /* allocate memory */
        size = nla_total_size(sizeof(struct thermal_genl_event)) +
               nla_total_size(0);
@@ -1748,7 +1816,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
 
        memset(thermal_event, 0, sizeof(struct thermal_genl_event));
 
-       thermal_event->orig = orig;
+       thermal_event->orig = tz->id;
        thermal_event->event = event;
 
        /* send multicast genetlink message */
@@ -1760,7 +1828,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event)
 
        result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC);
        if (result)
-               pr_info("failed to send netlink event:%d\n", result);
+               dev_err(&tz->device, "Failed to send netlink event:%d", result);
 
        return result;
 }
@@ -1800,6 +1868,7 @@ static int __init thermal_init(void)
                idr_destroy(&thermal_cdev_idr);
                mutex_destroy(&thermal_idr_lock);
                mutex_destroy(&thermal_list_lock);
+               return result;
        }
        result = genetlink_init();
        return result;
index 372c8c0d54a0fa7372bda74da48c7f697e490128..950d354d50e2dd593d8666078ff39ae4c5b3247f 100644 (file)
@@ -157,9 +157,16 @@ static int mxc_w1_remove(struct platform_device *pdev)
        return 0;
 }
 
+static struct of_device_id mxc_w1_dt_ids[] = {
+       { .compatible = "fsl,imx21-owire" },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mxc_w1_dt_ids);
+
 static struct platform_driver mxc_w1_driver = {
        .driver = {
-                  .name = "mxc_w1",
+               .name = "mxc_w1",
+               .of_match_table = mxc_w1_dt_ids,
        },
        .probe = mxc_w1_probe,
        .remove = mxc_w1_remove,
index b96fc6ce485595f0179bc909c807ae197258e671..bb5768f59b32e22fdcde250f0ae8b01eb6b69b48 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1428,6 +1428,8 @@ void bio_endio(struct bio *bio, int error)
        else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = -EIO;
 
+       trace_block_bio_complete(bio, error);
+
        if (bio->bi_end_io)
                bio->bi_end_io(bio, error);
 }
index 53f5fae5cfbe8e5f86a8c0785982f4f7b2e38df5..aea605c98ba6b4eb920a022acaf6c4587ed653f9 100644 (file)
@@ -1033,7 +1033,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 {
        unsigned bsize = bdev_logical_block_size(bdev);
 
-       bdev->bd_inode->i_size = size;
+       mutex_lock(&bdev->bd_inode->i_mutex);
+       i_size_write(bdev->bd_inode, size);
+       mutex_unlock(&bdev->bd_inode->i_mutex);
        while (bsize < PAGE_CACHE_SIZE) {
                if (size & bsize)
                        break;
@@ -1118,7 +1120,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                }
                        }
 
-                       if (!ret && !bdev->bd_openers) {
+                       if (!ret) {
                                bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
                                bdi = blk_get_backing_dev_info(bdev);
                                if (bdi == NULL)
index 1e59ed575cc991819a980b1ea6facd9d8a683995..cf54bdfee334287383e7b63badc5a3683e2edf4f 100644 (file)
@@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root,
        return 0;
 }
 
-static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
-                                              unsigned long nr_pages,
-                                              enum wb_reason reason)
-{
-       if (!writeback_in_progress(sb->s_bdi) &&
-           down_read_trylock(&sb->s_umount)) {
-               writeback_inodes_sb_nr(sb, nr_pages, reason);
-               up_read(&sb->s_umount);
-               return 1;
-       }
-
-       return 0;
-}
-
 /*
  * shrink metadata reservation for delalloc
  */
@@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
        while (delalloc_bytes && loops < 3) {
                max_reclaim = min(delalloc_bytes, to_reclaim);
                nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
-               writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
-                                                   nr_pages,
-                                                   WB_REASON_FS_FREE_SPACE);
+               try_to_writeback_inodes_sb_nr(root->fs_info->sb,
+                                             nr_pages,
+                                             WB_REASON_FS_FREE_SPACE);
 
                /*
                 * We need to wait for the async pages to actually start before
index 8e18281b4077027e763575065bdf510b3406606f..b4dcb34c9635ae61b747bb9d4816477a49a8d90a 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
+#include <trace/events/block.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 
@@ -53,6 +54,13 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
 }
 EXPORT_SYMBOL(init_buffer);
 
+inline void touch_buffer(struct buffer_head *bh)
+{
+       trace_block_touch_buffer(bh);
+       mark_page_accessed(bh->b_page);
+}
+EXPORT_SYMBOL(touch_buffer);
+
 static int sleep_on_buffer(void *word)
 {
        io_schedule();
@@ -1113,6 +1121,8 @@ void mark_buffer_dirty(struct buffer_head *bh)
 {
        WARN_ON_ONCE(!buffer_uptodate(bh));
 
+       trace_block_dirty_buffer(bh);
+
        /*
         * Very *carefully* optimize the it-is-already-dirty case.
         *
index d4f81edd9a5d8d4d57152411b1a896528b333446..a60ea977af6fa4c4ac65cea398dd04964e7d15e7 100644 (file)
@@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page)
 static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
 {
        struct inode *inode = req->r_inode;
-       struct ceph_osd_reply_head *replyhead;
-       int rc, bytes;
+       int rc = req->r_result;
+       int bytes = le32_to_cpu(msg->hdr.data_len);
        int i;
 
-       /* parse reply */
-       replyhead = msg->front.iov_base;
-       WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-       rc = le32_to_cpu(replyhead->result);
-       bytes = le32_to_cpu(msg->hdr.data_len);
-
        dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
 
        /* unlock all pages, zeroing any data we didn't read */
@@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
                                    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
                                    NULL, 0,
                                    ci->i_truncate_seq, ci->i_truncate_size,
-                                   NULL, false, 1, 0);
+                                   NULL, false, 0);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
                                   &ci->i_layout, snapc,
                                   page_off, len,
                                   ci->i_truncate_seq, ci->i_truncate_size,
-                                  &inode->i_mtime,
-                                  &page, 1, 0, 0, true);
+                                  &inode->i_mtime, &page, 1);
        if (err < 0) {
                dout("writepage setting page/mapping error %d %p\n", err, page);
                SetPageError(page);
@@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req,
                              struct ceph_msg *msg)
 {
        struct inode *inode = req->r_inode;
-       struct ceph_osd_reply_head *replyhead;
-       struct ceph_osd_op *op;
        struct ceph_inode_info *ci = ceph_inode(inode);
        unsigned wrote;
        struct page *page;
        int i;
        struct ceph_snap_context *snapc = req->r_snapc;
        struct address_space *mapping = inode->i_mapping;
-       __s32 rc = -EIO;
-       u64 bytes = 0;
+       int rc = req->r_result;
+       u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        long writeback_stat;
        unsigned issued = ceph_caps_issued(ci);
 
-       /* parse reply */
-       replyhead = msg->front.iov_base;
-       WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
-       op = (void *)(replyhead + 1);
-       rc = le32_to_cpu(replyhead->result);
-       bytes = le64_to_cpu(op->extent.length);
-
        if (rc >= 0) {
                /*
                 * Assume we wrote the pages we originally sent.  The
@@ -741,8 +725,6 @@ retry:
                struct page *page;
                int want;
                u64 offset, len;
-               struct ceph_osd_request_head *reqhead;
-               struct ceph_osd_op *op;
                long writeback_stat;
 
                next = 0;
@@ -838,7 +820,7 @@ get_more_pages:
                                            snapc, do_sync,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
-                                           &inode->i_mtime, true, 1, 0);
+                                           &inode->i_mtime, true, 0);
 
                                if (IS_ERR(req)) {
                                        rc = PTR_ERR(req);
@@ -906,10 +888,8 @@ get_more_pages:
 
                /* revise final length, page count */
                req->r_num_pages = locked_pages;
-               reqhead = req->r_request->front.iov_base;
-               op = (void *)(reqhead + 1);
-               op->extent.length = cpu_to_le64(len);
-               op->payload_len = cpu_to_le32(len);
+               req->r_request_ops[0].extent.length = cpu_to_le64(len);
+               req->r_request_ops[0].payload_len = cpu_to_le32(len);
                req->r_request->hdr.data_len = cpu_to_le32(len);
 
                rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
index ae2be696eb5b32e5e2cbad2a591582c3d802aab5..78e2f575247dce25ebbfdef971ac8f458531442c 100644 (file)
@@ -611,8 +611,16 @@ retry:
 
        if (flags & CEPH_CAP_FLAG_AUTH)
                ci->i_auth_cap = cap;
-       else if (ci->i_auth_cap == cap)
+       else if (ci->i_auth_cap == cap) {
                ci->i_auth_cap = NULL;
+               spin_lock(&mdsc->cap_dirty_lock);
+               if (!list_empty(&ci->i_dirty_item)) {
+                       dout(" moving %p to cap_dirty_migrating\n", inode);
+                       list_move(&ci->i_dirty_item,
+                                 &mdsc->cap_dirty_migrating);
+               }
+               spin_unlock(&mdsc->cap_dirty_lock);
+       }
 
        dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
             inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct inode *inode = &ci->vfs_inode;
        struct ceph_cap *cap;
-       int file_wanted, used;
+       int file_wanted, used, cap_used;
        int took_snap_rwsem = 0;             /* true if mdsc->snap_rwsem held */
        int issued, implemented, want, retain, revoking, flushing = 0;
        int mds = -1;   /* keep track of how far we've gone through i_caps list
@@ -1563,9 +1571,14 @@ retry_locked:
 
                /* NOTE: no side-effects allowed, until we take s_mutex */
 
+               cap_used = used;
+               if (ci->i_auth_cap && cap != ci->i_auth_cap)
+                       cap_used &= ~ci->i_auth_cap->issued;
+
                revoking = cap->implemented & ~cap->issued;
-               dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
+               dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
                     cap->mds, cap, ceph_cap_string(cap->issued),
+                    ceph_cap_string(cap_used),
                     ceph_cap_string(cap->implemented),
                     ceph_cap_string(revoking));
 
@@ -1593,7 +1606,7 @@ retry_locked:
                }
 
                /* completed revocation? going down and there are no caps? */
-               if (revoking && (revoking & used) == 0) {
+               if (revoking && (revoking & cap_used) == 0) {
                        dout("completed revocation of %s\n",
                             ceph_cap_string(cap->implemented & ~cap->issued));
                        goto ack;
@@ -1670,8 +1683,8 @@ ack:
                sent++;
 
                /* __send_cap drops i_ceph_lock */
-               delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
-                                     retain, flushing, NULL);
+               delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
+                                     want, retain, flushing, NULL);
                goto retry; /* retake i_ceph_lock and restart our cap scan. */
        }
 
@@ -2417,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
                dout("mds wanted %s -> %s\n",
                     ceph_cap_string(le32_to_cpu(grant->wanted)),
                     ceph_cap_string(wanted));
-               grant->wanted = cpu_to_le32(wanted);
+               /* imported cap may not have correct mds_wanted */
+               if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
+                       check_caps = 1;
        }
 
        cap->seq = seq;
@@ -2821,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
             (unsigned)seq);
 
+       if (op == CEPH_CAP_OP_IMPORT)
+               ceph_add_cap_releases(mdsc, session);
+
        /* lookup ino */
        inode = ceph_find_inode(sb, vino);
        ci = ceph_inode(inode);
index 11b57c2c8f154871da4f9e51c90083ae000e46c1..bf338d9b67e30b04927814016f58fa9334e12ac9 100644 (file)
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
        err = ceph_mdsc_do_request(mdsc,
                                   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
                                   req);
+       if (err)
+               goto out_err;
+
        err = ceph_handle_snapdir(req, dentry, err);
        if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                err = finish_no_open(file, dn);
        } else {
                dout("atomic_open finish_open on dn %p\n", dn);
+               if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
+                       *opened |= FILE_CREATED;
+               }
                err = finish_open(file, dentry, ceph_open, opened);
        }
 
@@ -535,7 +541,7 @@ more:
                                    ci->i_snap_realm->cached_context,
                                    do_sync,
                                    ci->i_truncate_seq, ci->i_truncate_size,
-                                   &mtime, false, 2, page_align);
+                                   &mtime, false, page_align);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
index f5ed767806df2ea39082ffa5b892025dc6dbc218..4a989345b37bd368e62ede9ddc0032ff8e5b8fa4 100644 (file)
@@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
                &ceph_sb_to_client(inode->i_sb)->client->osdc;
        u64 len = 1, olen;
        u64 tmp;
-       struct ceph_object_layout ol;
        struct ceph_pg pgid;
        int r;
 
@@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
                return -EFAULT;
 
        down_read(&osdc->map_sem);
-       r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
+       r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
                                          &dl.object_no, &dl.object_offset,
                                          &olen);
        if (r < 0)
@@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 
        snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
                 ceph_ino(inode), dl.object_no);
-       ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
+       ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout,
                                osdc->osdmap);
 
-       pgid = ol.ol_pgid;
        dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
        if (dl.osd >= 0) {
                struct ceph_entity_addr *a =
index 7a3dfe0a9a80e551f905a4d46d1248ea9e6c26f5..442880d099c939480583b3366eee9142e2d63d89 100644 (file)
@@ -232,6 +232,30 @@ bad:
        return -EIO;
 }
 
+/*
+ * parse create results
+ */
+static int parse_reply_info_create(void **p, void *end,
+                                 struct ceph_mds_reply_info_parsed *info,
+                                 int features)
+{
+       if (features & CEPH_FEATURE_REPLY_CREATE_INODE) {
+               if (*p == end) {
+                       info->has_create_ino = false;
+               } else {
+                       info->has_create_ino = true;
+                       info->ino = ceph_decode_64(p);
+               }
+       }
+
+       if (unlikely(*p != end))
+               goto bad;
+       return 0;
+
+bad:
+       return -EIO;
+}
+
 /*
  * parse extra results
  */
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end,
 {
        if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
                return parse_reply_info_filelock(p, end, info, features);
-       else
+       else if (info->head->op == CEPH_MDS_OP_READDIR)
                return parse_reply_info_dir(p, end, info, features);
+       else if (info->head->op == CEPH_MDS_OP_CREATE)
+               return parse_reply_info_create(p, end, info, features);
+       else
+               return -EIO;
 }
 
 /*
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        mutex_lock(&req->r_fill_mutex);
        err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
        if (err == 0) {
-               if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+               if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
+                                   req->r_op == CEPH_MDS_OP_LSSNAP) &&
                    rinfo->dir_nr)
                        ceph_readdir_prepopulate(req, req->r_session);
                ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
index ff4188bf6199b5988d9ba7c111160b3f0aadbd67..c2a19fbbe5177b619b7a3d7e6132b626df8c8508 100644 (file)
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed {
                        struct ceph_mds_reply_info_in *dir_in;
                        u8                            dir_complete, dir_end;
                };
+
+               /* for create results */
+               struct {
+                       bool has_create_ino;
+                       u64 ino;
+               };
        };
 
        /* encoded blob describing snapshot contexts for certain
index 73b7d44e8a354264e3f08f66e8cb788851328029..0d3c9240c61bc80031f85a94ba462809b1d7249a 100644 (file)
@@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
                return ERR_PTR(-ENOMEM);
 
        ceph_decode_16_safe(p, end, version, bad);
+       if (version > 3) {
+               pr_warning("got mdsmap version %d > 3, failing", version);
+               goto bad;
+       }
 
        ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad);
        m->m_epoch = ceph_decode_32(p);
@@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
        /* pg_pools */
        ceph_decode_32_safe(p, end, n, bad);
        m->m_num_data_pg_pools = n;
-       m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS);
+       m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
        if (!m->m_data_pg_pools)
                goto badmem;
-       ceph_decode_need(p, end, sizeof(u32)*(n+1), bad);
+       ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
        for (i = 0; i < n; i++)
-               m->m_data_pg_pools[i] = ceph_decode_32(p);
-       m->m_cas_pg_pool = ceph_decode_32(p);
+               m->m_data_pg_pools[i] = ceph_decode_64(p);
+       m->m_cas_pg_pool = ceph_decode_64(p);
 
        /* ok, we don't care about the rest. */
        dout("mdsmap_decode success epoch %u\n", m->m_epoch);
index cd5097d7c804e5897eeed60716c962d1bbd59b5c..89fa4a940a0ffff7b9cea94bfebb1bb62f8721df 100644 (file)
@@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s)
        case CEPH_MDS_STATE_BOOT:       return "up:boot";
        case CEPH_MDS_STATE_STANDBY:    return "up:standby";
        case CEPH_MDS_STATE_STANDBY_REPLAY:    return "up:standby-replay";
+       case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay";
        case CEPH_MDS_STATE_CREATING:   return "up:creating";
        case CEPH_MDS_STATE_STARTING:   return "up:starting";
                /* up and in */
@@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op)
        case CEPH_MDS_OP_LOOKUP:  return "lookup";
        case CEPH_MDS_OP_LOOKUPHASH:  return "lookuphash";
        case CEPH_MDS_OP_LOOKUPPARENT:  return "lookupparent";
+       case CEPH_MDS_OP_LOOKUPINO:  return "lookupino";
        case CEPH_MDS_OP_GETATTR:  return "getattr";
        case CEPH_MDS_OP_SETXATTR: return "setxattr";
        case CEPH_MDS_OP_SETATTR: return "setattr";
        case CEPH_MDS_OP_RMXATTR: return "rmxattr";
+       case CEPH_MDS_OP_SETLAYOUT: return "setlayou";
+       case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout";
        case CEPH_MDS_OP_READDIR: return "readdir";
        case CEPH_MDS_OP_MKNOD: return "mknod";
        case CEPH_MDS_OP_LINK: return "link";
index e86aa9948124b3601e1734515e4fb629627855de..9fe17c6c2876c0cbec11b57b6e5de76349320244 100644 (file)
@@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
        /*
         * express utilization in terms of large blocks to avoid
         * overflow on 32-bit machines.
+        *
+        * NOTE: for the time being, we make bsize == frsize to humor
+        * not-yet-ancient versions of glibc that are broken.
+        * Someday, we will probably want to report a real block
+        * size...  whatever that may mean for a network file system!
         */
        buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
+       buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
        buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
        buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
        buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
@@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_files = le64_to_cpu(st.num_objects);
        buf->f_ffree = -1;
        buf->f_namelen = NAME_MAX;
-       buf->f_frsize = PAGE_CACHE_SIZE;
 
        /* leave fsid little-endian, regardless of host endianness */
        fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
index f053bbd1886f215b2e2c64dab12f0a1812b5808e..c7b309723dcc2ee1db835db6395b42d36b461fe6 100644 (file)
@@ -21,7 +21,7 @@
 
 /* large granularity for statfs utilization stats to facilitate
  * large volume sizes on 32-bit machines. */
-#define CEPH_BLOCK_SHIFT   20  /* 1 MB */
+#define CEPH_BLOCK_SHIFT   22  /* 4 MB */
 #define CEPH_BLOCK         (1 << CEPH_BLOCK_SHIFT)
 
 #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
@@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 /* file.c */
 extern const struct file_operations ceph_file_fops;
 extern const struct address_space_operations ceph_aops;
-extern int ceph_copy_to_page_vector(struct page **pages,
-                                   const char *data,
-                                   loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
-                                   char *data,
-                                   loff_t off, size_t len);
-extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
+
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                            struct file *file, unsigned flags, umode_t mode,
index 2c2ae5be99027af909d30a65cb73706a7c0d193a..9b6b2b6dd164c5fd047f691a68aac03fd8de69d5 100644 (file)
@@ -29,9 +29,94 @@ struct ceph_vxattr {
        size_t name_size;       /* strlen(name) + 1 (for '\0') */
        size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
                              size_t size);
-       bool readonly;
+       bool readonly, hidden;
+       bool (*exists_cb)(struct ceph_inode_info *ci);
 };
 
+/* layouts */
+
+static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
+{
+       size_t s;
+       char *p = (char *)&ci->i_layout;
+
+       for (s = 0; s < sizeof(ci->i_layout); s++, p++)
+               if (*p)
+                       return true;
+       return false;
+}
+
+static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
+                                       size_t size)
+{
+       int ret;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+       struct ceph_osd_client *osdc = &fsc->client->osdc;
+       s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+       const char *pool_name;
+
+       dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
+       down_read(&osdc->map_sem);
+       pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+       if (pool_name)
+               ret = snprintf(val, size,
+               "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
+               (unsigned long long)ceph_file_layout_su(ci->i_layout),
+               (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+               (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+               pool_name);
+       else
+               ret = snprintf(val, size,
+               "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
+               (unsigned long long)ceph_file_layout_su(ci->i_layout),
+               (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
+               (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
+               (unsigned long long)pool);
+
+       up_read(&osdc->map_sem);
+       return ret;
+}
+
+static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
+                                              char *val, size_t size)
+{
+       return snprintf(val, size, "%lld",
+                       (unsigned long long)ceph_file_layout_su(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
+                                               char *val, size_t size)
+{
+       return snprintf(val, size, "%lld",
+              (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
+                                              char *val, size_t size)
+{
+       return snprintf(val, size, "%lld",
+              (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
+}
+
+static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
+                                       char *val, size_t size)
+{
+       int ret;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+       struct ceph_osd_client *osdc = &fsc->client->osdc;
+       s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
+       const char *pool_name;
+
+       down_read(&osdc->map_sem);
+       pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
+       if (pool_name)
+               ret = snprintf(val, size, "%s", pool_name);
+       else
+               ret = snprintf(val, size, "%lld", (unsigned long long)pool);
+       up_read(&osdc->map_sem);
+       return ret;
+}
+
 /* directories */
 
 static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
@@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
                        (long)ci->i_rctime.tv_nsec);
 }
 
-#define CEPH_XATTR_NAME(_type, _name)  XATTR_CEPH_PREFIX #_type "." #_name
 
-#define XATTR_NAME_CEPH(_type, _name) \
-               { \
-                       .name = CEPH_XATTR_NAME(_type, _name), \
-                       .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
-                       .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
-                       .readonly = true, \
-               }
+#define CEPH_XATTR_NAME(_type, _name)  XATTR_CEPH_PREFIX #_type "." #_name
+#define CEPH_XATTR_NAME2(_type, _name, _name2) \
+       XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
+
+#define XATTR_NAME_CEPH(_type, _name)                                  \
+       {                                                               \
+               .name = CEPH_XATTR_NAME(_type, _name),                  \
+               .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
+               .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
+               .readonly = true,                               \
+               .hidden = false,                                \
+               .exists_cb = NULL,                      \
+       }
+#define XATTR_LAYOUT_FIELD(_type, _name, _field)                       \
+       {                                                               \
+               .name = CEPH_XATTR_NAME2(_type, _name, _field), \
+               .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
+               .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
+               .readonly = false,                              \
+               .hidden = true,                 \
+               .exists_cb = ceph_vxattrcb_layout_exists,       \
+       }
 
 static struct ceph_vxattr ceph_dir_vxattrs[] = {
+       {
+               .name = "ceph.dir.layout",
+               .name_size = sizeof("ceph.dir.layout"),
+               .getxattr_cb = ceph_vxattrcb_layout,
+               .readonly = false,
+               .hidden = false,
+               .exists_cb = ceph_vxattrcb_layout_exists,
+       },
+       XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
+       XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
+       XATTR_LAYOUT_FIELD(dir, layout, object_size),
+       XATTR_LAYOUT_FIELD(dir, layout, pool),
        XATTR_NAME_CEPH(dir, entries),
        XATTR_NAME_CEPH(dir, files),
        XATTR_NAME_CEPH(dir, subdirs),
@@ -102,35 +213,26 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
        XATTR_NAME_CEPH(dir, rsubdirs),
        XATTR_NAME_CEPH(dir, rbytes),
        XATTR_NAME_CEPH(dir, rctime),
-       { 0 }   /* Required table terminator */
+       { .name = NULL, 0 }     /* Required table terminator */
 };
 static size_t ceph_dir_vxattrs_name_size;      /* total size of all names */
 
 /* files */
 
-static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val,
-                                  size_t size)
-{
-       int ret;
-
-       ret = snprintf(val, size,
-               "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n",
-               (unsigned long long)ceph_file_layout_su(ci->i_layout),
-               (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
-               (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-       return ret;
-}
-
 static struct ceph_vxattr ceph_file_vxattrs[] = {
-       XATTR_NAME_CEPH(file, layout),
-       /* The following extended attribute name is deprecated */
        {
-               .name = XATTR_CEPH_PREFIX "layout",
-               .name_size = sizeof (XATTR_CEPH_PREFIX "layout"),
-               .getxattr_cb = ceph_vxattrcb_file_layout,
-               .readonly = true,
+               .name = "ceph.file.layout",
+               .name_size = sizeof("ceph.file.layout"),
+               .getxattr_cb = ceph_vxattrcb_layout,
+               .readonly = false,
+               .hidden = false,
+               .exists_cb = ceph_vxattrcb_layout_exists,
        },
-       { 0 }   /* Required table terminator */
+       XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
+       XATTR_LAYOUT_FIELD(file, layout, stripe_count),
+       XATTR_LAYOUT_FIELD(file, layout, object_size),
+       XATTR_LAYOUT_FIELD(file, layout, pool),
+       { .name = NULL, 0 }     /* Required table terminator */
 };
 static size_t ceph_file_vxattrs_name_size;     /* total size of all names */
 
@@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
        size_t size = 0;
 
        for (vxattr = vxattrs; vxattr->name; vxattr++)
-               size += vxattr->name_size;
+               if (!vxattr->hidden)
+                       size += vxattr->name_size;
 
        return size;
 }
@@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
        if (!ceph_is_valid_xattr(name))
                return -ENODATA;
 
-       /* let's see if a virtual xattr was requested */
-       vxattr = ceph_match_vxattr(inode, name);
-
        spin_lock(&ci->i_ceph_lock);
        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
             ci->i_xattrs.version, ci->i_xattrs.index_version);
 
+       /* let's see if a virtual xattr was requested */
+       vxattr = ceph_match_vxattr(inode, name);
+       if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
+               err = vxattr->getxattr_cb(ci, value, size);
+               goto out;
+       }
+
        if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
                goto get_xattr;
@@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
 
        spin_lock(&ci->i_ceph_lock);
 
-       if (vxattr && vxattr->readonly) {
-               err = vxattr->getxattr_cb(ci, value, size);
-               goto out;
-       }
-
        err = __build_xattrs(inode);
        if (err < 0)
                goto out;
@@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
 get_xattr:
        err = -ENODATA;  /* == ENOATTR */
        xattr = __get_xattr(ci, name);
-       if (!xattr) {
-               if (vxattr)
-                       err = vxattr->getxattr_cb(ci, value, size);
+       if (!xattr)
                goto out;
-       }
 
        err = -ERANGE;
        if (size && size < xattr->val_len)
@@ -664,23 +763,30 @@ list_xattr:
        vir_namelen = ceph_vxattrs_name_size(vxattrs);
 
        /* adding 1 byte per each variable due to the null termination */
-       namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count;
+       namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
        err = -ERANGE;
-       if (size && namelen > size)
+       if (size && vir_namelen + namelen > size)
                goto out;
 
-       err = namelen;
+       err = namelen + vir_namelen;
        if (size == 0)
                goto out;
 
        names = __copy_xattr_names(ci, names);
 
        /* virtual xattr names, too */
-       if (vxattrs)
+       err = namelen;
+       if (vxattrs) {
                for (i = 0; vxattrs[i].name; i++) {
-                       len = sprintf(names, "%s", vxattrs[i].name);
-                       names += len + 1;
+                       if (!vxattrs[i].hidden &&
+                           !(vxattrs[i].exists_cb &&
+                             !vxattrs[i].exists_cb(ci))) {
+                               len = sprintf(names, "%s", vxattrs[i].name);
+                               names += len + 1;
+                               err += len + 1;
+                       }
                }
+       }
 
 out:
        spin_unlock(&ci->i_ceph_lock);
@@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
        if (vxattr && vxattr->readonly)
                return -EOPNOTSUPP;
 
+       /* pass any unhandled ceph.* xattrs through to the MDS */
+       if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+               goto do_sync_unlocked;
+
        /* preallocate memory for xattr name, value, index node */
        err = -ENOMEM;
        newname = kmemdup(name, name_len + 1, GFP_NOFS);
@@ -838,6 +948,7 @@ retry:
 
 do_sync:
        spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
        err = ceph_sync_setxattr(dentry, name, value, size, flags);
 out:
        kfree(newname);
@@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
        if (vxattr && vxattr->readonly)
                return -EOPNOTSUPP;
 
+       /* pass any unhandled ceph.* xattrs through to the MDS */
+       if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
+               goto do_sync_unlocked;
+
        err = -ENOMEM;
        spin_lock(&ci->i_ceph_lock);
 retry:
@@ -931,6 +1046,7 @@ retry:
        return err;
 do_sync:
        spin_unlock(&ci->i_ceph_lock);
+do_sync_unlocked:
        err = ceph_send_removexattr(dentry, name);
 out:
        return err;
index 9c4f4b1c97f84aad654dd8f70cf5231c78a6fc52..9ea0cde3fa9e0ffe7aebc28940293c422ae75a63 100644 (file)
@@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb)
        /*
         * Start pushing delalloc when 1/2 of free blocks are dirty.
         */
-       if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
-           !writeback_in_progress(sb->s_bdi) &&
-           down_read_trylock(&sb->s_umount)) {
-               writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
-               up_read(&sb->s_umount);
-       }
+       if (dirty_blocks && (free_blocks < 2 * dirty_blocks))
+               try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
 
        if (2 * free_blocks < 3 * dirty_blocks ||
                free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
index 310972b72a6654c1597fc11f717954606561c47e..21f46fb3a10193a966c18879c5bea794fd9a494e 100644 (file)
@@ -318,8 +318,14 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
 
 static int write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-       if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
-               return inode->i_sb->s_op->write_inode(inode, wbc);
+       int ret;
+
+       if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
+               trace_writeback_write_inode_start(inode, wbc);
+               ret = inode->i_sb->s_op->write_inode(inode, wbc);
+               trace_writeback_write_inode(inode, wbc);
+               return ret;
+       }
        return 0;
 }
 
@@ -450,6 +456,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
        WARN_ON(!(inode->i_state & I_SYNC));
 
+       trace_writeback_single_inode_start(inode, wbc, nr_to_write);
+
        ret = do_writepages(mapping, wbc);
 
        /*
@@ -1150,8 +1158,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
         * dirty the inode itself
         */
        if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+               trace_writeback_dirty_inode_start(inode, flags);
+
                if (sb->s_op->dirty_inode)
                        sb->s_op->dirty_inode(inode, flags);
+
+               trace_writeback_dirty_inode(inode, flags);
        }
 
        /*
@@ -1332,47 +1344,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
 EXPORT_SYMBOL(writeback_inodes_sb);
 
 /**
- * writeback_inodes_sb_if_idle -       start writeback if none underway
+ * try_to_writeback_inodes_sb_nr - try to start writeback if none underway
  * @sb: the superblock
- * @reason: reason why some writeback work was initiated
+ * @nr: the number of pages to write
+ * @reason: the reason of writeback
  *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Invoke writeback_inodes_sb_nr if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
+int try_to_writeback_inodes_sb_nr(struct super_block *sb,
+                                 unsigned long nr,
+                                 enum wb_reason reason)
 {
-       if (!writeback_in_progress(sb->s_bdi)) {
-               down_read(&sb->s_umount);
-               writeback_inodes_sb(sb, reason);
-               up_read(&sb->s_umount);
+       if (writeback_in_progress(sb->s_bdi))
                return 1;
-       } else
+
+       if (!down_read_trylock(&sb->s_umount))
                return 0;
+
+       writeback_inodes_sb_nr(sb, nr, reason);
+       up_read(&sb->s_umount);
+       return 1;
 }
-EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr);
 
 /**
- * writeback_inodes_sb_nr_if_idle      -       start writeback if none underway
+ * try_to_writeback_inodes_sb - try to start writeback if none underway
  * @sb: the superblock
- * @nr: the number of pages to write
  * @reason: reason why some writeback work was initiated
  *
- * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Implement by try_to_writeback_inodes_sb_nr()
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
-                                  unsigned long nr,
-                                  enum wb_reason reason)
+int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
 {
-       if (!writeback_in_progress(sb->s_bdi)) {
-               down_read(&sb->s_umount);
-               writeback_inodes_sb_nr(sb, nr, reason);
-               up_read(&sb->s_umount);
-               return 1;
-       } else
-               return 0;
+       return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
 }
-EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+EXPORT_SYMBOL(try_to_writeback_inodes_sb);
 
 /**
  * sync_inodes_sb      -       sync sb inode pages
index a2717408c4781a5cbe5fb6c607e3aed1a03a1a8e..0796c45d0d4d0795420743f104dc4e76e0cebd14 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/nfs_fs.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 #include <linux/kthread.h>
@@ -220,10 +220,19 @@ reclaimer(void *ptr)
 {
        struct nlm_host   *host = (struct nlm_host *) ptr;
        struct nlm_wait   *block;
+       struct nlm_rqst   *req;
        struct file_lock *fl, *next;
        u32 nsmstate;
        struct net *net = host->net;
 
+       req = kmalloc(sizeof(*req), GFP_KERNEL);
+       if (!req) {
+               printk(KERN_ERR "lockd: reclaimer unable to alloc memory."
+                               " Locks for %s won't be reclaimed!\n",
+                               host->h_name);
+               return 0;
+       }
+
        allow_signal(SIGKILL);
 
        down_write(&host->h_rwsem);
@@ -253,7 +262,7 @@ restart:
                 */
                if (signalled())
                        continue;
-               if (nlmclnt_reclaim(host, fl) != 0)
+               if (nlmclnt_reclaim(host, fl, req) != 0)
                        continue;
                list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
                if (host->h_nsmstate != nsmstate) {
@@ -279,5 +288,6 @@ restart:
        /* Release host handle after use */
        nlmclnt_release_host(host);
        lockd_down(net);
+       kfree(req);
        return 0;
 }
index 366277190b820c50cbb4c00523fc69cfae6b5a7f..7e529c3c45c0566fe358ba1975a1d2159a0e21a6 100644 (file)
@@ -618,17 +618,15 @@ out_unlock:
  * RECLAIM: Try to reclaim a lock
  */
 int
-nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
+nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl,
+               struct nlm_rqst *req)
 {
-       struct nlm_rqst reqst, *req;
        int             status;
 
-       req = &reqst;
        memset(req, 0, sizeof(*req));
        locks_init_lock(&req->a_args.lock.fl);
        locks_init_lock(&req->a_res.lock.fl);
        req->a_host  = host;
-       req->a_flags = 0;
 
        /* Set up the argument struct */
        nlmclnt_setlockargs(req, fl);
index abdd75d44dd46c3ff5b468d355eec03594d31bc6..969d589c848df8e066754e5ca5f3b02729eed919 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
 #include <linux/mutex.h>
index 3c2cfc6836315c288e9a0a367d03f235235ebbee..1812f026960c4229dd4c3d198b129f9554eb6fc4 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
index d17bb62b06d696f332875e646d2ed278bcd1f099..97e87415b145f7a6a0ed0e1a37c40a657209bf89 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
index 862a2f16db64b7335453ef88ed7bea6e153cfa91..5f7b053720eed9741250193784f0c33ac378df69 100644 (file)
@@ -128,10 +128,13 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd)
        struct super_block *pipefs_sb;
        int ret = 0;
 
+       sunrpc_init_cache_detail(cd);
        pipefs_sb = rpc_get_sb_net(net);
        if (pipefs_sb) {
                ret = nfs_cache_register_sb(pipefs_sb, cd);
                rpc_put_sb_net(net);
+               if (ret)
+                       sunrpc_destroy_cache_detail(cd);
        }
        return ret;
 }
@@ -151,14 +154,5 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd)
                nfs_cache_unregister_sb(pipefs_sb, cd);
                rpc_put_sb_net(net);
        }
-}
-
-void nfs_cache_init(struct cache_detail *cd)
-{
-       sunrpc_init_cache_detail(cd);
-}
-
-void nfs_cache_destroy(struct cache_detail *cd)
-{
        sunrpc_destroy_cache_detail(cd);
 }
index 317db95e37f80375b371130afd58cb31f39161ed..4116d2c3f52f130ca887d55682f1a8a24ba878cb 100644 (file)
@@ -23,8 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
 extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
 extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
 
-extern void nfs_cache_init(struct cache_detail *cd);
-extern void nfs_cache_destroy(struct cache_detail *cd);
 extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd);
 extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd);
 extern int nfs_cache_register_sb(struct super_block *sb,
index ca4b11ec87a292e92e5d9496f8ac194d97a5bf9f..9455270922958f420a7f31101cac1b6ac7b14bfb 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/dns_resolver.h>
 #include "dns_resolve.h"
 
@@ -42,6 +43,7 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
 #include <linux/seq_file.h>
 #include <linux/inet.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
@@ -142,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd,
 
        ret = nfs_cache_upcall(cd, key->hostname);
        if (ret)
-               ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request);
+               ret = sunrpc_cache_pipe_upcall(cd, ch);
        return ret;
 }
 
@@ -351,60 +353,47 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name,
 }
 EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
 
+static struct cache_detail nfs_dns_resolve_template = {
+       .owner          = THIS_MODULE,
+       .hash_size      = NFS_DNS_HASHTBL_SIZE,
+       .name           = "dns_resolve",
+       .cache_put      = nfs_dns_ent_put,
+       .cache_upcall   = nfs_dns_upcall,
+       .cache_request  = nfs_dns_request,
+       .cache_parse    = nfs_dns_parse,
+       .cache_show     = nfs_dns_show,
+       .match          = nfs_dns_match,
+       .init           = nfs_dns_ent_init,
+       .update         = nfs_dns_ent_update,
+       .alloc          = nfs_dns_ent_alloc,
+};
+
+
 int nfs_dns_resolver_cache_init(struct net *net)
 {
-       int err = -ENOMEM;
+       int err;
        struct nfs_net *nn = net_generic(net, nfs_net_id);
-       struct cache_detail *cd;
-       struct cache_head **tbl;
 
-       cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL);
-       if (cd == NULL)
-               goto err_cd;
-
-       tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *),
-                       GFP_KERNEL);
-       if (tbl == NULL)
-               goto err_tbl;
-
-       cd->owner = THIS_MODULE,
-       cd->hash_size = NFS_DNS_HASHTBL_SIZE,
-       cd->hash_table = tbl,
-       cd->name = "dns_resolve",
-       cd->cache_put = nfs_dns_ent_put,
-       cd->cache_upcall = nfs_dns_upcall,
-       cd->cache_parse = nfs_dns_parse,
-       cd->cache_show = nfs_dns_show,
-       cd->match = nfs_dns_match,
-       cd->init = nfs_dns_ent_init,
-       cd->update = nfs_dns_ent_update,
-       cd->alloc = nfs_dns_ent_alloc,
-
-       nfs_cache_init(cd);
-       err = nfs_cache_register_net(net, cd);
+       nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net);
+       if (IS_ERR(nn->nfs_dns_resolve))
+               return PTR_ERR(nn->nfs_dns_resolve);
+
+       err = nfs_cache_register_net(net, nn->nfs_dns_resolve);
        if (err)
                goto err_reg;
-       nn->nfs_dns_resolve = cd;
        return 0;
 
 err_reg:
-       nfs_cache_destroy(cd);
-       kfree(cd->hash_table);
-err_tbl:
-       kfree(cd);
-err_cd:
+       cache_destroy_net(nn->nfs_dns_resolve, net);
        return err;
 }
 
 void nfs_dns_resolver_cache_destroy(struct net *net)
 {
        struct nfs_net *nn = net_generic(net, nfs_net_id);
-       struct cache_detail *cd = nn->nfs_dns_resolve;
 
-       nfs_cache_unregister_net(net, cd);
-       nfs_cache_destroy(cd);
-       kfree(cd->hash_table);
-       kfree(cd);
+       nfs_cache_unregister_net(net, nn->nfs_dns_resolve);
+       cache_destroy_net(nn->nfs_dns_resolve, net);
 }
 
 static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
index 47d100872390157ac0d180d43b4f6d3ef7b2e2fd..ac4fc9a8fdbc6c6133b186ddd5de476db9f03930 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/nfs_mount.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/auth.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/bc_xprt.h>
index b720064bcd7ff15435ce4b7a4dcb5a2d1065b29f..1fe284f01f8b6419b3c7a9a5728ea5fc33cad583 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
+#include <linux/sunrpc/addr.h>
 
 #include "internal.h"
 #include "nfs4session.h"
index 1e09eb78543b2d50f9da025c733e52a99957ba64..0dd766079e1ca34feb26ba2da2dafafeb1e24060 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include "internal.h"
index a9dc5fc299550fe00858524e48f721f493804e3d..17b32b7224574207fba2ee5384474b69d6abcc99 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/errno.h>
 #include <linux/unistd.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/xprtsock.h>
index 93cc9d34c459cbf2e7b3b51332bbc2ad9123ff5f..87fd1410b737e19093157452e42f10f12073676f 100644 (file)
 
 /*
  * Representation of a reply cache entry.
+ *
+ * Note that we use a sockaddr_in6 to hold the address instead of the more
+ * typical sockaddr_storage. This is for space reasons, since sockaddr_storage
+ * is much larger than a sockaddr_in6.
  */
 struct svc_cacherep {
        struct hlist_node       c_hash;
@@ -20,11 +24,13 @@ struct svc_cacherep {
        unsigned char           c_state,        /* unused, inprog, done */
                                c_type,         /* status, buffer */
                                c_secure : 1;   /* req came from port < 1024 */
-       struct sockaddr_in      c_addr;
+       struct sockaddr_in6     c_addr;
        __be32                  c_xid;
        u32                     c_prot;
        u32                     c_proc;
        u32                     c_vers;
+       unsigned int            c_len;
+       __wsum                  c_csum;
        unsigned long           c_timestamp;
        union {
                struct kvec     u_vec;
@@ -46,8 +52,7 @@ enum {
 enum {
        RC_DROPIT,
        RC_REPLY,
-       RC_DOIT,
-       RC_INTR
+       RC_DOIT
 };
 
 /*
@@ -67,6 +72,12 @@ enum {
  */
 #define RC_DELAY               (HZ/5)
 
+/* Cache entries expire after this time period */
+#define RC_EXPIRE              (120 * HZ)
+
+/* Checksum this amount of the request */
+#define RC_CSUMLEN             (256U)
+
 int    nfsd_reply_cache_init(void);
 void   nfsd_reply_cache_shutdown(void);
 int    nfsd_cache_lookup(struct svc_rqst *);
index 5681c5906f088a49d0925ab9c70efc85c2ddc0b0..5f38ea36e266751f700b806bd4b4dfb7c6ed0dcd 100644 (file)
@@ -67,11 +67,6 @@ static void expkey_request(struct cache_detail *cd,
        (*bpp)[-1] = '\n';
 }
 
-static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-       return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
-}
-
 static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new,
                                            struct svc_expkey *old);
 static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *);
@@ -245,7 +240,7 @@ static struct cache_detail svc_expkey_cache_template = {
        .hash_size      = EXPKEY_HASHMAX,
        .name           = "nfsd.fh",
        .cache_put      = expkey_put,
-       .cache_upcall   = expkey_upcall,
+       .cache_request  = expkey_request,
        .cache_parse    = expkey_parse,
        .cache_show     = expkey_show,
        .match          = expkey_match,
@@ -315,6 +310,7 @@ static void svc_export_put(struct kref *ref)
        path_put(&exp->ex_path);
        auth_domain_put(exp->ex_client);
        nfsd4_fslocs_free(&exp->ex_fslocs);
+       kfree(exp->ex_uuid);
        kfree(exp);
 }
 
@@ -337,11 +333,6 @@ static void svc_export_request(struct cache_detail *cd,
        (*bpp)[-1] = '\n';
 }
 
-static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-       return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
-}
-
 static struct svc_export *svc_export_update(struct svc_export *new,
                                            struct svc_export *old);
 static struct svc_export *svc_export_lookup(struct svc_export *);
@@ -674,6 +665,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
        new->ex_fslocs.locations = NULL;
        new->ex_fslocs.locations_count = 0;
        new->ex_fslocs.migrated = 0;
+       new->ex_uuid = NULL;
        new->cd = item->cd;
 }
 
@@ -715,7 +707,7 @@ static struct cache_detail svc_export_cache_template = {
        .hash_size      = EXPORT_HASHMAX,
        .name           = "nfsd.export",
        .cache_put      = svc_export_put,
-       .cache_upcall   = svc_export_upcall,
+       .cache_request  = svc_export_request,
        .cache_parse    = svc_export_parse,
        .cache_show     = svc_export_show,
        .match          = svc_export_match,
index 497584c7036666254c261a6b606e5103e6b580a6..d620e7f8142903feebabcb2c3cefdf6a5ea2f1de 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/module.h>
 #include <linux/nsproxy.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <asm/uaccess.h>
 
 #include "state.h"
index 0ce12346df9c8613bd4cac8b6389a454413f893f..4832fd819f884f4a6b436a70369fea4ea9bfcf93 100644 (file)
@@ -139,12 +139,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
        (*bpp)[-1] = '\n';
 }
 
-static int
-idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
-{
-       return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
-}
-
 static int
 idtoname_match(struct cache_head *ca, struct cache_head *cb)
 {
@@ -192,7 +186,7 @@ static struct cache_detail idtoname_cache_template = {
        .hash_size      = ENT_HASHMAX,
        .name           = "nfs4.idtoname",
        .cache_put      = ent_put,
-       .cache_upcall   = idtoname_upcall,
+       .cache_request  = idtoname_request,
        .cache_parse    = idtoname_parse,
        .cache_show     = idtoname_show,
        .warn_no_listener = warn_no_idmapd,
@@ -320,12 +314,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
        (*bpp)[-1] = '\n';
 }
 
-static int
-nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
-{
-       return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
-}
-
 static int
 nametoid_match(struct cache_head *ca, struct cache_head *cb)
 {
@@ -365,7 +353,7 @@ static struct cache_detail nametoid_cache_template = {
        .hash_size      = ENT_HASHMAX,
        .name           = "nfs4.nametoid",
        .cache_put      = ent_put,
-       .cache_upcall   = nametoid_upcall,
+       .cache_request  = nametoid_request,
        .cache_parse    = nametoid_parse,
        .cache_show     = nametoid_show,
        .warn_no_listener = warn_no_idmapd,
index 9d1c5dba2bbb6625df90cebca48a364a29b9fe97..ae73175e6e6854fda740bd5a0dd88d17f14d556f 100644 (file)
@@ -993,14 +993,15 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (!buf)
                return nfserr_jukebox;
 
+       p = buf;
        status = nfsd4_encode_fattr(&cstate->current_fh,
                                    cstate->current_fh.fh_export,
-                                   cstate->current_fh.fh_dentry, buf,
-                                   &count, verify->ve_bmval,
+                                   cstate->current_fh.fh_dentry, &p,
+                                   count, verify->ve_bmval,
                                    rqstp, 0);
 
        /* this means that nfsd4_encode_fattr() ran out of space */
-       if (status == nfserr_resource && count == 0)
+       if (status == nfserr_resource)
                status = nfserr_not_same;
        if (status)
                goto out_kfree;
index 4914af4a817e34a9aaceb1cbeb865645bf117e30..899ca26dd194d73f43234ba08447f1533428eff6 100644 (file)
@@ -1185,6 +1185,12 @@ bin_to_hex_dup(const unsigned char *src, int srclen)
 static int
 nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net)
 {
+       /* XXX: The usermode helper s not working in container yet. */
+       if (net != &init_net) {
+               WARN(1, KERN_ERR "NFSD: attempt to initialize umh client "
+                       "tracking in a container!\n");
+               return -EINVAL;
+       }
        return nfsd4_umh_cltrack_upcall("init", NULL, NULL);
 }
 
index 9e7103b6e0ad5edbb611ee8f76367bc8c2f95b62..16d39c6c4fbb56b11c25ac8927b8913b1a928a15 100644 (file)
@@ -40,7 +40,7 @@
 #include <linux/pagemap.h>
 #include <linux/ratelimit.h>
 #include <linux/sunrpc/svcauth_gss.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include "xdr4.h"
 #include "vfs.h"
 #include "current_stateid.h"
@@ -261,33 +261,46 @@ static inline int get_new_stid(struct nfs4_stid *stid)
        return new_stid;
 }
 
-static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type)
+static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
+kmem_cache *slab)
 {
-       stateid_t *s = &stid->sc_stateid;
+       struct idr *stateids = &cl->cl_stateids;
+       static int min_stateid = 0;
+       struct nfs4_stid *stid;
        int new_id;
 
-       stid->sc_type = type;
+       stid = kmem_cache_alloc(slab, GFP_KERNEL);
+       if (!stid)
+               return NULL;
+
+       if (!idr_pre_get(stateids, GFP_KERNEL))
+               goto out_free;
+       if (idr_get_new_above(stateids, stid, min_stateid, &new_id))
+               goto out_free;
        stid->sc_client = cl;
-       s->si_opaque.so_clid = cl->cl_clientid;
-       new_id = get_new_stid(stid);
-       s->si_opaque.so_id = (u32)new_id;
+       stid->sc_type = 0;
+       stid->sc_stateid.si_opaque.so_id = new_id;
+       stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
        /* Will be incremented before return to client: */
-       s->si_generation = 0;
-}
-
-static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab)
-{
-       struct idr *stateids = &cl->cl_stateids;
+       stid->sc_stateid.si_generation = 0;
 
-       if (!idr_pre_get(stateids, GFP_KERNEL))
-               return NULL;
        /*
-        * Note: if we fail here (or any time between now and the time
-        * we actually get the new idr), we won't need to undo the idr
-        * preallocation, since the idr code caps the number of
-        * preallocated entries.
+        * It shouldn't be a problem to reuse an opaque stateid value.
+        * I don't think it is for 4.1.  But with 4.0 I worry that, for
+        * example, a stray write retransmission could be accepted by
+        * the server when it should have been rejected.  Therefore,
+        * adopt a trick from the sctp code to attempt to maximize the
+        * amount of time until an id is reused, by ensuring they always
+        * "increase" (mod INT_MAX):
         */
-       return kmem_cache_alloc(slab, GFP_KERNEL);
+
+       min_stateid = new_id+1;
+       if (min_stateid == INT_MAX)
+               min_stateid = 0;
+       return stid;
+out_free:
+       kfree(stid);
+       return NULL;
 }
 
 static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
@@ -316,7 +329,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
        dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
        if (dp == NULL)
                return dp;
-       init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID);
+       dp->dl_stid.sc_type = NFS4_DELEG_STID;
        /*
         * delegation seqid's are never incremented.  The 4.1 special
         * meaning of seqid 0 isn't meaningful, really, but let's avoid
@@ -337,13 +350,21 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
        return dp;
 }
 
+static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab)
+{
+       struct idr *stateids = &s->sc_client->cl_stateids;
+
+       idr_remove(stateids, s->sc_stateid.si_opaque.so_id);
+       kmem_cache_free(slab, s);
+}
+
 void
 nfs4_put_delegation(struct nfs4_delegation *dp)
 {
        if (atomic_dec_and_test(&dp->dl_count)) {
                dprintk("NFSD: freeing dp %p\n",dp);
                put_nfs4_file(dp->dl_file);
-               kmem_cache_free(deleg_slab, dp);
+               free_stid(&dp->dl_stid, deleg_slab);
                num_delegations--;
        }
 }
@@ -360,9 +381,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
 
 static void unhash_stid(struct nfs4_stid *s)
 {
-       struct idr *stateids = &s->sc_client->cl_stateids;
-
-       idr_remove(stateids, s->sc_stateid.si_opaque.so_id);
+       s->sc_type = 0;
 }
 
 /* Called under the state lock. */
@@ -519,7 +538,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp)
 
 static void free_generic_stateid(struct nfs4_ol_stateid *stp)
 {
-       kmem_cache_free(stateid_slab, stp);
+       free_stid(&stp->st_stid, stateid_slab);
 }
 
 static void release_lock_stateid(struct nfs4_ol_stateid *stp)
@@ -905,7 +924,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan,
 
        new = __alloc_session(slotsize, numslots);
        if (!new) {
-               nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
+               nfsd4_put_drc_mem(slotsize, numslots);
                return NULL;
        }
        init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn);
@@ -1048,7 +1067,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 static inline void
 free_client(struct nfs4_client *clp)
 {
-       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id);
 
        lockdep_assert_held(&nn->client_lock);
        while (!list_empty(&clp->cl_sessions)) {
@@ -1060,6 +1079,7 @@ free_client(struct nfs4_client *clp)
        }
        free_svc_cred(&clp->cl_cred);
        kfree(clp->cl_name.data);
+       idr_destroy(&clp->cl_stateids);
        kfree(clp);
 }
 
@@ -1258,7 +1278,12 @@ static void gen_confirm(struct nfs4_client *clp)
 
 static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
 {
-       return idr_find(&cl->cl_stateids, t->si_opaque.so_id);
+       struct nfs4_stid *ret;
+
+       ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id);
+       if (!ret || !ret->sc_type)
+               return NULL;
+       return ret;
 }
 
 static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
@@ -1844,11 +1869,12 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
        /* cache solo and embedded create sessions under the state lock */
        nfsd4_cache_create_session(cr_ses, cs_slot, status);
-out:
        nfs4_unlock_state();
+out:
        dprintk("%s returns %d\n", __func__, ntohl(status));
        return status;
 out_free_conn:
+       nfs4_unlock_state();
        free_conn(conn);
 out_free_session:
        __free_session(new);
@@ -2443,9 +2469,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
 
 static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
        struct nfs4_openowner *oo = open->op_openowner;
-       struct nfs4_client *clp = oo->oo_owner.so_client;
 
-       init_stid(&stp->st_stid, clp, NFS4_OPEN_STID);
+       stp->st_stid.sc_type = NFS4_OPEN_STID;
        INIT_LIST_HEAD(&stp->st_lockowners);
        list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
        list_add(&stp->st_perfile, &fp->fi_stateids);
@@ -4031,7 +4056,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct
        stp = nfs4_alloc_stateid(clp);
        if (stp == NULL)
                return NULL;
-       init_stid(&stp->st_stid, clp, NFS4_LOCK_STID);
+       stp->st_stid.sc_type = NFS4_LOCK_STID;
        list_add(&stp->st_perfile, &fp->fi_stateids);
        list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
        stp->st_stateowner = &lo->lo_owner;
@@ -4913,16 +4938,6 @@ nfs4_state_start_net(struct net *net)
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
        int ret;
 
-       /*
-        * FIXME: For now, we hang most of the pernet global stuff off of
-        * init_net until nfsd is fully containerized. Eventually, we'll
-        * need to pass a net pointer into this function, take a reference
-        * to that instead and then do most of the rest of this on a per-net
-        * basis.
-        */
-       if (net != &init_net)
-               return -EINVAL;
-
        ret = nfs4_state_create_net(net);
        if (ret)
                return ret;
index 8ca6d17f6cf3dc7505ce3e8754073fc9caa32a78..01168865dd37395a047cbeeb0175d7c8a5b80f88 100644 (file)
@@ -2024,12 +2024,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
  * ourselves.
  *
- * @countp is the buffer size in _words_; upon successful return this becomes
- * replaced with the number of words written.
+ * countp is the buffer size in _words_
  */
 __be32
 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-               struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
+               struct dentry *dentry, __be32 **buffer, int count, u32 *bmval,
                struct svc_rqst *rqstp, int ignore_crossmnt)
 {
        u32 bmval0 = bmval[0];
@@ -2038,12 +2037,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
        struct kstat stat;
        struct svc_fh tempfh;
        struct kstatfs statfs;
-       int buflen = *countp << 2;
+       int buflen = count << 2;
        __be32 *attrlenp;
        u32 dummy;
        u64 dummy64;
        u32 rdattr_err = 0;
-       __be32 *p = buffer;
+       __be32 *p = *buffer;
        __be32 status;
        int err;
        int aclsupport = 0;
@@ -2447,7 +2446,7 @@ out_acl:
        }
 
        *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
-       *countp = p - buffer;
+       *buffer = p;
        status = nfs_ok;
 
 out:
@@ -2459,7 +2458,6 @@ out_nfserr:
        status = nfserrno(err);
        goto out;
 out_resource:
-       *countp = 0;
        status = nfserr_resource;
        goto out;
 out_serverfault:
@@ -2478,7 +2476,7 @@ static inline int attributes_need_mount(u32 *bmval)
 
 static __be32
 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-               const char *name, int namlen, __be32 *p, int *buflen)
+               const char *name, int namlen, __be32 **p, int buflen)
 {
        struct svc_export *exp = cd->rd_fhp->fh_export;
        struct dentry *dentry;
@@ -2584,10 +2582,9 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
        p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
        p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
-       nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen);
+       nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
        switch (nfserr) {
        case nfs_ok:
-               p += buflen;
                break;
        case nfserr_resource:
                nfserr = nfserr_toosmall;
@@ -2714,10 +2711,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 
        buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
        nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
-                                   resp->p, &buflen, getattr->ga_bmval,
+                                   &resp->p, buflen, getattr->ga_bmval,
                                    resp->rqstp, 0);
-       if (!nfserr)
-               resp->p += buflen;
        return nfserr;
 }
 
index da3dbd0f8979a78f6dac4982c8efcab5351ee134..62c1ee128aebafbd82e70863e57caa74de804e13 100644 (file)
@@ -9,22 +9,22 @@
  */
 
 #include <linux/slab.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/highmem.h>
+#include <net/checksum.h>
 
 #include "nfsd.h"
 #include "cache.h"
 
-/* Size of reply cache. Common values are:
- * 4.3BSD:     128
- * 4.4BSD:     256
- * Solaris2:   1024
- * DEC Unix:   512-4096
- */
-#define CACHESIZE              1024
+#define NFSDDBG_FACILITY       NFSDDBG_REPCACHE
+
 #define HASHSIZE               64
 
 static struct hlist_head *     cache_hash;
 static struct list_head        lru_head;
-static int                     cache_disabled = 1;
+static struct kmem_cache       *drc_slab;
+static unsigned int            num_drc_entries;
+static unsigned int            max_drc_entries;
 
 /*
  * Calculate the hash index from an XID.
@@ -37,6 +37,14 @@ static inline u32 request_hash(u32 xid)
 }
 
 static int     nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
+static void    cache_cleaner_func(struct work_struct *unused);
+static int     nfsd_reply_cache_shrink(struct shrinker *shrink,
+                                       struct shrink_control *sc);
+
+struct shrinker nfsd_reply_cache_shrinker = {
+       .shrink = nfsd_reply_cache_shrink,
+       .seeks  = 1,
+};
 
 /*
  * locking for the reply cache:
@@ -44,30 +52,86 @@ static int  nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
  * Otherwise, it when accessing _prev or _next, the lock must be held.
  */
 static DEFINE_SPINLOCK(cache_lock);
+static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
 
-int nfsd_reply_cache_init(void)
+/*
+ * Put a cap on the size of the DRC based on the amount of available
+ * low memory in the machine.
+ *
+ *  64MB:    8192
+ * 128MB:   11585
+ * 256MB:   16384
+ * 512MB:   23170
+ *   1GB:   32768
+ *   2GB:   46340
+ *   4GB:   65536
+ *   8GB:   92681
+ *  16GB:  131072
+ *
+ * ...with a hard cap of 256k entries. In the worst case, each entry will be
+ * ~1k, so the above numbers should give a rough max of the amount of memory
+ * used in k.
+ */
+static unsigned int
+nfsd_cache_size_limit(void)
+{
+       unsigned int limit;
+       unsigned long low_pages = totalram_pages - totalhigh_pages;
+
+       limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
+       return min_t(unsigned int, limit, 256*1024);
+}
+
+static struct svc_cacherep *
+nfsd_reply_cache_alloc(void)
 {
        struct svc_cacherep     *rp;
-       int                     i;
 
-       INIT_LIST_HEAD(&lru_head);
-       i = CACHESIZE;
-       while (i) {
-               rp = kmalloc(sizeof(*rp), GFP_KERNEL);
-               if (!rp)
-                       goto out_nomem;
-               list_add(&rp->c_lru, &lru_head);
+       rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
+       if (rp) {
                rp->c_state = RC_UNUSED;
                rp->c_type = RC_NOCACHE;
+               INIT_LIST_HEAD(&rp->c_lru);
                INIT_HLIST_NODE(&rp->c_hash);
-               i--;
        }
+       return rp;
+}
 
-       cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
+static void
+nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
+{
+       if (rp->c_type == RC_REPLBUFF)
+               kfree(rp->c_replvec.iov_base);
+       hlist_del(&rp->c_hash);
+       list_del(&rp->c_lru);
+       --num_drc_entries;
+       kmem_cache_free(drc_slab, rp);
+}
+
+static void
+nfsd_reply_cache_free(struct svc_cacherep *rp)
+{
+       spin_lock(&cache_lock);
+       nfsd_reply_cache_free_locked(rp);
+       spin_unlock(&cache_lock);
+}
+
+int nfsd_reply_cache_init(void)
+{
+       register_shrinker(&nfsd_reply_cache_shrinker);
+       drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
+                                       0, 0, NULL);
+       if (!drc_slab)
+               goto out_nomem;
+
+       cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
        if (!cache_hash)
                goto out_nomem;
 
-       cache_disabled = 0;
+       INIT_LIST_HEAD(&lru_head);
+       max_drc_entries = nfsd_cache_size_limit();
+       num_drc_entries = 0;
+
        return 0;
 out_nomem:
        printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
@@ -79,27 +143,33 @@ void nfsd_reply_cache_shutdown(void)
 {
        struct svc_cacherep     *rp;
 
+       unregister_shrinker(&nfsd_reply_cache_shrinker);
+       cancel_delayed_work_sync(&cache_cleaner);
+
        while (!list_empty(&lru_head)) {
                rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
-               if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
-                       kfree(rp->c_replvec.iov_base);
-               list_del(&rp->c_lru);
-               kfree(rp);
+               nfsd_reply_cache_free_locked(rp);
        }
 
-       cache_disabled = 1;
-
        kfree (cache_hash);
        cache_hash = NULL;
+
+       if (drc_slab) {
+               kmem_cache_destroy(drc_slab);
+               drc_slab = NULL;
+       }
 }
 
 /*
- * Move cache entry to end of LRU list
+ * Move cache entry to end of LRU list, and queue the cleaner to run if it's
+ * not already scheduled.
  */
 static void
 lru_put_end(struct svc_cacherep *rp)
 {
+       rp->c_timestamp = jiffies;
        list_move_tail(&rp->c_lru, &lru_head);
+       schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
 }
 
 /*
@@ -112,82 +182,214 @@ hash_refile(struct svc_cacherep *rp)
        hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
 }
 
+static inline bool
+nfsd_cache_entry_expired(struct svc_cacherep *rp)
+{
+       return rp->c_state != RC_INPROG &&
+              time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
+}
+
+/*
+ * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
+ * Also prune the oldest ones when the total exceeds the max number of entries.
+ */
+static void
+prune_cache_entries(void)
+{
+       struct svc_cacherep *rp, *tmp;
+
+       list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
+               if (!nfsd_cache_entry_expired(rp) &&
+                   num_drc_entries <= max_drc_entries)
+                       break;
+               nfsd_reply_cache_free_locked(rp);
+       }
+
+       /*
+        * Conditionally rearm the job. If we cleaned out the list, then
+        * cancel any pending run (since there won't be any work to do).
+        * Otherwise, we rearm the job or modify the existing one to run in
+        * RC_EXPIRE since we just ran the pruner.
+        */
+       if (list_empty(&lru_head))
+               cancel_delayed_work(&cache_cleaner);
+       else
+               mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
+}
+
+static void
+cache_cleaner_func(struct work_struct *unused)
+{
+       spin_lock(&cache_lock);
+       prune_cache_entries();
+       spin_unlock(&cache_lock);
+}
+
+static int
+nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+       unsigned int num;
+
+       spin_lock(&cache_lock);
+       if (sc->nr_to_scan)
+               prune_cache_entries();
+       num = num_drc_entries;
+       spin_unlock(&cache_lock);
+
+       return num;
+}
+
+/*
+ * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
+ */
+static __wsum
+nfsd_cache_csum(struct svc_rqst *rqstp)
+{
+       int idx;
+       unsigned int base;
+       __wsum csum;
+       struct xdr_buf *buf = &rqstp->rq_arg;
+       const unsigned char *p = buf->head[0].iov_base;
+       size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
+                               RC_CSUMLEN);
+       size_t len = min(buf->head[0].iov_len, csum_len);
+
+       /* rq_arg.head first */
+       csum = csum_partial(p, len, 0);
+       csum_len -= len;
+
+       /* Continue into page array */
+       idx = buf->page_base / PAGE_SIZE;
+       base = buf->page_base & ~PAGE_MASK;
+       while (csum_len) {
+               p = page_address(buf->pages[idx]) + base;
+               len = min_t(size_t, PAGE_SIZE - base, csum_len);
+               csum = csum_partial(p, len, csum);
+               csum_len -= len;
+               base = 0;
+               ++idx;
+       }
+       return csum;
+}
+
+/*
+ * Search the request hash for an entry that matches the given rqstp.
+ * Must be called with cache_lock held. Returns the found entry or
+ * NULL on failure.
+ */
+static struct svc_cacherep *
+nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
+{
+       struct svc_cacherep     *rp;
+       struct hlist_head       *rh;
+       __be32                  xid = rqstp->rq_xid;
+       u32                     proto =  rqstp->rq_prot,
+                               vers = rqstp->rq_vers,
+                               proc = rqstp->rq_proc;
+
+       rh = &cache_hash[request_hash(xid)];
+       hlist_for_each_entry(rp, rh, c_hash) {
+               if (xid == rp->c_xid && proc == rp->c_proc &&
+                   proto == rp->c_prot && vers == rp->c_vers &&
+                   rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum &&
+                   rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) &&
+                   rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr))
+                       return rp;
+       }
+       return NULL;
+}
+
 /*
  * Try to find an entry matching the current call in the cache. When none
- * is found, we grab the oldest unlocked entry off the LRU list.
- * Note that no operation within the loop may sleep.
+ * is found, we try to grab the oldest expired entry off the LRU list. If
+ * a suitable one isn't there, then drop the cache_lock and allocate a
+ * new one, then search again in case one got inserted while this thread
+ * didn't hold the lock.
  */
 int
 nfsd_cache_lookup(struct svc_rqst *rqstp)
 {
-       struct hlist_head       *rh;
-       struct svc_cacherep     *rp;
+       struct svc_cacherep     *rp, *found;
        __be32                  xid = rqstp->rq_xid;
        u32                     proto =  rqstp->rq_prot,
                                vers = rqstp->rq_vers,
                                proc = rqstp->rq_proc;
+       __wsum                  csum;
        unsigned long           age;
        int type = rqstp->rq_cachetype;
        int rtn;
 
        rqstp->rq_cacherep = NULL;
-       if (cache_disabled || type == RC_NOCACHE) {
+       if (type == RC_NOCACHE) {
                nfsdstats.rcnocache++;
                return RC_DOIT;
        }
 
+       csum = nfsd_cache_csum(rqstp);
+
        spin_lock(&cache_lock);
        rtn = RC_DOIT;
 
-       rh = &cache_hash[request_hash(xid)];
-       hlist_for_each_entry(rp, rh, c_hash) {
-               if (rp->c_state != RC_UNUSED &&
-                   xid == rp->c_xid && proc == rp->c_proc &&
-                   proto == rp->c_prot && vers == rp->c_vers &&
-                   time_before(jiffies, rp->c_timestamp + 120*HZ) &&
-                   memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) {
-                       nfsdstats.rchits++;
-                       goto found_entry;
+       rp = nfsd_cache_search(rqstp, csum);
+       if (rp)
+               goto found_entry;
+
+       /* Try to use the first entry on the LRU */
+       if (!list_empty(&lru_head)) {
+               rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
+               if (nfsd_cache_entry_expired(rp) ||
+                   num_drc_entries >= max_drc_entries) {
+                       lru_put_end(rp);
+                       prune_cache_entries();
+                       goto setup_entry;
                }
        }
-       nfsdstats.rcmisses++;
 
-       /* This loop shouldn't take more than a few iterations normally */
-       {
-       int     safe = 0;
-       list_for_each_entry(rp, &lru_head, c_lru) {
-               if (rp->c_state != RC_INPROG)
-                       break;
-               if (safe++ > CACHESIZE) {
-                       printk("nfsd: loop in repcache LRU list\n");
-                       cache_disabled = 1;
-                       goto out;
-               }
+       /* Drop the lock and allocate a new entry */
+       spin_unlock(&cache_lock);
+       rp = nfsd_reply_cache_alloc();
+       if (!rp) {
+               dprintk("nfsd: unable to allocate DRC entry!\n");
+               return RC_DOIT;
        }
+       spin_lock(&cache_lock);
+       ++num_drc_entries;
+
+       /*
+        * Must search again just in case someone inserted one
+        * after we dropped the lock above.
+        */
+       found = nfsd_cache_search(rqstp, csum);
+       if (found) {
+               nfsd_reply_cache_free_locked(rp);
+               rp = found;
+               goto found_entry;
        }
 
-       /* All entries on the LRU are in-progress. This should not happen */
-       if (&rp->c_lru == &lru_head) {
-               static int      complaints;
-
-               printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
-               if (++complaints > 5) {
-                       printk(KERN_WARNING "nfsd: disabling repcache.\n");
-                       cache_disabled = 1;
-               }
-               goto out;
-       }
+       /*
+        * We're keeping the one we just allocated. Are we now over the
+        * limit? Prune one off the tip of the LRU in trade for the one we
+        * just allocated if so.
+        */
+       if (num_drc_entries >= max_drc_entries)
+               nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
+                                               struct svc_cacherep, c_lru));
 
+setup_entry:
+       nfsdstats.rcmisses++;
        rqstp->rq_cacherep = rp;
        rp->c_state = RC_INPROG;
        rp->c_xid = xid;
        rp->c_proc = proc;
-       memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr));
+       rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp));
+       rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp)));
        rp->c_prot = proto;
        rp->c_vers = vers;
-       rp->c_timestamp = jiffies;
+       rp->c_len = rqstp->rq_arg.len;
+       rp->c_csum = csum;
 
        hash_refile(rp);
+       lru_put_end(rp);
 
        /* release any buffer */
        if (rp->c_type == RC_REPLBUFF) {
@@ -200,9 +402,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
        return rtn;
 
 found_entry:
+       nfsdstats.rchits++;
        /* We found a matching entry which is either in progress or done. */
        age = jiffies - rp->c_timestamp;
-       rp->c_timestamp = jiffies;
        lru_put_end(rp);
 
        rtn = RC_DROPIT;
@@ -231,7 +433,7 @@ found_entry:
                break;
        default:
                printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
-               rp->c_state = RC_UNUSED;
+               nfsd_reply_cache_free_locked(rp);
        }
 
        goto out;
@@ -256,11 +458,11 @@ found_entry:
 void
 nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 {
-       struct svc_cacherep *rp;
+       struct svc_cacherep *rp = rqstp->rq_cacherep;
        struct kvec     *resv = &rqstp->rq_res.head[0], *cachv;
        int             len;
 
-       if (!(rp = rqstp->rq_cacherep) || cache_disabled)
+       if (!rp)
                return;
 
        len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
@@ -268,7 +470,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 
        /* Don't cache excessive amounts of data and XDR failures */
        if (!statp || len > (256 >> 2)) {
-               rp->c_state = RC_UNUSED;
+               nfsd_reply_cache_free(rp);
                return;
        }
 
@@ -282,21 +484,21 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
                cachv = &rp->c_replvec;
                cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
                if (!cachv->iov_base) {
-                       spin_lock(&cache_lock);
-                       rp->c_state = RC_UNUSED;
-                       spin_unlock(&cache_lock);
+                       nfsd_reply_cache_free(rp);
                        return;
                }
                cachv->iov_len = len << 2;
                memcpy(cachv->iov_base, statp, len << 2);
                break;
+       case RC_NOCACHE:
+               nfsd_reply_cache_free(rp);
+               return;
        }
        spin_lock(&cache_lock);
        lru_put_end(rp);
        rp->c_secure = rqstp->rq_secure;
        rp->c_type = cachetype;
        rp->c_state = RC_DONE;
-       rp->c_timestamp = jiffies;
        spin_unlock(&cache_lock);
        return;
 }
index 2db7021b01ae3f771f77a7e1d7fd0a693d4d0551..13a21c8fca490884e05e832d1a657daf9d1af58f 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <linux/sunrpc/svcsock.h>
 #include <linux/lockd/lockd.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/gss_krb5_enctypes.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
@@ -125,11 +125,11 @@ static const struct file_operations transaction_ops = {
        .llseek         = default_llseek,
 };
 
-static int exports_open(struct inode *inode, struct file *file)
+static int exports_net_open(struct net *net, struct file *file)
 {
        int err;
        struct seq_file *seq;
-       struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
        err = seq_open(file, &nfs_exports_op);
        if (err)
@@ -140,8 +140,26 @@ static int exports_open(struct inode *inode, struct file *file)
        return 0;
 }
 
-static const struct file_operations exports_operations = {
-       .open           = exports_open,
+static int exports_proc_open(struct inode *inode, struct file *file)
+{
+       return exports_net_open(current->nsproxy->net_ns, file);
+}
+
+static const struct file_operations exports_proc_operations = {
+       .open           = exports_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+       .owner          = THIS_MODULE,
+};
+
+static int exports_nfsd_open(struct inode *inode, struct file *file)
+{
+       return exports_net_open(inode->i_sb->s_fs_info, file);
+}
+
+static const struct file_operations exports_nfsd_operations = {
+       .open           = exports_nfsd_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = seq_release,
@@ -220,6 +238,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
        struct sockaddr *sap = (struct sockaddr *)&address;
        size_t salen = sizeof(address);
        char *fo_path;
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
 
        /* sanity check */
        if (size == 0)
@@ -232,7 +251,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
        if (qword_get(&buf, fo_path, size) < 0)
                return -EINVAL;
 
-       if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0)
+       if (rpc_pton(net, fo_path, size, sap, salen) == 0)
                return -EINVAL;
 
        return nlmsvc_unlock_all_by_ip(sap);
@@ -317,6 +336,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
        int len;
        struct auth_domain *dom;
        struct knfsd_fh fh;
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
 
        if (size == 0)
                return -EINVAL;
@@ -352,7 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
        if (!dom)
                return -ENOMEM;
 
-       len = exp_rootfh(&init_net, dom, path, &fh,  maxsize);
+       len = exp_rootfh(net, dom, path, &fh,  maxsize);
        auth_domain_put(dom);
        if (len)
                return len;
@@ -396,7 +416,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
 {
        char *mesg = buf;
        int rv;
-       struct net *net = &init_net;
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
 
        if (size > 0) {
                int newthreads;
@@ -447,7 +467,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
        int len;
        int npools;
        int *nthreads;
-       struct net *net = &init_net;
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
 
        mutex_lock(&nfsd_mutex);
        npools = nfsd_nrpools(net);
@@ -510,7 +530,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
        unsigned minor;
        ssize_t tlen = 0;
        char *sep;
-       struct net *net = &init_net;
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
        if (size>0) {
@@ -534,7 +554,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                        else
                                num = simple_strtol(vers, &minorp, 0);
                        if (*minorp == '.') {
-                               if (num < 4)
+                               if (num != 4)
                                        return -EINVAL;
                                minor = simple_strtoul(minorp+1, NULL, 0);
                                if (minor == 0)
@@ -792,7 +812,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size,
 static ssize_t write_ports(struct file *file, char *buf, size_t size)
 {
        ssize_t rv;
-       struct net *net = &init_net;
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
 
        mutex_lock(&nfsd_mutex);
        rv = __write_ports(file, buf, size, net);
@@ -827,7 +847,7 @@ int nfsd_max_blksize;
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 {
        char *mesg = buf;
-       struct net *net = &init_net;
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
        if (size > 0) {
@@ -923,7 +943,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
  */
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 {
-       struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
+       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
        return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn);
 }
 
@@ -939,7 +960,8 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
  */
 static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
 {
-       struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
+       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
        return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn);
 }
 
@@ -995,7 +1017,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
 {
        ssize_t rv;
-       struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
+       struct net *net = file->f_dentry->d_sb->s_fs_info;
+       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
        mutex_lock(&nfsd_mutex);
        rv = __write_recoverydir(file, buf, size, nn);
@@ -1013,7 +1036,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
 static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 {
        static struct tree_descr nfsd_files[] = {
-               [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+               [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
                [NFSD_Export_features] = {"export_features",
                                        &export_features_operations, S_IRUGO},
                [NFSD_FO_UnlockIP] = {"unlock_ip",
@@ -1037,20 +1060,35 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 #endif
                /* last one */ {""}
        };
-       return simple_fill_super(sb, 0x6e667364, nfsd_files);
+       struct net *net = data;
+       int ret;
+
+       ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
+       if (ret)
+               return ret;
+       sb->s_fs_info = get_net(net);
+       return 0;
 }
 
 static struct dentry *nfsd_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
 {
-       return mount_single(fs_type, flags, data, nfsd_fill_super);
+       return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super);
+}
+
+static void nfsd_umount(struct super_block *sb)
+{
+       struct net *net = sb->s_fs_info;
+
+       kill_litter_super(sb);
+       put_net(net);
 }
 
 static struct file_system_type nfsd_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "nfsd",
        .mount          = nfsd_mount,
-       .kill_sb        = kill_litter_super,
+       .kill_sb        = nfsd_umount,
 };
 
 #ifdef CONFIG_PROC_FS
@@ -1061,7 +1099,8 @@ static int create_proc_exports_entry(void)
        entry = proc_mkdir("fs/nfs", NULL);
        if (!entry)
                return -ENOMEM;
-       entry = proc_create("exports", 0, entry, &exports_operations);
+       entry = proc_create("exports", 0, entry,
+                                &exports_proc_operations);
        if (!entry)
                return -ENOMEM;
        return 0;
index be7af509930cb95fbbef903e45d33a6b69c42697..262df5ccbf59db0c4fd516a30fa721945b55f600 100644 (file)
@@ -652,7 +652,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 
        /* Check whether we have this call in the cache. */
        switch (nfsd_cache_lookup(rqstp)) {
-       case RC_INTR:
        case RC_DROPIT:
                return 0;
        case RC_REPLY:
@@ -703,8 +702,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 {
        int ret;
-       struct net *net = &init_net;
-       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+       struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
 
        mutex_lock(&nfsd_mutex);
        if (nn->nfsd_serv == NULL) {
@@ -721,7 +719,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
 int nfsd_pool_stats_release(struct inode *inode, struct file *file)
 {
        int ret = seq_release(inode, file);
-       struct net *net = &init_net;
+       struct net *net = inode->i_sb->s_fs_info;
 
        mutex_lock(&nfsd_mutex);
        /* this function really, really should have been called svc_put() */
index 0889bfb43dc9ac83bcf6d05c47c5752dbec5e280..546f8983ecf144faa4c8c8b2bacbef821e12a875 100644 (file)
@@ -563,7 +563,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
 __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-                      struct dentry *dentry, __be32 *buffer, int *countp,
+                      struct dentry *dentry, __be32 **buffer, int countp,
                       u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
 extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *,
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
new file mode 100644 (file)
index 0000000..720446c
--- /dev/null
@@ -0,0 +1,72 @@
+#include <acpi/apei.h>
+#include <acpi/hed.h>
+
+/*
+ * One struct ghes is created for each generic hardware error source.
+ * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
+ * handler.
+ *
+ * estatus: memory buffer for error status block, allocated during
+ * HEST parsing.
+ */
+#define GHES_TO_CLEAR          0x0001
+#define GHES_EXITING           0x0002
+
+struct ghes {
+       struct acpi_hest_generic *generic;
+       struct acpi_hest_generic_status *estatus;
+       u64 buffer_paddr;
+       unsigned long flags;
+       union {
+               struct list_head list;
+               struct timer_list timer;
+               unsigned int irq;
+       };
+};
+
+struct ghes_estatus_node {
+       struct llist_node llnode;
+       struct acpi_hest_generic *generic;
+       struct ghes *ghes;
+};
+
+struct ghes_estatus_cache {
+       u32 estatus_len;
+       atomic_t count;
+       struct acpi_hest_generic *generic;
+       unsigned long long time_in;
+       struct rcu_head rcu;
+};
+
+enum {
+       GHES_SEV_NO = 0x0,
+       GHES_SEV_CORRECTED = 0x1,
+       GHES_SEV_RECOVERABLE = 0x2,
+       GHES_SEV_PANIC = 0x3,
+};
+
+/* From drivers/edac/ghes_edac.c */
+
+#ifdef CONFIG_EDAC_GHES
+void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+                               struct cper_sec_mem_err *mem_err);
+
+int ghes_edac_register(struct ghes *ghes, struct device *dev);
+
+void ghes_edac_unregister(struct ghes *ghes);
+
+#else
+static inline void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+                                      struct cper_sec_mem_err *mem_err)
+{
+}
+
+static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
+{
+       return 0;
+}
+
+static inline void ghes_edac_unregister(struct ghes *ghes)
+{
+}
+#endif
index f94bc83011ed5c7a3c96f0adabefa1246f86cb47..78feda9bbae2632b2c0e61493e31b909af55f522 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/gfp.h>
 #include <linux/bsg.h>
 #include <linux/smp.h>
+#include <linux/rcupdate.h>
 
 #include <asm/scatterlist.h>
 
@@ -437,6 +438,7 @@ struct request_queue {
        /* Throttle data */
        struct throtl_data *td;
 #endif
+       struct rcu_head         rcu_head;
 };
 
 #define QUEUE_FLAG_QUEUED      1       /* uses generic tag queueing */
@@ -974,7 +976,6 @@ struct blk_plug {
        unsigned long magic; /* detect uninitialized use-cases */
        struct list_head list; /* requests */
        struct list_head cb_list; /* md requires an unplug callback */
-       unsigned int should_sort; /* list to be sorted before flushing? */
 };
 #define BLK_MAX_REQUEST_COUNT 16
 
index 7c2e030e72f10542612ba34ce79b18f6b1b8ad06..0ea61e07a91c03053833faf3a4250e1f6023a10d 100644 (file)
@@ -12,6 +12,7 @@
 
 struct blk_trace {
        int trace_state;
+       bool rq_based;
        struct rchan *rchan;
        unsigned long __percpu *sequence;
        unsigned char __percpu *msg_data;
index 458f497738a445ec96bf2153f2e0f99ee49e3e8c..5afc4f94d110f67590e8a7924c09b006f1e711c8 100644 (file)
@@ -126,7 +126,6 @@ BUFFER_FNS(Write_EIO, write_io_error)
 BUFFER_FNS(Unwritten, unwritten)
 
 #define bh_offset(bh)          ((unsigned long)(bh)->b_data & ~PAGE_MASK)
-#define touch_buffer(bh)       mark_page_accessed(bh->b_page)
 
 /* If we *know* page->private refers to buffer_heads */
 #define page_buffers(page)                                     \
@@ -142,6 +141,7 @@ BUFFER_FNS(Unwritten, unwritten)
 
 void mark_buffer_dirty(struct buffer_head *bh);
 void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
+void touch_buffer(struct buffer_head *bh);
 void set_bh_page(struct buffer_head *bh,
                struct page *page, unsigned long offset);
 int try_to_free_buffers(struct page *);
index dad579b0c0e65981ea42c7a820f71e0480154b17..76554cecaab24f71fd327db30e3af51143928008 100644 (file)
 #define CEPH_FEATURE_MONNAMES       (1<<5)
 #define CEPH_FEATURE_RECONNECT_SEQ  (1<<6)
 #define CEPH_FEATURE_DIRLAYOUTHASH  (1<<7)
-/* bits 8-17 defined by user-space; not supported yet here */
+#define CEPH_FEATURE_OBJECTLOCATOR  (1<<8)
+#define CEPH_FEATURE_PGID64         (1<<9)
+#define CEPH_FEATURE_INCSUBOSDMAP   (1<<10)
+#define CEPH_FEATURE_PGPOOL3        (1<<11)
+#define CEPH_FEATURE_OSDREPLYMUX    (1<<12)
+#define CEPH_FEATURE_OSDENC         (1<<13)
+#define CEPH_FEATURE_OMAP           (1<<14)
+#define CEPH_FEATURE_MONENC         (1<<15)
+#define CEPH_FEATURE_QUERY_T        (1<<16)
+#define CEPH_FEATURE_INDEP_PG_MAP   (1<<17)
 #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
+#define CEPH_FEATURE_CHUNKY_SCRUB   (1<<19)
+#define CEPH_FEATURE_MON_NULLROUTE  (1<<20)
+#define CEPH_FEATURE_MON_GV         (1<<21)
+#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22)
+#define CEPH_FEATURE_MSG_AUTH      (1<<23)
+#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24)
+#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
+#define CEPH_FEATURE_CREATEPOOLID   (1<<26)
+#define CEPH_FEATURE_REPLY_CREATE_INODE   (1<<27)
+#define CEPH_FEATURE_OSD_HBMSGS     (1<<28)
+#define CEPH_FEATURE_MDSENC         (1<<29)
+#define CEPH_FEATURE_OSDHASHPSPOOL  (1<<30)
 
 /*
  * Features supported.
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  \
-       (CEPH_FEATURE_NOSRCADDR |        \
-        CEPH_FEATURE_CRUSH_TUNABLES)
+       (CEPH_FEATURE_NOSRCADDR |               \
+        CEPH_FEATURE_PGID64 |                  \
+        CEPH_FEATURE_PGPOOL3 |                 \
+        CEPH_FEATURE_OSDENC |                  \
+        CEPH_FEATURE_CRUSH_TUNABLES |          \
+        CEPH_FEATURE_CRUSH_TUNABLES2 |         \
+        CEPH_FEATURE_REPLY_CREATE_INODE |      \
+        CEPH_FEATURE_OSDHASHPSPOOL)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
-       (CEPH_FEATURE_NOSRCADDR)
+       (CEPH_FEATURE_NOSRCADDR |        \
+        CEPH_FEATURE_PGID64 |           \
+        CEPH_FEATURE_PGPOOL3 |          \
+        CEPH_FEATURE_OSDENC)
 #endif
index cf6f4d998a7642567bb9198d90b16f07c6996fa8..2ad7b860f06232d76ae5a4f208d081a17825a9ae 100644 (file)
  * internal cluster protocols separately from the public,
  * client-facing protocol.
  */
-#define CEPH_OSD_PROTOCOL     8 /* cluster internal */
-#define CEPH_MDS_PROTOCOL    12 /* cluster internal */
-#define CEPH_MON_PROTOCOL     5 /* cluster internal */
 #define CEPH_OSDC_PROTOCOL   24 /* server/client */
 #define CEPH_MDSC_PROTOCOL   32 /* server/client */
 #define CEPH_MONC_PROTOCOL   15 /* server/client */
 
 
-#define CEPH_INO_ROOT  1
-#define CEPH_INO_CEPH  2        /* hidden .ceph dir */
+#define CEPH_INO_ROOT   1
+#define CEPH_INO_CEPH   2       /* hidden .ceph dir */
+#define CEPH_INO_DOTDOT 3      /* used by ceph fuse for parent (..) */
 
 /* arbitrary limit on max # of monitors (cluster of 3 is typical) */
 #define CEPH_MAX_MON   31
@@ -51,7 +49,7 @@ struct ceph_file_layout {
        __le32 fl_object_stripe_unit;  /* UNUSED.  for per-object parity, if any */
 
        /* object -> pg layout */
-       __le32 fl_unused;       /* unused; used to be preferred primary (-1) */
+       __le32 fl_unused;       /* unused; used to be preferred primary for pg (-1 for none) */
        __le32 fl_pg_pool;      /* namespace, crush ruleset, rep level */
 } __attribute__ ((packed));
 
@@ -101,6 +99,8 @@ struct ceph_dir_layout {
 #define CEPH_MSG_MON_SUBSCRIBE_ACK      16
 #define CEPH_MSG_AUTH                  17
 #define CEPH_MSG_AUTH_REPLY            18
+#define CEPH_MSG_MON_GET_VERSION        19
+#define CEPH_MSG_MON_GET_VERSION_REPLY  20
 
 /* client <-> mds */
 #define CEPH_MSG_MDS_MAP                21
@@ -220,6 +220,11 @@ struct ceph_mon_subscribe_ack {
        struct ceph_fsid fsid;
 } __attribute__ ((packed));
 
+/*
+ * mdsmap flags
+ */
+#define CEPH_MDSMAP_DOWN    (1<<0)  /* cluster deliberately down */
+
 /*
  * mds states
  *   > 0 -> in
@@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack {
 #define CEPH_MDS_STATE_CREATING    -6  /* up, creating MDS instance. */
 #define CEPH_MDS_STATE_STARTING    -7  /* up, starting previously stopped mds */
 #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
+#define CEPH_MDS_STATE_REPLAYONCE   -9 /* up, replaying an active node's journal */
 
 #define CEPH_MDS_STATE_REPLAY       8  /* up, replaying journal. */
 #define CEPH_MDS_STATE_RESOLVE      9  /* up, disambiguating distributed
@@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s);
 #define CEPH_LOCK_IXATTR      2048
 #define CEPH_LOCK_IFLOCK      4096  /* advisory file locks */
 #define CEPH_LOCK_INO         8192  /* immutable inode bits; not a lock */
+#define CEPH_LOCK_IPOLICY     16384 /* policy lock on dirs. MDS internal */
 
 /* client_session ops */
 enum {
@@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op);
 #define CEPH_SETATTR_SIZE  32
 #define CEPH_SETATTR_CTIME 64
 
+/*
+ * Ceph setxattr request flags.
+ */
+#define CEPH_XATTR_CREATE  1
+#define CEPH_XATTR_REPLACE 2
+
 union ceph_mds_request_args {
        struct {
                __le32 mask;                 /* CEPH_CAP_* */
@@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags);
 #define CEPH_CAP_GWREXTEND  64  /* (file) client can extend EOF */
 #define CEPH_CAP_GLAZYIO   128  /* (file) client can perform lazy io */
 
+#define CEPH_CAP_SIMPLE_BITS  2
+#define CEPH_CAP_FILE_BITS    8
+
 /* per-lock shift */
 #define CEPH_CAP_SAUTH      2
 #define CEPH_CAP_SLINK      4
 #define CEPH_CAP_SXATTR     6
 #define CEPH_CAP_SFILE      8
-#define CEPH_CAP_SFLOCK    20 
+#define CEPH_CAP_SFLOCK    20
 
-#define CEPH_CAP_BITS       22
+#define CEPH_CAP_BITS      22
 
 /* composed values */
 #define CEPH_CAP_AUTH_SHARED  (CEPH_CAP_GSHARED  << CEPH_CAP_SAUTH)
index 63d092822bad0b07064b869244ed394dbad1b818..360d9d08ca9e2a12a56e85b5625a938d8a2f6b3a 100644 (file)
@@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
        return end >= *p && n <= end - *p;
 }
 
-#define ceph_decode_need(p, end, n, bad)               \
-       do {                                            \
-               if (!likely(ceph_has_room(p, end, n)))  \
-                       goto bad;                       \
+#define ceph_decode_need(p, end, n, bad)                       \
+       do {                                                    \
+               if (!likely(ceph_has_room(p, end, n)))          \
+                       goto bad;                               \
        } while (0)
 
 #define ceph_decode_64_safe(p, end, v, bad)                    \
@@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n)
  *
  * There are two possible failures:
  *   - converting the string would require accessing memory at or
- *     beyond the "end" pointer provided (-E
- *   - memory could not be allocated for the result
+ *     beyond the "end" pointer provided (-ERANGE)
+ *   - memory could not be allocated for the result (-ENOMEM)
  */
 static inline char *ceph_extract_encoded_string(void **p, void *end,
                                                size_t *lenp, gfp_t gfp)
@@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end,
        *p += len;
 }
 
-#define ceph_encode_need(p, end, n, bad)               \
-       do {                                            \
-               if (!likely(ceph_has_room(p, end, n)))  \
-                       goto bad;                       \
+#define ceph_encode_need(p, end, n, bad)                       \
+       do {                                                    \
+               if (!likely(ceph_has_room(p, end, n)))          \
+                       goto bad;                               \
        } while (0)
 
 #define ceph_encode_64_safe(p, end, v, bad)                    \
@@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end,
 #define ceph_encode_32_safe(p, end, v, bad)                    \
        do {                                                    \
                ceph_encode_need(p, end, sizeof(u32), bad);     \
-               ceph_encode_32(p, v);                   \
+               ceph_encode_32(p, v);                           \
        } while (0)
 #define ceph_encode_16_safe(p, end, v, bad)                    \
        do {                                                    \
                ceph_encode_need(p, end, sizeof(u16), bad);     \
-               ceph_encode_16(p, v);                   \
+               ceph_encode_16(p, v);                           \
+       } while (0)
+#define ceph_encode_8_safe(p, end, v, bad)                     \
+       do {                                                    \
+               ceph_encode_need(p, end, sizeof(u8), bad);      \
+               ceph_encode_8(p, v);                            \
        } while (0)
 
 #define ceph_encode_copy_safe(p, end, pv, n, bad)              \
index 084d3c622b12a8ac58ad626da2e8f325d6d3d1c9..29818fc3fa493ea084e19f8f57b88d6e7aa2cd00 100644 (file)
@@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len)
 }
 
 /* ceph_common.c */
+extern bool libceph_compatible(void *data);
+
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
 extern struct kmem_cache *ceph_inode_cachep;
@@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client);
 /* pagevec.c */
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 
-extern struct page **ceph_get_direct_page_vector(const char __user *data,
+extern struct page **ceph_get_direct_page_vector(const void __user *data,
                                                 int num_pages,
                                                 bool write_page);
 extern void ceph_put_page_vector(struct page **pages, int num_pages,
@@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages,
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_copy_user_to_page_vector(struct page **pages,
-                                        const char __user *data,
+                                        const void __user *data,
                                         loff_t off, size_t len);
-extern int ceph_copy_to_page_vector(struct page **pages,
-                                   const char *data,
+extern void ceph_copy_to_page_vector(struct page **pages,
+                                   const void *data,
                                    loff_t off, size_t len);
-extern int ceph_copy_from_page_vector(struct page **pages,
-                                   char *data,
+extern void ceph_copy_from_page_vector(struct page **pages,
+                                   void *data,
                                    loff_t off, size_t len);
-extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
+extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
                                    loff_t off, size_t len);
 extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
 
index cb15b5d867c75b3c1ab20e883b04e2ac43f02b52..87ed09f548007eb5ce6e8e50651dfc0484387091 100644 (file)
@@ -29,8 +29,8 @@ struct ceph_mdsmap {
 
        /* which object pools file data can be stored in */
        int m_num_data_pg_pools;
-       u32 *m_data_pg_pools;
-       u32 m_cas_pg_pool;
+       u64 *m_data_pg_pools;
+       u64 m_cas_pg_pool;
 };
 
 static inline struct ceph_entity_addr *
index 14ba5ee738a90bbf6f27c0b12c846f1dfd93c13a..60903e0f665cb62ea9bf2db0cba04c984255e493 100644 (file)
@@ -83,9 +83,11 @@ struct ceph_msg {
        struct list_head list_head;
 
        struct kref kref;
+#ifdef CONFIG_BLOCK
        struct bio  *bio;               /* instead of pages/pagelist */
        struct bio  *bio_iter;          /* bio iterator */
        int bio_seg;                    /* current bio segment */
+#endif /* CONFIG_BLOCK */
        struct ceph_pagelist *trail;    /* the trailing part of the data */
        bool front_is_vmalloc;
        bool more_to_follow;
index d9b880e977e62fa5dcf853baf8dbffedf8c38cda..1dd5d466b6f9f01980c8e3efb7d905395f6b3fde 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/ceph/osdmap.h>
 #include <linux/ceph/messenger.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
 
 /* 
  * Maximum object name size 
@@ -22,7 +23,6 @@ struct ceph_snap_context;
 struct ceph_osd_request;
 struct ceph_osd_client;
 struct ceph_authorizer;
-struct ceph_pagelist;
 
 /*
  * completion callback for async writepages
@@ -47,6 +47,9 @@ struct ceph_osd {
        struct list_head o_keepalive_item;
 };
 
+
+#define CEPH_OSD_MAX_OP 10
+
 /* an in-flight request */
 struct ceph_osd_request {
        u64             r_tid;              /* unique for this client */
@@ -63,9 +66,23 @@ struct ceph_osd_request {
        struct ceph_connection *r_con_filling_msg;
 
        struct ceph_msg  *r_request, *r_reply;
-       int               r_result;
        int               r_flags;     /* any additional flags for the osd */
        u32               r_sent;      /* >0 if r_request is sending/sent */
+       int               r_num_ops;
+
+       /* encoded message content */
+       struct ceph_osd_op *r_request_ops;
+       /* these are updated on each send */
+       __le32           *r_request_osdmap_epoch;
+       __le32           *r_request_flags;
+       __le64           *r_request_pool;
+       void             *r_request_pgid;
+       __le32           *r_request_attempts;
+       struct ceph_eversion *r_request_reassert_version;
+
+       int               r_result;
+       int               r_reply_op_len[CEPH_OSD_MAX_OP];
+       s32               r_reply_op_result[CEPH_OSD_MAX_OP];
        int               r_got_reply;
        int               r_linger;
 
@@ -82,6 +99,7 @@ struct ceph_osd_request {
 
        char              r_oid[MAX_OBJ_NAME_SIZE];          /* object name */
        int               r_oid_len;
+       u64               r_snapid;
        unsigned long     r_stamp;            /* send OR check time */
 
        struct ceph_file_layout r_file_layout;
@@ -95,7 +113,7 @@ struct ceph_osd_request {
        struct bio       *r_bio;              /* instead of pages */
 #endif
 
-       struct ceph_pagelist *r_trail;        /* trailing part of the data */
+       struct ceph_pagelist r_trail;         /* trailing part of the data */
 };
 
 struct ceph_osd_event {
@@ -107,7 +125,6 @@ struct ceph_osd_event {
        struct rb_node node;
        struct list_head osd_node;
        struct kref kref;
-       struct completion completion;
 };
 
 struct ceph_osd_event_work {
@@ -157,7 +174,7 @@ struct ceph_osd_client {
 
 struct ceph_osd_req_op {
        u16 op;           /* CEPH_OSD_OP_* */
-       u32 flags;        /* CEPH_OSD_FLAG_* */
+       u32 payload_len;
        union {
                struct {
                        u64 offset, length;
@@ -166,23 +183,24 @@ struct ceph_osd_req_op {
                } extent;
                struct {
                        const char *name;
-                       u32 name_len;
                        const char  *val;
+                       u32 name_len;
                        u32 value_len;
                        __u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
                        __u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
                } xattr;
                struct {
                        const char *class_name;
-                       __u8 class_len;
                        const char *method_name;
-                       __u8 method_len;
-                       __u8 argc;
                        const char *indata;
                        u32 indata_len;
+                       __u8 class_len;
+                       __u8 method_len;
+                       __u8 argc;
                } cls;
                struct {
-                       u64 cookie, count;
+                       u64 cookie;
+                       u64 count;
                } pgls;
                struct {
                        u64 snapid;
@@ -190,12 +208,11 @@ struct ceph_osd_req_op {
                struct {
                        u64 cookie;
                        u64 ver;
-                       __u8 flag;
                        u32 prot_ver;
                        u32 timeout;
+                       __u8 flag;
                } watch;
        };
-       u32 payload_len;
 };
 
 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
@@ -207,29 +224,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
                                 struct ceph_msg *msg);
 
-extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-                       struct ceph_file_layout *layout,
-                       u64 snapid,
-                       u64 off, u64 *plen, u64 *bno,
-                       struct ceph_osd_request *req,
-                       struct ceph_osd_req_op *op);
-
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-                                              int flags,
                                               struct ceph_snap_context *snapc,
-                                              struct ceph_osd_req_op *ops,
+                                              unsigned int num_op,
                                               bool use_mempool,
-                                              gfp_t gfp_flags,
-                                              struct page **pages,
-                                              struct bio *bio);
+                                              gfp_t gfp_flags);
 
 extern void ceph_osdc_build_request(struct ceph_osd_request *req,
-                                   u64 off, u64 *plen,
+                                   u64 off, u64 len,
+                                   unsigned int num_op,
                                    struct ceph_osd_req_op *src_ops,
                                    struct ceph_snap_context *snapc,
-                                   struct timespec *mtime,
-                                   const char *oid,
-                                   int oid_len);
+                                   u64 snap_id,
+                                   struct timespec *mtime);
 
 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
                                      struct ceph_file_layout *layout,
@@ -239,8 +246,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
                                      int do_sync, u32 truncate_seq,
                                      u64 truncate_size,
                                      struct timespec *mtime,
-                                     bool use_mempool, int num_reply,
-                                     int page_align);
+                                     bool use_mempool, int page_align);
 
 extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
                                         struct ceph_osd_request *req);
@@ -279,17 +285,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
                                u64 off, u64 len,
                                u32 truncate_seq, u64 truncate_size,
                                struct timespec *mtime,
-                               struct page **pages, int nr_pages,
-                               int flags, int do_sync, bool nofail);
+                               struct page **pages, int nr_pages);
 
 /* watch/notify events */
 extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
                                  void (*event_cb)(u64, u64, u8, void *),
-                                 int one_shot, void *data,
-                                 struct ceph_osd_event **pevent);
+                                 void *data, struct ceph_osd_event **pevent);
 extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
-extern int ceph_osdc_wait_event(struct ceph_osd_event *event,
-                               unsigned long timeout);
 extern void ceph_osdc_put_event(struct ceph_osd_event *event);
 #endif
 
index 10a417f9f76fa9ccd2e2fffbc27950dbc355df59..c819190d16423c4b82a619871a1e68babbd765ba 100644 (file)
  * The map can be updated either via an incremental map (diff) describing
  * the change between two successive epochs, or as a fully encoded map.
  */
+struct ceph_pg {
+       uint64_t pool;
+       uint32_t seed;
+};
+
+#define CEPH_POOL_FLAG_HASHPSPOOL  1
+
 struct ceph_pg_pool_info {
        struct rb_node node;
-       int id;
-       struct ceph_pg_pool v;
-       int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
+       s64 id;
+       u8 type;
+       u8 size;
+       u8 crush_ruleset;
+       u8 object_hash;
+       u32 pg_num, pgp_num;
+       int pg_num_mask, pgp_num_mask;
+       u64 flags;
        char *name;
 };
 
+struct ceph_object_locator {
+       uint64_t pool;
+       char *key;
+};
+
 struct ceph_pg_mapping {
        struct rb_node node;
        struct ceph_pg pgid;
@@ -110,15 +127,16 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
 
 /* calculate mapping of a file extent to an object */
 extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-                                        u64 off, u64 *plen,
+                                        u64 off, u64 len,
                                         u64 *bno, u64 *oxoff, u64 *oxlen);
 
 /* calculate mapping of object to a placement group */
-extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
+extern int ceph_calc_object_layout(struct ceph_pg *pg,
                                   const char *oid,
                                   struct ceph_file_layout *fl,
                                   struct ceph_osdmap *osdmap);
-extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
+                              struct ceph_pg pgid,
                               int *acting);
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
                                struct ceph_pg pgid);
index 2c04afeead1cb3d776feea90e51f93731417b1a3..68c96a508ac20f8ff3aff08b7e29ff9bdf86bccf 100644 (file)
@@ -8,14 +8,6 @@
 
 #include <linux/ceph/msgr.h>
 
-/*
- * osdmap encoding versions
- */
-#define CEPH_OSDMAP_INC_VERSION     5
-#define CEPH_OSDMAP_INC_VERSION_EXT 6
-#define CEPH_OSDMAP_VERSION         5
-#define CEPH_OSDMAP_VERSION_EXT     6
-
 /*
  * fs id
  */
@@ -64,7 +56,7 @@ struct ceph_timespec {
  * placement group.
  * we encode this into one __le64.
  */
-struct ceph_pg {
+struct ceph_pg_v1 {
        __le16 preferred; /* preferred primary osd */
        __le16 ps;        /* placement seed */
        __le32 pool;      /* object pool */
@@ -91,21 +83,6 @@ struct ceph_pg {
 
 #define CEPH_PG_TYPE_REP     1
 #define CEPH_PG_TYPE_RAID4   2
-#define CEPH_PG_POOL_VERSION 2
-struct ceph_pg_pool {
-       __u8 type;                /* CEPH_PG_TYPE_* */
-       __u8 size;                /* number of osds in each pg */
-       __u8 crush_ruleset;       /* crush placement rule */
-       __u8 object_hash;         /* hash mapping object name to ps */
-       __le32 pg_num, pgp_num;   /* number of pg's */
-       __le32 lpg_num, lpgp_num; /* number of localized pg's */
-       __le32 last_change;       /* most recent epoch changed */
-       __le64 snap_seq;          /* seq for per-pool snapshot */
-       __le32 snap_epoch;        /* epoch of last snap */
-       __le32 num_snaps;
-       __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
-       __le64 auid;               /* who owns the pg */
-} __attribute__ ((packed));
 
 /*
  * stable_mod func is used to control number of placement groups.
@@ -128,7 +105,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask)
  * object layout - how a given object should be stored.
  */
 struct ceph_object_layout {
-       struct ceph_pg ol_pgid;   /* raw pg, with _full_ ps precision. */
+       struct ceph_pg_v1 ol_pgid;   /* raw pg, with _full_ ps precision. */
        __le32 ol_stripe_unit;    /* for per-object parity, if any */
 } __attribute__ ((packed));
 
@@ -145,8 +122,12 @@ struct ceph_eversion {
  */
 
 /* status bits */
-#define CEPH_OSD_EXISTS 1
-#define CEPH_OSD_UP     2
+#define CEPH_OSD_EXISTS  (1<<0)
+#define CEPH_OSD_UP      (1<<1)
+#define CEPH_OSD_AUTOOUT (1<<2)  /* osd was automatically marked out */
+#define CEPH_OSD_NEW     (1<<3)  /* osd is new, never marked in */
+
+extern const char *ceph_osd_state_name(int s);
 
 /* osd weights.  fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
 #define CEPH_OSD_IN  0x10000
@@ -161,9 +142,25 @@ struct ceph_eversion {
 #define CEPH_OSDMAP_PAUSERD  (1<<2)  /* pause all reads */
 #define CEPH_OSDMAP_PAUSEWR  (1<<3)  /* pause all writes */
 #define CEPH_OSDMAP_PAUSEREC (1<<4)  /* pause recovery */
+#define CEPH_OSDMAP_NOUP     (1<<5)  /* block osd boot */
+#define CEPH_OSDMAP_NODOWN   (1<<6)  /* block osd mark-down/failure */
+#define CEPH_OSDMAP_NOOUT    (1<<7)  /* block osd auto mark-out */
+#define CEPH_OSDMAP_NOIN     (1<<8)  /* block osd auto mark-in */
+#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
+#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
+
+/*
+ * The error code to return when an OSD can't handle a write
+ * because it is too large.
+ */
+#define OSD_WRITETOOBIG EMSGSIZE
 
 /*
  * osd ops
+ *
+ * WARNING: do not use these op codes directly.  Use the helpers
+ * defined below instead.  In certain cases, op code behavior was
+ * redefined, resulting in special-cases in the helpers.
  */
 #define CEPH_OSD_OP_MODE       0xf000
 #define CEPH_OSD_OP_MODE_RD    0x1000
@@ -177,6 +174,7 @@ struct ceph_eversion {
 #define CEPH_OSD_OP_TYPE_ATTR  0x0300
 #define CEPH_OSD_OP_TYPE_EXEC  0x0400
 #define CEPH_OSD_OP_TYPE_PG    0x0500
+#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
 
 enum {
        /** data **/
@@ -217,6 +215,23 @@ enum {
 
        CEPH_OSD_OP_WATCH   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
 
+       /* omap */
+       CEPH_OSD_OP_OMAPGETKEYS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
+       CEPH_OSD_OP_OMAPGETVALS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
+       CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
+       CEPH_OSD_OP_OMAPGETVALSBYKEYS  =
+         CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
+       CEPH_OSD_OP_OMAPSETVALS   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
+       CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
+       CEPH_OSD_OP_OMAPCLEAR     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
+       CEPH_OSD_OP_OMAPRMKEYS    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
+       CEPH_OSD_OP_OMAP_CMP      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
+
+       /** multi **/
+       CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
+       CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
+       CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
+
        /** attrs **/
        /* read */
        CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
@@ -238,6 +253,7 @@ enum {
        CEPH_OSD_OP_SCRUB_RESERVE   = CEPH_OSD_OP_MODE_SUB | 6,
        CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
        CEPH_OSD_OP_SCRUB_STOP      = CEPH_OSD_OP_MODE_SUB | 8,
+       CEPH_OSD_OP_SCRUB_MAP     = CEPH_OSD_OP_MODE_SUB | 9,
 
        /** lock **/
        CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
@@ -248,10 +264,12 @@ enum {
        CEPH_OSD_OP_DNLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
 
        /** exec **/
+       /* note: the RD bit here is wrong; see special-case below in helper */
        CEPH_OSD_OP_CALL    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
 
        /** pg **/
        CEPH_OSD_OP_PGLS      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
+       CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
 };
 
 static inline int ceph_osd_op_type_lock(int op)
@@ -274,6 +292,10 @@ static inline int ceph_osd_op_type_pg(int op)
 {
        return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
 }
+static inline int ceph_osd_op_type_multi(int op)
+{
+       return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI;
+}
 
 static inline int ceph_osd_op_mode_subop(int op)
 {
@@ -281,11 +303,12 @@ static inline int ceph_osd_op_mode_subop(int op)
 }
 static inline int ceph_osd_op_mode_read(int op)
 {
-       return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD;
+       return (op & CEPH_OSD_OP_MODE_RD) &&
+               op != CEPH_OSD_OP_CALL;
 }
 static inline int ceph_osd_op_mode_modify(int op)
 {
-       return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
+       return op & CEPH_OSD_OP_MODE_WR;
 }
 
 /*
@@ -294,34 +317,38 @@ static inline int ceph_osd_op_mode_modify(int op)
  */
 #define CEPH_OSD_TMAP_HDR 'h'
 #define CEPH_OSD_TMAP_SET 's'
+#define CEPH_OSD_TMAP_CREATE 'c' /* create key */
 #define CEPH_OSD_TMAP_RM  'r'
+#define CEPH_OSD_TMAP_RMSLOPPY 'R'
 
 extern const char *ceph_osd_op_name(int op);
 
-
 /*
  * osd op flags
  *
  * An op may be READ, WRITE, or READ|WRITE.
  */
 enum {
-       CEPH_OSD_FLAG_ACK = 1,          /* want (or is) "ack" ack */
-       CEPH_OSD_FLAG_ONNVRAM = 2,      /* want (or is) "onnvram" ack */
-       CEPH_OSD_FLAG_ONDISK = 4,       /* want (or is) "ondisk" ack */
-       CEPH_OSD_FLAG_RETRY = 8,        /* resend attempt */
-       CEPH_OSD_FLAG_READ = 16,        /* op may read */
-       CEPH_OSD_FLAG_WRITE = 32,       /* op may write */
-       CEPH_OSD_FLAG_ORDERSNAP = 64,   /* EOLDSNAP if snapc is out of order */
-       CEPH_OSD_FLAG_PEERSTAT = 128,   /* msg includes osd_peer_stat */
-       CEPH_OSD_FLAG_BALANCE_READS = 256,
-       CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
-       CEPH_OSD_FLAG_PGOP = 1024,      /* pg op, no object */
-       CEPH_OSD_FLAG_EXEC = 2048,      /* op may exec */
-       CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
+       CEPH_OSD_FLAG_ACK =            0x0001,  /* want (or is) "ack" ack */
+       CEPH_OSD_FLAG_ONNVRAM =        0x0002,  /* want (or is) "onnvram" ack */
+       CEPH_OSD_FLAG_ONDISK =         0x0004,  /* want (or is) "ondisk" ack */
+       CEPH_OSD_FLAG_RETRY =          0x0008,  /* resend attempt */
+       CEPH_OSD_FLAG_READ =           0x0010,  /* op may read */
+       CEPH_OSD_FLAG_WRITE =          0x0020,  /* op may write */
+       CEPH_OSD_FLAG_ORDERSNAP =      0x0040,  /* EOLDSNAP if snapc is out of order */
+       CEPH_OSD_FLAG_PEERSTAT_OLD =   0x0080,  /* DEPRECATED msg includes osd_peer_stat */
+       CEPH_OSD_FLAG_BALANCE_READS =  0x0100,
+       CEPH_OSD_FLAG_PARALLELEXEC =   0x0200,  /* execute op in parallel */
+       CEPH_OSD_FLAG_PGOP =           0x0400,  /* pg op, no object */
+       CEPH_OSD_FLAG_EXEC =           0x0800,  /* op may exec */
+       CEPH_OSD_FLAG_EXEC_PUBLIC =    0x1000,  /* DEPRECATED op may exec (public) */
+       CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000,  /* read from nearby replica, if any */
+       CEPH_OSD_FLAG_RWORDERED =      0x4000,  /* order wrt concurrent reads */
 };
 
 enum {
        CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
+       CEPH_OSD_OP_FLAG_FAILOK = 2,    /* continue despite failure */
 };
 
 #define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
@@ -381,48 +408,13 @@ struct ceph_osd_op {
                        __le64 ver;
                        __u8 flag;      /* 0 = unwatch, 1 = watch */
                } __attribute__ ((packed)) watch;
-};
+               struct {
+                       __le64 offset, length;
+                       __le64 src_offset;
+               } __attribute__ ((packed)) clonerange;
+       };
        __le32 payload_len;
 } __attribute__ ((packed));
 
-/*
- * osd request message header.  each request may include multiple
- * ceph_osd_op object operations.
- */
-struct ceph_osd_request_head {
-       __le32 client_inc;                 /* client incarnation */
-       struct ceph_object_layout layout;  /* pgid */
-       __le32 osdmap_epoch;               /* client's osdmap epoch */
-
-       __le32 flags;
-
-       struct ceph_timespec mtime;        /* for mutations only */
-       struct ceph_eversion reassert_version; /* if we are replaying op */
-
-       __le32 object_len;     /* length of object name */
-
-       __le64 snapid;         /* snapid to read */
-       __le64 snap_seq;       /* writer's snap context */
-       __le32 num_snaps;
-
-       __le16 num_ops;
-       struct ceph_osd_op ops[];  /* followed by ops[], obj, ticket, snaps */
-} __attribute__ ((packed));
-
-struct ceph_osd_reply_head {
-       __le32 client_inc;                /* client incarnation */
-       __le32 flags;
-       struct ceph_object_layout layout;
-       __le32 osdmap_epoch;
-       struct ceph_eversion reassert_version; /* for replaying uncommitted */
-
-       __le32 result;                    /* result code */
-
-       __le32 object_len;                /* length of object name */
-       __le32 num_ops;
-       struct ceph_osd_op ops[0];  /* ops[], object */
-} __attribute__ ((packed));
-
-
 
 #endif
index 51494e6b55487f30496c8870165dd75f8ba4c7b1..33f0280fd533574fe7739bfe4413bb85791bafcd 100644 (file)
@@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x)
 }
 
 extern void wait_for_completion(struct completion *);
+extern void wait_for_completion_io(struct completion *);
 extern int wait_for_completion_interruptible(struct completion *x);
 extern int wait_for_completion_killable(struct completion *x);
 extern unsigned long wait_for_completion_timeout(struct completion *x,
                                                   unsigned long timeout);
+extern unsigned long wait_for_completion_io_timeout(struct completion *x,
+                                                   unsigned long timeout);
 extern long wait_for_completion_interruptible_timeout(
        struct completion *x, unsigned long timeout);
 extern long wait_for_completion_killable_timeout(
index 25baa287cff7287d8e6cc6f5b94255ef4cf86f9e..6a1101f24cfba84eaf6210f96802984009462dbd 100644 (file)
@@ -162,6 +162,8 @@ struct crush_map {
        __u32 choose_local_fallback_tries;
        /* choose attempts before giving up */ 
        __u32 choose_total_tries;
+       /* attempt chooseleaf inner descent once; on failure retry outer descent */
+       __u32 chooseleaf_descend_once;
 };
 
 
index 3d754a394e921a6f9e1e0cdc7b62c28df80f29c0..9978b614a1aac8627a24e6b42b3a0ff0761e941a 100644 (file)
@@ -119,8 +119,10 @@ struct dma_buf {
        struct file *file;
        struct list_head attachments;
        const struct dma_buf_ops *ops;
-       /* mutex to serialize list manipulation and attach/detach */
+       /* mutex to serialize list manipulation, attach/detach and vmap/unmap */
        struct mutex lock;
+       unsigned vmapping_counter;
+       void *vmap_ptr;
        void *priv;
 };
 
index 1b8c02b36f76f5ed7023fda4c4f6c7e9392a4f6b..4fd4999ccb5bf3386db3c27fec3f0f1d4a048b42 100644 (file)
@@ -14,7 +14,6 @@
 
 #include <linux/atomic.h>
 #include <linux/device.h>
-#include <linux/kobject.h>
 #include <linux/completion.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
@@ -48,8 +47,17 @@ static inline void opstate_init(void)
        return;
 }
 
+/* Max length of a DIMM label*/
 #define EDAC_MC_LABEL_LEN      31
-#define MC_PROC_NAME_MAX_LEN   7
+
+/* Maximum size of the location string */
+#define LOCATION_SIZE 80
+
+/* Defines the maximum number of labels that can be reported */
+#define EDAC_MAX_LABELS                8
+
+/* String used to join two or more labels */
+#define OTHER_LABEL " or "
 
 /**
  * enum dev_type - describe the type of memory DRAM chips used at the stick
@@ -101,8 +109,24 @@ enum hw_event_mc_err_type {
        HW_EVENT_ERR_CORRECTED,
        HW_EVENT_ERR_UNCORRECTED,
        HW_EVENT_ERR_FATAL,
+       HW_EVENT_ERR_INFO,
 };
 
+static inline char *mc_event_error_type(const unsigned int err_type)
+{
+       switch (err_type) {
+       case HW_EVENT_ERR_CORRECTED:
+               return "Corrected";
+       case HW_EVENT_ERR_UNCORRECTED:
+               return "Uncorrected";
+       case HW_EVENT_ERR_FATAL:
+               return "Fatal";
+       default:
+       case HW_EVENT_ERR_INFO:
+               return "Info";
+       }
+}
+
 /**
  * enum mem_type - memory types. For a more detailed reference, please see
  *                     http://en.wikipedia.org/wiki/DRAM
@@ -376,6 +400,9 @@ enum scrub_type {
  * @EDAC_MC_LAYER_CHANNEL:     memory layer is named "channel"
  * @EDAC_MC_LAYER_SLOT:                memory layer is named "slot"
  * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select"
+ * @EDAC_MC_LAYER_ALL_MEM:     memory layout is unknown. All memory is mapped
+ *                             as a single memory area. This is used when
+ *                             retrieving errors from a firmware driven driver.
  *
  * This enum is used by the drivers to tell edac_mc_sysfs what name should
  * be used when describing a memory stick location.
@@ -385,6 +412,7 @@ enum edac_mc_layer_type {
        EDAC_MC_LAYER_CHANNEL,
        EDAC_MC_LAYER_SLOT,
        EDAC_MC_LAYER_CHIP_SELECT,
+       EDAC_MC_LAYER_ALL_MEM,
 };
 
 /**
@@ -551,6 +579,46 @@ struct errcount_attribute_data {
        int layer0, layer1, layer2;
 };
 
+/**
+ * edac_raw_error_desc - Raw error report structure
+ * @grain:                     minimum granularity for an error report, in bytes
+ * @error_count:               number of errors of the same type
+ * @top_layer:                 top layer of the error (layer[0])
+ * @mid_layer:                 middle layer of the error (layer[1])
+ * @low_layer:                 low layer of the error (layer[2])
+ * @page_frame_number:         page where the error happened
+ * @offset_in_page:            page offset
+ * @syndrome:                  syndrome of the error (or 0 if unknown or if
+ *                             the syndrome is not applicable)
+ * @msg:                       error message
+ * @location:                  location of the error
+ * @label:                     label of the affected DIMM(s)
+ * @other_detail:              other driver-specific detail about the error
+ * @enable_per_layer_report:   if false, the error affects all layers
+ *                             (typically, a memory controller error)
+ */
+struct edac_raw_error_desc {
+       /*
+        * NOTE: everything before grain won't be cleaned by
+        * edac_raw_error_desc_clean()
+        */
+       char location[LOCATION_SIZE];
+       char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
+       long grain;
+
+       /* the vars below and grain will be cleaned on every new error report */
+       u16 error_count;
+       int top_layer;
+       int mid_layer;
+       int low_layer;
+       unsigned long page_frame_number;
+       unsigned long offset_in_page;
+       unsigned long syndrome;
+       const char *msg;
+       const char *other_detail;
+       bool enable_per_layer_report;
+};
+
 /* MEMORY controller information structure
  */
 struct mem_ctl_info {
@@ -630,7 +698,6 @@ struct mem_ctl_info {
        const char *mod_ver;
        const char *ctl_name;
        const char *dev_name;
-       char proc_name[MC_PROC_NAME_MAX_LEN + 1];
        void *pvt_info;
        unsigned long start_time;       /* mci load start time (in jiffies) */
 
@@ -659,6 +726,12 @@ struct mem_ctl_info {
        /* work struct for this MC */
        struct delayed_work work;
 
+       /*
+        * Used to report an error - by being at the global struct
+        * makes the memory allocated by the EDAC core
+        */
+       struct edac_raw_error_desc error_desc;
+
        /* the internal state of this controller instance */
        int op_state;
 
index 18662063175044e4641610799d48422e3cd583f0..acd0312d46fb13d0a2752ea49add626a1a9411d8 100644 (file)
@@ -2,6 +2,7 @@
 #define _LINUX_ELEVATOR_H
 
 #include <linux/percpu.h>
+#include <linux/hashtable.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -96,6 +97,8 @@ struct elevator_type
        struct list_head list;
 };
 
+#define ELV_HASH_BITS 6
+
 /*
  * each queue has an elevator_queue associated with it
  */
@@ -105,8 +108,8 @@ struct elevator_queue
        void *elevator_data;
        struct kobject kobj;
        struct mutex sysfs_lock;
-       struct hlist_head *hash;
        unsigned int registered:1;
+       DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
 };
 
 /*
index d0ab98f73d380c67f2572a0b9536719e17325ca9..a5199f6d0e82592dde0e4ba7908b1d1b70323483 100644 (file)
@@ -124,31 +124,6 @@ static inline void init_llist_head(struct llist_head *list)
             &(pos)->member != NULL;                                    \
             (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member))
 
-/**
- * llist_for_each_entry_safe - iterate safely against remove over some entries
- * of lock-less list of given type.
- * @pos:       the type * to use as a loop cursor.
- * @n:         another type * to use as a temporary storage.
- * @node:      the fist entry of deleted list entries.
- * @member:    the name of the llist_node with the struct.
- *
- * In general, some entries of the lock-less list can be traversed
- * safely only after being removed from list, so start with an entry
- * instead of list head. This variant allows removal of entries
- * as we iterate.
- *
- * If being used on entries deleted from lock-less list directly, the
- * traverse order is from the newest to the oldest added entry.  If
- * you want to traverse from the oldest to the newest, you must
- * reverse the order by yourself before traversing.
- */
-#define llist_for_each_entry_safe(pos, n, node, member)                \
-       for ((pos) = llist_entry((node), typeof(*(pos)), member),       \
-            (n) = (pos)->member.next;                                  \
-            &(pos)->member != NULL;                                    \
-            (pos) = llist_entry(n, typeof(*(pos)), member),            \
-            (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL)
-
 /**
  * llist_empty - tests whether a lock-less list is empty
  * @head:      the list to test
index 0e62d84f9f7ffcaaa8c472aa5da3e373705d76e5..dcaad79f54ed5e74985c165585a354c6d04e0bc0 100644 (file)
@@ -212,7 +212,8 @@ int           nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 __be32           nlmclnt_grant(const struct sockaddr *addr,
                                const struct nlm_lock *lock);
 void             nlmclnt_recovery(struct nlm_host *);
-int              nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
+int              nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
+                                 struct nlm_rqst *);
 void             nlmclnt_next_cookie(struct nlm_cookie *);
 
 /*
index 31717bd287fd9f8f1e2515d8947aa5c5896b21f1..f11c1c2609d53ded1135b6289845f2ddd5e701da 100644 (file)
 #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX      0x3ce0
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB   0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16    0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_19    0x65f3
 #define PCI_DEVICE_ID_INTEL_5100_21    0x65f5
 #define PCI_DEVICE_ID_INTEL_5100_22    0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR   0x4030
index a7bdb2f63b73599119de3b975b17ed3979df306c..da7e6274b175c7befc37f1d0b54b78598dec9c12 100644 (file)
@@ -53,6 +53,8 @@ struct freq_clip_table {
  * struct exynos_tmu_platform_data
  * @threshold: basic temperature for generating interrupt
  *            25 <= threshold <= 125 [unit: degree Celsius]
+ * @threshold_falling: differntial value for setting threshold
+ *                    of temperature falling interrupt.
  * @trigger_levels: array for each interrupt levels
  *     [unit: degree Celsius]
  *     0: temperature for trigger_level0 interrupt
@@ -97,6 +99,7 @@ struct freq_clip_table {
  */
 struct exynos_tmu_platform_data {
        u8 threshold;
+       u8 threshold_falling;
        u8 trigger_levels[4];
        bool trigger_level0_en;
        bool trigger_level1_en;
diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h
new file mode 100644 (file)
index 0000000..07d8e53
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+ * linux/include/linux/sunrpc/addr.h
+ *
+ * Various routines for copying and comparing sockaddrs and for
+ * converting them to and from presentation format.
+ */
+#ifndef _LINUX_SUNRPC_ADDR_H
+#define _LINUX_SUNRPC_ADDR_H
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+
+size_t         rpc_ntop(const struct sockaddr *, char *, const size_t);
+size_t         rpc_pton(struct net *, const char *, const size_t,
+                        struct sockaddr *, const size_t);
+char *         rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t);
+size_t         rpc_uaddr2sockaddr(struct net *, const char *, const size_t,
+                                  struct sockaddr *, const size_t);
+
+static inline unsigned short rpc_get_port(const struct sockaddr *sap)
+{
+       switch (sap->sa_family) {
+       case AF_INET:
+               return ntohs(((struct sockaddr_in *)sap)->sin_port);
+       case AF_INET6:
+               return ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+       }
+       return 0;
+}
+
+static inline void rpc_set_port(struct sockaddr *sap,
+                               const unsigned short port)
+{
+       switch (sap->sa_family) {
+       case AF_INET:
+               ((struct sockaddr_in *)sap)->sin_port = htons(port);
+               break;
+       case AF_INET6:
+               ((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
+               break;
+       }
+}
+
+#define IPV6_SCOPE_DELIMITER           '%'
+#define IPV6_SCOPE_ID_LEN              sizeof("%nnnnnnnnnn")
+
+static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
+                                  const struct sockaddr *sap2)
+{
+       const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
+       const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
+
+       return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+}
+
+static inline bool __rpc_copy_addr4(struct sockaddr *dst,
+                                   const struct sockaddr *src)
+{
+       const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
+       struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
+
+       dsin->sin_family = ssin->sin_family;
+       dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
+       return true;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+                                  const struct sockaddr *sap2)
+{
+       const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
+       const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
+
+       if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
+               return false;
+       else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+               return sin1->sin6_scope_id == sin2->sin6_scope_id;
+
+       return true;
+}
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+                                   const struct sockaddr *src)
+{
+       const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
+       struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
+
+       dsin6->sin6_family = ssin6->sin6_family;
+       dsin6->sin6_addr = ssin6->sin6_addr;
+       dsin6->sin6_scope_id = ssin6->sin6_scope_id;
+       return true;
+}
+#else  /* !(IS_ENABLED(CONFIG_IPV6) */
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+                                  const struct sockaddr *sap2)
+{
+       return false;
+}
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+                                   const struct sockaddr *src)
+{
+       return false;
+}
+#endif /* !(IS_ENABLED(CONFIG_IPV6) */
+
+/**
+ * rpc_cmp_addr - compare the address portion of two sockaddrs.
+ * @sap1: first sockaddr
+ * @sap2: second sockaddr
+ *
+ * Just compares the family and address portion. Ignores port, but
+ * compares the scope if it's a link-local address.
+ *
+ * Returns true if the addrs are equal, false if they aren't.
+ */
+static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
+                               const struct sockaddr *sap2)
+{
+       if (sap1->sa_family == sap2->sa_family) {
+               switch (sap1->sa_family) {
+               case AF_INET:
+                       return __rpc_cmp_addr4(sap1, sap2);
+               case AF_INET6:
+                       return __rpc_cmp_addr6(sap1, sap2);
+               }
+       }
+       return false;
+}
+
+/**
+ * rpc_copy_addr - copy the address portion of one sockaddr to another
+ * @dst: destination sockaddr
+ * @src: source sockaddr
+ *
+ * Just copies the address portion and family. Ignores port, scope, etc.
+ * Caller is responsible for making certain that dst is large enough to hold
+ * the address in src. Returns true if address family is supported. Returns
+ * false otherwise.
+ */
+static inline bool rpc_copy_addr(struct sockaddr *dst,
+                                const struct sockaddr *src)
+{
+       switch (src->sa_family) {
+       case AF_INET:
+               return __rpc_copy_addr4(dst, src);
+       case AF_INET6:
+               return __rpc_copy_addr6(dst, src);
+       }
+       return false;
+}
+
+/**
+ * rpc_get_scope_id - return scopeid for a given sockaddr
+ * @sa: sockaddr to get scopeid from
+ *
+ * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
+ * not an AF_INET6 address.
+ */
+static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
+{
+       if (sa->sa_family != AF_INET6)
+               return 0;
+
+       return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
+}
+
+#endif /* _LINUX_SUNRPC_ADDR_H */
index 5dc9ee4d616e6f966e1b4abe9fe69021f48645ea..303399b1ba5954a6de412114b15df8fc0def1514 100644 (file)
@@ -83,6 +83,10 @@ struct cache_detail {
        int                     (*cache_upcall)(struct cache_detail *,
                                                struct cache_head *);
 
+       void                    (*cache_request)(struct cache_detail *cd,
+                                                struct cache_head *ch,
+                                                char **bpp, int *blen);
+
        int                     (*cache_parse)(struct cache_detail *,
                                               char *buf, int len);
 
@@ -157,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail,
                    struct cache_head *new, struct cache_head *old, int hash);
 
 extern int
-sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
-               void (*cache_request)(struct cache_detail *,
-                                     struct cache_head *,
-                                     char **,
-                                     int *));
+sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h);
 
 
 extern void cache_clean_deferred(void *owner);
index 34206b84d8dac9208401c48e92713c1d8ca11a1f..4a4abde000cb9674221c596eb9696112c053015b 100644 (file)
@@ -165,157 +165,5 @@ size_t            rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char     *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 int            rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t);
 
-size_t         rpc_ntop(const struct sockaddr *, char *, const size_t);
-size_t         rpc_pton(struct net *, const char *, const size_t,
-                        struct sockaddr *, const size_t);
-char *         rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t);
-size_t         rpc_uaddr2sockaddr(struct net *, const char *, const size_t,
-                                  struct sockaddr *, const size_t);
-
-static inline unsigned short rpc_get_port(const struct sockaddr *sap)
-{
-       switch (sap->sa_family) {
-       case AF_INET:
-               return ntohs(((struct sockaddr_in *)sap)->sin_port);
-       case AF_INET6:
-               return ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
-       }
-       return 0;
-}
-
-static inline void rpc_set_port(struct sockaddr *sap,
-                               const unsigned short port)
-{
-       switch (sap->sa_family) {
-       case AF_INET:
-               ((struct sockaddr_in *)sap)->sin_port = htons(port);
-               break;
-       case AF_INET6:
-               ((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
-               break;
-       }
-}
-
-#define IPV6_SCOPE_DELIMITER           '%'
-#define IPV6_SCOPE_ID_LEN              sizeof("%nnnnnnnnnn")
-
-static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
-                                  const struct sockaddr *sap2)
-{
-       const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
-       const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
-
-       return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
-}
-
-static inline bool __rpc_copy_addr4(struct sockaddr *dst,
-                                   const struct sockaddr *src)
-{
-       const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
-       struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
-
-       dsin->sin_family = ssin->sin_family;
-       dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
-       return true;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
-                                  const struct sockaddr *sap2)
-{
-       const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
-       const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
-
-       if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
-               return false;
-       else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-               return sin1->sin6_scope_id == sin2->sin6_scope_id;
-
-       return true;
-}
-
-static inline bool __rpc_copy_addr6(struct sockaddr *dst,
-                                   const struct sockaddr *src)
-{
-       const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
-       struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
-
-       dsin6->sin6_family = ssin6->sin6_family;
-       dsin6->sin6_addr = ssin6->sin6_addr;
-       return true;
-}
-#else  /* !(IS_ENABLED(CONFIG_IPV6) */
-static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
-                                  const struct sockaddr *sap2)
-{
-       return false;
-}
-
-static inline bool __rpc_copy_addr6(struct sockaddr *dst,
-                                   const struct sockaddr *src)
-{
-       return false;
-}
-#endif /* !(IS_ENABLED(CONFIG_IPV6) */
-
-/**
- * rpc_cmp_addr - compare the address portion of two sockaddrs.
- * @sap1: first sockaddr
- * @sap2: second sockaddr
- *
- * Just compares the family and address portion. Ignores port, scope, etc.
- * Returns true if the addrs are equal, false if they aren't.
- */
-static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
-                               const struct sockaddr *sap2)
-{
-       if (sap1->sa_family == sap2->sa_family) {
-               switch (sap1->sa_family) {
-               case AF_INET:
-                       return __rpc_cmp_addr4(sap1, sap2);
-               case AF_INET6:
-                       return __rpc_cmp_addr6(sap1, sap2);
-               }
-       }
-       return false;
-}
-
-/**
- * rpc_copy_addr - copy the address portion of one sockaddr to another
- * @dst: destination sockaddr
- * @src: source sockaddr
- *
- * Just copies the address portion and family. Ignores port, scope, etc.
- * Caller is responsible for making certain that dst is large enough to hold
- * the address in src. Returns true if address family is supported. Returns
- * false otherwise.
- */
-static inline bool rpc_copy_addr(struct sockaddr *dst,
-                                const struct sockaddr *src)
-{
-       switch (src->sa_family) {
-       case AF_INET:
-               return __rpc_copy_addr4(dst, src);
-       case AF_INET6:
-               return __rpc_copy_addr6(dst, src);
-       }
-       return false;
-}
-
-/**
- * rpc_get_scope_id - return scopeid for a given sockaddr
- * @sa: sockaddr to get scopeid from
- *
- * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
- * not an AF_INET6 address.
- */
-static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
-{
-       if (sa->sa_family != AF_INET6)
-               return 0;
-
-       return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
-}
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
index 676ddf53b3eee063b3c6b433de057c1da6894023..1f0216b9a6c9d0cbd556cfa501ec7739997c2361 100644 (file)
@@ -50,6 +50,7 @@ struct svc_pool {
        unsigned int            sp_nrthreads;   /* # of threads in pool */
        struct list_head        sp_all_threads; /* all server threads */
        struct svc_pool_stats   sp_stats;       /* statistics on pool operation */
+       int                     sp_task_pending;/* has pending task */
 } ____cacheline_aligned_in_smp;
 
 /*
index 63988990bd36d5c81e8537cd1ef1ef2961c30ed6..15f9204ee70bba6a61c27658a323e97d4fa264ba 100644 (file)
@@ -56,7 +56,7 @@ struct xdr_buf {
        struct kvec     head[1],        /* RPC header + non-page data */
                        tail[1];        /* Appended after page data */
 
-       struct page **  pages;          /* Array of contiguous pages */
+       struct page **  pages;          /* Array of pages */
        unsigned int    page_base,      /* Start of page data */
                        page_len,       /* Length of page data */
                        flags;          /* Flags for data disposition */
@@ -152,6 +152,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
+extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
index fe82022478e7f7ad20affd6c5ce4706c69205375..f0bd7f90a90d45d3aeeb3aed8bc2d2f8295c8be0 100644 (file)
@@ -74,6 +74,8 @@ enum thermal_trend {
        THERMAL_TREND_STABLE, /* temperature is stable */
        THERMAL_TREND_RAISING, /* temperature is raising */
        THERMAL_TREND_DROPPING, /* temperature is dropping */
+       THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */
+       THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */
 };
 
 /* Events supported by Thermal Netlink */
@@ -121,6 +123,7 @@ struct thermal_zone_device_ops {
        int (*set_trip_hyst) (struct thermal_zone_device *, int,
                              unsigned long);
        int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *);
+       int (*set_emul_temp) (struct thermal_zone_device *, unsigned long);
        int (*get_trend) (struct thermal_zone_device *, int,
                          enum thermal_trend *);
        int (*notify) (struct thermal_zone_device *, int,
@@ -163,6 +166,7 @@ struct thermal_zone_device {
        int polling_delay;
        int temperature;
        int last_temperature;
+       int emul_temperature;
        int passive;
        unsigned int forced_passive;
        const struct thermal_zone_device_ops *ops;
@@ -244,9 +248,11 @@ int thermal_register_governor(struct thermal_governor *);
 void thermal_unregister_governor(struct thermal_governor *);
 
 #ifdef CONFIG_NET
-extern int thermal_generate_netlink_event(u32 orig, enum events event);
+extern int thermal_generate_netlink_event(struct thermal_zone_device *tz,
+                                               enum events event);
 #else
-static inline int thermal_generate_netlink_event(u32 orig, enum events event)
+static int thermal_generate_netlink_event(struct thermal_zone_device *tz,
+                                               enum events event)
 {
        return 0;
 }
index b82a83aba31185870b132d4e8d7c99bb975fc0b6..9a9367c0c0768bff86bfa9ddde5ff85574497292 100644 (file)
@@ -87,9 +87,9 @@ int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
                                                        enum wb_reason reason);
-int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason);
-int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr,
-                                                       enum wb_reason reason);
+int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
+int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
+                                 enum wb_reason reason);
 void sync_inodes_sb(struct super_block *);
 long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
                                enum wb_reason reason);
index 260470e72483071ee1843345e7969817e5b2571a..21cdb0b7b0fb471f9b99052a305d48a8e3b851a3 100644 (file)
@@ -78,9 +78,7 @@ TRACE_EVENT(mc_event,
 
        TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)",
                  __entry->error_count,
-                 (__entry->error_type == HW_EVENT_ERR_CORRECTED) ? "Corrected" :
-                       ((__entry->error_type == HW_EVENT_ERR_FATAL) ?
-                       "Fatal" : "Uncorrected"),
+                 mc_event_error_type(__entry->error_type),
                  __entry->error_count > 1 ? "s" : "",
                  ((char *)__get_str(msg))[0] ? " " : "",
                  __get_str(msg),
diff --git a/include/sound/aess.h b/include/sound/aess.h
new file mode 100644 (file)
index 0000000..cee0d09
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * AESS IP block reset
+ *
+ * Copyright (C) 2012 Texas Instruments, Inc.
+ * Paul Walmsley
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#ifndef __SOUND_AESS_H__
+#define __SOUND_AESS_H__
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+
+/*
+ * AESS_AUTO_GATING_ENABLE_OFFSET: offset in bytes of the AESS IP
+ *     block's AESS_AUTO_GATING_ENABLE__1 register from the IP block's
+ *     base address
+ */
+#define AESS_AUTO_GATING_ENABLE_OFFSET                 0x07c
+
+/* Register bitfields in the AESS_AUTO_GATING_ENABLE__1 register */
+#define AESS_AUTO_GATING_ENABLE_SHIFT                  0
+
+/**
+ * aess_enable_autogating - enable AESS internal autogating
+ * @oh: struct omap_hwmod *
+ *
+ * Enable internal autogating on the AESS.  This allows the AESS to
+ * indicate that it is idle to the OMAP PRCM.  Returns 0.
+ */
+static inline void aess_enable_autogating(void __iomem *base)
+{
+       u32 v;
+
+       /* Set AESS_AUTO_GATING_ENABLE__1.ENABLE to allow idle entry */
+       v = 1 << AESS_AUTO_GATING_ENABLE_SHIFT;
+       writel(v, base + AESS_AUTO_GATING_ENABLE_OFFSET);
+}
+
+#endif /* __SOUND_AESS_H__ */
index 05c5e61f0a7ca23b03a6a965a375323ea2b37036..9961726523d01251957b2f8c661422c31d1ac78c 100644 (file)
@@ -6,10 +6,61 @@
 
 #include <linux/blktrace_api.h>
 #include <linux/blkdev.h>
+#include <linux/buffer_head.h>
 #include <linux/tracepoint.h>
 
 #define RWBS_LEN       8
 
+DECLARE_EVENT_CLASS(block_buffer,
+
+       TP_PROTO(struct buffer_head *bh),
+
+       TP_ARGS(bh),
+
+       TP_STRUCT__entry (
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  size_t,       size                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bh->b_bdev->bd_dev;
+               __entry->sector         = bh->b_blocknr;
+               __entry->size           = bh->b_size;
+       ),
+
+       TP_printk("%d,%d sector=%llu size=%zu",
+               MAJOR(__entry->dev), MINOR(__entry->dev),
+               (unsigned long long)__entry->sector, __entry->size
+       )
+);
+
+/**
+ * block_touch_buffer - mark a buffer accessed
+ * @bh: buffer_head being touched
+ *
+ * Called from touch_buffer().
+ */
+DEFINE_EVENT(block_buffer, block_touch_buffer,
+
+       TP_PROTO(struct buffer_head *bh),
+
+       TP_ARGS(bh)
+);
+
+/**
+ * block_dirty_buffer - mark a buffer dirty
+ * @bh: buffer_head being dirtied
+ *
+ * Called from mark_buffer_dirty().
+ */
+DEFINE_EVENT(block_buffer, block_dirty_buffer,
+
+       TP_PROTO(struct buffer_head *bh),
+
+       TP_ARGS(bh)
+);
+
 DECLARE_EVENT_CLASS(block_rq_with_error,
 
        TP_PROTO(struct request_queue *q, struct request *rq),
@@ -206,7 +257,6 @@ TRACE_EVENT(block_bio_bounce,
 
 /**
  * block_bio_complete - completed all work on the block operation
- * @q: queue holding the block operation
  * @bio: block operation completed
  * @error: io error value
  *
@@ -215,9 +265,9 @@ TRACE_EVENT(block_bio_bounce,
  */
 TRACE_EVENT(block_bio_complete,
 
-       TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+       TP_PROTO(struct bio *bio, int error),
 
-       TP_ARGS(q, bio, error),
+       TP_ARGS(bio, error),
 
        TP_STRUCT__entry(
                __field( dev_t,         dev             )
@@ -228,7 +278,8 @@ TRACE_EVENT(block_bio_complete,
        ),
 
        TP_fast_assign(
-               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->dev            = bio->bi_bdev ?
+                                         bio->bi_bdev->bd_dev : 0;
                __entry->sector         = bio->bi_sector;
                __entry->nr_sector      = bio->bi_size >> 9;
                __entry->error          = error;
@@ -241,11 +292,11 @@ TRACE_EVENT(block_bio_complete,
                  __entry->nr_sector, __entry->error)
 );
 
-DECLARE_EVENT_CLASS(block_bio,
+DECLARE_EVENT_CLASS(block_bio_merge,
 
-       TP_PROTO(struct request_queue *q, struct bio *bio),
+       TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio),
 
-       TP_ARGS(q, bio),
+       TP_ARGS(q, rq, bio),
 
        TP_STRUCT__entry(
                __field( dev_t,         dev                     )
@@ -272,31 +323,33 @@ DECLARE_EVENT_CLASS(block_bio,
 /**
  * block_bio_backmerge - merging block operation to the end of an existing operation
  * @q: queue holding operation
+ * @rq: request bio is being merged into
  * @bio: new block operation to merge
  *
  * Merging block request @bio to the end of an existing block request
  * in queue @q.
  */
-DEFINE_EVENT(block_bio, block_bio_backmerge,
+DEFINE_EVENT(block_bio_merge, block_bio_backmerge,
 
-       TP_PROTO(struct request_queue *q, struct bio *bio),
+       TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio),
 
-       TP_ARGS(q, bio)
+       TP_ARGS(q, rq, bio)
 );
 
 /**
  * block_bio_frontmerge - merging block operation to the beginning of an existing operation
  * @q: queue holding operation
+ * @rq: request bio is being merged into
  * @bio: new block operation to merge
  *
  * Merging block IO operation @bio to the beginning of an existing block
  * operation in queue @q.
  */
-DEFINE_EVENT(block_bio, block_bio_frontmerge,
+DEFINE_EVENT(block_bio_merge, block_bio_frontmerge,
 
-       TP_PROTO(struct request_queue *q, struct bio *bio),
+       TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio),
 
-       TP_ARGS(q, bio)
+       TP_ARGS(q, rq, bio)
 );
 
 /**
@@ -306,11 +359,32 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge,
  *
  * About to place the block IO operation @bio into queue @q.
  */
-DEFINE_EVENT(block_bio, block_bio_queue,
+TRACE_EVENT(block_bio_queue,
 
        TP_PROTO(struct request_queue *q, struct bio *bio),
 
-       TP_ARGS(q, bio)
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   RWBS_LEN        )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
 );
 
 DECLARE_EVENT_CLASS(block_get_rq,
index b453d92c225347f95605db68e33dd50b628fb0f0..6a16fd2e70ed27741ed13f80156d713d38127fb1 100644 (file)
 
 struct wb_writeback_work;
 
+TRACE_EVENT(writeback_dirty_page,
+
+       TP_PROTO(struct page *page, struct address_space *mapping),
+
+       TP_ARGS(page, mapping),
+
+       TP_STRUCT__entry (
+               __array(char, name, 32)
+               __field(unsigned long, ino)
+               __field(pgoff_t, index)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name,
+                       mapping ? dev_name(mapping->backing_dev_info->dev) : "(unknown)", 32);
+               __entry->ino = mapping ? mapping->host->i_ino : 0;
+               __entry->index = page->index;
+       ),
+
+       TP_printk("bdi %s: ino=%lu index=%lu",
+               __entry->name,
+               __entry->ino,
+               __entry->index
+       )
+);
+
+DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
+
+       TP_PROTO(struct inode *inode, int flags),
+
+       TP_ARGS(inode, flags),
+
+       TP_STRUCT__entry (
+               __array(char, name, 32)
+               __field(unsigned long, ino)
+               __field(unsigned long, flags)
+       ),
+
+       TP_fast_assign(
+               struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+
+               /* may be called for files on pseudo FSes w/ unregistered bdi */
+               strncpy(__entry->name,
+                       bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+               __entry->ino            = inode->i_ino;
+               __entry->flags          = flags;
+       ),
+
+       TP_printk("bdi %s: ino=%lu flags=%s",
+               __entry->name,
+               __entry->ino,
+               show_inode_state(__entry->flags)
+       )
+);
+
+DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,
+
+       TP_PROTO(struct inode *inode, int flags),
+
+       TP_ARGS(inode, flags)
+);
+
+DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
+
+       TP_PROTO(struct inode *inode, int flags),
+
+       TP_ARGS(inode, flags)
+);
+
+DECLARE_EVENT_CLASS(writeback_write_inode_template,
+
+       TP_PROTO(struct inode *inode, struct writeback_control *wbc),
+
+       TP_ARGS(inode, wbc),
+
+       TP_STRUCT__entry (
+               __array(char, name, 32)
+               __field(unsigned long, ino)
+               __field(int, sync_mode)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->name,
+                       dev_name(inode->i_mapping->backing_dev_info->dev), 32);
+               __entry->ino            = inode->i_ino;
+               __entry->sync_mode      = wbc->sync_mode;
+       ),
+
+       TP_printk("bdi %s: ino=%lu sync_mode=%d",
+               __entry->name,
+               __entry->ino,
+               __entry->sync_mode
+       )
+);
+
+DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start,
+
+       TP_PROTO(struct inode *inode, struct writeback_control *wbc),
+
+       TP_ARGS(inode, wbc)
+);
+
+DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode,
+
+       TP_PROTO(struct inode *inode, struct writeback_control *wbc),
+
+       TP_ARGS(inode, wbc)
+);
+
 DECLARE_EVENT_CLASS(writeback_work_class,
        TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
        TP_ARGS(bdi, work),
@@ -479,6 +588,13 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
        )
 );
 
+DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start,
+       TP_PROTO(struct inode *inode,
+                struct writeback_control *wbc,
+                unsigned long nr_to_write),
+       TP_ARGS(inode, wbc, nr_to_write)
+);
+
 DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
        TP_PROTO(struct inode *inode,
                 struct writeback_control *wbc,
index 12af4270c9c15df6d2ba8e79e7fd4d5ab48c4985..7f12624a393c3100506e5a3eaf588b5f463332ee 100644 (file)
@@ -3258,7 +3258,8 @@ void complete_all(struct completion *x)
 EXPORT_SYMBOL(complete_all);
 
 static inline long __sched
-do_wait_for_common(struct completion *x, long timeout, int state)
+do_wait_for_common(struct completion *x,
+                  long (*action)(long), long timeout, int state)
 {
        if (!x->done) {
                DECLARE_WAITQUEUE(wait, current);
@@ -3271,7 +3272,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
                        }
                        __set_current_state(state);
                        spin_unlock_irq(&x->wait.lock);
-                       timeout = schedule_timeout(timeout);
+                       timeout = action(timeout);
                        spin_lock_irq(&x->wait.lock);
                } while (!x->done && timeout);
                __remove_wait_queue(&x->wait, &wait);
@@ -3282,17 +3283,30 @@ do_wait_for_common(struct completion *x, long timeout, int state)
        return timeout ?: 1;
 }
 
-static long __sched
-wait_for_common(struct completion *x, long timeout, int state)
+static inline long __sched
+__wait_for_common(struct completion *x,
+                 long (*action)(long), long timeout, int state)
 {
        might_sleep();
 
        spin_lock_irq(&x->wait.lock);
-       timeout = do_wait_for_common(x, timeout, state);
+       timeout = do_wait_for_common(x, action, timeout, state);
        spin_unlock_irq(&x->wait.lock);
        return timeout;
 }
 
+static long __sched
+wait_for_common(struct completion *x, long timeout, int state)
+{
+       return __wait_for_common(x, schedule_timeout, timeout, state);
+}
+
+static long __sched
+wait_for_common_io(struct completion *x, long timeout, int state)
+{
+       return __wait_for_common(x, io_schedule_timeout, timeout, state);
+}
+
 /**
  * wait_for_completion: - waits for completion of a task
  * @x:  holds the state of this particular completion
@@ -3328,6 +3342,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 }
 EXPORT_SYMBOL(wait_for_completion_timeout);
 
+/**
+ * wait_for_completion_io: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout. The caller is accounted as waiting
+ * for IO.
+ */
+void __sched wait_for_completion_io(struct completion *x)
+{
+       wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io);
+
+/**
+ * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible. The caller is accounted as waiting for IO.
+ *
+ * The return value is 0 if timed out, and positive (at least 1, or number of
+ * jiffies left till timeout) if completed.
+ */
+unsigned long __sched
+wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
+{
+       return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io_timeout);
+
 /**
  * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
  * @x:  holds the state of this particular completion
index 314b9ee07edf076dcba956f064075ef9768e0422..a19a39952c1b24d3dd0ab0e9bd354fa9c5a7cb46 100644 (file)
@@ -554,6 +554,7 @@ void tick_nohz_idle_enter(void)
 
        local_irq_enable();
 }
+EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
 
 /**
  * tick_nohz_irq_exit - update next tick event from interrupt exit
@@ -685,6 +686,7 @@ void tick_nohz_idle_exit(void)
 
        local_irq_enable();
 }
+EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
 
 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
 {
index 71259e2b6b6167985fe0550e8c47de33a770e603..9e5b8c272eecc9121e8d3e064f30220c71c7e957 100644 (file)
@@ -739,6 +739,12 @@ static void blk_add_trace_rq_complete(void *ignore,
                                      struct request_queue *q,
                                      struct request *rq)
 {
+       struct blk_trace *bt = q->blk_trace;
+
+       /* if control ever passes through here, it's a request based driver */
+       if (unlikely(bt && !bt->rq_based))
+               bt->rq_based = true;
+
        blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
 }
 
@@ -774,15 +780,30 @@ static void blk_add_trace_bio_bounce(void *ignore,
        blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
 }
 
-static void blk_add_trace_bio_complete(void *ignore,
-                                      struct request_queue *q, struct bio *bio,
-                                      int error)
+static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error)
 {
+       struct request_queue *q;
+       struct blk_trace *bt;
+
+       if (!bio->bi_bdev)
+               return;
+
+       q = bdev_get_queue(bio->bi_bdev);
+       bt = q->blk_trace;
+
+       /*
+        * Request based drivers will generate both rq and bio completions.
+        * Ignore bio ones.
+        */
+       if (likely(!bt) || bt->rq_based)
+               return;
+
        blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
 }
 
 static void blk_add_trace_bio_backmerge(void *ignore,
                                        struct request_queue *q,
+                                       struct request *rq,
                                        struct bio *bio)
 {
        blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
@@ -790,6 +811,7 @@ static void blk_add_trace_bio_backmerge(void *ignore,
 
 static void blk_add_trace_bio_frontmerge(void *ignore,
                                         struct request_queue *q,
+                                        struct request *rq,
                                         struct bio *bio)
 {
        blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
index cdc377c456c0314ecdf8e20b2cdd881a2d88c814..efe68148f621959beb28987dd9430fff289f583f 100644 (file)
@@ -696,7 +696,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
         *     => fast response on large errors; small oscillation near setpoint
         */
        setpoint = (freerun + limit) / 2;
-       x = div_s64((setpoint - dirty) << RATELIMIT_CALC_SHIFT,
+       x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
                    limit - setpoint + 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -1986,6 +1986,8 @@ int __set_page_dirty_no_writeback(struct page *page)
  */
 void account_page_dirtied(struct page *page, struct address_space *mapping)
 {
+       trace_writeback_dirty_page(page, mapping);
+
        if (mapping_cap_account_dirty(mapping)) {
                __inc_zone_page_state(page, NR_FILE_DIRTY);
                __inc_zone_page_state(page, NR_DIRTIED);
index 1deb29af82fd2b10a2b578c4a11c362fa83d4ecf..e65e6e4be38be72cff529e7d5019c908dd8f3961 100644 (file)
 #include "crypto.h"
 
 
+/*
+ * Module compatibility interface.  For now it doesn't do anything,
+ * but its existence signals a certain level of functionality.
+ *
+ * The data buffer is used to pass information both to and from
+ * libceph.  The return value indicates whether libceph determines
+ * it is compatible with the caller (from another kernel module),
+ * given the provided data.
+ *
+ * The data pointer can be null.
+ */
+bool libceph_compatible(void *data)
+{
+       return true;
+}
+EXPORT_SYMBOL(libceph_compatible);
 
 /*
  * find filename portion of a path (/foo/bar/baz -> baz)
@@ -590,10 +606,8 @@ static int __init init_ceph_lib(void)
        if (ret < 0)
                goto out_crypto;
 
-       pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
-               CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
-               CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
-               CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
+       pr_info("loaded (mon/osd proto %d/%d)\n",
+               CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
 
        return 0;
 
index 3fbda04de29cdaa18b8f8b624d0ef3c15891bf40..1348df96fe15190be48a5922a33e99ad77efa388 100644 (file)
@@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op)
        switch (op) {
        case CEPH_OSD_OP_READ: return "read";
        case CEPH_OSD_OP_STAT: return "stat";
+       case CEPH_OSD_OP_MAPEXT: return "mapext";
+       case CEPH_OSD_OP_SPARSE_READ: return "sparse-read";
+       case CEPH_OSD_OP_NOTIFY: return "notify";
+       case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack";
+       case CEPH_OSD_OP_ASSERT_VER: return "assert-version";
 
        case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
 
+       case CEPH_OSD_OP_CREATE: return "create";
        case CEPH_OSD_OP_WRITE: return "write";
        case CEPH_OSD_OP_DELETE: return "delete";
        case CEPH_OSD_OP_TRUNCATE: return "truncate";
@@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op)
        case CEPH_OSD_OP_TMAPUP: return "tmapup";
        case CEPH_OSD_OP_TMAPGET: return "tmapget";
        case CEPH_OSD_OP_TMAPPUT: return "tmapput";
+       case CEPH_OSD_OP_WATCH: return "watch";
+
+       case CEPH_OSD_OP_CLONERANGE: return "clonerange";
+       case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
+       case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr";
 
        case CEPH_OSD_OP_GETXATTR: return "getxattr";
        case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
@@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op)
        case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
        case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
        case CEPH_OSD_OP_SCRUB: return "scrub";
+       case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve";
+       case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve";
+       case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop";
+       case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map";
 
        case CEPH_OSD_OP_WRLOCK: return "wrlock";
        case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
@@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op)
        case CEPH_OSD_OP_CALL: return "call";
 
        case CEPH_OSD_OP_PGLS: return "pgls";
+       case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter";
+       case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys";
+       case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals";
+       case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header";
+       case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys";
+       case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals";
+       case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header";
+       case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear";
+       case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys";
        }
        return "???";
 }
 
+const char *ceph_osd_state_name(int s)
+{
+       switch (s) {
+       case CEPH_OSD_EXISTS:
+               return "exists";
+       case CEPH_OSD_UP:
+               return "up";
+       case CEPH_OSD_AUTOOUT:
+               return "autoout";
+       case CEPH_OSD_NEW:
+               return "new";
+       default:
+               return "???";
+       }
+}
 
 const char *ceph_pool_op_name(int op)
 {
index 35fce755ce103528071d422ca6e9ca2e0faf6a27..cbd06a91941c15f3e88b9dd6671694e2fe76dbe2 100644 (file)
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in
  * @outpos: our position in that vector
  * @firstn: true if choosing "first n" items, false if choosing "indep"
  * @recurse_to_leaf: true if we want one device under each item of given type
+ * @descend_once: true if we should only try one descent before giving up
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  */
 static int crush_choose(const struct crush_map *map,
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map,
                        int x, int numrep, int type,
                        int *out, int outpos,
                        int firstn, int recurse_to_leaf,
-                       int *out2)
+                       int descend_once, int *out2)
 {
        int rep;
        unsigned int ftotal, flocal;
@@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map,
                                }
 
                                reject = 0;
-                               if (recurse_to_leaf) {
+                               if (!collide && recurse_to_leaf) {
                                        if (item < 0) {
                                                if (crush_choose(map,
                                                         map->buckets[-1-item],
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map,
                                                         x, outpos+1, 0,
                                                         out2, outpos,
                                                         firstn, 0,
+                                                        map->chooseleaf_descend_once,
                                                         NULL) <= outpos)
                                                        /* didn't get leaf */
                                                        reject = 1;
@@ -422,7 +424,10 @@ reject:
                                        ftotal++;
                                        flocal++;
 
-                                       if (collide && flocal <= map->choose_local_tries)
+                                       if (reject && descend_once)
+                                               /* let outer call try again */
+                                               skip_rep = 1;
+                                       else if (collide && flocal <= map->choose_local_tries)
                                                /* retry locally a few times */
                                                retry_bucket = 1;
                                        else if (map->choose_local_fallback_tries > 0 &&
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map,
        int i, j;
        int numrep;
        int firstn;
+       const int descend_once = 0;
 
        if ((__u32)ruleno >= map->max_rules) {
                dprintk(" bad ruleno %d\n", ruleno);
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map,
                                                      curstep->arg2,
                                                      o+osize, j,
                                                      firstn,
-                                                     recurse_to_leaf, c+osize);
+                                                     recurse_to_leaf,
+                                                     descend_once, c+osize);
                        }
 
                        if (recurse_to_leaf)
index af14cb425164d068d8a6272e1d9da1491ccd5f74..6e7a236525b6ff92d9cd2e44770cb6f7334b0b02 100644 (file)
@@ -423,7 +423,8 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
        }
 }
 
-int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
+static int ceph_key_instantiate(struct key *key,
+                               struct key_preparsed_payload *prep)
 {
        struct ceph_crypto_key *ckey;
        size_t datalen = prep->datalen;
@@ -458,12 +459,12 @@ err:
        return ret;
 }
 
-int ceph_key_match(const struct key *key, const void *description)
+static int ceph_key_match(const struct key *key, const void *description)
 {
        return strcmp(key->description, description) == 0;
 }
 
-void ceph_key_destroy(struct key *key) {
+static void ceph_key_destroy(struct key *key) {
        struct ceph_crypto_key *ckey = key->payload.data;
 
        ceph_crypto_key_destroy(ckey);
index 38b5dc1823d44961e6bbf7d52d36991920f5ddc1..00d051f4894e2e0c690fcb8420263ab1aa505ca8 100644 (file)
@@ -66,9 +66,9 @@ static int osdmap_show(struct seq_file *s, void *p)
        for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
                struct ceph_pg_pool_info *pool =
                        rb_entry(n, struct ceph_pg_pool_info, node);
-               seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
-                          pool->id, pool->v.pg_num, pool->pg_num_mask,
-                          pool->v.lpg_num, pool->lpg_num_mask);
+               seq_printf(s, "pg_pool %llu pg_num %d / %d\n",
+                          (unsigned long long)pool->id, pool->pg_num,
+                          pool->pg_num_mask);
        }
        for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
                struct ceph_entity_addr *addr =
@@ -123,26 +123,16 @@ static int osdc_show(struct seq_file *s, void *pp)
        mutex_lock(&osdc->request_mutex);
        for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
                struct ceph_osd_request *req;
-               struct ceph_osd_request_head *head;
-               struct ceph_osd_op *op;
-               int num_ops;
-               int opcode, olen;
+               int opcode;
                int i;
 
                req = rb_entry(p, struct ceph_osd_request, r_node);
 
-               seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
+               seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid,
                           req->r_osd ? req->r_osd->o_osd : -1,
-                          le32_to_cpu(req->r_pgid.pool),
-                          le16_to_cpu(req->r_pgid.ps));
+                          req->r_pgid.pool, req->r_pgid.seed);
 
-               head = req->r_request->front.iov_base;
-               op = (void *)(head + 1);
-
-               num_ops = le16_to_cpu(head->num_ops);
-               olen = le32_to_cpu(head->object_len);
-               seq_printf(s, "%.*s", olen,
-                          (const char *)(head->ops + num_ops));
+               seq_printf(s, "%.*s", req->r_oid_len, req->r_oid);
 
                if (req->r_reassert_version.epoch)
                        seq_printf(s, "\t%u'%llu",
@@ -151,10 +141,9 @@ static int osdc_show(struct seq_file *s, void *pp)
                else
                        seq_printf(s, "\t");
 
-               for (i = 0; i < num_ops; i++) {
-                       opcode = le16_to_cpu(op->op);
+               for (i = 0; i < req->r_num_ops; i++) {
+                       opcode = le16_to_cpu(req->r_request_ops[i].op);
                        seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
-                       op++;
                }
 
                seq_printf(s, "\n");
index 5ccf87ed8d688820a23ba1267439abe149398ec2..2c0669fb54e33181f2834ea6725dd15291ec5a64 100644 (file)
@@ -9,8 +9,9 @@
 #include <linux/slab.h>
 #include <linux/socket.h>
 #include <linux/string.h>
+#ifdef CONFIG_BLOCK
 #include <linux/bio.h>
-#include <linux/blkdev.h>
+#endif /* CONFIG_BLOCK */
 #include <linux/dns_resolver.h>
 #include <net/tcp.h>
 
 #define CON_FLAG_SOCK_CLOSED      3  /* socket state changed to closed */
 #define CON_FLAG_BACKOFF           4  /* need to retry queuing delayed work */
 
+static bool con_flag_valid(unsigned long con_flag)
+{
+       switch (con_flag) {
+       case CON_FLAG_LOSSYTX:
+       case CON_FLAG_KEEPALIVE_PENDING:
+       case CON_FLAG_WRITE_PENDING:
+       case CON_FLAG_SOCK_CLOSED:
+       case CON_FLAG_BACKOFF:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static void con_flag_clear(struct ceph_connection *con, unsigned long con_flag)
+{
+       BUG_ON(!con_flag_valid(con_flag));
+
+       clear_bit(con_flag, &con->flags);
+}
+
+static void con_flag_set(struct ceph_connection *con, unsigned long con_flag)
+{
+       BUG_ON(!con_flag_valid(con_flag));
+
+       set_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test(struct ceph_connection *con, unsigned long con_flag)
+{
+       BUG_ON(!con_flag_valid(con_flag));
+
+       return test_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_clear(struct ceph_connection *con,
+                                       unsigned long con_flag)
+{
+       BUG_ON(!con_flag_valid(con_flag));
+
+       return test_and_clear_bit(con_flag, &con->flags);
+}
+
+static bool con_flag_test_and_set(struct ceph_connection *con,
+                                       unsigned long con_flag)
+{
+       BUG_ON(!con_flag_valid(con_flag));
+
+       return test_and_set_bit(con_flag, &con->flags);
+}
+
 /* static tag bytes (protocol control messages) */
 static char tag_msg = CEPH_MSGR_TAG_MSG;
 static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -114,7 +166,7 @@ static struct lock_class_key socket_class;
 
 static void queue_con(struct ceph_connection *con);
 static void con_work(struct work_struct *);
-static void ceph_fault(struct ceph_connection *con);
+static void con_fault(struct ceph_connection *con);
 
 /*
  * Nicely render a sockaddr as a string.  An array of formatted
@@ -171,7 +223,7 @@ static void encode_my_addr(struct ceph_messenger *msgr)
  */
 static struct workqueue_struct *ceph_msgr_wq;
 
-void _ceph_msgr_exit(void)
+static void _ceph_msgr_exit(void)
 {
        if (ceph_msgr_wq) {
                destroy_workqueue(ceph_msgr_wq);
@@ -308,7 +360,7 @@ static void ceph_sock_write_space(struct sock *sk)
         * buffer. See net/ipv4/tcp_input.c:tcp_check_space()
         * and net/core/stream.c:sk_stream_write_space().
         */
-       if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
+       if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
                if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
                        dout("%s %p queueing write work\n", __func__, con);
                        clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
@@ -333,7 +385,7 @@ static void ceph_sock_state_change(struct sock *sk)
        case TCP_CLOSE_WAIT:
                dout("%s TCP_CLOSE_WAIT\n", __func__);
                con_sock_state_closing(con);
-               set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+               con_flag_set(con, CON_FLAG_SOCK_CLOSED);
                queue_con(con);
                break;
        case TCP_ESTABLISHED:
@@ -474,7 +526,7 @@ static int con_close_socket(struct ceph_connection *con)
         * received a socket close event before we had the chance to
         * shut the socket down.
         */
-       clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+       con_flag_clear(con, CON_FLAG_SOCK_CLOSED);
 
        con_sock_state_closed(con);
        return rc;
@@ -538,11 +590,10 @@ void ceph_con_close(struct ceph_connection *con)
             ceph_pr_addr(&con->peer_addr.in_addr));
        con->state = CON_STATE_CLOSED;
 
-       clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
-       clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
-       clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
-       clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
-       clear_bit(CON_FLAG_BACKOFF, &con->flags);
+       con_flag_clear(con, CON_FLAG_LOSSYTX);  /* so we retry next connect */
+       con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING);
+       con_flag_clear(con, CON_FLAG_WRITE_PENDING);
+       con_flag_clear(con, CON_FLAG_BACKOFF);
 
        reset_connection(con);
        con->peer_global_seq = 0;
@@ -798,7 +849,7 @@ static void prepare_write_message(struct ceph_connection *con)
                /* no, queue up footer too and be done */
                prepare_write_message_footer(con);
 
-       set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+       con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 /*
@@ -819,7 +870,7 @@ static void prepare_write_ack(struct ceph_connection *con)
                                &con->out_temp_ack);
 
        con->out_more = 1;  /* more will follow.. eventually.. */
-       set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+       con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 /*
@@ -830,7 +881,7 @@ static void prepare_write_keepalive(struct ceph_connection *con)
        dout("prepare_write_keepalive %p\n", con);
        con_out_kvec_reset(con);
        con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
-       set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+       con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 /*
@@ -873,7 +924,7 @@ static void prepare_write_banner(struct ceph_connection *con)
                                        &con->msgr->my_enc_addr);
 
        con->out_more = 0;
-       set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+       con_flag_set(con, CON_FLAG_WRITE_PENDING);
 }
 
 static int prepare_write_connect(struct ceph_connection *con)
@@ -923,7 +974,7 @@ static int prepare_write_connect(struct ceph_connection *con)
                                        auth->authorizer_buf);
 
        con->out_more = 0;
-       set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+       con_flag_set(con, CON_FLAG_WRITE_PENDING);
 
        return 0;
 }
@@ -1643,7 +1694,7 @@ static int process_connect(struct ceph_connection *con)
                        le32_to_cpu(con->in_reply.connect_seq));
 
                if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
-                       set_bit(CON_FLAG_LOSSYTX, &con->flags);
+                       con_flag_set(con, CON_FLAG_LOSSYTX);
 
                con->delay = 0;      /* reset backoff memory */
 
@@ -2080,15 +2131,14 @@ do_next:
                        prepare_write_ack(con);
                        goto more;
                }
-               if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
-                                      &con->flags)) {
+               if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) {
                        prepare_write_keepalive(con);
                        goto more;
                }
        }
 
        /* Nothing to do! */
-       clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+       con_flag_clear(con, CON_FLAG_WRITE_PENDING);
        dout("try_write nothing else to write.\n");
        ret = 0;
 out:
@@ -2268,7 +2318,7 @@ static void queue_con(struct ceph_connection *con)
 
 static bool con_sock_closed(struct ceph_connection *con)
 {
-       if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags))
+       if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED))
                return false;
 
 #define CASE(x)                                                                \
@@ -2295,6 +2345,41 @@ static bool con_sock_closed(struct ceph_connection *con)
        return true;
 }
 
+static bool con_backoff(struct ceph_connection *con)
+{
+       int ret;
+
+       if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF))
+               return false;
+
+       ret = queue_con_delay(con, round_jiffies_relative(con->delay));
+       if (ret) {
+               dout("%s: con %p FAILED to back off %lu\n", __func__,
+                       con, con->delay);
+               BUG_ON(ret == -ENOENT);
+               con_flag_set(con, CON_FLAG_BACKOFF);
+       }
+
+       return true;
+}
+
+/* Finish fault handling; con->mutex must *not* be held here */
+
+static void con_fault_finish(struct ceph_connection *con)
+{
+       /*
+        * in case we faulted due to authentication, invalidate our
+        * current tickets so that we can get new ones.
+        */
+       if (con->auth_retry && con->ops->invalidate_authorizer) {
+               dout("calling invalidate_authorizer()\n");
+               con->ops->invalidate_authorizer(con);
+       }
+
+       if (con->ops->fault)
+               con->ops->fault(con);
+}
+
 /*
  * Do some work on a connection.  Drop a connection ref when we're done.
  */
@@ -2302,73 +2387,68 @@ static void con_work(struct work_struct *work)
 {
        struct ceph_connection *con = container_of(work, struct ceph_connection,
                                                   work.work);
-       int ret;
+       bool fault;
 
        mutex_lock(&con->mutex);
-restart:
-       if (con_sock_closed(con))
-               goto fault;
+       while (true) {
+               int ret;
 
-       if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
-               dout("con_work %p backing off\n", con);
-               ret = queue_con_delay(con, round_jiffies_relative(con->delay));
-               if (ret) {
-                       dout("con_work %p FAILED to back off %lu\n", con,
-                            con->delay);
-                       BUG_ON(ret == -ENOENT);
-                       set_bit(CON_FLAG_BACKOFF, &con->flags);
+               if ((fault = con_sock_closed(con))) {
+                       dout("%s: con %p SOCK_CLOSED\n", __func__, con);
+                       break;
+               }
+               if (con_backoff(con)) {
+                       dout("%s: con %p BACKOFF\n", __func__, con);
+                       break;
+               }
+               if (con->state == CON_STATE_STANDBY) {
+                       dout("%s: con %p STANDBY\n", __func__, con);
+                       break;
+               }
+               if (con->state == CON_STATE_CLOSED) {
+                       dout("%s: con %p CLOSED\n", __func__, con);
+                       BUG_ON(con->sock);
+                       break;
+               }
+               if (con->state == CON_STATE_PREOPEN) {
+                       dout("%s: con %p PREOPEN\n", __func__, con);
+                       BUG_ON(con->sock);
                }
-               goto done;
-       }
 
-       if (con->state == CON_STATE_STANDBY) {
-               dout("con_work %p STANDBY\n", con);
-               goto done;
-       }
-       if (con->state == CON_STATE_CLOSED) {
-               dout("con_work %p CLOSED\n", con);
-               BUG_ON(con->sock);
-               goto done;
-       }
-       if (con->state == CON_STATE_PREOPEN) {
-               dout("con_work OPENING\n");
-               BUG_ON(con->sock);
-       }
+               ret = try_read(con);
+               if (ret < 0) {
+                       if (ret == -EAGAIN)
+                               continue;
+                       con->error_msg = "socket error on read";
+                       fault = true;
+                       break;
+               }
 
-       ret = try_read(con);
-       if (ret == -EAGAIN)
-               goto restart;
-       if (ret < 0) {
-               con->error_msg = "socket error on read";
-               goto fault;
-       }
+               ret = try_write(con);
+               if (ret < 0) {
+                       if (ret == -EAGAIN)
+                               continue;
+                       con->error_msg = "socket error on write";
+                       fault = true;
+               }
 
-       ret = try_write(con);
-       if (ret == -EAGAIN)
-               goto restart;
-       if (ret < 0) {
-               con->error_msg = "socket error on write";
-               goto fault;
+               break;  /* If we make it to here, we're done */
        }
-
-done:
+       if (fault)
+               con_fault(con);
        mutex_unlock(&con->mutex);
-done_unlocked:
-       con->ops->put(con);
-       return;
 
-fault:
-       ceph_fault(con);     /* error/fault path */
-       goto done_unlocked;
-}
+       if (fault)
+               con_fault_finish(con);
 
+       con->ops->put(con);
+}
 
 /*
  * Generic error/fault handler.  A retry mechanism is used with
  * exponential backoff
  */
-static void ceph_fault(struct ceph_connection *con)
-       __releases(con->mutex)
+static void con_fault(struct ceph_connection *con)
 {
        pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
               ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
@@ -2381,10 +2461,10 @@ static void ceph_fault(struct ceph_connection *con)
 
        con_close_socket(con);
 
-       if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
+       if (con_flag_test(con, CON_FLAG_LOSSYTX)) {
                dout("fault on LOSSYTX channel, marking CLOSED\n");
                con->state = CON_STATE_CLOSED;
-               goto out_unlock;
+               return;
        }
 
        if (con->in_msg) {
@@ -2401,9 +2481,9 @@ static void ceph_fault(struct ceph_connection *con)
        /* If there are no messages queued or keepalive pending, place
         * the connection in a STANDBY state */
        if (list_empty(&con->out_queue) &&
-           !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
+           !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) {
                dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
-               clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+               con_flag_clear(con, CON_FLAG_WRITE_PENDING);
                con->state = CON_STATE_STANDBY;
        } else {
                /* retry after a delay. */
@@ -2412,23 +2492,9 @@ static void ceph_fault(struct ceph_connection *con)
                        con->delay = BASE_DELAY_INTERVAL;
                else if (con->delay < MAX_DELAY_INTERVAL)
                        con->delay *= 2;
-               set_bit(CON_FLAG_BACKOFF, &con->flags);
+               con_flag_set(con, CON_FLAG_BACKOFF);
                queue_con(con);
        }
-
-out_unlock:
-       mutex_unlock(&con->mutex);
-       /*
-        * in case we faulted due to authentication, invalidate our
-        * current tickets so that we can get new ones.
-        */
-       if (con->auth_retry && con->ops->invalidate_authorizer) {
-               dout("calling invalidate_authorizer()\n");
-               con->ops->invalidate_authorizer(con);
-       }
-
-       if (con->ops->fault)
-               con->ops->fault(con);
 }
 
 
@@ -2469,8 +2535,8 @@ static void clear_standby(struct ceph_connection *con)
                dout("clear_standby %p and ++connect_seq\n", con);
                con->state = CON_STATE_PREOPEN;
                con->connect_seq++;
-               WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
-               WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
+               WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING));
+               WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING));
        }
 }
 
@@ -2511,7 +2577,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
 
        /* if there wasn't anything waiting to send before, queue
         * new work */
-       if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
+       if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
                queue_con(con);
 }
 EXPORT_SYMBOL(ceph_con_send);
@@ -2600,8 +2666,8 @@ void ceph_con_keepalive(struct ceph_connection *con)
        mutex_lock(&con->mutex);
        clear_standby(con);
        mutex_unlock(&con->mutex);
-       if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
-           test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
+       if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 &&
+           con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0)
                queue_con(con);
 }
 EXPORT_SYMBOL(ceph_con_keepalive);
@@ -2651,9 +2717,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
        m->page_alignment = 0;
        m->pages = NULL;
        m->pagelist = NULL;
+#ifdef CONFIG_BLOCK
        m->bio = NULL;
        m->bio_iter = NULL;
        m->bio_seg = 0;
+#endif /* CONFIG_BLOCK */
        m->trail = NULL;
 
        /* front */
index 812eb3b46c1f94d188079274457108cad4c5b54f..aef5b1062beec7c0d9e45fbe4c77b6ec05b10d5a 100644 (file)
@@ -697,7 +697,7 @@ int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
                            u32 pool, u64 snapid)
 {
        return do_poolop(monc,  POOL_OP_CREATE_UNMANAGED_SNAP,
-                                  pool, snapid, 0, 0);
+                                  pool, snapid, NULL, 0);
 
 }
 
index eb9a4447876481e9a4110a1e68ea351ce3ec86d9..d730dd4d8eb28bc421babb138f187605ee7c0ec6 100644 (file)
@@ -23,7 +23,7 @@
 
 static const struct ceph_connection_operations osd_con_ops;
 
-static void send_queued(struct ceph_osd_client *osdc);
+static void __send_queued(struct ceph_osd_client *osdc);
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
 static void __register_request(struct ceph_osd_client *osdc,
                               struct ceph_osd_request *req);
@@ -32,64 +32,12 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 static void __send_request(struct ceph_osd_client *osdc,
                           struct ceph_osd_request *req);
 
-static int op_needs_trail(int op)
-{
-       switch (op) {
-       case CEPH_OSD_OP_GETXATTR:
-       case CEPH_OSD_OP_SETXATTR:
-       case CEPH_OSD_OP_CMPXATTR:
-       case CEPH_OSD_OP_CALL:
-       case CEPH_OSD_OP_NOTIFY:
-               return 1;
-       default:
-               return 0;
-       }
-}
-
 static int op_has_extent(int op)
 {
        return (op == CEPH_OSD_OP_READ ||
                op == CEPH_OSD_OP_WRITE);
 }
 
-int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-                       struct ceph_file_layout *layout,
-                       u64 snapid,
-                       u64 off, u64 *plen, u64 *bno,
-                       struct ceph_osd_request *req,
-                       struct ceph_osd_req_op *op)
-{
-       struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
-       u64 orig_len = *plen;
-       u64 objoff, objlen;    /* extent in object */
-       int r;
-
-       reqhead->snapid = cpu_to_le64(snapid);
-
-       /* object extent? */
-       r = ceph_calc_file_object_mapping(layout, off, plen, bno,
-                                         &objoff, &objlen);
-       if (r < 0)
-               return r;
-       if (*plen < orig_len)
-               dout(" skipping last %llu, final file extent %llu~%llu\n",
-                    orig_len - *plen, off, *plen);
-
-       if (op_has_extent(op->op)) {
-               op->extent.offset = objoff;
-               op->extent.length = objlen;
-       }
-       req->r_num_pages = calc_pages_for(off, *plen);
-       req->r_page_alignment = off & ~PAGE_MASK;
-       if (op->op == CEPH_OSD_OP_WRITE)
-               op->payload_len = *plen;
-
-       dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
-            *bno, objoff, objlen, req->r_num_pages);
-       return 0;
-}
-EXPORT_SYMBOL(ceph_calc_raw_layout);
-
 /*
  * Implement client access to distributed object storage cluster.
  *
@@ -115,20 +63,48 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
  *
  * fill osd op in request message.
  */
-static int calc_layout(struct ceph_osd_client *osdc,
-                      struct ceph_vino vino,
+static int calc_layout(struct ceph_vino vino,
                       struct ceph_file_layout *layout,
                       u64 off, u64 *plen,
                       struct ceph_osd_request *req,
                       struct ceph_osd_req_op *op)
 {
-       u64 bno;
+       u64 orig_len = *plen;
+       u64 bno = 0;
+       u64 objoff = 0;
+       u64 objlen = 0;
        int r;
 
-       r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
-                                plen, &bno, req, op);
+       /* object extent? */
+       r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno,
+                                         &objoff, &objlen);
        if (r < 0)
                return r;
+       if (objlen < orig_len) {
+               *plen = objlen;
+               dout(" skipping last %llu, final file extent %llu~%llu\n",
+                    orig_len - *plen, off, *plen);
+       }
+
+       if (op_has_extent(op->op)) {
+               u32 osize = le32_to_cpu(layout->fl_object_size);
+               op->extent.offset = objoff;
+               op->extent.length = objlen;
+               if (op->extent.truncate_size <= off - objoff) {
+                       op->extent.truncate_size = 0;
+               } else {
+                       op->extent.truncate_size -= off - objoff;
+                       if (op->extent.truncate_size > osize)
+                               op->extent.truncate_size = osize;
+               }
+       }
+       req->r_num_pages = calc_pages_for(off, *plen);
+       req->r_page_alignment = off & ~PAGE_MASK;
+       if (op->op == CEPH_OSD_OP_WRITE)
+               op->payload_len = *plen;
+
+       dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
+            bno, objoff, objlen, req->r_num_pages);
 
        snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
        req->r_oid_len = strlen(req->r_oid);
@@ -148,25 +124,19 @@ void ceph_osdc_release_request(struct kref *kref)
        if (req->r_request)
                ceph_msg_put(req->r_request);
        if (req->r_con_filling_msg) {
-               dout("%s revoking pages %p from con %p\n", __func__,
-                    req->r_pages, req->r_con_filling_msg);
+               dout("%s revoking msg %p from con %p\n", __func__,
+                    req->r_reply, req->r_con_filling_msg);
                ceph_msg_revoke_incoming(req->r_reply);
                req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
+               req->r_con_filling_msg = NULL;
        }
        if (req->r_reply)
                ceph_msg_put(req->r_reply);
        if (req->r_own_pages)
                ceph_release_page_vector(req->r_pages,
                                         req->r_num_pages);
-#ifdef CONFIG_BLOCK
-       if (req->r_bio)
-               bio_put(req->r_bio);
-#endif
        ceph_put_snap_context(req->r_snapc);
-       if (req->r_trail) {
-               ceph_pagelist_release(req->r_trail);
-               kfree(req->r_trail);
-       }
+       ceph_pagelist_release(&req->r_trail);
        if (req->r_mempool)
                mempool_free(req, req->r_osdc->req_mempool);
        else
@@ -174,37 +144,25 @@ void ceph_osdc_release_request(struct kref *kref)
 }
 EXPORT_SYMBOL(ceph_osdc_release_request);
 
-static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
-{
-       int i = 0;
-
-       if (needs_trail)
-               *needs_trail = 0;
-       while (ops[i].op) {
-               if (needs_trail && op_needs_trail(ops[i].op))
-                       *needs_trail = 1;
-               i++;
-       }
-
-       return i;
-}
-
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-                                              int flags,
                                               struct ceph_snap_context *snapc,
-                                              struct ceph_osd_req_op *ops,
+                                              unsigned int num_ops,
                                               bool use_mempool,
-                                              gfp_t gfp_flags,
-                                              struct page **pages,
-                                              struct bio *bio)
+                                              gfp_t gfp_flags)
 {
        struct ceph_osd_request *req;
        struct ceph_msg *msg;
-       int needs_trail;
-       int num_op = get_num_ops(ops, &needs_trail);
-       size_t msg_size = sizeof(struct ceph_osd_request_head);
-
-       msg_size += num_op*sizeof(struct ceph_osd_op);
+       size_t msg_size;
+
+       msg_size = 4 + 4 + 8 + 8 + 4+8;
+       msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
+       msg_size += 1 + 8 + 4 + 4;     /* pg_t */
+       msg_size += 4 + MAX_OBJ_NAME_SIZE;
+       msg_size += 2 + num_ops*sizeof(struct ceph_osd_op);
+       msg_size += 8;  /* snapid */
+       msg_size += 8;  /* snap_seq */
+       msg_size += 8 * (snapc ? snapc->num_snaps : 0);  /* snaps */
+       msg_size += 4;
 
        if (use_mempool) {
                req = mempool_alloc(osdc->req_mempool, gfp_flags);
@@ -228,10 +186,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
        INIT_LIST_HEAD(&req->r_req_lru_item);
        INIT_LIST_HEAD(&req->r_osd_item);
 
-       req->r_flags = flags;
-
-       WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
-
        /* create reply message */
        if (use_mempool)
                msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
@@ -244,20 +198,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
        }
        req->r_reply = msg;
 
-       /* allocate space for the trailing data */
-       if (needs_trail) {
-               req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
-               if (!req->r_trail) {
-                       ceph_osdc_put_request(req);
-                       return NULL;
-               }
-               ceph_pagelist_init(req->r_trail);
-       }
+       ceph_pagelist_init(&req->r_trail);
 
        /* create request message; allow space for oid */
-       msg_size += MAX_OBJ_NAME_SIZE;
-       if (snapc)
-               msg_size += sizeof(u64) * snapc->num_snaps;
        if (use_mempool)
                msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
        else
@@ -270,13 +213,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
        memset(msg->front.iov_base, 0, msg->front.iov_len);
 
        req->r_request = msg;
-       req->r_pages = pages;
-#ifdef CONFIG_BLOCK
-       if (bio) {
-               req->r_bio = bio;
-               bio_get(req->r_bio);
-       }
-#endif
 
        return req;
 }
@@ -289,6 +225,8 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
        dst->op = cpu_to_le16(src->op);
 
        switch (src->op) {
+       case CEPH_OSD_OP_STAT:
+               break;
        case CEPH_OSD_OP_READ:
        case CEPH_OSD_OP_WRITE:
                dst->extent.offset =
@@ -300,52 +238,20 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
                dst->extent.truncate_seq =
                        cpu_to_le32(src->extent.truncate_seq);
                break;
-
-       case CEPH_OSD_OP_GETXATTR:
-       case CEPH_OSD_OP_SETXATTR:
-       case CEPH_OSD_OP_CMPXATTR:
-               BUG_ON(!req->r_trail);
-
-               dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
-               dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
-               dst->xattr.cmp_op = src->xattr.cmp_op;
-               dst->xattr.cmp_mode = src->xattr.cmp_mode;
-               ceph_pagelist_append(req->r_trail, src->xattr.name,
-                                    src->xattr.name_len);
-               ceph_pagelist_append(req->r_trail, src->xattr.val,
-                                    src->xattr.value_len);
-               break;
        case CEPH_OSD_OP_CALL:
-               BUG_ON(!req->r_trail);
-
                dst->cls.class_len = src->cls.class_len;
                dst->cls.method_len = src->cls.method_len;
                dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
 
-               ceph_pagelist_append(req->r_trail, src->cls.class_name,
+               ceph_pagelist_append(&req->r_trail, src->cls.class_name,
                                     src->cls.class_len);
-               ceph_pagelist_append(req->r_trail, src->cls.method_name,
+               ceph_pagelist_append(&req->r_trail, src->cls.method_name,
                                     src->cls.method_len);
-               ceph_pagelist_append(req->r_trail, src->cls.indata,
+               ceph_pagelist_append(&req->r_trail, src->cls.indata,
                                     src->cls.indata_len);
                break;
-       case CEPH_OSD_OP_ROLLBACK:
-               dst->snap.snapid = cpu_to_le64(src->snap.snapid);
-               break;
        case CEPH_OSD_OP_STARTSYNC:
                break;
-       case CEPH_OSD_OP_NOTIFY:
-               {
-                       __le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
-                       __le32 timeout = cpu_to_le32(src->watch.timeout);
-
-                       BUG_ON(!req->r_trail);
-
-                       ceph_pagelist_append(req->r_trail,
-                                               &prot_ver, sizeof(prot_ver));
-                       ceph_pagelist_append(req->r_trail,
-                                               &timeout, sizeof(timeout));
-               }
        case CEPH_OSD_OP_NOTIFY_ACK:
        case CEPH_OSD_OP_WATCH:
                dst->watch.cookie = cpu_to_le64(src->watch.cookie);
@@ -356,6 +262,64 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
                pr_err("unrecognized osd opcode %d\n", dst->op);
                WARN_ON(1);
                break;
+       case CEPH_OSD_OP_MAPEXT:
+       case CEPH_OSD_OP_MASKTRUNC:
+       case CEPH_OSD_OP_SPARSE_READ:
+       case CEPH_OSD_OP_NOTIFY:
+       case CEPH_OSD_OP_ASSERT_VER:
+       case CEPH_OSD_OP_WRITEFULL:
+       case CEPH_OSD_OP_TRUNCATE:
+       case CEPH_OSD_OP_ZERO:
+       case CEPH_OSD_OP_DELETE:
+       case CEPH_OSD_OP_APPEND:
+       case CEPH_OSD_OP_SETTRUNC:
+       case CEPH_OSD_OP_TRIMTRUNC:
+       case CEPH_OSD_OP_TMAPUP:
+       case CEPH_OSD_OP_TMAPPUT:
+       case CEPH_OSD_OP_TMAPGET:
+       case CEPH_OSD_OP_CREATE:
+       case CEPH_OSD_OP_ROLLBACK:
+       case CEPH_OSD_OP_OMAPGETKEYS:
+       case CEPH_OSD_OP_OMAPGETVALS:
+       case CEPH_OSD_OP_OMAPGETHEADER:
+       case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
+       case CEPH_OSD_OP_MODE_RD:
+       case CEPH_OSD_OP_OMAPSETVALS:
+       case CEPH_OSD_OP_OMAPSETHEADER:
+       case CEPH_OSD_OP_OMAPCLEAR:
+       case CEPH_OSD_OP_OMAPRMKEYS:
+       case CEPH_OSD_OP_OMAP_CMP:
+       case CEPH_OSD_OP_CLONERANGE:
+       case CEPH_OSD_OP_ASSERT_SRC_VERSION:
+       case CEPH_OSD_OP_SRC_CMPXATTR:
+       case CEPH_OSD_OP_GETXATTR:
+       case CEPH_OSD_OP_GETXATTRS:
+       case CEPH_OSD_OP_CMPXATTR:
+       case CEPH_OSD_OP_SETXATTR:
+       case CEPH_OSD_OP_SETXATTRS:
+       case CEPH_OSD_OP_RESETXATTRS:
+       case CEPH_OSD_OP_RMXATTR:
+       case CEPH_OSD_OP_PULL:
+       case CEPH_OSD_OP_PUSH:
+       case CEPH_OSD_OP_BALANCEREADS:
+       case CEPH_OSD_OP_UNBALANCEREADS:
+       case CEPH_OSD_OP_SCRUB:
+       case CEPH_OSD_OP_SCRUB_RESERVE:
+       case CEPH_OSD_OP_SCRUB_UNRESERVE:
+       case CEPH_OSD_OP_SCRUB_STOP:
+       case CEPH_OSD_OP_SCRUB_MAP:
+       case CEPH_OSD_OP_WRLOCK:
+       case CEPH_OSD_OP_WRUNLOCK:
+       case CEPH_OSD_OP_RDLOCK:
+       case CEPH_OSD_OP_RDUNLOCK:
+       case CEPH_OSD_OP_UPLOCK:
+       case CEPH_OSD_OP_DNLOCK:
+       case CEPH_OSD_OP_PGLS:
+       case CEPH_OSD_OP_PGLS_FILTER:
+               pr_err("unsupported osd opcode %s\n",
+                       ceph_osd_op_name(dst->op));
+               WARN_ON(1);
+               break;
        }
        dst->payload_len = cpu_to_le32(src->payload_len);
 }
@@ -365,75 +329,95 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
  *
  */
 void ceph_osdc_build_request(struct ceph_osd_request *req,
-                            u64 off, u64 *plen,
+                            u64 off, u64 len, unsigned int num_ops,
                             struct ceph_osd_req_op *src_ops,
-                            struct ceph_snap_context *snapc,
-                            struct timespec *mtime,
-                            const char *oid,
-                            int oid_len)
+                            struct ceph_snap_context *snapc, u64 snap_id,
+                            struct timespec *mtime)
 {
        struct ceph_msg *msg = req->r_request;
-       struct ceph_osd_request_head *head;
        struct ceph_osd_req_op *src_op;
-       struct ceph_osd_op *op;
        void *p;
-       int num_op = get_num_ops(src_ops, NULL);
-       size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
+       size_t msg_size;
        int flags = req->r_flags;
-       u64 data_len = 0;
+       u64 data_len;
        int i;
 
-       head = msg->front.iov_base;
-       op = (void *)(head + 1);
-       p = (void *)(op + num_op);
-
+       req->r_num_ops = num_ops;
+       req->r_snapid = snap_id;
        req->r_snapc = ceph_get_snap_context(snapc);
 
-       head->client_inc = cpu_to_le32(1); /* always, for now. */
-       head->flags = cpu_to_le32(flags);
-       if (flags & CEPH_OSD_FLAG_WRITE)
-               ceph_encode_timespec(&head->mtime, mtime);
-       head->num_ops = cpu_to_le16(num_op);
-
-
-       /* fill in oid */
-       head->object_len = cpu_to_le32(oid_len);
-       memcpy(p, oid, oid_len);
-       p += oid_len;
+       /* encode request */
+       msg->hdr.version = cpu_to_le16(4);
 
+       p = msg->front.iov_base;
+       ceph_encode_32(&p, 1);   /* client_inc  is always 1 */
+       req->r_request_osdmap_epoch = p;
+       p += 4;
+       req->r_request_flags = p;
+       p += 4;
+       if (req->r_flags & CEPH_OSD_FLAG_WRITE)
+               ceph_encode_timespec(p, mtime);
+       p += sizeof(struct ceph_timespec);
+       req->r_request_reassert_version = p;
+       p += sizeof(struct ceph_eversion); /* will get filled in */
+
+       /* oloc */
+       ceph_encode_8(&p, 4);
+       ceph_encode_8(&p, 4);
+       ceph_encode_32(&p, 8 + 4 + 4);
+       req->r_request_pool = p;
+       p += 8;
+       ceph_encode_32(&p, -1);  /* preferred */
+       ceph_encode_32(&p, 0);   /* key len */
+
+       ceph_encode_8(&p, 1);
+       req->r_request_pgid = p;
+       p += 8 + 4;
+       ceph_encode_32(&p, -1);  /* preferred */
+
+       /* oid */
+       ceph_encode_32(&p, req->r_oid_len);
+       memcpy(p, req->r_oid, req->r_oid_len);
+       dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
+       p += req->r_oid_len;
+
+       /* ops */
+       ceph_encode_16(&p, num_ops);
        src_op = src_ops;
-       while (src_op->op) {
-               osd_req_encode_op(req, op, src_op);
-               src_op++;
-               op++;
+       req->r_request_ops = p;
+       for (i = 0; i < num_ops; i++, src_op++) {
+               osd_req_encode_op(req, p, src_op);
+               p += sizeof(struct ceph_osd_op);
        }
 
-       if (req->r_trail)
-               data_len += req->r_trail->length;
-
-       if (snapc) {
-               head->snap_seq = cpu_to_le64(snapc->seq);
-               head->num_snaps = cpu_to_le32(snapc->num_snaps);
+       /* snaps */
+       ceph_encode_64(&p, req->r_snapid);
+       ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
+       ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
+       if (req->r_snapc) {
                for (i = 0; i < snapc->num_snaps; i++) {
-                       put_unaligned_le64(snapc->snaps[i], p);
-                       p += sizeof(u64);
+                       ceph_encode_64(&p, req->r_snapc->snaps[i]);
                }
        }
 
+       req->r_request_attempts = p;
+       p += 4;
+
+       data_len = req->r_trail.length;
        if (flags & CEPH_OSD_FLAG_WRITE) {
                req->r_request->hdr.data_off = cpu_to_le16(off);
-               req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
-       } else if (data_len) {
-               req->r_request->hdr.data_off = 0;
-               req->r_request->hdr.data_len = cpu_to_le32(data_len);
+               data_len += len;
        }
-
+       req->r_request->hdr.data_len = cpu_to_le32(data_len);
        req->r_request->page_alignment = req->r_page_alignment;
 
        BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
        msg_size = p - msg->front.iov_base;
        msg->front.iov_len = msg_size;
        msg->hdr.front_len = cpu_to_le32(msg_size);
+
+       dout("build_request msg_size was %d num_ops %d\n", (int)msg_size,
+            num_ops);
        return;
 }
 EXPORT_SYMBOL(ceph_osdc_build_request);
@@ -459,34 +443,33 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
                                               u32 truncate_seq,
                                               u64 truncate_size,
                                               struct timespec *mtime,
-                                              bool use_mempool, int num_reply,
+                                              bool use_mempool,
                                               int page_align)
 {
-       struct ceph_osd_req_op ops[3];
+       struct ceph_osd_req_op ops[2];
        struct ceph_osd_request *req;
+       unsigned int num_op = 1;
        int r;
 
+       memset(&ops, 0, sizeof ops);
+
        ops[0].op = opcode;
        ops[0].extent.truncate_seq = truncate_seq;
        ops[0].extent.truncate_size = truncate_size;
-       ops[0].payload_len = 0;
 
        if (do_sync) {
                ops[1].op = CEPH_OSD_OP_STARTSYNC;
-               ops[1].payload_len = 0;
-               ops[2].op = 0;
-       } else
-               ops[1].op = 0;
-
-       req = ceph_osdc_alloc_request(osdc, flags,
-                                        snapc, ops,
-                                        use_mempool,
-                                        GFP_NOFS, NULL, NULL);
+               num_op++;
+       }
+
+       req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool,
+                                       GFP_NOFS);
        if (!req)
                return ERR_PTR(-ENOMEM);
+       req->r_flags = flags;
 
        /* calculate max write size */
-       r = calc_layout(osdc, vino, layout, off, plen, req, ops);
+       r = calc_layout(vino, layout, off, plen, req, ops);
        if (r < 0)
                return ERR_PTR(r);
        req->r_file_layout = *layout;  /* keep a copy */
@@ -496,10 +479,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
        req->r_num_pages = calc_pages_for(page_align, *plen);
        req->r_page_alignment = page_align;
 
-       ceph_osdc_build_request(req, off, plen, ops,
-                               snapc,
-                               mtime,
-                               req->r_oid, req->r_oid_len);
+       ceph_osdc_build_request(req, off, *plen, num_op, ops,
+                               snapc, vino.snap, mtime);
 
        return req;
 }
@@ -623,8 +604,8 @@ static void osd_reset(struct ceph_connection *con)
        down_read(&osdc->map_sem);
        mutex_lock(&osdc->request_mutex);
        __kick_osd_requests(osdc, osd);
+       __send_queued(osdc);
        mutex_unlock(&osdc->request_mutex);
-       send_queued(osdc);
        up_read(&osdc->map_sem);
 }
 
@@ -739,31 +720,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
  */
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 {
-       struct ceph_osd_request *req;
-       int ret = 0;
+       struct ceph_entity_addr *peer_addr;
 
        dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
        if (list_empty(&osd->o_requests) &&
            list_empty(&osd->o_linger_requests)) {
                __remove_osd(osdc, osd);
-               ret = -ENODEV;
-       } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
-                         &osd->o_con.peer_addr,
-                         sizeof(osd->o_con.peer_addr)) == 0 &&
-                  !ceph_con_opened(&osd->o_con)) {
+
+               return -ENODEV;
+       }
+
+       peer_addr = &osdc->osdmap->osd_addr[osd->o_osd];
+       if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) &&
+                       !ceph_con_opened(&osd->o_con)) {
+               struct ceph_osd_request *req;
+
                dout(" osd addr hasn't changed and connection never opened,"
                     " letting msgr retry");
                /* touch each r_stamp for handle_timeout()'s benfit */
                list_for_each_entry(req, &osd->o_requests, r_osd_item)
                        req->r_stamp = jiffies;
-               ret = -EAGAIN;
-       } else {
-               ceph_con_close(&osd->o_con);
-               ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
-                             &osdc->osdmap->osd_addr[osd->o_osd]);
-               osd->o_incarnation++;
+
+               return -EAGAIN;
        }
-       return ret;
+
+       ceph_con_close(&osd->o_con);
+       ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr);
+       osd->o_incarnation++;
+
+       return 0;
 }
 
 static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
@@ -961,20 +946,18 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger);
 static int __map_request(struct ceph_osd_client *osdc,
                         struct ceph_osd_request *req, int force_resend)
 {
-       struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
        struct ceph_pg pgid;
        int acting[CEPH_PG_MAX_SIZE];
        int o = -1, num = 0;
        int err;
 
        dout("map_request %p tid %lld\n", req, req->r_tid);
-       err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
+       err = ceph_calc_object_layout(&pgid, req->r_oid,
                                      &req->r_file_layout, osdc->osdmap);
        if (err) {
                list_move(&req->r_req_lru_item, &osdc->req_notarget);
                return err;
        }
-       pgid = reqhead->layout.ol_pgid;
        req->r_pgid = pgid;
 
        err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
@@ -991,8 +974,8 @@ static int __map_request(struct ceph_osd_client *osdc,
            (req->r_osd == NULL && o == -1))
                return 0;  /* no change */
 
-       dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
-            req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
+       dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
+            req->r_tid, pgid.pool, pgid.seed, o,
             req->r_osd ? req->r_osd->o_osd : -1);
 
        /* record full pg acting set */
@@ -1041,15 +1024,22 @@ out:
 static void __send_request(struct ceph_osd_client *osdc,
                           struct ceph_osd_request *req)
 {
-       struct ceph_osd_request_head *reqhead;
-
-       dout("send_request %p tid %llu to osd%d flags %d\n",
-            req, req->r_tid, req->r_osd->o_osd, req->r_flags);
+       void *p;
 
-       reqhead = req->r_request->front.iov_base;
-       reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch);
-       reqhead->flags |= cpu_to_le32(req->r_flags);  /* e.g., RETRY */
-       reqhead->reassert_version = req->r_reassert_version;
+       dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n",
+            req, req->r_tid, req->r_osd->o_osd, req->r_flags,
+            (unsigned long long)req->r_pgid.pool, req->r_pgid.seed);
+
+       /* fill in message content that changes each time we send it */
+       put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch);
+       put_unaligned_le32(req->r_flags, req->r_request_flags);
+       put_unaligned_le64(req->r_pgid.pool, req->r_request_pool);
+       p = req->r_request_pgid;
+       ceph_encode_64(&p, req->r_pgid.pool);
+       ceph_encode_32(&p, req->r_pgid.seed);
+       put_unaligned_le64(1, req->r_request_attempts);  /* FIXME */
+       memcpy(req->r_request_reassert_version, &req->r_reassert_version,
+              sizeof(req->r_reassert_version));
 
        req->r_stamp = jiffies;
        list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
@@ -1062,16 +1052,13 @@ static void __send_request(struct ceph_osd_client *osdc,
 /*
  * Send any requests in the queue (req_unsent).
  */
-static void send_queued(struct ceph_osd_client *osdc)
+static void __send_queued(struct ceph_osd_client *osdc)
 {
        struct ceph_osd_request *req, *tmp;
 
-       dout("send_queued\n");
-       mutex_lock(&osdc->request_mutex);
-       list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
+       dout("__send_queued\n");
+       list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item)
                __send_request(osdc, req);
-       }
-       mutex_unlock(&osdc->request_mutex);
 }
 
 /*
@@ -1123,8 +1110,8 @@ static void handle_timeout(struct work_struct *work)
        }
 
        __schedule_osd_timeout(osdc);
+       __send_queued(osdc);
        mutex_unlock(&osdc->request_mutex);
-       send_queued(osdc);
        up_read(&osdc->map_sem);
 }
 
@@ -1152,6 +1139,26 @@ static void complete_request(struct ceph_osd_request *req)
        complete_all(&req->r_safe_completion);  /* fsync waiter */
 }
 
+static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid)
+{
+       __u8 v;
+
+       ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad);
+       v = ceph_decode_8(p);
+       if (v > 1) {
+               pr_warning("do not understand pg encoding %d > 1", v);
+               return -EINVAL;
+       }
+       pgid->pool = ceph_decode_64(p);
+       pgid->seed = ceph_decode_32(p);
+       *p += 4;
+       return 0;
+
+bad:
+       pr_warning("incomplete pg encoding");
+       return -EINVAL;
+}
+
 /*
  * handle osd op reply.  either call the callback if it is specified,
  * or do the completion to wake up the waiting thread.
@@ -1159,22 +1166,42 @@ static void complete_request(struct ceph_osd_request *req)
 static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
                         struct ceph_connection *con)
 {
-       struct ceph_osd_reply_head *rhead = msg->front.iov_base;
+       void *p, *end;
        struct ceph_osd_request *req;
        u64 tid;
-       int numops, object_len, flags;
+       int object_len;
+       int numops, payload_len, flags;
        s32 result;
+       s32 retry_attempt;
+       struct ceph_pg pg;
+       int err;
+       u32 reassert_epoch;
+       u64 reassert_version;
+       u32 osdmap_epoch;
+       int i;
 
        tid = le64_to_cpu(msg->hdr.tid);
-       if (msg->front.iov_len < sizeof(*rhead))
-               goto bad;
-       numops = le32_to_cpu(rhead->num_ops);
-       object_len = le32_to_cpu(rhead->object_len);
-       result = le32_to_cpu(rhead->result);
-       if (msg->front.iov_len != sizeof(*rhead) + object_len +
-           numops * sizeof(struct ceph_osd_op))
+       dout("handle_reply %p tid %llu\n", msg, tid);
+
+       p = msg->front.iov_base;
+       end = p + msg->front.iov_len;
+
+       ceph_decode_need(&p, end, 4, bad);
+       object_len = ceph_decode_32(&p);
+       ceph_decode_need(&p, end, object_len, bad);
+       p += object_len;
+
+       err = __decode_pgid(&p, end, &pg);
+       if (err)
                goto bad;
-       dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
+
+       ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad);
+       flags = ceph_decode_64(&p);
+       result = ceph_decode_32(&p);
+       reassert_epoch = ceph_decode_32(&p);
+       reassert_version = ceph_decode_64(&p);
+       osdmap_epoch = ceph_decode_32(&p);
+
        /* lookup */
        mutex_lock(&osdc->request_mutex);
        req = __lookup_request(osdc, tid);
@@ -1184,7 +1211,38 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
                return;
        }
        ceph_osdc_get_request(req);
-       flags = le32_to_cpu(rhead->flags);
+
+       dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
+            req, result);
+
+       ceph_decode_need(&p, end, 4, bad);
+       numops = ceph_decode_32(&p);
+       if (numops > CEPH_OSD_MAX_OP)
+               goto bad_put;
+       if (numops != req->r_num_ops)
+               goto bad_put;
+       payload_len = 0;
+       ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
+       for (i = 0; i < numops; i++) {
+               struct ceph_osd_op *op = p;
+               int len;
+
+               len = le32_to_cpu(op->payload_len);
+               req->r_reply_op_len[i] = len;
+               dout(" op %d has %d bytes\n", i, len);
+               payload_len += len;
+               p += sizeof(*op);
+       }
+       if (payload_len != le32_to_cpu(msg->hdr.data_len)) {
+               pr_warning("sum of op payload lens %d != data_len %d",
+                          payload_len, le32_to_cpu(msg->hdr.data_len));
+               goto bad_put;
+       }
+
+       ceph_decode_need(&p, end, 4 + numops * 4, bad);
+       retry_attempt = ceph_decode_32(&p);
+       for (i = 0; i < numops; i++)
+               req->r_reply_op_result[i] = ceph_decode_32(&p);
 
        /*
         * if this connection filled our message, drop our reference now, to
@@ -1199,7 +1257,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
        if (!req->r_got_reply) {
                unsigned int bytes;
 
-               req->r_result = le32_to_cpu(rhead->result);
+               req->r_result = result;
                bytes = le32_to_cpu(msg->hdr.data_len);
                dout("handle_reply result %d bytes %d\n", req->r_result,
                     bytes);
@@ -1207,7 +1265,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
                        req->r_result = bytes;
 
                /* in case this is a write and we need to replay, */
-               req->r_reassert_version = rhead->reassert_version;
+               req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch);
+               req->r_reassert_version.version = cpu_to_le64(reassert_version);
 
                req->r_got_reply = 1;
        } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
@@ -1242,10 +1301,11 @@ done:
        ceph_osdc_put_request(req);
        return;
 
+bad_put:
+       ceph_osdc_put_request(req);
 bad:
-       pr_err("corrupt osd_op_reply got %d %d expected %d\n",
-              (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len),
-              (int)sizeof(*rhead));
+       pr_err("corrupt osd_op_reply got %d %d\n",
+              (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
        ceph_msg_dump(msg);
 }
 
@@ -1462,7 +1522,9 @@ done:
        if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
                ceph_monc_request_next_osdmap(&osdc->client->monc);
 
-       send_queued(osdc);
+       mutex_lock(&osdc->request_mutex);
+       __send_queued(osdc);
+       mutex_unlock(&osdc->request_mutex);
        up_read(&osdc->map_sem);
        wake_up_all(&osdc->client->auth_wq);
        return;
@@ -1556,8 +1618,7 @@ static void __remove_event(struct ceph_osd_event *event)
 
 int ceph_osdc_create_event(struct ceph_osd_client *osdc,
                           void (*event_cb)(u64, u64, u8, void *),
-                          int one_shot, void *data,
-                          struct ceph_osd_event **pevent)
+                          void *data, struct ceph_osd_event **pevent)
 {
        struct ceph_osd_event *event;
 
@@ -1567,14 +1628,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc,
 
        dout("create_event %p\n", event);
        event->cb = event_cb;
-       event->one_shot = one_shot;
+       event->one_shot = 0;
        event->data = data;
        event->osdc = osdc;
        INIT_LIST_HEAD(&event->osd_node);
        RB_CLEAR_NODE(&event->node);
        kref_init(&event->kref);   /* one ref for us */
        kref_get(&event->kref);    /* one ref for the caller */
-       init_completion(&event->completion);
 
        spin_lock(&osdc->event_lock);
        event->cookie = ++osdc->event_count;
@@ -1610,7 +1670,6 @@ static void do_event_work(struct work_struct *work)
 
        dout("do_event_work completing %p\n", event);
        event->cb(ver, notify_id, opcode, event->data);
-       complete(&event->completion);
        dout("do_event_work completed %p\n", event);
        ceph_osdc_put_event(event);
        kfree(event_work);
@@ -1620,7 +1679,8 @@ static void do_event_work(struct work_struct *work)
 /*
  * Process osd watch notifications
  */
-void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+static void handle_watch_notify(struct ceph_osd_client *osdc,
+                               struct ceph_msg *msg)
 {
        void *p, *end;
        u8 proto_ver;
@@ -1641,9 +1701,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
        spin_lock(&osdc->event_lock);
        event = __find_event(osdc, cookie);
        if (event) {
+               BUG_ON(event->one_shot);
                get_event(event);
-               if (event->one_shot)
-                       __remove_event(event);
        }
        spin_unlock(&osdc->event_lock);
        dout("handle_watch_notify cookie %lld ver %lld event %p\n",
@@ -1668,7 +1727,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
        return;
 
 done_err:
-       complete(&event->completion);
        ceph_osdc_put_event(event);
        return;
 
@@ -1677,21 +1735,6 @@ bad:
        return;
 }
 
-int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
-{
-       int err;
-
-       dout("wait_event %p\n", event);
-       err = wait_for_completion_interruptible_timeout(&event->completion,
-                                                       timeout * HZ);
-       ceph_osdc_put_event(event);
-       if (err > 0)
-               err = 0;
-       dout("wait_event %p returns %d\n", event, err);
-       return err;
-}
-EXPORT_SYMBOL(ceph_osdc_wait_event);
-
 /*
  * Register request, send initial attempt.
  */
@@ -1706,7 +1749,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 #ifdef CONFIG_BLOCK
        req->r_request->bio = req->r_bio;
 #endif
-       req->r_request->trail = req->r_trail;
+       req->r_request->trail = &req->r_trail;
 
        register_request(osdc, req);
 
@@ -1865,7 +1908,6 @@ out_mempool:
 out:
        return err;
 }
-EXPORT_SYMBOL(ceph_osdc_init);
 
 void ceph_osdc_stop(struct ceph_osd_client *osdc)
 {
@@ -1882,7 +1924,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
        ceph_msgpool_destroy(&osdc->msgpool_op);
        ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 }
-EXPORT_SYMBOL(ceph_osdc_stop);
 
 /*
  * Read some contiguous pages.  If we cross a stripe boundary, shorten
@@ -1902,7 +1943,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
        req = ceph_osdc_new_request(osdc, layout, vino, off, plen,
                                    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
                                    NULL, 0, truncate_seq, truncate_size, NULL,
-                                   false, 1, page_align);
+                                   false, page_align);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -1931,8 +1972,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
                         u64 off, u64 len,
                         u32 truncate_seq, u64 truncate_size,
                         struct timespec *mtime,
-                        struct page **pages, int num_pages,
-                        int flags, int do_sync, bool nofail)
+                        struct page **pages, int num_pages)
 {
        struct ceph_osd_request *req;
        int rc = 0;
@@ -1941,11 +1981,10 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
        BUG_ON(vino.snap != CEPH_NOSNAP);
        req = ceph_osdc_new_request(osdc, layout, vino, off, &len,
                                    CEPH_OSD_OP_WRITE,
-                                   flags | CEPH_OSD_FLAG_ONDISK |
-                                           CEPH_OSD_FLAG_WRITE,
-                                   snapc, do_sync,
+                                   CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+                                   snapc, 0,
                                    truncate_seq, truncate_size, mtime,
-                                   nofail, 1, page_align);
+                                   true, page_align);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
@@ -1954,7 +1993,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
        dout("writepages %llu~%llu (%d pages)\n", off, len,
             req->r_num_pages);
 
-       rc = ceph_osdc_start_request(osdc, req, nofail);
+       rc = ceph_osdc_start_request(osdc, req, true);
        if (!rc)
                rc = ceph_osdc_wait_request(osdc, req);
 
@@ -2047,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
        if (data_len > 0) {
                int want = calc_pages_for(req->r_page_alignment, data_len);
 
-               if (unlikely(req->r_num_pages < want)) {
+               if (req->r_pages && unlikely(req->r_num_pages < want)) {
                        pr_warning("tid %lld reply has %d bytes %d pages, we"
                                   " had only %d pages ready\n", tid, data_len,
                                   want, req->r_num_pages);
index de73214b5d26c04989905bafc62bd9c056f2131c..69bc4bf89e3e79bb47eddcac63367913a8645ef0 100644 (file)
 
 char *ceph_osdmap_state_str(char *str, int len, int state)
 {
-       int flag = 0;
-
        if (!len)
-               goto done;
-
-       *str = '\0';
-       if (state) {
-               if (state & CEPH_OSD_EXISTS) {
-                       snprintf(str, len, "exists");
-                       flag = 1;
-               }
-               if (state & CEPH_OSD_UP) {
-                       snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
-                                "up");
-                       flag = 1;
-               }
-       } else {
+               return str;
+
+       if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
+               snprintf(str, len, "exists, up");
+       else if (state & CEPH_OSD_EXISTS)
+               snprintf(str, len, "exists");
+       else if (state & CEPH_OSD_UP)
+               snprintf(str, len, "up");
+       else
                snprintf(str, len, "doesn't exist");
-       }
-done:
+
        return str;
 }
 
@@ -53,13 +45,8 @@ static int calc_bits_of(unsigned int t)
  */
 static void calc_pg_masks(struct ceph_pg_pool_info *pi)
 {
-       pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1;
-       pi->pgp_num_mask =
-               (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
-       pi->lpg_num_mask =
-               (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
-       pi->lpgp_num_mask =
-               (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
+       pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
+       pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
 }
 
 /*
@@ -170,6 +157,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         c->choose_local_tries = 2;
         c->choose_local_fallback_tries = 5;
         c->choose_total_tries = 19;
+       c->chooseleaf_descend_once = 0;
 
        ceph_decode_need(p, end, 4*sizeof(u32), bad);
        magic = ceph_decode_32(p);
@@ -336,6 +324,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         dout("crush decode tunable choose_total_tries = %d",
              c->choose_total_tries);
 
+       ceph_decode_need(p, end, sizeof(u32), done);
+       c->chooseleaf_descend_once = ceph_decode_32(p);
+       dout("crush decode tunable chooseleaf_descend_once = %d",
+            c->chooseleaf_descend_once);
+
 done:
        dout("crush_decode success\n");
        return c;
@@ -354,12 +347,13 @@ bad:
  */
 static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
 {
-       u64 a = *(u64 *)&l;
-       u64 b = *(u64 *)&r;
-
-       if (a < b)
+       if (l.pool < r.pool)
+               return -1;
+       if (l.pool > r.pool)
+               return 1;
+       if (l.seed < r.seed)
                return -1;
-       if (a > b)
+       if (l.seed > r.seed)
                return 1;
        return 0;
 }
@@ -405,8 +399,8 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
                } else if (c > 0) {
                        n = n->rb_right;
                } else {
-                       dout("__lookup_pg_mapping %llx got %p\n",
-                            *(u64 *)&pgid, pg);
+                       dout("__lookup_pg_mapping %lld.%x got %p\n",
+                            pgid.pool, pgid.seed, pg);
                        return pg;
                }
        }
@@ -418,12 +412,13 @@ static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
        struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
 
        if (pg) {
-               dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
+               dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
+                    pg);
                rb_erase(&pg->node, root);
                kfree(pg);
                return 0;
        }
-       dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
+       dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
        return -ENOENT;
 }
 
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
        return 0;
 }
 
-static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
+static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
 {
        struct ceph_pg_pool_info *pi;
        struct rb_node *n = root->rb_node;
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
 
 static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
 {
-       unsigned int n, m;
+       u8 ev, cv;
+       unsigned len, num;
+       void *pool_end;
+
+       ceph_decode_need(p, end, 2 + 4, bad);
+       ev = ceph_decode_8(p);  /* encoding version */
+       cv = ceph_decode_8(p); /* compat version */
+       if (ev < 5) {
+               pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
+               return -EINVAL;
+       }
+       if (cv > 7) {
+               pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv);
+               return -EINVAL;
+       }
+       len = ceph_decode_32(p);
+       ceph_decode_need(p, end, len, bad);
+       pool_end = *p + len;
 
-       ceph_decode_copy(p, &pi->v, sizeof(pi->v));
-       calc_pg_masks(pi);
+       pi->type = ceph_decode_8(p);
+       pi->size = ceph_decode_8(p);
+       pi->crush_ruleset = ceph_decode_8(p);
+       pi->object_hash = ceph_decode_8(p);
+
+       pi->pg_num = ceph_decode_32(p);
+       pi->pgp_num = ceph_decode_32(p);
+
+       *p += 4 + 4;  /* skip lpg* */
+       *p += 4;      /* skip last_change */
+       *p += 8 + 4;  /* skip snap_seq, snap_epoch */
 
-       /* num_snaps * snap_info_t */
-       n = le32_to_cpu(pi->v.num_snaps);
-       while (n--) {
-               ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) +
-                                sizeof(struct ceph_timespec), bad);
-               *p += sizeof(u64) +       /* key */
-                       1 + sizeof(u64) + /* u8, snapid */
-                       sizeof(struct ceph_timespec);
-               m = ceph_decode_32(p);    /* snap name */
-               *p += m;
+       /* skip snaps */
+       num = ceph_decode_32(p);
+       while (num--) {
+               *p += 8;  /* snapid key */
+               *p += 1 + 1; /* versions */
+               len = ceph_decode_32(p);
+               *p += len;
        }
 
-       *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2;
+       /* skip removed snaps */
+       num = ceph_decode_32(p);
+       *p += num * (8 + 8);
+
+       *p += 8;  /* skip auid */
+       pi->flags = ceph_decode_64(p);
+
+       /* ignore the rest */
+
+       *p = pool_end;
+       calc_pg_masks(pi);
        return 0;
 
 bad:
@@ -535,14 +563,15 @@ bad:
 static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
 {
        struct ceph_pg_pool_info *pi;
-       u32 num, len, pool;
+       u32 num, len;
+       u64 pool;
 
        ceph_decode_32_safe(p, end, num, bad);
        dout(" %d pool names\n", num);
        while (num--) {
-               ceph_decode_32_safe(p, end, pool, bad);
+               ceph_decode_64_safe(p, end, pool, bad);
                ceph_decode_32_safe(p, end, len, bad);
-               dout("  pool %d len %d\n", pool, len);
+               dout("  pool %llu len %d\n", pool, len);
                ceph_decode_need(p, end, len, bad);
                pi = __lookup_pg_pool(&map->pg_pools, pool);
                if (pi) {
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
        struct ceph_osdmap *map;
        u16 version;
        u32 len, max, i;
-       u8 ev;
        int err = -EINVAL;
        void *start = *p;
        struct ceph_pg_pool_info *pi;
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
        map->pg_temp = RB_ROOT;
 
        ceph_decode_16_safe(p, end, version, bad);
-       if (version > CEPH_OSDMAP_VERSION) {
-               pr_warning("got unknown v %d > %d of osdmap\n", version,
-                          CEPH_OSDMAP_VERSION);
+       if (version > 6) {
+               pr_warning("got unknown v %d > 6 of osdmap\n", version);
+               goto bad;
+       }
+       if (version < 6) {
+               pr_warning("got old v %d < 6 of osdmap\n", version);
                goto bad;
        }
 
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
 
        ceph_decode_32_safe(p, end, max, bad);
        while (max--) {
-               ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
+               ceph_decode_need(p, end, 8 + 2, bad);
                err = -ENOMEM;
                pi = kzalloc(sizeof(*pi), GFP_NOFS);
                if (!pi)
                        goto bad;
-               pi->id = ceph_decode_32(p);
-               err = -EINVAL;
-               ev = ceph_decode_8(p); /* encoding version */
-               if (ev > CEPH_PG_POOL_VERSION) {
-                       pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-                                  ev, CEPH_PG_POOL_VERSION);
-                       kfree(pi);
-                       goto bad;
-               }
+               pi->id = ceph_decode_64(p);
                err = __decode_pool(p, end, pi);
                if (err < 0) {
                        kfree(pi);
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
                __insert_pg_pool(&map->pg_pools, pi);
        }
 
-       if (version >= 5) {
-               err = __decode_pool_names(p, end, map);
-               if (err < 0) {
-                       dout("fail to decode pool names");
-                       goto bad;
-               }
+       err = __decode_pool_names(p, end, map);
+       if (err < 0) {
+               dout("fail to decode pool names");
+               goto bad;
        }
 
        ceph_decode_32_safe(p, end, map->pool_max, bad);
@@ -724,10 +745,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
        for (i = 0; i < len; i++) {
                int n, j;
                struct ceph_pg pgid;
+               struct ceph_pg_v1 pgid_v1;
                struct ceph_pg_mapping *pg;
 
                ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
-               ceph_decode_copy(p, &pgid, sizeof(pgid));
+               ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
+               pgid.pool = le32_to_cpu(pgid_v1.pool);
+               pgid.seed = le16_to_cpu(pgid_v1.ps);
                n = ceph_decode_32(p);
                err = -EINVAL;
                if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
@@ -745,7 +769,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
                err = __insert_pg_mapping(pg, &map->pg_temp);
                if (err)
                        goto bad;
-               dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len);
+               dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed,
+                    len);
        }
 
        /* crush */
@@ -784,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
        struct ceph_fsid fsid;
        u32 epoch = 0;
        struct ceph_timespec modified;
-       u32 len, pool;
-       __s32 new_pool_max, new_flags, max;
+       s32 len;
+       u64 pool;
+       __s64 new_pool_max;
+       __s32 new_flags, max;
        void *start = *p;
        int err = -EINVAL;
        u16 version;
 
        ceph_decode_16_safe(p, end, version, bad);
-       if (version > CEPH_OSDMAP_INC_VERSION) {
-               pr_warning("got unknown v %d > %d of inc osdmap\n", version,
-                          CEPH_OSDMAP_INC_VERSION);
+       if (version > 6) {
+               pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6);
                goto bad;
        }
 
@@ -803,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
        epoch = ceph_decode_32(p);
        BUG_ON(epoch != map->epoch+1);
        ceph_decode_copy(p, &modified, sizeof(modified));
-       new_pool_max = ceph_decode_32(p);
+       new_pool_max = ceph_decode_64(p);
        new_flags = ceph_decode_32(p);
 
        /* full map? */
@@ -853,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
        /* new_pool */
        ceph_decode_32_safe(p, end, len, bad);
        while (len--) {
-               __u8 ev;
                struct ceph_pg_pool_info *pi;
 
-               ceph_decode_32_safe(p, end, pool, bad);
-               ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
-               ev = ceph_decode_8(p);  /* encoding version */
-               if (ev > CEPH_PG_POOL_VERSION) {
-                       pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
-                                  ev, CEPH_PG_POOL_VERSION);
-                       err = -EINVAL;
-                       goto bad;
-               }
+               ceph_decode_64_safe(p, end, pool, bad);
                pi = __lookup_pg_pool(&map->pg_pools, pool);
                if (!pi) {
                        pi = kzalloc(sizeof(*pi), GFP_NOFS);
@@ -890,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
        while (len--) {
                struct ceph_pg_pool_info *pi;
 
-               ceph_decode_32_safe(p, end, pool, bad);
+               ceph_decode_64_safe(p, end, pool, bad);
                pi = __lookup_pg_pool(&map->pg_pools, pool);
                if (pi)
                        __remove_pg_pool(&map->pg_pools, pi);
@@ -946,10 +963,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
        while (len--) {
                struct ceph_pg_mapping *pg;
                int j;
+               struct ceph_pg_v1 pgid_v1;
                struct ceph_pg pgid;
                u32 pglen;
                ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
-               ceph_decode_copy(p, &pgid, sizeof(pgid));
+               ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
+               pgid.pool = le32_to_cpu(pgid_v1.pool);
+               pgid.seed = le16_to_cpu(pgid_v1.ps);
                pglen = ceph_decode_32(p);
 
                if (pglen) {
@@ -975,8 +995,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
                                kfree(pg);
                                goto bad;
                        }
-                       dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
-                            pglen);
+                       dout(" added pg_temp %lld.%x len %d\n", pgid.pool,
+                            pgid.seed, pglen);
                } else {
                        /* remove */
                        __remove_pg_mapping(&map->pg_temp, pgid);
@@ -1010,7 +1030,7 @@ bad:
  * pass a stride back to the caller.
  */
 int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-                                  u64 off, u64 *plen,
+                                  u64 off, u64 len,
                                   u64 *ono,
                                   u64 *oxoff, u64 *oxlen)
 {
@@ -1021,7 +1041,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
        u32 su_per_object;
        u64 t, su_offset;
 
-       dout("mapping %llu~%llu  osize %u fl_su %u\n", off, *plen,
+       dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
             osize, su);
        if (su == 0 || sc == 0)
                goto invalid;
@@ -1054,11 +1074,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 
        /*
         * Calculate the length of the extent being written to the selected
-        * object. This is the minimum of the full length requested (plen) or
+        * object. This is the minimum of the full length requested (len) or
         * the remainder of the current stripe being written to.
         */
-       *oxlen = min_t(u64, *plen, su - su_offset);
-       *plen = *oxlen;
+       *oxlen = min_t(u64, len, su - su_offset);
 
        dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
        return 0;
@@ -1076,33 +1095,24 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
  * calculate an object layout (i.e. pgid) from an oid,
  * file_layout, and osdmap
  */
-int ceph_calc_object_layout(struct ceph_object_layout *ol,
+int ceph_calc_object_layout(struct ceph_pg *pg,
                            const char *oid,
                            struct ceph_file_layout *fl,
                            struct ceph_osdmap *osdmap)
 {
        unsigned int num, num_mask;
-       struct ceph_pg pgid;
-       int poolid = le32_to_cpu(fl->fl_pg_pool);
        struct ceph_pg_pool_info *pool;
-       unsigned int ps;
 
        BUG_ON(!osdmap);
-
-       pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+       pg->pool = le32_to_cpu(fl->fl_pg_pool);
+       pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool);
        if (!pool)
                return -EIO;
-       ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
-       num = le32_to_cpu(pool->v.pg_num);
+       pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
+       num = pool->pg_num;
        num_mask = pool->pg_num_mask;
 
-       pgid.ps = cpu_to_le16(ps);
-       pgid.preferred = cpu_to_le16(-1);
-       pgid.pool = fl->fl_pg_pool;
-       dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
-
-       ol->ol_pgid = pgid;
-       ol->ol_stripe_unit = fl->fl_object_stripe_unit;
+       dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed);
        return 0;
 }
 EXPORT_SYMBOL(ceph_calc_object_layout);
@@ -1117,19 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
        struct ceph_pg_mapping *pg;
        struct ceph_pg_pool_info *pool;
        int ruleno;
-       unsigned int poolid, ps, pps, t, r;
-
-       poolid = le32_to_cpu(pgid.pool);
-       ps = le16_to_cpu(pgid.ps);
+       int r;
+       u32 pps;
 
-       pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
+       pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
        if (!pool)
                return NULL;
 
        /* pg_temp? */
-       t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
-                           pool->pgp_num_mask);
-       pgid.ps = cpu_to_le16(t);
+       pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
+                                   pool->pgp_num_mask);
        pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
        if (pg) {
                *num = pg->len;
@@ -1137,26 +1144,39 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
        }
 
        /* crush */
-       ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
-                                pool->v.type, pool->v.size);
+       ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
+                                pool->type, pool->size);
        if (ruleno < 0) {
-               pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
-                      poolid, pool->v.crush_ruleset, pool->v.type,
-                      pool->v.size);
+               pr_err("no crush rule pool %lld ruleset %d type %d size %d\n",
+                      pgid.pool, pool->crush_ruleset, pool->type,
+                      pool->size);
                return NULL;
        }
 
-       pps = ceph_stable_mod(ps,
-                             le32_to_cpu(pool->v.pgp_num),
-                             pool->pgp_num_mask);
-       pps += poolid;
+       if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
+               /* hash pool id and seed sothat pool PGs do not overlap */
+               pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
+                                    ceph_stable_mod(pgid.seed, pool->pgp_num,
+                                                    pool->pgp_num_mask),
+                                    pgid.pool);
+       } else {
+               /*
+                * legacy ehavior: add ps and pool together.  this is
+                * not a great approach because the PGs from each pool
+                * will overlap on top of each other: 0.5 == 1.4 ==
+                * 2.3 == ...
+                */
+               pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
+                                     pool->pgp_num_mask) +
+                       (unsigned)pgid.pool;
+       }
        r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
-                         min_t(int, pool->v.size, *num),
+                         min_t(int, pool->size, *num),
                          osdmap->osd_weight);
        if (r < 0) {
-               pr_err("error %d from crush rule: pool %d ruleset %d type %d"
-                      " size %d\n", r, poolid, pool->v.crush_ruleset,
-                      pool->v.type, pool->v.size);
+               pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
+                      " size %d\n", r, pgid.pool, pool->crush_ruleset,
+                      pool->type, pool->size);
                return NULL;
        }
        *num = r;
index cd9c21df87d172fa0c7bfe1af7ee289ab5dd0494..815a2249cfa9371f1f9505c7016b509b83d50100 100644 (file)
@@ -12,7 +12,7 @@
 /*
  * build a vector of user pages
  */
-struct page **ceph_get_direct_page_vector(const char __user *data,
+struct page **ceph_get_direct_page_vector(const void __user *data,
                                          int num_pages, bool write_page)
 {
        struct page **pages;
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector);
  * copy user data into a page vector
  */
 int ceph_copy_user_to_page_vector(struct page **pages,
-                                        const char __user *data,
+                                        const void __user *data,
                                         loff_t off, size_t len)
 {
        int i = 0;
@@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages,
 }
 EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
 
-int ceph_copy_to_page_vector(struct page **pages,
-                                   const char *data,
+void ceph_copy_to_page_vector(struct page **pages,
+                                   const void *data,
                                    loff_t off, size_t len)
 {
        int i = 0;
        size_t po = off & ~PAGE_CACHE_MASK;
        size_t left = len;
-       size_t l;
 
        while (left > 0) {
-               l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+               size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
                memcpy(page_address(pages[i]) + po, data, l);
                data += l;
                left -= l;
@@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages,
                        i++;
                }
        }
-       return len;
 }
 EXPORT_SYMBOL(ceph_copy_to_page_vector);
 
-int ceph_copy_from_page_vector(struct page **pages,
-                                   char *data,
+void ceph_copy_from_page_vector(struct page **pages,
+                                   void *data,
                                    loff_t off, size_t len)
 {
        int i = 0;
        size_t po = off & ~PAGE_CACHE_MASK;
        size_t left = len;
-       size_t l;
 
        while (left > 0) {
-               l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+               size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
+
                memcpy(data, page_address(pages[i]) + po, l);
                data += l;
                left -= l;
@@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages,
                        i++;
                }
        }
-       return len;
 }
 EXPORT_SYMBOL(ceph_copy_from_page_vector);
 
@@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector);
  * copy user data from a page vector into a user pointer
  */
 int ceph_copy_page_vector_to_user(struct page **pages,
-                                        char __user *data,
+                                        void __user *data,
                                         loff_t off, size_t len)
 {
        int i = 0;
index d11418f97f1fa58cb04b3badb1a03d21b91af7a5..a622ad64acd8686a9baa3ad11ffea8ea91bb6731 100644 (file)
@@ -17,7 +17,8 @@
  */
 
 #include <net/ipv6.h>
-#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/msg_prot.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
index 107c4528654fd5867b8363ccdf66c648e9202a34..88edec929d7331b1f15b9c9fb45e6aa5c4bdc6b3 100644 (file)
@@ -574,6 +574,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
        buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
        buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 
+       /* Trim off the checksum blob */
+       xdr_buf_trim(buf, GSS_KRB5_TOK_HDR_LEN + tailskip);
        return GSS_S_COMPLETE;
 }
 
index ecd1d58bf611389ad283e77fd86977d97b2180fc..f7d34e7b6f818ba52bec46795b83062cbdf5a71e 100644 (file)
@@ -182,12 +182,6 @@ static void rsi_request(struct cache_detail *cd,
        (*bpp)[-1] = '\n';
 }
 
-static int rsi_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-       return sunrpc_cache_pipe_upcall(cd, h, rsi_request);
-}
-
-
 static int rsi_parse(struct cache_detail *cd,
                    char *mesg, int mlen)
 {
@@ -275,7 +269,7 @@ static struct cache_detail rsi_cache_template = {
        .hash_size      = RSI_HASHMAX,
        .name           = "auth.rpcsec.init",
        .cache_put      = rsi_put,
-       .cache_upcall   = rsi_upcall,
+       .cache_request  = rsi_request,
        .cache_parse    = rsi_parse,
        .match          = rsi_match,
        .init           = rsi_init,
@@ -825,13 +819,17 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
  *     The server uses base of head iovec as read pointer, while the
  *     client uses separate pointer. */
 static int
-unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 {
        int stat = -EINVAL;
        u32 integ_len, maj_stat;
        struct xdr_netobj mic;
        struct xdr_buf integ_buf;
 
+       /* Did we already verify the signature on the original pass through? */
+       if (rqstp->rq_deferred)
+               return 0;
+
        integ_len = svc_getnl(&buf->head[0]);
        if (integ_len & 3)
                return stat;
@@ -854,6 +852,8 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
                goto out;
        if (svc_getnl(&buf->head[0]) != seq)
                goto out;
+       /* trim off the mic at the end before returning */
+       xdr_buf_trim(buf, mic.len + 4);
        stat = 0;
 out:
        kfree(mic.data);
@@ -1198,7 +1198,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
                        /* placeholders for length and seq. number: */
                        svc_putnl(resv, 0);
                        svc_putnl(resv, 0);
-                       if (unwrap_integ_data(&rqstp->rq_arg,
+                       if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
                                        gc->gc_seq, rsci->mechctx))
                                goto garbage_args;
                        break;
index 39a4112faf54b9b31d18b8aaf14e3ddcf5880884..25d58e766014bf751ba404cac60893b2cfc5ecf9 100644 (file)
@@ -196,9 +196,9 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_update);
 
 static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
 {
-       if (!cd->cache_upcall)
-               return -EINVAL;
-       return cd->cache_upcall(cd, h);
+       if (cd->cache_upcall)
+               return cd->cache_upcall(cd, h);
+       return sunrpc_cache_pipe_upcall(cd, h);
 }
 
 static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
@@ -750,6 +750,18 @@ struct cache_reader {
        int                     offset; /* if non-0, we have a refcnt on next request */
 };
 
+static int cache_request(struct cache_detail *detail,
+                              struct cache_request *crq)
+{
+       char *bp = crq->buf;
+       int len = PAGE_SIZE;
+
+       detail->cache_request(detail, crq->item, &bp, &len);
+       if (len < 0)
+               return -EAGAIN;
+       return PAGE_SIZE - len;
+}
+
 static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
                          loff_t *ppos, struct cache_detail *cd)
 {
@@ -784,6 +796,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
                rq->readers++;
        spin_unlock(&queue_lock);
 
+       if (rq->len == 0) {
+               err = cache_request(cd, rq);
+               if (err < 0)
+                       goto out;
+               rq->len = err;
+       }
+
        if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
                err = -EAGAIN;
                spin_lock(&queue_lock);
@@ -1140,17 +1159,14 @@ static bool cache_listeners_exist(struct cache_detail *detail)
  *
  * Each request is at most one page long.
  */
-int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
-               void (*cache_request)(struct cache_detail *,
-                                     struct cache_head *,
-                                     char **,
-                                     int *))
+int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 {
 
        char *buf;
        struct cache_request *crq;
-       char *bp;
-       int len;
+
+       if (!detail->cache_request)
+               return -EINVAL;
 
        if (!cache_listeners_exist(detail)) {
                warn_no_listener(detail);
@@ -1167,19 +1183,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
                return -EAGAIN;
        }
 
-       bp = buf; len = PAGE_SIZE;
-
-       cache_request(detail, h, &bp, &len);
-
-       if (len < 0) {
-               kfree(buf);
-               kfree(crq);
-               return -EAGAIN;
-       }
        crq->q.reader = 0;
        crq->item = cache_get(h);
        crq->buf = buf;
-       crq->len = PAGE_SIZE - len;
+       crq->len = 0;
        crq->readers = 0;
        spin_lock(&queue_lock);
        list_add_tail(&crq->q.list, &detail->queue);
@@ -1605,7 +1612,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
        if (p == NULL)
                goto out_nomem;
 
-       if (cd->cache_upcall || cd->cache_parse) {
+       if (cd->cache_request || cd->cache_parse) {
                p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
                                     cd->u.procfs.proc_ent,
                                     &cache_file_operations_procfs, cd);
@@ -1614,7 +1621,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
                        goto out_nomem;
        }
        if (cd->cache_show) {
-               p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR,
+               p = proc_create_data("content", S_IFREG|S_IRUSR,
                                cd->u.procfs.proc_ent,
                                &content_file_operations_procfs, cd);
                cd->u.procfs.content_ent = p;
index a9f7906c1a6ad251d4881bb731632d695449a720..d7a369e61085119f2897aea276ab28fc8dc12fb5 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/rcupdate.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/metrics.h>
 #include <linux/sunrpc/bc_xprt.h>
index 795a0f4e920bf8b4e52e78458b210fea02df8132..3df764dc330cbdb398e7dbbd5c6558d894bfc386 100644 (file)
@@ -26,6 +26,7 @@
 #include <net/ipv6.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xprtsock.h>
 
index dbf12ac5ecb7f611fac1a06d5db643a7a5a39ac4..89a588b4478bda057fe6e4d66d0041545de952c5 100644 (file)
@@ -515,15 +515,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
 
 void svc_shutdown_net(struct svc_serv *serv, struct net *net)
 {
-       /*
-        * The set of xprts (contained in the sv_tempsocks and
-        * sv_permsocks lists) is now constant, since it is modified
-        * only by accepting new sockets (done by service threads in
-        * svc_recv) or aging old ones (done by sv_temptimer), or
-        * configuration changes (excluded by whatever locking the
-        * caller is using--nfsd_mutex in the case of nfsd).  So it's
-        * safe to traverse those lists and shut everything down:
-        */
        svc_close_net(serv, net);
 
        if (serv->sv_shutdown)
@@ -1042,6 +1033,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
 /*
  * dprintk the given error with the address of the client that caused it.
  */
+#ifdef RPC_DEBUG
 static __printf(2, 3)
 void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 {
@@ -1058,6 +1050,9 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 
        va_end(args);
 }
+#else
+static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
+#endif
 
 /*
  * Common routine for processing the RPC request.
index b8e47fac731557bdde5bc5d96250e30a822f6c6c..80a6640f329bab991859e032fda658a871e65ca5 100644 (file)
@@ -499,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv)
                        rqstp->rq_xprt = NULL;
                         */
                        wake_up(&rqstp->rq_wait);
-               }
+               } else
+                       pool->sp_task_pending = 1;
                spin_unlock_bh(&pool->sp_lock);
        }
 }
@@ -634,7 +635,13 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
                 * long for cache updates.
                 */
                rqstp->rq_chandle.thread_wait = 1*HZ;
+               pool->sp_task_pending = 0;
        } else {
+               if (pool->sp_task_pending) {
+                       pool->sp_task_pending = 0;
+                       spin_unlock_bh(&pool->sp_lock);
+                       return ERR_PTR(-EAGAIN);
+               }
                /* No data pending. Go to sleep */
                svc_thread_enqueue(pool, rqstp);
 
@@ -856,7 +863,6 @@ static void svc_age_temp_xprts(unsigned long closure)
        struct svc_serv *serv = (struct svc_serv *)closure;
        struct svc_xprt *xprt;
        struct list_head *le, *next;
-       LIST_HEAD(to_be_aged);
 
        dprintk("svc_age_temp_xprts\n");
 
@@ -877,25 +883,15 @@ static void svc_age_temp_xprts(unsigned long closure)
                if (atomic_read(&xprt->xpt_ref.refcount) > 1 ||
                    test_bit(XPT_BUSY, &xprt->xpt_flags))
                        continue;
-               svc_xprt_get(xprt);
-               list_move(le, &to_be_aged);
+               list_del_init(le);
                set_bit(XPT_CLOSE, &xprt->xpt_flags);
                set_bit(XPT_DETACHED, &xprt->xpt_flags);
-       }
-       spin_unlock_bh(&serv->sv_lock);
-
-       while (!list_empty(&to_be_aged)) {
-               le = to_be_aged.next;
-               /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */
-               list_del_init(le);
-               xprt = list_entry(le, struct svc_xprt, xpt_list);
-
                dprintk("queuing xprt %p for closing\n", xprt);
 
                /* a thread will dequeue and close it soon */
                svc_xprt_enqueue(xprt);
-               svc_xprt_put(xprt);
        }
+       spin_unlock_bh(&serv->sv_lock);
 
        mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
 }
@@ -959,21 +955,24 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
+static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
 {
        struct svc_xprt *xprt;
+       int ret = 0;
 
        spin_lock(&serv->sv_lock);
        list_for_each_entry(xprt, xprt_list, xpt_list) {
                if (xprt->xpt_net != net)
                        continue;
+               ret++;
                set_bit(XPT_CLOSE, &xprt->xpt_flags);
-               set_bit(XPT_BUSY, &xprt->xpt_flags);
+               svc_xprt_enqueue(xprt);
        }
        spin_unlock(&serv->sv_lock);
+       return ret;
 }
 
-static void svc_clear_pools(struct svc_serv *serv, struct net *net)
+static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net)
 {
        struct svc_pool *pool;
        struct svc_xprt *xprt;
@@ -988,42 +987,46 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net)
                        if (xprt->xpt_net != net)
                                continue;
                        list_del_init(&xprt->xpt_ready);
+                       spin_unlock_bh(&pool->sp_lock);
+                       return xprt;
                }
                spin_unlock_bh(&pool->sp_lock);
        }
+       return NULL;
 }
 
-static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
+static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
 {
        struct svc_xprt *xprt;
-       struct svc_xprt *tmp;
-       LIST_HEAD(victims);
 
-       spin_lock(&serv->sv_lock);
-       list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
-               if (xprt->xpt_net != net)
-                       continue;
-               list_move(&xprt->xpt_list, &victims);
-       }
-       spin_unlock(&serv->sv_lock);
-
-       list_for_each_entry_safe(xprt, tmp, &victims, xpt_list)
+       while ((xprt = svc_dequeue_net(serv, net))) {
+               set_bit(XPT_CLOSE, &xprt->xpt_flags);
                svc_delete_xprt(xprt);
+       }
 }
 
+/*
+ * Server threads may still be running (especially in the case where the
+ * service is still running in other network namespaces).
+ *
+ * So we shut down sockets the same way we would on a running server, by
+ * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do
+ * the close.  In the case there are no such other threads,
+ * threads running, svc_clean_up_xprts() does a simple version of a
+ * server's main event loop, and in the case where there are other
+ * threads, we may need to wait a little while and then check again to
+ * see if they're done.
+ */
 void svc_close_net(struct svc_serv *serv, struct net *net)
 {
-       svc_close_list(serv, &serv->sv_tempsocks, net);
-       svc_close_list(serv, &serv->sv_permsocks, net);
+       int delay = 0;
 
-       svc_clear_pools(serv, net);
-       /*
-        * At this point the sp_sockets lists will stay empty, since
-        * svc_xprt_enqueue will not add new entries without taking the
-        * sp_lock and checking XPT_BUSY.
-        */
-       svc_clear_list(serv, &serv->sv_tempsocks, net);
-       svc_clear_list(serv, &serv->sv_permsocks, net);
+       while (svc_close_list(serv, &serv->sv_permsocks, net) +
+              svc_close_list(serv, &serv->sv_tempsocks, net)) {
+
+               svc_clean_up_xprts(serv, net);
+               msleep(delay++);
+       }
 }
 
 /*
index a1852e19ed0ced5297189dc5b5171217cc4c33e9..c3f9e1ef7f531857f432993896d13d0ae8442876 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/err.h>
 #include <linux/seq_file.h>
 #include <linux/hash.h>
@@ -17,7 +18,6 @@
 #include <linux/user_namespace.h>
 #define RPCDBG_FACILITY        RPCDBG_AUTH
 
-#include <linux/sunrpc/clnt.h>
 
 #include "netns.h"
 
@@ -157,11 +157,6 @@ static void ip_map_request(struct cache_detail *cd,
        (*bpp)[-1] = '\n';
 }
 
-static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-       return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
-}
-
 static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr);
 static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
 
@@ -475,11 +470,6 @@ static void unix_gid_request(struct cache_detail *cd,
        (*bpp)[-1] = '\n';
 }
 
-static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
-{
-       return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request);
-}
-
 static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid);
 
 static int unix_gid_parse(struct cache_detail *cd,
@@ -586,7 +576,7 @@ static struct cache_detail unix_gid_cache_template = {
        .hash_size      = GID_HASHMAX,
        .name           = "auth.unix.gid",
        .cache_put      = unix_gid_put,
-       .cache_upcall   = unix_gid_upcall,
+       .cache_request  = unix_gid_request,
        .cache_parse    = unix_gid_parse,
        .cache_show     = unix_gid_show,
        .match          = unix_gid_match,
@@ -885,7 +875,7 @@ static struct cache_detail ip_map_cache_template = {
        .hash_size      = IP_HASHMAX,
        .name           = "auth.unix.ip",
        .cache_put      = ip_map_put,
-       .cache_upcall   = ip_map_upcall,
+       .cache_request  = ip_map_request,
        .cache_parse    = ip_map_parse,
        .cache_show     = ip_map_show,
        .match          = ip_map_match,
index 56055632f1518323f68be9e8bcd3e59d7441be2c..75edcfad6e264f299fac566f16bb1cfd82615d2a 100644 (file)
@@ -879,6 +879,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
+/**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+ * @len: number of bytes to reduce "buf" by
+ *
+ * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
+ * that it's possible that we'll trim less than that amount if the xdr_buf is
+ * too small, or if (for instance) it's all in the head and the parser has
+ * already read too far into it.
+ */
+void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
+{
+       size_t cur;
+       unsigned int trim = len;
+
+       if (buf->tail[0].iov_len) {
+               cur = min_t(size_t, buf->tail[0].iov_len, trim);
+               buf->tail[0].iov_len -= cur;
+               trim -= cur;
+               if (!trim)
+                       goto fix_len;
+       }
+
+       if (buf->page_len) {
+               cur = min_t(unsigned int, buf->page_len, trim);
+               buf->page_len -= cur;
+               trim -= cur;
+               if (!trim)
+                       goto fix_len;
+       }
+
+       if (buf->head[0].iov_len) {
+               cur = min_t(size_t, buf->head[0].iov_len, trim);
+               buf->head[0].iov_len -= cur;
+               trim -= cur;
+       }
+fix_len:
+       buf->len -= (len - trim);
+}
+EXPORT_SYMBOL_GPL(xdr_buf_trim);
+
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
        unsigned int this_len;
index d0074289708e6a38b161ed43820f33d7489d747f..794312f22b9badcb10f3e98d9563672f2b08e534 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#include <linux/sunrpc/addr.h>
 
 #include "xprt_rdma.h"
 
index 37cbda63f45c5bef857f249854dd04a95327c150..c1d8476b76929b4300d967dd795aa6c2ce38f7a0 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
@@ -1867,13 +1868,9 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
  * @xprt: RPC transport to connect
  * @transport: socket transport to connect
  * @create_sock: function to create a socket of the correct type
- *
- * Invoked by a work queue tasklet.
  */
-static void xs_local_setup_socket(struct work_struct *work)
+static int xs_local_setup_socket(struct sock_xprt *transport)
 {
-       struct sock_xprt *transport =
-               container_of(work, struct sock_xprt, connect_worker.work);
        struct rpc_xprt *xprt = &transport->xprt;
        struct socket *sock;
        int status = -EIO;
@@ -1918,6 +1915,30 @@ out:
        xprt_clear_connecting(xprt);
        xprt_wake_pending_tasks(xprt, status);
        current->flags &= ~PF_FSTRANS;
+       return status;
+}
+
+static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       int ret;
+
+        if (RPC_IS_ASYNC(task)) {
+               /*
+                * We want the AF_LOCAL connect to be resolved in the
+                * filesystem namespace of the process making the rpc
+                * call.  Thus we connect synchronously.
+                *
+                * If we want to support asynchronous AF_LOCAL calls,
+                * we'll need to figure out how to pass a namespace to
+                * connect.
+                */
+               rpc_exit(task, -ENOTCONN);
+               return;
+       }
+       ret = xs_local_setup_socket(transport);
+       if (ret && !RPC_IS_SOFTCONN(task))
+               msleep_interruptible(15000);
 }
 
 #ifdef CONFIG_SUNRPC_SWAP
@@ -2455,7 +2476,7 @@ static struct rpc_xprt_ops xs_local_ops = {
        .alloc_slot             = xprt_alloc_slot,
        .rpcbind                = xs_local_rpcbind,
        .set_port               = xs_local_set_port,
-       .connect                = xs_connect,
+       .connect                = xs_local_connect,
        .buf_alloc              = rpc_malloc,
        .buf_free               = rpc_free,
        .send_request           = xs_local_send_request,
@@ -2628,8 +2649,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
                        goto out_err;
                }
                xprt_set_bound(xprt);
-               INIT_DELAYED_WORK(&transport->connect_worker,
-                                       xs_local_setup_socket);
                xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
                break;
        default: