]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'moduleh/for-sfr'
authorStephen Rothwell <sfr@canb.auug.org.au>
Tue, 1 Nov 2011 07:05:51 +0000 (18:05 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Tue, 1 Nov 2011 07:33:56 +0000 (18:33 +1100)
Conflicts:
drivers/media/dvb/frontends/dibx000_common.c
drivers/mfd/ab3550-core.c
include/linux/dmaengine.h

208 files changed:
1  2 
arch/arm/kernel/setup.c
arch/arm/mach-davinci/board-dm644x-evm.c
arch/arm/mach-imx/mach-mx31lilly.c
arch/arm/mach-imx/mach-mx31lite.c
arch/arm/mach-imx/mach-mx31moboard.c
arch/arm/mach-omap1/board-ams-delta.c
arch/arm/mach-omap1/board-sx1.c
arch/arm/mach-omap1/board-voiceblue.c
arch/arm/mach-omap2/board-omap3evm.c
arch/arm/mach-omap2/clockdomain.c
arch/arm/mach-omap2/display.c
arch/arm/mach-omap2/pm.c
arch/arm/mach-omap2/prcm.c
arch/arm/mach-omap2/voltage.c
arch/arm/mach-s3c2410/mach-h1940.c
arch/arm/mach-tegra/pcie.c
arch/arm/plat-samsung/dma-ops.c
arch/arm/plat-samsung/platformdata.c
arch/arm/plat-samsung/s3c-dma-ops.c
arch/blackfin/mach-bf518/boards/ezbrd.c
arch/blackfin/mach-bf527/boards/ad7160eval.c
arch/blackfin/mach-bf527/boards/cm_bf527.c
arch/blackfin/mach-bf527/boards/ezbrd.c
arch/blackfin/mach-bf527/boards/ezkit.c
arch/blackfin/mach-bf527/boards/tll6527m.c
arch/blackfin/mach-bf537/boards/cm_bf537e.c
arch/blackfin/mach-bf537/boards/cm_bf537u.c
arch/blackfin/mach-bf537/boards/dnp5370.c
arch/blackfin/mach-bf537/boards/pnav10.c
arch/blackfin/mach-bf537/boards/stamp.c
arch/blackfin/mach-bf537/boards/tcm_bf537.c
arch/microblaze/kernel/dma.c
arch/mips/kernel/cpu-probe.c
arch/powerpc/include/asm/machdep.h
arch/powerpc/kernel/dma-iommu.c
arch/powerpc/kernel/dma.c
arch/powerpc/kernel/ibmebus.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/vio.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/mmu_context_hash64.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/tlb_nohash.c
arch/powerpc/platforms/ps3/system-bus.c
arch/powerpc/platforms/pseries/eeh.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/sysdev/cpm_common.c
arch/powerpc/xmon/xmon.c
arch/x86/crypto/aes_glue.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/vsyscall_64.c
block/ioctl.c
drivers/block/aoe/aoeblk.c
drivers/block/ps3vram.c
drivers/char/virtio_console.c
drivers/cpufreq/cpufreq_stats.c
drivers/dma/imx-dma.c
drivers/dma/imx-sdma.c
drivers/dma/intel_mid_dma.c
drivers/gpio/gpio-mxc.c
drivers/gpio/gpio-mxs.c
drivers/gpu/drm/i915/i915_drv.c
drivers/i2c/busses/i2c-sh7760.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/qib/qib_driver.c
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_fs.c
drivers/leds/leds-asic3.c
drivers/md/dm-bufio.c
drivers/md/dm-log-userspace-base.c
drivers/md/dm-raid.c
drivers/md/faulty.c
drivers/md/linear.c
drivers/md/md.c
drivers/md/multipath.c
drivers/md/persistent-data/dm-btree-remove.c
drivers/md/persistent-data/dm-btree.c
drivers/md/persistent-data/dm-space-map-disk.c
drivers/md/persistent-data/dm-transaction-manager.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/media/common/saa7146_core.c
drivers/media/common/saa7146_fops.c
drivers/media/common/saa7146_hlp.c
drivers/media/common/saa7146_video.c
drivers/media/dvb/frontends/dibx000_common.c
drivers/media/radio/radio-wl1273.c
drivers/media/radio/wl128x/fmdrv_v4l2.c
drivers/media/rc/ir-lirc-codec.c
drivers/media/rc/keymaps/rc-pinnacle-pctv-hd.c
drivers/media/rc/rc-main.c
drivers/media/video/adp1653.c
drivers/media/video/cx25840/cx25840-ir.c
drivers/media/video/hexium_gemini.c
drivers/media/video/hexium_orion.c
drivers/media/video/m5mols/m5mols_core.c
drivers/media/video/mt9m111.c
drivers/media/video/mxb.c
drivers/media/video/noon010pc30.c
drivers/media/video/pvrusb2/pvrusb2-hdw.c
drivers/media/video/pvrusb2/pvrusb2-v4l2.c
drivers/media/video/sr030pc30.c
drivers/media/video/tvp7002.c
drivers/media/video/v4l2-ctrls.c
drivers/media/video/v4l2-subdev.c
drivers/mfd/ab3100-core.c
drivers/mfd/asic3.c
drivers/mfd/max8997.c
drivers/mfd/twl-core.c
drivers/mfd/twl6030-irq.c
drivers/mtd/ar7part.c
drivers/mtd/cmdlinepart.c
drivers/mtd/mtdsuper.c
drivers/mtd/nand/cafe_nand.c
drivers/mtd/nand/cmx270_nand.c
drivers/mtd/nand/diskonchip.c
drivers/mtd/nand/nand_bbt.c
drivers/mtd/nand/omap2.c
drivers/mtd/nand/sm_common.c
drivers/mtd/onenand/onenand_bbt.c
drivers/mtd/redboot.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/mr.c
drivers/net/ethernet/mellanox/mlx4/pd.c
drivers/net/ethernet/mellanox/mlx4/port.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx4/srq.c
drivers/platform/x86/wmi.c
drivers/power/max17042_battery.c
drivers/power/max8903_charger.c
drivers/power/max8997_charger.c
drivers/power/max8998_charger.c
drivers/power/power_supply_sysfs.c
drivers/regulator/88pm8607.c
drivers/regulator/core.c
drivers/regulator/db8500-prcmu.c
drivers/regulator/mc13783-regulator.c
drivers/regulator/tps6586x-regulator.c
drivers/s390/char/vmur.c
drivers/s390/cio/qdio_debug.c
drivers/s390/kvm/kvm_virtio.c
drivers/scsi/device_handler/scsi_dh.c
drivers/scsi/libfc/fc_exch.c
drivers/scsi/libfc/fc_lport.c
drivers/scsi/scsi_lib.c
drivers/target/target_core_tmr.c
drivers/target/target_core_transport.c
drivers/tty/hvc/hvc_opal.c
drivers/xen/xenbus/xenbus_client.c
fs/cifs/connect.c
fs/logfs/super.c
fs/nfs/nfs4filelayout.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfsd/nfssvc.c
fs/ocfs2/cluster/tcp.c
include/linux/blkdev.h
include/linux/crypto.h
include/linux/dmaengine.h
include/linux/mtd/mtd.h
include/linux/of.h
include/media/saa7146.h
include/net/bluetooth/hci_core.h
include/net/inet_timewait_sock.h
kernel/cpu.c
kernel/crash_dump.c
kernel/module.c
kernel/signal.c
kernel/sys.c
mm/bounce.c
mm/filemap.c
mm/kmemleak.c
mm/page-writeback.c
mm/swapfile.c
mm/truncate.c
net/8021q/vlan_core.c
net/bluetooth/hci_sysfs.c
net/bluetooth/l2cap_sock.c
net/bluetooth/mgmt.c
net/ipv6/addrconf.c
net/ipv6/route.c
sound/core/hwdep.c
sound/pci/intel8x0.c
sound/pci/rme9652/hdsp.c
sound/pci/rme9652/hdspm.c
sound/soc/samsung/ac97.c
sound/soc/samsung/dma.c

Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 64070ac1e761048caf85bf5fb4ebad2e06b85197,e964cfd3a3d0108fd5967a2d337fd5842eef258f..1f8fdf736e630976bc97fc781d58bfbf85704179
  
  #include <linux/delay.h>
  #include <linux/io.h>
 -#include <linux/clk.h>
  #include <linux/err.h>
+ #include <linux/export.h>
  #include <linux/debugfs.h>
  #include <linux/slab.h>
 +#include <linux/clk.h>
  
  #include <plat/common.h>
  
Simple merge
Simple merge
index 6e3d9abc9e2e856470e304c78f31794a4f715833,0000000000000000000000000000000000000000..93a994a5dd8f546c53ea2d80af751336d0f84184
mode 100644,000000..100644
--- /dev/null
@@@ -1,131 -1,0 +1,132 @@@
 +/* linux/arch/arm/plat-samsung/dma-ops.c
 + *
 + * Copyright (c) 2011 Samsung Electronics Co., Ltd.
 + *            http://www.samsung.com
 + *
 + * Samsung DMA Operations
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + */
 +
 +#include <linux/kernel.h>
 +#include <linux/errno.h>
 +#include <linux/amba/pl330.h>
 +#include <linux/scatterlist.h>
++#include <linux/export.h>
 +
 +#include <mach/dma.h>
 +
 +static inline bool pl330_filter(struct dma_chan *chan, void *param)
 +{
 +      struct dma_pl330_peri *peri = chan->private;
 +      return peri->peri_id == (unsigned)param;
 +}
 +
 +static unsigned samsung_dmadev_request(enum dma_ch dma_ch,
 +                              struct samsung_dma_info *info)
 +{
 +      struct dma_chan *chan;
 +      dma_cap_mask_t mask;
 +      struct dma_slave_config slave_config;
 +
 +      dma_cap_zero(mask);
 +      dma_cap_set(info->cap, mask);
 +
 +      chan = dma_request_channel(mask, pl330_filter, (void *)dma_ch);
 +
 +      if (info->direction == DMA_FROM_DEVICE) {
 +              memset(&slave_config, 0, sizeof(struct dma_slave_config));
 +              slave_config.direction = info->direction;
 +              slave_config.src_addr = info->fifo;
 +              slave_config.src_addr_width = info->width;
 +              slave_config.src_maxburst = 1;
 +              dmaengine_slave_config(chan, &slave_config);
 +      } else if (info->direction == DMA_TO_DEVICE) {
 +              memset(&slave_config, 0, sizeof(struct dma_slave_config));
 +              slave_config.direction = info->direction;
 +              slave_config.dst_addr = info->fifo;
 +              slave_config.dst_addr_width = info->width;
 +              slave_config.dst_maxburst = 1;
 +              dmaengine_slave_config(chan, &slave_config);
 +      }
 +
 +      return (unsigned)chan;
 +}
 +
 +static int samsung_dmadev_release(unsigned ch,
 +                      struct s3c2410_dma_client *client)
 +{
 +      dma_release_channel((struct dma_chan *)ch);
 +
 +      return 0;
 +}
 +
 +static int samsung_dmadev_prepare(unsigned ch,
 +                      struct samsung_dma_prep_info *info)
 +{
 +      struct scatterlist sg;
 +      struct dma_chan *chan = (struct dma_chan *)ch;
 +      struct dma_async_tx_descriptor *desc;
 +
 +      switch (info->cap) {
 +      case DMA_SLAVE:
 +              sg_init_table(&sg, 1);
 +              sg_dma_len(&sg) = info->len;
 +              sg_set_page(&sg, pfn_to_page(PFN_DOWN(info->buf)),
 +                          info->len, offset_in_page(info->buf));
 +              sg_dma_address(&sg) = info->buf;
 +
 +              desc = chan->device->device_prep_slave_sg(chan,
 +                      &sg, 1, info->direction, DMA_PREP_INTERRUPT);
 +              break;
 +      case DMA_CYCLIC:
 +              desc = chan->device->device_prep_dma_cyclic(chan,
 +                      info->buf, info->len, info->period, info->direction);
 +              break;
 +      default:
 +              dev_err(&chan->dev->device, "unsupported format\n");
 +              return -EFAULT;
 +      }
 +
 +      if (!desc) {
 +              dev_err(&chan->dev->device, "cannot prepare cyclic dma\n");
 +              return -EFAULT;
 +      }
 +
 +      desc->callback = info->fp;
 +      desc->callback_param = info->fp_param;
 +
 +      dmaengine_submit((struct dma_async_tx_descriptor *)desc);
 +
 +      return 0;
 +}
 +
 +static inline int samsung_dmadev_trigger(unsigned ch)
 +{
 +      dma_async_issue_pending((struct dma_chan *)ch);
 +
 +      return 0;
 +}
 +
 +static inline int samsung_dmadev_flush(unsigned ch)
 +{
 +      return dmaengine_terminate_all((struct dma_chan *)ch);
 +}
 +
 +struct samsung_dma_ops dmadev_ops = {
 +      .request        = samsung_dmadev_request,
 +      .release        = samsung_dmadev_release,
 +      .prepare        = samsung_dmadev_prepare,
 +      .trigger        = samsung_dmadev_trigger,
 +      .started        = NULL,
 +      .flush          = samsung_dmadev_flush,
 +      .stop           = samsung_dmadev_flush,
 +};
 +
 +void *samsung_dmadev_get_ops(void)
 +{
 +      return &dmadev_ops;
 +}
 +EXPORT_SYMBOL(samsung_dmadev_get_ops);
Simple merge
index 582333c70585222a7452a08e7684859ab554944c,0000000000000000000000000000000000000000..78149491282749adf6c71051c1321c35c9404077
mode 100644,000000..100644
--- /dev/null
@@@ -1,130 -1,0 +1,131 @@@
 +/* linux/arch/arm/plat-samsung/s3c-dma-ops.c
 + *
 + * Copyright (c) 2011 Samsung Electronics Co., Ltd.
 + *            http://www.samsung.com
 + *
 + * Samsung S3C-DMA Operations
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2 as
 + * published by the Free Software Foundation.
 + */
 +
 +#include <linux/kernel.h>
 +#include <linux/errno.h>
 +#include <linux/slab.h>
 +#include <linux/types.h>
++#include <linux/export.h>
 +
 +#include <mach/dma.h>
 +
 +struct cb_data {
 +      void (*fp) (void *);
 +      void *fp_param;
 +      unsigned ch;
 +      struct list_head node;
 +};
 +
 +static LIST_HEAD(dma_list);
 +
 +static void s3c_dma_cb(struct s3c2410_dma_chan *channel, void *param,
 +                     int size, enum s3c2410_dma_buffresult res)
 +{
 +      struct cb_data *data = param;
 +
 +      data->fp(data->fp_param);
 +}
 +
 +static unsigned s3c_dma_request(enum dma_ch dma_ch,
 +                               struct samsung_dma_info *info)
 +{
 +      struct cb_data *data;
 +
 +      if (s3c2410_dma_request(dma_ch, info->client, NULL) < 0) {
 +              s3c2410_dma_free(dma_ch, info->client);
 +              return 0;
 +      }
 +
 +      data = kzalloc(sizeof(struct cb_data), GFP_KERNEL);
 +      data->ch = dma_ch;
 +      list_add_tail(&data->node, &dma_list);
 +
 +      s3c2410_dma_devconfig(dma_ch, info->direction, info->fifo);
 +
 +      if (info->cap == DMA_CYCLIC)
 +              s3c2410_dma_setflags(dma_ch, S3C2410_DMAF_CIRCULAR);
 +
 +      s3c2410_dma_config(dma_ch, info->width);
 +
 +      return (unsigned)dma_ch;
 +}
 +
 +static int s3c_dma_release(unsigned ch, struct s3c2410_dma_client *client)
 +{
 +      struct cb_data *data;
 +
 +      list_for_each_entry(data, &dma_list, node)
 +              if (data->ch == ch)
 +                      break;
 +      list_del(&data->node);
 +
 +      s3c2410_dma_free(ch, client);
 +      kfree(data);
 +
 +      return 0;
 +}
 +
 +static int s3c_dma_prepare(unsigned ch, struct samsung_dma_prep_info *info)
 +{
 +      struct cb_data *data;
 +      int len = (info->cap == DMA_CYCLIC) ? info->period : info->len;
 +
 +      list_for_each_entry(data, &dma_list, node)
 +              if (data->ch == ch)
 +                      break;
 +
 +      if (!data->fp) {
 +              s3c2410_dma_set_buffdone_fn(ch, s3c_dma_cb);
 +              data->fp = info->fp;
 +              data->fp_param = info->fp_param;
 +      }
 +
 +      s3c2410_dma_enqueue(ch, (void *)data, info->buf, len);
 +
 +      return 0;
 +}
 +
 +static inline int s3c_dma_trigger(unsigned ch)
 +{
 +      return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_START);
 +}
 +
 +static inline int s3c_dma_started(unsigned ch)
 +{
 +      return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_STARTED);
 +}
 +
 +static inline int s3c_dma_flush(unsigned ch)
 +{
 +      return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_FLUSH);
 +}
 +
 +static inline int s3c_dma_stop(unsigned ch)
 +{
 +      return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_STOP);
 +}
 +
 +static struct samsung_dma_ops s3c_dma_ops = {
 +      .request        = s3c_dma_request,
 +      .release        = s3c_dma_release,
 +      .prepare        = s3c_dma_prepare,
 +      .trigger        = s3c_dma_trigger,
 +      .started        = s3c_dma_started,
 +      .flush          = s3c_dma_flush,
 +      .stop           = s3c_dma_stop,
 +};
 +
 +void *s3c_dma_get_ops(void)
 +{
 +      return &s3c_dma_ops;
 +}
 +EXPORT_SYMBOL(s3c_dma_get_ops);
index dc6416d265d616e1ac65382b5174c81c1b22d723,b159b8a847d69144d0a74753dca4d315bb80a7fd..65a4af4cbbbe864bb10eb75ff7fd1426c8021950
@@@ -10,7 -10,9 +10,8 @@@
  #include <linux/dma-mapping.h>
  #include <linux/gfp.h>
  #include <linux/dma-debug.h>
+ #include <linux/export.h>
  #include <asm/bug.h>
 -#include <asm/cacheflush.h>
  
  /*
   * Generic direct DMA implementation
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index b0b6950cc8c8850d68366b869ca39e4250827013,bdce3eeeaa37633ead43d56065892cfaa4a53c58..8efcf42a9d7e318b4cddb93a820cd01d1bd01f4e
@@@ -3,8 -3,8 +3,9 @@@
   *
   */
  
+ #include <linux/module.h>
  #include <crypto/aes.h>
 +#include <asm/aes.h>
  
  asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
  asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
index 864830e1dd655839dec78d5b45ed2e27442305c8,537c89e0009519900f51451095907f138576dfa5..362056aefeb474c86b165c4f9c77d213b40fdbfa
@@@ -36,7 -36,9 +36,8 @@@
  #include <linux/fs.h>
  #include <linux/mm.h>
  #include <linux/debugfs.h>
 -#include <linux/edac_mce.h>
  #include <linux/irq_work.h>
+ #include <linux/export.h>
  
  #include <asm/processor.h>
  #include <asm/mce.h>
Simple merge
diff --cc block/ioctl.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 42517500b223e98b0e9dd265e799233057dde9f4,a8923ffc64594eba2e00a83645bd4d58534d011b..602b1bd723a963aa49904fc0a907d12d965b009e
@@@ -38,8 -38,8 +38,9 @@@
  
  #include <linux/errno.h>
  #include <linux/err.h>
+ #include <linux/export.h>
  #include <linux/string.h>
 +#include <linux/slab.h>
  
  #include <rdma/ib_verbs.h>
  #include <rdma/ib_cache.h>
Simple merge
Simple merge
Simple merge
Simple merge
index cb246667dd52b6f7f71595db6db1138c16ea3aa0,0000000000000000000000000000000000000000..0a6806f80ab5cdf40d4dba56c41dc74ad3db772b
mode 100644,000000..100644
--- /dev/null
@@@ -1,1699 -1,0 +1,1700 @@@
 +/*
 + * Copyright (C) 2009-2011 Red Hat, Inc.
 + *
 + * Author: Mikulas Patocka <mpatocka@redhat.com>
 + *
 + * This file is released under the GPL.
 + */
 +
 +#include "dm-bufio.h"
 +
 +#include <linux/device-mapper.h>
 +#include <linux/dm-io.h>
 +#include <linux/slab.h>
 +#include <linux/vmalloc.h>
 +#include <linux/version.h>
 +#include <linux/shrinker.h>
++#include <linux/module.h>
 +
 +#define DM_MSG_PREFIX "bufio"
 +
 +/*
 + * Memory management policy:
 + *    Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory
 + *    or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower).
 + *    Always allocate at least DM_BUFIO_MIN_BUFFERS buffers.
 + *    Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT
 + *    dirty buffers.
 + */
 +#define DM_BUFIO_MIN_BUFFERS          8
 +
 +#define DM_BUFIO_MEMORY_PERCENT               2
 +#define DM_BUFIO_VMALLOC_PERCENT      25
 +#define DM_BUFIO_WRITEBACK_PERCENT    75
 +
 +/*
 + * Check buffer ages in this interval (seconds)
 + */
 +#define DM_BUFIO_WORK_TIMER_SECS      10
 +
 +/*
 + * Free buffers when they are older than this (seconds)
 + */
 +#define DM_BUFIO_DEFAULT_AGE_SECS     60
 +
 +/*
 + * The number of bvec entries that are embedded directly in the buffer.
 + * If the chunk size is larger, dm-io is used to do the io.
 + */
 +#define DM_BUFIO_INLINE_VECS          16
 +
 +/*
 + * Buffer hash
 + */
 +#define DM_BUFIO_HASH_BITS    20
 +#define DM_BUFIO_HASH(block) \
 +      ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \
 +       ((1 << DM_BUFIO_HASH_BITS) - 1))
 +
 +/*
 + * Don't try to use kmem_cache_alloc for blocks larger than this.
 + * For explanation, see alloc_buffer_data below.
 + */
 +#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT        (PAGE_SIZE >> 1)
 +#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1))
 +
 +/*
 + * dm_buffer->list_mode
 + */
 +#define LIST_CLEAN    0
 +#define LIST_DIRTY    1
 +#define LIST_SIZE     2
 +
 +/*
 + * Linking of buffers:
 + *    All buffers are linked to cache_hash with their hash_list field.
 + *
 + *    Clean buffers that are not being written (B_WRITING not set)
 + *    are linked to lru[LIST_CLEAN] with their lru_list field.
 + *
 + *    Dirty and clean buffers that are being written are linked to
 + *    lru[LIST_DIRTY] with their lru_list field. When the write
 + *    finishes, the buffer cannot be relinked immediately (because we
 + *    are in an interrupt context and relinking requires process
 + *    context), so some clean-not-writing buffers can be held on
 + *    dirty_lru too.  They are later added to lru in the process
 + *    context.
 + */
 +struct dm_bufio_client {
 +      struct mutex lock;
 +
 +      struct list_head lru[LIST_SIZE];
 +      unsigned long n_buffers[LIST_SIZE];
 +
 +      struct block_device *bdev;
 +      unsigned block_size;
 +      unsigned char sectors_per_block_bits;
 +      unsigned char pages_per_block_bits;
 +      unsigned char blocks_per_page_bits;
 +      unsigned aux_size;
 +      void (*alloc_callback)(struct dm_buffer *);
 +      void (*write_callback)(struct dm_buffer *);
 +
 +      struct dm_io_client *dm_io;
 +
 +      struct list_head reserved_buffers;
 +      unsigned need_reserved_buffers;
 +
 +      struct hlist_head *cache_hash;
 +      wait_queue_head_t free_buffer_wait;
 +
 +      int async_write_error;
 +
 +      struct list_head client_list;
 +      struct shrinker shrinker;
 +};
 +
 +/*
 + * Buffer state bits.
 + */
 +#define B_READING     0
 +#define B_WRITING     1
 +#define B_DIRTY               2
 +
 +/*
 + * Describes how the block was allocated:
 + * kmem_cache_alloc(), __get_free_pages() or vmalloc().
 + * See the comment at alloc_buffer_data.
 + */
 +enum data_mode {
 +      DATA_MODE_SLAB = 0,
 +      DATA_MODE_GET_FREE_PAGES = 1,
 +      DATA_MODE_VMALLOC = 2,
 +      DATA_MODE_LIMIT = 3
 +};
 +
 +struct dm_buffer {
 +      struct hlist_node hash_list;
 +      struct list_head lru_list;
 +      sector_t block;
 +      void *data;
 +      enum data_mode data_mode;
 +      unsigned char list_mode;                /* LIST_* */
 +      unsigned hold_count;
 +      int read_error;
 +      int write_error;
 +      unsigned long state;
 +      unsigned long last_accessed;
 +      struct dm_bufio_client *c;
 +      struct bio bio;
 +      struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
 +};
 +
 +/*----------------------------------------------------------------*/
 +
 +static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT];
 +static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT];
 +
 +static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
 +{
 +      unsigned ret = c->blocks_per_page_bits - 1;
 +
 +      BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches));
 +
 +      return ret;
 +}
 +
 +#define DM_BUFIO_CACHE(c)     (dm_bufio_caches[dm_bufio_cache_index(c)])
 +#define DM_BUFIO_CACHE_NAME(c)        (dm_bufio_cache_names[dm_bufio_cache_index(c)])
 +
 +#define dm_bufio_in_request() (!!current->bio_list)
 +
 +static void dm_bufio_lock(struct dm_bufio_client *c)
 +{
 +      mutex_lock_nested(&c->lock, dm_bufio_in_request());
 +}
 +
 +static int dm_bufio_trylock(struct dm_bufio_client *c)
 +{
 +      return mutex_trylock(&c->lock);
 +}
 +
 +static void dm_bufio_unlock(struct dm_bufio_client *c)
 +{
 +      mutex_unlock(&c->lock);
 +}
 +
 +/*
 + * FIXME Move to sched.h?
 + */
 +#ifdef CONFIG_PREEMPT_VOLUNTARY
 +#  define dm_bufio_cond_resched()             \
 +do {                                          \
 +      if (unlikely(need_resched()))           \
 +              _cond_resched();                \
 +} while (0)
 +#else
 +#  define dm_bufio_cond_resched()                do { } while (0)
 +#endif
 +
 +/*----------------------------------------------------------------*/
 +
 +/*
 + * Default cache size: available memory divided by the ratio.
 + */
 +static unsigned long dm_bufio_default_cache_size;
 +
 +/*
 + * Total cache size set by the user.
 + */
 +static unsigned long dm_bufio_cache_size;
 +
 +/*
 + * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change
 + * at any time.  If it disagrees, the user has changed cache size.
 + */
 +static unsigned long dm_bufio_cache_size_latch;
 +
 +static DEFINE_SPINLOCK(param_spinlock);
 +
 +/*
 + * Buffers are freed after this timeout
 + */
 +static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
 +
 +static unsigned long dm_bufio_peak_allocated;
 +static unsigned long dm_bufio_allocated_kmem_cache;
 +static unsigned long dm_bufio_allocated_get_free_pages;
 +static unsigned long dm_bufio_allocated_vmalloc;
 +static unsigned long dm_bufio_current_allocated;
 +
 +/*----------------------------------------------------------------*/
 +
 +/*
 + * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count
 + */
 +static unsigned long dm_bufio_cache_size_per_client;
 +
 +/*
 + * The current number of clients.
 + */
 +static int dm_bufio_client_count;
 +
 +/*
 + * The list of all clients.
 + */
 +static LIST_HEAD(dm_bufio_all_clients);
 +
 +/*
 + * This mutex protects dm_bufio_cache_size_latch,
 + * dm_bufio_cache_size_per_client and dm_bufio_client_count
 + */
 +static DEFINE_MUTEX(dm_bufio_clients_lock);
 +
 +/*----------------------------------------------------------------*/
 +
 +static void adjust_total_allocated(enum data_mode data_mode, long diff)
 +{
 +      static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
 +              &dm_bufio_allocated_kmem_cache,
 +              &dm_bufio_allocated_get_free_pages,
 +              &dm_bufio_allocated_vmalloc,
 +      };
 +
 +      spin_lock(&param_spinlock);
 +
 +      *class_ptr[data_mode] += diff;
 +
 +      dm_bufio_current_allocated += diff;
 +
 +      if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
 +              dm_bufio_peak_allocated = dm_bufio_current_allocated;
 +
 +      spin_unlock(&param_spinlock);
 +}
 +
 +/*
 + * Change the number of clients and recalculate per-client limit.
 + */
 +static void __cache_size_refresh(void)
 +{
 +      BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock));
 +      BUG_ON(dm_bufio_client_count < 0);
 +
 +      dm_bufio_cache_size_latch = dm_bufio_cache_size;
 +
 +      barrier();
 +
 +      /*
 +       * Use default if set to 0 and report the actual cache size used.
 +       */
 +      if (!dm_bufio_cache_size_latch) {
 +              (void)cmpxchg(&dm_bufio_cache_size, 0,
 +                            dm_bufio_default_cache_size);
 +              dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
 +      }
 +
 +      dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch /
 +                                       (dm_bufio_client_count ? : 1);
 +}
 +
 +/*
 + * Allocating buffer data.
 + *
 + * Small buffers are allocated with kmem_cache, to use space optimally.
 + *
 + * For large buffers, we choose between get_free_pages and vmalloc.
 + * Each has advantages and disadvantages.
 + *
 + * __get_free_pages can randomly fail if the memory is fragmented.
 + * __vmalloc won't randomly fail, but vmalloc space is limited (it may be
 + * as low as 128M) so using it for caching is not appropriate.
 + *
 + * If the allocation may fail we use __get_free_pages. Memory fragmentation
 + * won't have a fatal effect here, but it just causes flushes of some other
 + * buffers and more I/O will be performed. Don't use __get_free_pages if it
 + * always fails (i.e. order >= MAX_ORDER).
 + *
 + * If the allocation shouldn't fail we use __vmalloc. This is only for the
 + * initial reserve allocation, so there's no risk of wasting all vmalloc
 + * space.
 + */
 +static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 +                             enum data_mode *data_mode)
 +{
 +      if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
 +              *data_mode = DATA_MODE_SLAB;
 +              return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
 +      }
 +
 +      if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT &&
 +          gfp_mask & __GFP_NORETRY) {
 +              *data_mode = DATA_MODE_GET_FREE_PAGES;
 +              return (void *)__get_free_pages(gfp_mask,
 +                                              c->pages_per_block_bits);
 +      }
 +
 +      *data_mode = DATA_MODE_VMALLOC;
 +      return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
 +}
 +
 +/*
 + * Free buffer's data.
 + */
 +static void free_buffer_data(struct dm_bufio_client *c,
 +                           void *data, enum data_mode data_mode)
 +{
 +      switch (data_mode) {
 +      case DATA_MODE_SLAB:
 +              kmem_cache_free(DM_BUFIO_CACHE(c), data);
 +              break;
 +
 +      case DATA_MODE_GET_FREE_PAGES:
 +              free_pages((unsigned long)data, c->pages_per_block_bits);
 +              break;
 +
 +      case DATA_MODE_VMALLOC:
 +              vfree(data);
 +              break;
 +
 +      default:
 +              DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d",
 +                     data_mode);
 +              BUG();
 +      }
 +}
 +
 +/*
 + * Allocate buffer and its data.
 + */
 +static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
 +{
 +      struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size,
 +                                    gfp_mask);
 +
 +      if (!b)
 +              return NULL;
 +
 +      b->c = c;
 +
 +      b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
 +      if (!b->data) {
 +              kfree(b);
 +              return NULL;
 +      }
 +
 +      adjust_total_allocated(b->data_mode, (long)c->block_size);
 +
 +      return b;
 +}
 +
 +/*
 + * Free buffer and its data.
 + */
 +static void free_buffer(struct dm_buffer *b)
 +{
 +      struct dm_bufio_client *c = b->c;
 +
 +      adjust_total_allocated(b->data_mode, -(long)c->block_size);
 +
 +      free_buffer_data(c, b->data, b->data_mode);
 +      kfree(b);
 +}
 +
 +/*
 + * Link buffer to the hash list and clean or dirty queue.
 + */
 +static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
 +{
 +      struct dm_bufio_client *c = b->c;
 +
 +      c->n_buffers[dirty]++;
 +      b->block = block;
 +      b->list_mode = dirty;
 +      list_add(&b->lru_list, &c->lru[dirty]);
 +      hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]);
 +      b->last_accessed = jiffies;
 +}
 +
 +/*
 + * Unlink buffer from the hash list and dirty or clean queue.
 + */
 +static void __unlink_buffer(struct dm_buffer *b)
 +{
 +      struct dm_bufio_client *c = b->c;
 +
 +      BUG_ON(!c->n_buffers[b->list_mode]);
 +
 +      c->n_buffers[b->list_mode]--;
 +      hlist_del(&b->hash_list);
 +      list_del(&b->lru_list);
 +}
 +
 +/*
 + * Place the buffer to the head of dirty or clean LRU queue.
 + */
 +static void __relink_lru(struct dm_buffer *b, int dirty)
 +{
 +      struct dm_bufio_client *c = b->c;
 +
 +      BUG_ON(!c->n_buffers[b->list_mode]);
 +
 +      c->n_buffers[b->list_mode]--;
 +      c->n_buffers[dirty]++;
 +      b->list_mode = dirty;
 +      list_del(&b->lru_list);
 +      list_add(&b->lru_list, &c->lru[dirty]);
 +}
 +
 +/*----------------------------------------------------------------
 + * Submit I/O on the buffer.
 + *
 + * Bio interface is faster but it has some problems:
 + *    the vector list is limited (increasing this limit increases
 + *    memory-consumption per buffer, so it is not viable);
 + *
 + *    the memory must be direct-mapped, not vmalloced;
 + *
 + *    the I/O driver can reject requests spuriously if it thinks that
 + *    the requests are too big for the device or if they cross a
 + *    controller-defined memory boundary.
 + *
 + * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
 + * it is not vmalloced, try using the bio interface.
 + *
 + * If the buffer is big, if it is vmalloced or if the underlying device
 + * rejects the bio because it is too large, use dm-io layer to do the I/O.
 + * The dm-io layer splits the I/O into multiple requests, avoiding the above
 + * shortcomings.
 + *--------------------------------------------------------------*/
 +
 +/*
 + * dm-io completion routine. It just calls b->bio.bi_end_io, pretending
 + * that the request was handled directly with bio interface.
 + */
 +static void dmio_complete(unsigned long error, void *context)
 +{
 +      struct dm_buffer *b = context;
 +
 +      b->bio.bi_end_io(&b->bio, error ? -EIO : 0);
 +}
 +
 +static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
 +                   bio_end_io_t *end_io)
 +{
 +      int r;
 +      struct dm_io_request io_req = {
 +              .bi_rw = rw,
 +              .notify.fn = dmio_complete,
 +              .notify.context = b,
 +              .client = b->c->dm_io,
 +      };
 +      struct dm_io_region region = {
 +              .bdev = b->c->bdev,
 +              .sector = block << b->c->sectors_per_block_bits,
 +              .count = b->c->block_size >> SECTOR_SHIFT,
 +      };
 +
 +      if (b->data_mode != DATA_MODE_VMALLOC) {
 +              io_req.mem.type = DM_IO_KMEM;
 +              io_req.mem.ptr.addr = b->data;
 +      } else {
 +              io_req.mem.type = DM_IO_VMA;
 +              io_req.mem.ptr.vma = b->data;
 +      }
 +
 +      b->bio.bi_end_io = end_io;
 +
 +      r = dm_io(&io_req, 1, &region, NULL);
 +      if (r)
 +              end_io(&b->bio, r);
 +}
 +
 +static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
 +                         bio_end_io_t *end_io)
 +{
 +      char *ptr;
 +      int len;
 +
 +      bio_init(&b->bio);
 +      b->bio.bi_io_vec = b->bio_vec;
 +      b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
 +      b->bio.bi_sector = block << b->c->sectors_per_block_bits;
 +      b->bio.bi_bdev = b->c->bdev;
 +      b->bio.bi_end_io = end_io;
 +
 +      /*
 +       * We assume that if len >= PAGE_SIZE ptr is page-aligned.
 +       * If len < PAGE_SIZE the buffer doesn't cross page boundary.
 +       */
 +      ptr = b->data;
 +      len = b->c->block_size;
 +
 +      if (len >= PAGE_SIZE)
 +              BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
 +      else
 +              BUG_ON((unsigned long)ptr & (len - 1));
 +
 +      do {
 +              if (!bio_add_page(&b->bio, virt_to_page(ptr),
 +                                len < PAGE_SIZE ? len : PAGE_SIZE,
 +                                virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
 +                      BUG_ON(b->c->block_size <= PAGE_SIZE);
 +                      use_dmio(b, rw, block, end_io);
 +                      return;
 +              }
 +
 +              len -= PAGE_SIZE;
 +              ptr += PAGE_SIZE;
 +      } while (len > 0);
 +
 +      submit_bio(rw, &b->bio);
 +}
 +
 +static void submit_io(struct dm_buffer *b, int rw, sector_t block,
 +                    bio_end_io_t *end_io)
 +{
 +      if (rw == WRITE && b->c->write_callback)
 +              b->c->write_callback(b);
 +
 +      if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
 +          b->data_mode != DATA_MODE_VMALLOC)
 +              use_inline_bio(b, rw, block, end_io);
 +      else
 +              use_dmio(b, rw, block, end_io);
 +}
 +
 +/*----------------------------------------------------------------
 + * Writing dirty buffers
 + *--------------------------------------------------------------*/
 +
 +/*
 + * The endio routine for write.
 + *
 + * Set the error, clear B_WRITING bit and wake anyone who was waiting on
 + * it.
 + */
 +static void write_endio(struct bio *bio, int error)
 +{
 +      struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 +
 +      b->write_error = error;
 +      if (error) {
 +              struct dm_bufio_client *c = b->c;
 +              (void)cmpxchg(&c->async_write_error, 0, error);
 +      }
 +
 +      BUG_ON(!test_bit(B_WRITING, &b->state));
 +
 +      smp_mb__before_clear_bit();
 +      clear_bit(B_WRITING, &b->state);
 +      smp_mb__after_clear_bit();
 +
 +      wake_up_bit(&b->state, B_WRITING);
 +}
 +
 +/*
 + * This function is called when wait_on_bit is actually waiting.
 + */
 +static int do_io_schedule(void *word)
 +{
 +      io_schedule();
 +
 +      return 0;
 +}
 +
 +/*
 + * Initiate a write on a dirty buffer, but don't wait for it.
 + *
 + * - If the buffer is not dirty, exit.
 + * - If there some previous write going on, wait for it to finish (we can't
 + *   have two writes on the same buffer simultaneously).
 + * - Submit our write and don't wait on it. We set B_WRITING indicating
 + *   that there is a write in progress.
 + */
 +static void __write_dirty_buffer(struct dm_buffer *b)
 +{
 +      if (!test_bit(B_DIRTY, &b->state))
 +              return;
 +
 +      clear_bit(B_DIRTY, &b->state);
 +      wait_on_bit_lock(&b->state, B_WRITING,
 +                       do_io_schedule, TASK_UNINTERRUPTIBLE);
 +
 +      submit_io(b, WRITE, b->block, write_endio);
 +}
 +
 +/*
 + * Wait until any activity on the buffer finishes.  Possibly write the
 + * buffer if it is dirty.  When this function finishes, there is no I/O
 + * running on the buffer and the buffer is not dirty.
 + */
 +static void __make_buffer_clean(struct dm_buffer *b)
 +{
 +      BUG_ON(b->hold_count);
 +
 +      if (!b->state)  /* fast case */
 +              return;
 +
 +      wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
 +      __write_dirty_buffer(b);
 +      wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE);
 +}
 +
 +/*
 + * Find some buffer that is not held by anybody, clean it, unlink it and
 + * return it.
 + */
 +static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
 +{
 +      struct dm_buffer *b;
 +
 +      list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) {
 +              BUG_ON(test_bit(B_WRITING, &b->state));
 +              BUG_ON(test_bit(B_DIRTY, &b->state));
 +
 +              if (!b->hold_count) {
 +                      __make_buffer_clean(b);
 +                      __unlink_buffer(b);
 +                      return b;
 +              }
 +              dm_bufio_cond_resched();
 +      }
 +
 +      list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
 +              BUG_ON(test_bit(B_READING, &b->state));
 +
 +              if (!b->hold_count) {
 +                      __make_buffer_clean(b);
 +                      __unlink_buffer(b);
 +                      return b;
 +              }
 +              dm_bufio_cond_resched();
 +      }
 +
 +      return NULL;
 +}
 +
 +/*
 + * Wait until some other threads free some buffer or release hold count on
 + * some buffer.
 + *
 + * This function is entered with c->lock held, drops it and regains it
 + * before exiting.
 + */
 +static void __wait_for_free_buffer(struct dm_bufio_client *c)
 +{
 +      DECLARE_WAITQUEUE(wait, current);
 +
 +      add_wait_queue(&c->free_buffer_wait, &wait);
 +      set_task_state(current, TASK_UNINTERRUPTIBLE);
 +      dm_bufio_unlock(c);
 +
 +      io_schedule();
 +
 +      set_task_state(current, TASK_RUNNING);
 +      remove_wait_queue(&c->free_buffer_wait, &wait);
 +
 +      dm_bufio_lock(c);
 +}
 +
 +/*
 + * Allocate a new buffer. If the allocation is not possible, wait until
 + * some other thread frees a buffer.
 + *
 + * May drop the lock and regain it.
 + */
 +static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c)
 +{
 +      struct dm_buffer *b;
 +
 +      /*
 +       * dm-bufio is resistant to allocation failures (it just keeps
 +       * one buffer reserved in cases all the allocations fail).
 +       * So set flags to not try too hard:
 +       *      GFP_NOIO: don't recurse into the I/O layer
 +       *      __GFP_NORETRY: don't retry and rather return failure
 +       *      __GFP_NOMEMALLOC: don't use emergency reserves
 +       *      __GFP_NOWARN: don't print a warning in case of failure
 +       *
 +       * For debugging, if we set the cache size to 1, no new buffers will
 +       * be allocated.
 +       */
 +      while (1) {
 +              if (dm_bufio_cache_size_latch != 1) {
 +                      b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
 +                      if (b)
 +                              return b;
 +              }
 +
 +              if (!list_empty(&c->reserved_buffers)) {
 +                      b = list_entry(c->reserved_buffers.next,
 +                                     struct dm_buffer, lru_list);
 +                      list_del(&b->lru_list);
 +                      c->need_reserved_buffers++;
 +
 +                      return b;
 +              }
 +
 +              b = __get_unclaimed_buffer(c);
 +              if (b)
 +                      return b;
 +
 +              __wait_for_free_buffer(c);
 +      }
 +}
 +
 +static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
 +{
 +      struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
 +
 +      if (c->alloc_callback)
 +              c->alloc_callback(b);
 +
 +      return b;
 +}
 +
 +/*
 + * Free a buffer and wake other threads waiting for free buffers.
 + */
 +static void __free_buffer_wake(struct dm_buffer *b)
 +{
 +      struct dm_bufio_client *c = b->c;
 +
 +      if (!c->need_reserved_buffers)
 +              free_buffer(b);
 +      else {
 +              list_add(&b->lru_list, &c->reserved_buffers);
 +              c->need_reserved_buffers--;
 +      }
 +
 +      wake_up(&c->free_buffer_wait);
 +}
 +
 +static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait)
 +{
 +      struct dm_buffer *b, *tmp;
 +
 +      list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
 +              BUG_ON(test_bit(B_READING, &b->state));
 +
 +              if (!test_bit(B_DIRTY, &b->state) &&
 +                  !test_bit(B_WRITING, &b->state)) {
 +                      __relink_lru(b, LIST_CLEAN);
 +                      continue;
 +              }
 +
 +              if (no_wait && test_bit(B_WRITING, &b->state))
 +                      return;
 +
 +              __write_dirty_buffer(b);
 +              dm_bufio_cond_resched();
 +      }
 +}
 +
 +/*
 + * Get writeback threshold and buffer limit for a given client.
 + */
 +static void __get_memory_limit(struct dm_bufio_client *c,
 +                             unsigned long *threshold_buffers,
 +                             unsigned long *limit_buffers)
 +{
 +      unsigned long buffers;
 +
 +      if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
 +              mutex_lock(&dm_bufio_clients_lock);
 +              __cache_size_refresh();
 +              mutex_unlock(&dm_bufio_clients_lock);
 +      }
 +
 +      buffers = dm_bufio_cache_size_per_client >>
 +                (c->sectors_per_block_bits + SECTOR_SHIFT);
 +
 +      if (buffers < DM_BUFIO_MIN_BUFFERS)
 +              buffers = DM_BUFIO_MIN_BUFFERS;
 +
 +      *limit_buffers = buffers;
 +      *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
 +}
 +
 +/*
 + * Check if we're over watermark.
 + * If we are over threshold_buffers, start freeing buffers.
 + * If we're over "limit_buffers", block until we get under the limit.
 + */
 +static void __check_watermark(struct dm_bufio_client *c)
 +{
 +      unsigned long threshold_buffers, limit_buffers;
 +
 +      __get_memory_limit(c, &threshold_buffers, &limit_buffers);
 +
 +      while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] >
 +             limit_buffers) {
 +
 +              struct dm_buffer *b = __get_unclaimed_buffer(c);
 +
 +              if (!b)
 +                      return;
 +
 +              __free_buffer_wake(b);
 +              dm_bufio_cond_resched();
 +      }
 +
 +      if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
 +              __write_dirty_buffers_async(c, 1);
 +}
 +
 +/*
 + * Find a buffer in the hash.
 + */
 +static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
 +{
 +      struct dm_buffer *b;
 +      struct hlist_node *hn;
 +
 +      hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)],
 +                           hash_list) {
 +              dm_bufio_cond_resched();
 +              if (b->block == block)
 +                      return b;
 +      }
 +
 +      return NULL;
 +}
 +
 +/*----------------------------------------------------------------
 + * Getting a buffer
 + *--------------------------------------------------------------*/
 +
 +enum new_flag {
 +      NF_FRESH = 0,
 +      NF_READ = 1,
 +      NF_GET = 2
 +};
 +
 +static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
 +                                   enum new_flag nf, struct dm_buffer **bp,
 +                                   int *need_submit)
 +{
 +      struct dm_buffer *b, *new_b = NULL;
 +
 +      *need_submit = 0;
 +
 +      b = __find(c, block);
 +      if (b) {
 +              b->hold_count++;
 +              __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
 +                           test_bit(B_WRITING, &b->state));
 +              return b;
 +      }
 +
 +      if (nf == NF_GET)
 +              return NULL;
 +
 +      new_b = __alloc_buffer_wait(c);
 +
 +      /*
 +       * We've had a period where the mutex was unlocked, so need to
 +       * recheck the hash table.
 +       */
 +      b = __find(c, block);
 +      if (b) {
 +              __free_buffer_wake(new_b);
 +              b->hold_count++;
 +              __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
 +                           test_bit(B_WRITING, &b->state));
 +              return b;
 +      }
 +
 +      __check_watermark(c);
 +
 +      b = new_b;
 +      b->hold_count = 1;
 +      b->read_error = 0;
 +      b->write_error = 0;
 +      __link_buffer(b, block, LIST_CLEAN);
 +
 +      if (nf == NF_FRESH) {
 +              b->state = 0;
 +              return b;
 +      }
 +
 +      b->state = 1 << B_READING;
 +      *need_submit = 1;
 +
 +      return b;
 +}
 +
 +/*
 + * The endio routine for reading: set the error, clear the bit and wake up
 + * anyone waiting on the buffer.
 + */
 +static void read_endio(struct bio *bio, int error)
 +{
 +      struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
 +
 +      b->read_error = error;
 +
 +      BUG_ON(!test_bit(B_READING, &b->state));
 +
 +      smp_mb__before_clear_bit();
 +      clear_bit(B_READING, &b->state);
 +      smp_mb__after_clear_bit();
 +
 +      wake_up_bit(&b->state, B_READING);
 +}
 +
 +/*
 + * A common routine for dm_bufio_new and dm_bufio_read.  Operation of these
 + * functions is similar except that dm_bufio_new doesn't read the
 + * buffer from the disk (assuming that the caller overwrites all the data
 + * and uses dm_bufio_mark_buffer_dirty to write new data back).
 + */
 +static void *new_read(struct dm_bufio_client *c, sector_t block,
 +                    enum new_flag nf, struct dm_buffer **bp)
 +{
 +      int need_submit;
 +      struct dm_buffer *b;
 +
 +      dm_bufio_lock(c);
 +      b = __bufio_new(c, block, nf, bp, &need_submit);
 +      dm_bufio_unlock(c);
 +
 +      if (!b || IS_ERR(b))
 +              return b;
 +
 +      if (need_submit)
 +              submit_io(b, READ, b->block, read_endio);
 +
 +      wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
 +
 +      if (b->read_error) {
 +              int error = b->read_error;
 +
 +              dm_bufio_release(b);
 +
 +              return ERR_PTR(error);
 +      }
 +
 +      *bp = b;
 +
 +      return b->data;
 +}
 +
 +void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
 +                 struct dm_buffer **bp)
 +{
 +      return new_read(c, block, NF_GET, bp);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_get);
 +
 +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
 +                  struct dm_buffer **bp)
 +{
 +      BUG_ON(dm_bufio_in_request());
 +
 +      return new_read(c, block, NF_READ, bp);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_read);
 +
 +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
 +                 struct dm_buffer **bp)
 +{
 +      BUG_ON(dm_bufio_in_request());
 +
 +      return new_read(c, block, NF_FRESH, bp);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_new);
 +
 +void dm_bufio_release(struct dm_buffer *b)
 +{
 +      struct dm_bufio_client *c = b->c;
 +
 +      dm_bufio_lock(c);
 +
 +      BUG_ON(test_bit(B_READING, &b->state));
 +      BUG_ON(!b->hold_count);
 +
 +      b->hold_count--;
 +      if (!b->hold_count) {
 +              wake_up(&c->free_buffer_wait);
 +
 +              /*
 +               * If there were errors on the buffer, and the buffer is not
 +               * to be written, free the buffer. There is no point in caching
 +               * invalid buffer.
 +               */
 +              if ((b->read_error || b->write_error) &&
 +                  !test_bit(B_WRITING, &b->state) &&
 +                  !test_bit(B_DIRTY, &b->state)) {
 +                      __unlink_buffer(b);
 +                      __free_buffer_wake(b);
 +              }
 +      }
 +
 +      dm_bufio_unlock(c);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_release);
 +
 +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
 +{
 +      struct dm_bufio_client *c = b->c;
 +
 +      dm_bufio_lock(c);
 +
 +      if (!test_and_set_bit(B_DIRTY, &b->state))
 +              __relink_lru(b, LIST_DIRTY);
 +
 +      dm_bufio_unlock(c);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
 +
 +void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
 +{
 +      BUG_ON(dm_bufio_in_request());
 +
 +      dm_bufio_lock(c);
 +      __write_dirty_buffers_async(c, 0);
 +      dm_bufio_unlock(c);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
 +
 +/*
 + * For performance, it is essential that the buffers are written asynchronously
 + * and simultaneously (so that the block layer can merge the writes) and then
 + * waited upon.
 + *
 + * Finally, we flush hardware disk cache.
 + */
 +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
 +{
 +      int a, f;
 +      unsigned long buffers_processed = 0;
 +      struct dm_buffer *b, *tmp;
 +
 +      dm_bufio_lock(c);
 +      __write_dirty_buffers_async(c, 0);
 +
 +again:
 +      list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
 +              int dropped_lock = 0;
 +
 +              if (buffers_processed < c->n_buffers[LIST_DIRTY])
 +                      buffers_processed++;
 +
 +              BUG_ON(test_bit(B_READING, &b->state));
 +
 +              if (test_bit(B_WRITING, &b->state)) {
 +                      if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
 +                              dropped_lock = 1;
 +                              b->hold_count++;
 +                              dm_bufio_unlock(c);
 +                              wait_on_bit(&b->state, B_WRITING,
 +                                          do_io_schedule,
 +                                          TASK_UNINTERRUPTIBLE);
 +                              dm_bufio_lock(c);
 +                              b->hold_count--;
 +                      } else
 +                              wait_on_bit(&b->state, B_WRITING,
 +                                          do_io_schedule,
 +                                          TASK_UNINTERRUPTIBLE);
 +              }
 +
 +              if (!test_bit(B_DIRTY, &b->state) &&
 +                  !test_bit(B_WRITING, &b->state))
 +                      __relink_lru(b, LIST_CLEAN);
 +
 +              dm_bufio_cond_resched();
 +
 +              /*
 +               * If we dropped the lock, the list is no longer consistent,
 +               * so we must restart the search.
 +               *
 +               * In the most common case, the buffer just processed is
 +               * relinked to the clean list, so we won't loop scanning the
 +               * same buffer again and again.
 +               *
 +               * This may livelock if there is another thread simultaneously
 +               * dirtying buffers, so we count the number of buffers walked
 +               * and if it exceeds the total number of buffers, it means that
 +               * someone is doing some writes simultaneously with us.  In
 +               * this case, stop, dropping the lock.
 +               */
 +              if (dropped_lock)
 +                      goto again;
 +      }
 +      wake_up(&c->free_buffer_wait);
 +      dm_bufio_unlock(c);
 +
 +      a = xchg(&c->async_write_error, 0);
 +      f = dm_bufio_issue_flush(c);
 +      if (a)
 +              return a;
 +
 +      return f;
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
 +
 +/*
 + * Use dm-io to send and empty barrier flush the device.
 + */
 +int dm_bufio_issue_flush(struct dm_bufio_client *c)
 +{
 +      struct dm_io_request io_req = {
 +              .bi_rw = REQ_FLUSH,
 +              .mem.type = DM_IO_KMEM,
 +              .mem.ptr.addr = NULL,
 +              .client = c->dm_io,
 +      };
 +      struct dm_io_region io_reg = {
 +              .bdev = c->bdev,
 +              .sector = 0,
 +              .count = 0,
 +      };
 +
 +      BUG_ON(dm_bufio_in_request());
 +
 +      return dm_io(&io_req, 1, &io_reg, NULL);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
 +
 +/*
 + * We first delete any other buffer that may be at that new location.
 + *
 + * Then, we write the buffer to the original location if it was dirty.
 + *
 + * Then, if we are the only one who is holding the buffer, relink the buffer
 + * in the hash queue for the new location.
 + *
 + * If there was someone else holding the buffer, we write it to the new
 + * location but not relink it, because that other user needs to have the buffer
 + * at the same place.
 + */
 +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
 +{
 +      struct dm_bufio_client *c = b->c;
 +      struct dm_buffer *new;
 +
 +      BUG_ON(dm_bufio_in_request());
 +
 +      dm_bufio_lock(c);
 +
 +retry:
 +      new = __find(c, new_block);
 +      if (new) {
 +              if (new->hold_count) {
 +                      __wait_for_free_buffer(c);
 +                      goto retry;
 +              }
 +
 +              /*
 +               * FIXME: Is there any point waiting for a write that's going
 +               * to be overwritten in a bit?
 +               */
 +              __make_buffer_clean(new);
 +              __unlink_buffer(new);
 +              __free_buffer_wake(new);
 +      }
 +
 +      BUG_ON(!b->hold_count);
 +      BUG_ON(test_bit(B_READING, &b->state));
 +
 +      __write_dirty_buffer(b);
 +      if (b->hold_count == 1) {
 +              wait_on_bit(&b->state, B_WRITING,
 +                          do_io_schedule, TASK_UNINTERRUPTIBLE);
 +              set_bit(B_DIRTY, &b->state);
 +              __unlink_buffer(b);
 +              __link_buffer(b, new_block, LIST_DIRTY);
 +      } else {
 +              sector_t old_block;
 +              wait_on_bit_lock(&b->state, B_WRITING,
 +                               do_io_schedule, TASK_UNINTERRUPTIBLE);
 +              /*
 +               * Relink buffer to "new_block" so that write_callback
 +               * sees "new_block" as a block number.
 +               * After the write, link the buffer back to old_block.
 +               * All this must be done in bufio lock, so that block number
 +               * change isn't visible to other threads.
 +               */
 +              old_block = b->block;
 +              __unlink_buffer(b);
 +              __link_buffer(b, new_block, b->list_mode);
 +              submit_io(b, WRITE, new_block, write_endio);
 +              wait_on_bit(&b->state, B_WRITING,
 +                          do_io_schedule, TASK_UNINTERRUPTIBLE);
 +              __unlink_buffer(b);
 +              __link_buffer(b, old_block, b->list_mode);
 +      }
 +
 +      dm_bufio_unlock(c);
 +      dm_bufio_release(b);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_release_move);
 +
 +unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
 +{
 +      return c->block_size;
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
 +
 +sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
 +{
 +      return i_size_read(c->bdev->bd_inode) >>
 +                         (SECTOR_SHIFT + c->sectors_per_block_bits);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
 +
 +sector_t dm_bufio_get_block_number(struct dm_buffer *b)
 +{
 +      return b->block;
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_get_block_number);
 +
 +void *dm_bufio_get_block_data(struct dm_buffer *b)
 +{
 +      return b->data;
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_get_block_data);
 +
 +void *dm_bufio_get_aux_data(struct dm_buffer *b)
 +{
 +      return b + 1;
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data);
 +
 +struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b)
 +{
 +      return b->c;
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_get_client);
 +
 +static void drop_buffers(struct dm_bufio_client *c)
 +{
 +      struct dm_buffer *b;
 +      int i;
 +
 +      BUG_ON(dm_bufio_in_request());
 +
 +      /*
 +       * An optimization so that the buffers are not written one-by-one.
 +       */
 +      dm_bufio_write_dirty_buffers_async(c);
 +
 +      dm_bufio_lock(c);
 +
 +      while ((b = __get_unclaimed_buffer(c)))
 +              __free_buffer_wake(b);
 +
 +      for (i = 0; i < LIST_SIZE; i++)
 +              list_for_each_entry(b, &c->lru[i], lru_list)
 +                      DMERR("leaked buffer %llx, hold count %u, list %d",
 +                            (unsigned long long)b->block, b->hold_count, i);
 +
 +      for (i = 0; i < LIST_SIZE; i++)
 +              BUG_ON(!list_empty(&c->lru[i]));
 +
 +      dm_bufio_unlock(c);
 +}
 +
 +/*
 + * Test if the buffer is unused and too old, and commit it.
 + * At if noio is set, we must not do any I/O because we hold
 + * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to
 + * different bufio client.
 + */
 +static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
 +                              unsigned long max_jiffies)
 +{
 +      if (jiffies - b->last_accessed < max_jiffies)
 +              return 1;
 +
 +      if (!(gfp & __GFP_IO)) {
 +              if (test_bit(B_READING, &b->state) ||
 +                  test_bit(B_WRITING, &b->state) ||
 +                  test_bit(B_DIRTY, &b->state))
 +                      return 1;
 +      }
 +
 +      if (b->hold_count)
 +              return 1;
 +
 +      __make_buffer_clean(b);
 +      __unlink_buffer(b);
 +      __free_buffer_wake(b);
 +
 +      return 0;
 +}
 +
 +static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
 +                 struct shrink_control *sc)
 +{
 +      int l;
 +      struct dm_buffer *b, *tmp;
 +
 +      for (l = 0; l < LIST_SIZE; l++) {
 +              list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list)
 +                      if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) &&
 +                          !--nr_to_scan)
 +                              return;
 +              dm_bufio_cond_resched();
 +      }
 +}
 +
 +static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
 +{
 +      struct dm_bufio_client *c =
 +          container_of(shrinker, struct dm_bufio_client, shrinker);
 +      unsigned long r;
 +      unsigned long nr_to_scan = sc->nr_to_scan;
 +
 +      if (sc->gfp_mask & __GFP_IO)
 +              dm_bufio_lock(c);
 +      else if (!dm_bufio_trylock(c))
 +              return !nr_to_scan ? 0 : -1;
 +
 +      if (nr_to_scan)
 +              __scan(c, nr_to_scan, sc);
 +
 +      r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
 +      if (r > INT_MAX)
 +              r = INT_MAX;
 +
 +      dm_bufio_unlock(c);
 +
 +      return r;
 +}
 +
 +/*
 + * Create the buffering interface
 + */
 +struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
 +                                             unsigned reserved_buffers, unsigned aux_size,
 +                                             void (*alloc_callback)(struct dm_buffer *),
 +                                             void (*write_callback)(struct dm_buffer *))
 +{
 +      int r;
 +      struct dm_bufio_client *c;
 +      unsigned i;
 +
 +      BUG_ON(block_size < 1 << SECTOR_SHIFT ||
 +             (block_size & (block_size - 1)));
 +
 +      c = kmalloc(sizeof(*c), GFP_KERNEL);
 +      if (!c) {
 +              r = -ENOMEM;
 +              goto bad_client;
 +      }
 +      c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS);
 +      if (!c->cache_hash) {
 +              r = -ENOMEM;
 +              goto bad_hash;
 +      }
 +
 +      c->bdev = bdev;
 +      c->block_size = block_size;
 +      c->sectors_per_block_bits = ffs(block_size) - 1 - SECTOR_SHIFT;
 +      c->pages_per_block_bits = (ffs(block_size) - 1 >= PAGE_SHIFT) ?
 +                                ffs(block_size) - 1 - PAGE_SHIFT : 0;
 +      c->blocks_per_page_bits = (ffs(block_size) - 1 < PAGE_SHIFT ?
 +                                PAGE_SHIFT - (ffs(block_size) - 1) : 0);
 +
 +      c->aux_size = aux_size;
 +      c->alloc_callback = alloc_callback;
 +      c->write_callback = write_callback;
 +
 +      for (i = 0; i < LIST_SIZE; i++) {
 +              INIT_LIST_HEAD(&c->lru[i]);
 +              c->n_buffers[i] = 0;
 +      }
 +
 +      for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
 +              INIT_HLIST_HEAD(&c->cache_hash[i]);
 +
 +      mutex_init(&c->lock);
 +      INIT_LIST_HEAD(&c->reserved_buffers);
 +      c->need_reserved_buffers = reserved_buffers;
 +
 +      init_waitqueue_head(&c->free_buffer_wait);
 +      c->async_write_error = 0;
 +
 +      c->dm_io = dm_io_client_create();
 +      if (IS_ERR(c->dm_io)) {
 +              r = PTR_ERR(c->dm_io);
 +              goto bad_dm_io;
 +      }
 +
 +      mutex_lock(&dm_bufio_clients_lock);
 +      if (c->blocks_per_page_bits) {
 +              if (!DM_BUFIO_CACHE_NAME(c)) {
 +                      DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size);
 +                      if (!DM_BUFIO_CACHE_NAME(c)) {
 +                              r = -ENOMEM;
 +                              mutex_unlock(&dm_bufio_clients_lock);
 +                              goto bad_cache;
 +                      }
 +              }
 +
 +              if (!DM_BUFIO_CACHE(c)) {
 +                      DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c),
 +                                                            c->block_size,
 +                                                            c->block_size, 0, NULL);
 +                      if (!DM_BUFIO_CACHE(c)) {
 +                              r = -ENOMEM;
 +                              mutex_unlock(&dm_bufio_clients_lock);
 +                              goto bad_cache;
 +                      }
 +              }
 +      }
 +      mutex_unlock(&dm_bufio_clients_lock);
 +
 +      while (c->need_reserved_buffers) {
 +              struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
 +
 +              if (!b) {
 +                      r = -ENOMEM;
 +                      goto bad_buffer;
 +              }
 +              __free_buffer_wake(b);
 +      }
 +
 +      mutex_lock(&dm_bufio_clients_lock);
 +      dm_bufio_client_count++;
 +      list_add(&c->client_list, &dm_bufio_all_clients);
 +      __cache_size_refresh();
 +      mutex_unlock(&dm_bufio_clients_lock);
 +
 +      c->shrinker.shrink = shrink;
 +      c->shrinker.seeks = 1;
 +      c->shrinker.batch = 0;
 +      register_shrinker(&c->shrinker);
 +
 +      return c;
 +
 +bad_buffer:
 +bad_cache:
 +      while (!list_empty(&c->reserved_buffers)) {
 +              struct dm_buffer *b = list_entry(c->reserved_buffers.next,
 +                                               struct dm_buffer, lru_list);
 +              list_del(&b->lru_list);
 +              free_buffer(b);
 +      }
 +      dm_io_client_destroy(c->dm_io);
 +bad_dm_io:
 +      vfree(c->cache_hash);
 +bad_hash:
 +      kfree(c);
 +bad_client:
 +      return ERR_PTR(r);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_client_create);
 +
 +/*
 + * Free the buffering interface.
 + * It is required that there are no references on any buffers.
 + */
 +void dm_bufio_client_destroy(struct dm_bufio_client *c)
 +{
 +      unsigned i;
 +
 +      drop_buffers(c);
 +
 +      unregister_shrinker(&c->shrinker);
 +
 +      mutex_lock(&dm_bufio_clients_lock);
 +
 +      list_del(&c->client_list);
 +      dm_bufio_client_count--;
 +      __cache_size_refresh();
 +
 +      mutex_unlock(&dm_bufio_clients_lock);
 +
 +      for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
 +              BUG_ON(!hlist_empty(&c->cache_hash[i]));
 +
 +      BUG_ON(c->need_reserved_buffers);
 +
 +      while (!list_empty(&c->reserved_buffers)) {
 +              struct dm_buffer *b = list_entry(c->reserved_buffers.next,
 +                                               struct dm_buffer, lru_list);
 +              list_del(&b->lru_list);
 +              free_buffer(b);
 +      }
 +
 +      for (i = 0; i < LIST_SIZE; i++)
 +              if (c->n_buffers[i])
 +                      DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]);
 +
 +      for (i = 0; i < LIST_SIZE; i++)
 +              BUG_ON(c->n_buffers[i]);
 +
 +      dm_io_client_destroy(c->dm_io);
 +      vfree(c->cache_hash);
 +      kfree(c);
 +}
 +EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
 +
 +static void cleanup_old_buffers(void)
 +{
 +      unsigned long max_age = dm_bufio_max_age;
 +      struct dm_bufio_client *c;
 +
 +      barrier();
 +
 +      if (max_age > ULONG_MAX / HZ)
 +              max_age = ULONG_MAX / HZ;
 +
 +      mutex_lock(&dm_bufio_clients_lock);
 +      list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
 +              if (!dm_bufio_trylock(c))
 +                      continue;
 +
 +              while (!list_empty(&c->lru[LIST_CLEAN])) {
 +                      struct dm_buffer *b;
 +                      b = list_entry(c->lru[LIST_CLEAN].prev,
 +                                     struct dm_buffer, lru_list);
 +                      if (__cleanup_old_buffer(b, 0, max_age * HZ))
 +                              break;
 +                      dm_bufio_cond_resched();
 +              }
 +
 +              dm_bufio_unlock(c);
 +              dm_bufio_cond_resched();
 +      }
 +      mutex_unlock(&dm_bufio_clients_lock);
 +}
 +
 +static struct workqueue_struct *dm_bufio_wq;
 +static struct delayed_work dm_bufio_work;
 +
 +static void work_fn(struct work_struct *w)
 +{
 +      cleanup_old_buffers();
 +
 +      queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
 +                         DM_BUFIO_WORK_TIMER_SECS * HZ);
 +}
 +
 +/*----------------------------------------------------------------
 + * Module setup
 + *--------------------------------------------------------------*/
 +
 +/*
 + * This is called only once for the whole dm_bufio module.
 + * It initializes memory limit.
 + */
 +static int __init dm_bufio_init(void)
 +{
 +      __u64 mem;
 +
 +      memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
 +      memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
 +
 +      mem = (__u64)((totalram_pages - totalhigh_pages) *
 +                    DM_BUFIO_MEMORY_PERCENT / 100) << PAGE_SHIFT;
 +
 +      if (mem > ULONG_MAX)
 +              mem = ULONG_MAX;
 +
 +#ifdef CONFIG_MMU
 +      /*
 +       * Get the size of vmalloc space the same way as VMALLOC_TOTAL
 +       * in fs/proc/internal.h
 +       */
 +      if (mem > (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100)
 +              mem = (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100;
 +#endif
 +
 +      dm_bufio_default_cache_size = mem;
 +
 +      mutex_lock(&dm_bufio_clients_lock);
 +      __cache_size_refresh();
 +      mutex_unlock(&dm_bufio_clients_lock);
 +
 +      dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache");
 +      if (!dm_bufio_wq)
 +              return -ENOMEM;
 +
 +      INIT_DELAYED_WORK(&dm_bufio_work, work_fn);
 +      queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
 +                         DM_BUFIO_WORK_TIMER_SECS * HZ);
 +
 +      return 0;
 +}
 +
 +/*
 + * This is called once when unloading the dm_bufio module.
 + */
 +static void __exit dm_bufio_exit(void)
 +{
 +      int bug = 0;
 +      int i;
 +
 +      cancel_delayed_work_sync(&dm_bufio_work);
 +      destroy_workqueue(dm_bufio_wq);
 +
 +      for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) {
 +              struct kmem_cache *kc = dm_bufio_caches[i];
 +
 +              if (kc)
 +                      kmem_cache_destroy(kc);
 +      }
 +
 +      for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
 +              kfree(dm_bufio_cache_names[i]);
 +
 +      if (dm_bufio_client_count) {
 +              DMCRIT("%s: dm_bufio_client_count leaked: %d",
 +                      __func__, dm_bufio_client_count);
 +              bug = 1;
 +      }
 +
 +      if (dm_bufio_current_allocated) {
 +              DMCRIT("%s: dm_bufio_current_allocated leaked: %lu",
 +                      __func__, dm_bufio_current_allocated);
 +              bug = 1;
 +      }
 +
 +      if (dm_bufio_allocated_get_free_pages) {
 +              DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu",
 +                     __func__, dm_bufio_allocated_get_free_pages);
 +              bug = 1;
 +      }
 +
 +      if (dm_bufio_allocated_vmalloc) {
 +              DMCRIT("%s: dm_bufio_vmalloc leaked: %lu",
 +                     __func__, dm_bufio_allocated_vmalloc);
 +              bug = 1;
 +      }
 +
 +      if (bug)
 +              BUG();
 +}
 +
 +module_init(dm_bufio_init)
 +module_exit(dm_bufio_exit)
 +
 +module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR);
 +MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
 +
 +module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
 +MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
 +
 +module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
 +MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
 +
 +module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO);
 +MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
 +
 +module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO);
 +MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
 +
 +module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO);
 +MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
 +
 +module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO);
 +MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
 +
 +MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
 +MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
 +MODULE_LICENSE("GPL");
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc drivers/md/md.c
Simple merge
Simple merge
index 65fd85ec6514dabcb0d13720cb5f029b861e90a4,0000000000000000000000000000000000000000..023fbc2d389ee086e2edf53168e969a24dba5cad
mode 100644,000000..100644
--- /dev/null
@@@ -1,566 -1,0 +1,566 @@@
- #include <linux/module.h>
 +/*
 + * Copyright (C) 2011 Red Hat, Inc.
 + *
 + * This file is released under the GPL.
 + */
 +
 +#include "dm-btree.h"
 +#include "dm-btree-internal.h"
 +#include "dm-transaction-manager.h"
 +
++#include <linux/export.h>
 +
 +/*
 + * Removing an entry from a btree
 + * ==============================
 + *
 + * A very important constraint for our btree is that no node, except the
 + * root, may have fewer than a certain number of entries.
 + * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
 + *
 + * Ensuring this is complicated by the way we want to only ever hold the
 + * locks on 2 nodes concurrently, and only change nodes in a top to bottom
 + * fashion.
 + *
 + * Each node may have a left or right sibling.  When decending the spine,
 + * if a node contains only MIN_ENTRIES then we try and increase this to at
 + * least MIN_ENTRIES + 1.  We do this in the following ways:
 + *
 + * [A] No siblings => this can only happen if the node is the root, in which
 + *     case we copy the childs contents over the root.
 + *
 + * [B] No left sibling
 + *     ==> rebalance(node, right sibling)
 + *
 + * [C] No right sibling
 + *     ==> rebalance(left sibling, node)
 + *
 + * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
 + *     ==> delete node adding it's contents to left and right
 + *
 + * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
 + *     ==> rebalance(left, node, right)
 + *
 + * After these operations it's possible that the our original node no
 + * longer contains the desired sub tree.  For this reason this rebalancing
 + * is performed on the children of the current node.  This also avoids
 + * having a special case for the root.
 + *
 + * Once this rebalancing has occurred we can then step into the child node
 + * for internal nodes.  Or delete the entry for leaf nodes.
 + */
 +
 +/*
 + * Some little utilities for moving node data around.
 + */
 +static void node_shift(struct node *n, int shift)
 +{
 +      uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
 +      uint32_t value_size = le32_to_cpu(n->header.value_size);
 +
 +      if (shift < 0) {
 +              shift = -shift;
 +              BUG_ON(shift > nr_entries);
 +              BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift, value_size));
 +              memmove(key_ptr(n, 0),
 +                      key_ptr(n, shift),
 +                      (nr_entries - shift) * sizeof(__le64));
 +              memmove(value_ptr(n, 0, value_size),
 +                      value_ptr(n, shift, value_size),
 +                      (nr_entries - shift) * value_size);
 +      } else {
 +              BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
 +              memmove(key_ptr(n, shift),
 +                      key_ptr(n, 0),
 +                      nr_entries * sizeof(__le64));
 +              memmove(value_ptr(n, shift, value_size),
 +                      value_ptr(n, 0, value_size),
 +                      nr_entries * value_size);
 +      }
 +}
 +
 +static void node_copy(struct node *left, struct node *right, int shift)
 +{
 +      uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
 +      uint32_t value_size = le32_to_cpu(left->header.value_size);
 +      BUG_ON(value_size != le32_to_cpu(right->header.value_size));
 +
 +      if (shift < 0) {
 +              shift = -shift;
 +              BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
 +              memcpy(key_ptr(left, nr_left),
 +                     key_ptr(right, 0),
 +                     shift * sizeof(__le64));
 +              memcpy(value_ptr(left, nr_left, value_size),
 +                     value_ptr(right, 0, value_size),
 +                     shift * value_size);
 +      } else {
 +              BUG_ON(shift > le32_to_cpu(right->header.max_entries));
 +              memcpy(key_ptr(right, 0),
 +                     key_ptr(left, nr_left - shift),
 +                     shift * sizeof(__le64));
 +              memcpy(value_ptr(right, 0, value_size),
 +                     value_ptr(left, nr_left - shift, value_size),
 +                     shift * value_size);
 +      }
 +}
 +
 +/*
 + * Delete a specific entry from a leaf node.
 + */
 +static void delete_at(struct node *n, unsigned index)
 +{
 +      unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
 +      unsigned nr_to_copy = nr_entries - (index + 1);
 +      uint32_t value_size = le32_to_cpu(n->header.value_size);
 +      BUG_ON(index >= nr_entries);
 +
 +      if (nr_to_copy) {
 +              memmove(key_ptr(n, index),
 +                      key_ptr(n, index + 1),
 +                      nr_to_copy * sizeof(__le64));
 +
 +              memmove(value_ptr(n, index, value_size),
 +                      value_ptr(n, index + 1, value_size),
 +                      nr_to_copy * value_size);
 +      }
 +
 +      n->header.nr_entries = cpu_to_le32(nr_entries - 1);
 +}
 +
 +static unsigned del_threshold(struct node *n)
 +{
 +      return le32_to_cpu(n->header.max_entries) / 3;
 +}
 +
 +static unsigned merge_threshold(struct node *n)
 +{
 +      /*
 +       * The extra one is because we know we're potentially going to
 +       * delete an entry.
 +       */
 +      return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1;
 +}
 +
 +struct child {
 +      unsigned index;
 +      struct dm_block *block;
 +      struct node *n;
 +};
 +
 +static struct dm_btree_value_type le64_type = {
 +      .context = NULL,
 +      .size = sizeof(__le64),
 +      .inc = NULL,
 +      .dec = NULL,
 +      .equal = NULL
 +};
 +
 +static int init_child(struct dm_btree_info *info, struct node *parent,
 +                    unsigned index, struct child *result)
 +{
 +      int r, inc;
 +      dm_block_t root;
 +
 +      result->index = index;
 +      root = value64(parent, index);
 +
 +      r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
 +                             &result->block, &inc);
 +      if (r)
 +              return r;
 +
 +      result->n = dm_block_data(result->block);
 +
 +      if (inc)
 +              inc_children(info->tm, result->n, &le64_type);
 +
 +      *((__le64 *) value_ptr(parent, index, sizeof(__le64))) =
 +              cpu_to_le64(dm_block_location(result->block));
 +
 +      return 0;
 +}
 +
 +static int exit_child(struct dm_btree_info *info, struct child *c)
 +{
 +      return dm_tm_unlock(info->tm, c->block);
 +}
 +
 +static void shift(struct node *left, struct node *right, int count)
 +{
 +      if (!count)
 +              return;
 +
 +      if (count > 0) {
 +              node_shift(right, count);
 +              node_copy(left, right, count);
 +      } else {
 +              node_copy(left, right, count);
 +              node_shift(right, count);
 +      }
 +
 +      left->header.nr_entries =
 +              cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count);
 +      BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries));
 +
 +      right->header.nr_entries =
 +              cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count);
 +      BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries));
 +}
 +
 +static void __rebalance2(struct dm_btree_info *info, struct node *parent,
 +                       struct child *l, struct child *r)
 +{
 +      struct node *left = l->n;
 +      struct node *right = r->n;
 +      uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
 +      uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
 +
 +      if (nr_left + nr_right <= merge_threshold(left)) {
 +              /*
 +               * Merge
 +               */
 +              node_copy(left, right, -nr_right);
 +              left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
 +              delete_at(parent, r->index);
 +
 +              /*
 +               * We need to decrement the right block, but not it's
 +               * children, since they're still referenced by left.
 +               */
 +              dm_tm_dec(info->tm, dm_block_location(r->block));
 +      } else {
 +              /*
 +               * Rebalance.
 +               */
 +              unsigned target_left = (nr_left + nr_right) / 2;
 +              unsigned shift_ = nr_left - target_left;
 +              BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_);
 +              BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_);
 +              shift(left, right, nr_left - target_left);
 +              *key_ptr(parent, r->index) = right->keys[0];
 +      }
 +}
 +
 +static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
 +                    unsigned left_index)
 +{
 +      int r;
 +      struct node *parent;
 +      struct child left, right;
 +
 +      parent = dm_block_data(shadow_current(s));
 +
 +      r = init_child(info, parent, left_index, &left);
 +      if (r)
 +              return r;
 +
 +      r = init_child(info, parent, left_index + 1, &right);
 +      if (r) {
 +              exit_child(info, &left);
 +              return r;
 +      }
 +
 +      __rebalance2(info, parent, &left, &right);
 +
 +      r = exit_child(info, &left);
 +      if (r) {
 +              exit_child(info, &right);
 +              return r;
 +      }
 +
 +      return exit_child(info, &right);
 +}
 +
 +static void __rebalance3(struct dm_btree_info *info, struct node *parent,
 +                       struct child *l, struct child *c, struct child *r)
 +{
 +      struct node *left = l->n;
 +      struct node *center = c->n;
 +      struct node *right = r->n;
 +
 +      uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
 +      uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
 +      uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
 +      uint32_t max_entries = le32_to_cpu(left->header.max_entries);
 +
 +      unsigned target;
 +
 +      BUG_ON(left->header.max_entries != center->header.max_entries);
 +      BUG_ON(center->header.max_entries != right->header.max_entries);
 +
 +      if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) {
 +              /*
 +               * Delete center node:
 +               *
 +               * We dump as many entries from center as possible into
 +               * left, then the rest in right, then rebalance2.  This
 +               * wastes some cpu, but I want something simple atm.
 +               */
 +              unsigned shift = min(max_entries - nr_left, nr_center);
 +
 +              BUG_ON(nr_left + shift > max_entries);
 +              node_copy(left, center, -shift);
 +              left->header.nr_entries = cpu_to_le32(nr_left + shift);
 +
 +              if (shift != nr_center) {
 +                      shift = nr_center - shift;
 +                      BUG_ON((nr_right + shift) >= max_entries);
 +                      node_shift(right, shift);
 +                      node_copy(center, right, shift);
 +                      right->header.nr_entries = cpu_to_le32(nr_right + shift);
 +              }
 +              *key_ptr(parent, r->index) = right->keys[0];
 +
 +              delete_at(parent, c->index);
 +              r->index--;
 +
 +              dm_tm_dec(info->tm, dm_block_location(c->block));
 +              __rebalance2(info, parent, l, r);
 +
 +              return;
 +      }
 +
 +      /*
 +       * Rebalance
 +       */
 +      target = (nr_left + nr_center + nr_right) / 3;
 +      BUG_ON(target > max_entries);
 +
 +      /*
 +       * Adjust the left node
 +       */
 +      shift(left, center, nr_left - target);
 +
 +      /*
 +       * Adjust the right node
 +       */
 +      shift(center, right, target - nr_right);
 +      *key_ptr(parent, c->index) = center->keys[0];
 +      *key_ptr(parent, r->index) = right->keys[0];
 +}
 +
 +static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
 +                    unsigned left_index)
 +{
 +      int r;
 +      struct node *parent = dm_block_data(shadow_current(s));
 +      struct child left, center, right;
 +
 +      /*
 +       * FIXME: fill out an array?
 +       */
 +      r = init_child(info, parent, left_index, &left);
 +      if (r)
 +              return r;
 +
 +      r = init_child(info, parent, left_index + 1, &center);
 +      if (r) {
 +              exit_child(info, &left);
 +              return r;
 +      }
 +
 +      r = init_child(info, parent, left_index + 2, &right);
 +      if (r) {
 +              exit_child(info, &left);
 +              exit_child(info, &center);
 +              return r;
 +      }
 +
 +      __rebalance3(info, parent, &left, &center, &right);
 +
 +      r = exit_child(info, &left);
 +      if (r) {
 +              exit_child(info, &center);
 +              exit_child(info, &right);
 +              return r;
 +      }
 +
 +      r = exit_child(info, &center);
 +      if (r) {
 +              exit_child(info, &right);
 +              return r;
 +      }
 +
 +      r = exit_child(info, &right);
 +      if (r)
 +              return r;
 +
 +      return 0;
 +}
 +
 +static int get_nr_entries(struct dm_transaction_manager *tm,
 +                        dm_block_t b, uint32_t *result)
 +{
 +      int r;
 +      struct dm_block *block;
 +      struct node *n;
 +
 +      r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
 +      if (r)
 +              return r;
 +
 +      n = dm_block_data(block);
 +      *result = le32_to_cpu(n->header.nr_entries);
 +
 +      return dm_tm_unlock(tm, block);
 +}
 +
 +static int rebalance_children(struct shadow_spine *s,
 +                            struct dm_btree_info *info, uint64_t key)
 +{
 +      int i, r, has_left_sibling, has_right_sibling;
 +      uint32_t child_entries;
 +      struct node *n;
 +
 +      n = dm_block_data(shadow_current(s));
 +
 +      if (le32_to_cpu(n->header.nr_entries) == 1) {
 +              struct dm_block *child;
 +              dm_block_t b = value64(n, 0);
 +
 +              r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
 +              if (r)
 +                      return r;
 +
 +              memcpy(n, dm_block_data(child),
 +                     dm_bm_block_size(dm_tm_get_bm(info->tm)));
 +              r = dm_tm_unlock(info->tm, child);
 +              if (r)
 +                      return r;
 +
 +              dm_tm_dec(info->tm, dm_block_location(child));
 +              return 0;
 +      }
 +
 +      i = lower_bound(n, key);
 +      if (i < 0)
 +              return -ENODATA;
 +
 +      r = get_nr_entries(info->tm, value64(n, i), &child_entries);
 +      if (r)
 +              return r;
 +
 +      if (child_entries > del_threshold(n))
 +              return 0;
 +
 +      has_left_sibling = i > 0;
 +      has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
 +
 +      if (!has_left_sibling)
 +              r = rebalance2(s, info, i);
 +
 +      else if (!has_right_sibling)
 +              r = rebalance2(s, info, i - 1);
 +
 +      else
 +              r = rebalance3(s, info, i - 1);
 +
 +      return r;
 +}
 +
 +static int do_leaf(struct node *n, uint64_t key, unsigned *index)
 +{
 +      int i = lower_bound(n, key);
 +
 +      if ((i < 0) ||
 +          (i >= le32_to_cpu(n->header.nr_entries)) ||
 +          (le64_to_cpu(n->keys[i]) != key))
 +              return -ENODATA;
 +
 +      *index = i;
 +
 +      return 0;
 +}
 +
 +/*
 + * Prepares for removal from one level of the hierarchy.  The caller must
 + * call delete_at() to remove the entry at index.
 + */
 +static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
 +                    struct dm_btree_value_type *vt, dm_block_t root,
 +                    uint64_t key, unsigned *index)
 +{
 +      int i = *index, r;
 +      struct node *n;
 +
 +      for (;;) {
 +              r = shadow_step(s, root, vt);
 +              if (r < 0)
 +                      break;
 +
 +              /*
 +               * We have to patch up the parent node, ugly, but I don't
 +               * see a way to do this automatically as part of the spine
 +               * op.
 +               */
 +              if (shadow_has_parent(s)) {
 +                      __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
 +                      memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(__le64)),
 +                             &location, sizeof(__le64));
 +              }
 +
 +              n = dm_block_data(shadow_current(s));
 +
 +              if (le32_to_cpu(n->header.flags) & LEAF_NODE)
 +                      return do_leaf(n, key, index);
 +
 +              r = rebalance_children(s, info, key);
 +              if (r)
 +                      break;
 +
 +              n = dm_block_data(shadow_current(s));
 +              if (le32_to_cpu(n->header.flags) & LEAF_NODE)
 +                      return do_leaf(n, key, index);
 +
 +              i = lower_bound(n, key);
 +
 +              /*
 +               * We know the key is present, or else
 +               * rebalance_children would have returned
 +               * -ENODATA
 +               */
 +              root = value64(n, i);
 +      }
 +
 +      return r;
 +}
 +
 +int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 +                  uint64_t *keys, dm_block_t *new_root)
 +{
 +      unsigned level, last_level = info->levels - 1;
 +      int index = 0, r = 0;
 +      struct shadow_spine spine;
 +      struct node *n;
 +
 +      init_shadow_spine(&spine, info);
 +      for (level = 0; level < info->levels; level++) {
 +              r = remove_raw(&spine, info,
 +                             (level == last_level ?
 +                              &info->value_type : &le64_type),
 +                             root, keys[level], (unsigned *)&index);
 +              if (r < 0)
 +                      break;
 +
 +              n = dm_block_data(shadow_current(&spine));
 +              if (level != last_level) {
 +                      root = value64(n, index);
 +                      continue;
 +              }
 +
 +              BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
 +
 +              if (info->value_type.dec)
 +                      info->value_type.dec(info->value_type.context,
 +                                           value_ptr(n, index, info->value_type.size));
 +
 +              delete_at(n, index);
 +      }
 +
 +      *new_root = shadow_root(&spine);
 +      exit_shadow_spine(&spine);
 +
 +      return r;
 +}
 +EXPORT_SYMBOL_GPL(dm_btree_remove);
index e0638be53ea4302460d9a47df1cbd7f736d145a7,0000000000000000000000000000000000000000..bd1e7ffbe26c750a26a4b9341d925a98acf632b3
mode 100644,000000..100644
--- /dev/null
@@@ -1,805 -1,0 +1,805 @@@
- #include <linux/module.h>
 +/*
 + * Copyright (C) 2011 Red Hat, Inc.
 + *
 + * This file is released under the GPL.
 + */
 +
 +#include "dm-btree-internal.h"
 +#include "dm-space-map.h"
 +#include "dm-transaction-manager.h"
 +
++#include <linux/export.h>
 +#include <linux/device-mapper.h>
 +
 +#define DM_MSG_PREFIX "btree"
 +
 +/*----------------------------------------------------------------
 + * Array manipulation
 + *--------------------------------------------------------------*/
 +static void memcpy_disk(void *dest, const void *src, size_t len)
 +      __dm_written_to_disk(src)
 +{
 +      memcpy(dest, src, len);
 +      __dm_unbless_for_disk(src);
 +}
 +
 +static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
 +                       unsigned index, void *elt)
 +      __dm_written_to_disk(elt)
 +{
 +      if (index < nr_elts)
 +              memmove(base + (elt_size * (index + 1)),
 +                      base + (elt_size * index),
 +                      (nr_elts - index) * elt_size);
 +
 +      memcpy_disk(base + (elt_size * index), elt, elt_size);
 +}
 +
 +/*----------------------------------------------------------------*/
 +
 +/* makes the assumption that no two keys are the same. */
 +static int bsearch(struct node *n, uint64_t key, int want_hi)
 +{
 +      int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
 +
 +      while (hi - lo > 1) {
 +              int mid = lo + ((hi - lo) / 2);
 +              uint64_t mid_key = le64_to_cpu(n->keys[mid]);
 +
 +              if (mid_key == key)
 +                      return mid;
 +
 +              if (mid_key < key)
 +                      lo = mid;
 +              else
 +                      hi = mid;
 +      }
 +
 +      return want_hi ? hi : lo;
 +}
 +
 +int lower_bound(struct node *n, uint64_t key)
 +{
 +      return bsearch(n, key, 0);
 +}
 +
 +void inc_children(struct dm_transaction_manager *tm, struct node *n,
 +                struct dm_btree_value_type *vt)
 +{
 +      unsigned i;
 +      uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
 +
 +      if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
 +              for (i = 0; i < nr_entries; i++)
 +                      dm_tm_inc(tm, value64(n, i));
 +      else if (vt->inc)
 +              for (i = 0; i < nr_entries; i++)
 +                      vt->inc(vt->context,
 +                              value_ptr(n, i, vt->size));
 +}
 +
 +static int insert_at(size_t value_size, struct node *node, unsigned index,
 +                    uint64_t key, void *value)
 +                    __dm_written_to_disk(value)
 +{
 +      uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
 +      __le64 key_le = cpu_to_le64(key);
 +
 +      if (index > nr_entries ||
 +          index >= le32_to_cpu(node->header.max_entries)) {
 +              DMERR("too many entries in btree node for insert");
 +              __dm_unbless_for_disk(value);
 +              return -ENOMEM;
 +      }
 +
 +      __dm_bless_for_disk(&key_le);
 +
 +      array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
 +      array_insert(value_base(node), value_size, nr_entries, index, value);
 +      node->header.nr_entries = cpu_to_le32(nr_entries + 1);
 +
 +      return 0;
 +}
 +
 +/*----------------------------------------------------------------*/
 +
 +/*
 + * We want 3n entries (for some n).  This works more nicely for repeated
 + * insert remove loops than (2n + 1).
 + */
 +static uint32_t calc_max_entries(size_t value_size, size_t block_size)
 +{
 +      uint32_t total, n;
 +      size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */
 +
 +      block_size -= sizeof(struct node_header);
 +      total = block_size / elt_size;
 +      n = total / 3;          /* rounds down */
 +
 +      return 3 * n;
 +}
 +
 +int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
 +{
 +      int r;
 +      struct dm_block *b;
 +      struct node *n;
 +      size_t block_size;
 +      uint32_t max_entries;
 +
 +      r = new_block(info, &b);
 +      if (r < 0)
 +              return r;
 +
 +      block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
 +      max_entries = calc_max_entries(info->value_type.size, block_size);
 +
 +      n = dm_block_data(b);
 +      memset(n, 0, block_size);
 +      n->header.flags = cpu_to_le32(LEAF_NODE);
 +      n->header.nr_entries = cpu_to_le32(0);
 +      n->header.max_entries = cpu_to_le32(max_entries);
 +      n->header.value_size = cpu_to_le32(info->value_type.size);
 +
 +      *root = dm_block_location(b);
 +      return unlock_block(info, b);
 +}
 +EXPORT_SYMBOL_GPL(dm_btree_empty);
 +
 +/*----------------------------------------------------------------*/
 +
 +/*
 + * Deletion uses a recursive algorithm, since we have limited stack space
 + * we explicitly manage our own stack on the heap.
 + */
 +#define MAX_SPINE_DEPTH 64
 +struct frame {
 +      struct dm_block *b;
 +      struct node *n;
 +      unsigned level;
 +      unsigned nr_children;
 +      unsigned current_child;
 +};
 +
 +struct del_stack {
 +      struct dm_transaction_manager *tm;
 +      int top;
 +      struct frame spine[MAX_SPINE_DEPTH];
 +};
 +
 +static int top_frame(struct del_stack *s, struct frame **f)
 +{
 +      if (s->top < 0) {
 +              DMERR("btree deletion stack empty");
 +              return -EINVAL;
 +      }
 +
 +      *f = s->spine + s->top;
 +
 +      return 0;
 +}
 +
 +static int unprocessed_frames(struct del_stack *s)
 +{
 +      return s->top >= 0;
 +}
 +
 +static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
 +{
 +      int r;
 +      uint32_t ref_count;
 +
 +      if (s->top >= MAX_SPINE_DEPTH - 1) {
 +              DMERR("btree deletion stack out of memory");
 +              return -ENOMEM;
 +      }
 +
 +      r = dm_tm_ref(s->tm, b, &ref_count);
 +      if (r)
 +              return r;
 +
 +      if (ref_count > 1)
 +              /*
 +               * This is a shared node, so we can just decrement it's
 +               * reference counter and leave the children.
 +               */
 +              dm_tm_dec(s->tm, b);
 +
 +      else {
 +              struct frame *f = s->spine + ++s->top;
 +
 +              r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
 +              if (r) {
 +                      s->top--;
 +                      return r;
 +              }
 +
 +              f->n = dm_block_data(f->b);
 +              f->level = level;
 +              f->nr_children = le32_to_cpu(f->n->header.nr_entries);
 +              f->current_child = 0;
 +      }
 +
 +      return 0;
 +}
 +
 +static void pop_frame(struct del_stack *s)
 +{
 +      struct frame *f = s->spine + s->top--;
 +
 +      dm_tm_dec(s->tm, dm_block_location(f->b));
 +      dm_tm_unlock(s->tm, f->b);
 +}
 +
 +int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
 +{
 +      int r;
 +      struct del_stack *s;
 +
 +      s = kmalloc(sizeof(*s), GFP_KERNEL);
 +      if (!s)
 +              return -ENOMEM;
 +      s->tm = info->tm;
 +      s->top = -1;
 +
 +      r = push_frame(s, root, 1);
 +      if (r)
 +              goto out;
 +
 +      while (unprocessed_frames(s)) {
 +              uint32_t flags;
 +              struct frame *f;
 +              dm_block_t b;
 +
 +              r = top_frame(s, &f);
 +              if (r)
 +                      goto out;
 +
 +              if (f->current_child >= f->nr_children) {
 +                      pop_frame(s);
 +                      continue;
 +              }
 +
 +              flags = le32_to_cpu(f->n->header.flags);
 +              if (flags & INTERNAL_NODE) {
 +                      b = value64(f->n, f->current_child);
 +                      f->current_child++;
 +                      r = push_frame(s, b, f->level);
 +                      if (r)
 +                              goto out;
 +
 +              } else if (f->level != (info->levels - 1)) {
 +                      b = value64(f->n, f->current_child);
 +                      f->current_child++;
 +                      r = push_frame(s, b, f->level + 1);
 +                      if (r)
 +                              goto out;
 +
 +              } else {
 +                      if (info->value_type.dec) {
 +                              unsigned i;
 +
 +                              for (i = 0; i < f->nr_children; i++)
 +                                      info->value_type.dec(info->value_type.context,
 +                                                           value_ptr(f->n, i, info->value_type.size));
 +                      }
 +                      f->current_child = f->nr_children;
 +              }
 +      }
 +
 +out:
 +      kfree(s);
 +      return r;
 +}
 +EXPORT_SYMBOL_GPL(dm_btree_del);
 +
 +/*----------------------------------------------------------------*/
 +
 +static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
 +                          int (*search_fn)(struct node *, uint64_t),
 +                          uint64_t *result_key, void *v, size_t value_size)
 +{
 +      int i, r;
 +      uint32_t flags, nr_entries;
 +
 +      do {
 +              r = ro_step(s, block);
 +              if (r < 0)
 +                      return r;
 +
 +              i = search_fn(ro_node(s), key);
 +
 +              flags = le32_to_cpu(ro_node(s)->header.flags);
 +              nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
 +              if (i < 0 || i >= nr_entries)
 +                      return -ENODATA;
 +
 +              if (flags & INTERNAL_NODE)
 +                      block = value64(ro_node(s), i);
 +
 +      } while (!(flags & LEAF_NODE));
 +
 +      *result_key = le64_to_cpu(ro_node(s)->keys[i]);
 +      memcpy(v, value_ptr(ro_node(s), i, value_size), value_size);
 +
 +      return 0;
 +}
 +
 +int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
 +                  uint64_t *keys, void *value_le)
 +{
 +      unsigned level, last_level = info->levels - 1;
 +      int r = -ENODATA;
 +      uint64_t rkey;
 +      __le64 internal_value_le;
 +      struct ro_spine spine;
 +
 +      init_ro_spine(&spine, info);
 +      for (level = 0; level < info->levels; level++) {
 +              size_t size;
 +              void *value_p;
 +
 +              if (level == last_level) {
 +                      value_p = value_le;
 +                      size = info->value_type.size;
 +
 +              } else {
 +                      value_p = &internal_value_le;
 +                      size = sizeof(uint64_t);
 +              }
 +
 +              r = btree_lookup_raw(&spine, root, keys[level],
 +                                   lower_bound, &rkey,
 +                                   value_p, size);
 +
 +              if (!r) {
 +                      if (rkey != keys[level]) {
 +                              exit_ro_spine(&spine);
 +                              return -ENODATA;
 +                      }
 +              } else {
 +                      exit_ro_spine(&spine);
 +                      return r;
 +              }
 +
 +              root = le64_to_cpu(internal_value_le);
 +      }
 +      exit_ro_spine(&spine);
 +
 +      return r;
 +}
 +EXPORT_SYMBOL_GPL(dm_btree_lookup);
 +
 +/*
 + * Splits a node by creating a sibling node and shifting half the nodes
 + * contents across.  Assumes there is a parent node, and it has room for
 + * another child.
 + *
 + * Before:
 + *      +--------+
 + *      | Parent |
 + *      +--------+
 + *         |
 + *         v
 + *    +----------+
 + *    | A ++++++ |
 + *    +----------+
 + *
 + *
 + * After:
 + *            +--------+
 + *            | Parent |
 + *            +--------+
 + *              |     |
 + *              v     +------+
 + *        +---------+        |
 + *        | A* +++  |        v
 + *        +---------+   +-------+
 + *                      | B +++ |
 + *                      +-------+
 + *
 + * Where A* is a shadow of A.
 + */
 +static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
 +                             unsigned parent_index, uint64_t key)
 +{
 +      int r;
 +      size_t size;
 +      unsigned nr_left, nr_right;
 +      struct dm_block *left, *right, *parent;
 +      struct node *ln, *rn, *pn;
 +      __le64 location;
 +
 +      left = shadow_current(s);
 +
 +      r = new_block(s->info, &right);
 +      if (r < 0)
 +              return r;
 +
 +      ln = dm_block_data(left);
 +      rn = dm_block_data(right);
 +
 +      nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
 +      nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
 +
 +      ln->header.nr_entries = cpu_to_le32(nr_left);
 +
 +      rn->header.flags = ln->header.flags;
 +      rn->header.nr_entries = cpu_to_le32(nr_right);
 +      rn->header.max_entries = ln->header.max_entries;
 +      rn->header.value_size = ln->header.value_size;
 +      memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
 +
 +      size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
 +              sizeof(uint64_t) : s->info->value_type.size;
 +      memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size),
 +             size * nr_right);
 +
 +      /*
 +       * Patch up the parent
 +       */
 +      parent = shadow_parent(s);
 +
 +      pn = dm_block_data(parent);
 +      location = cpu_to_le64(dm_block_location(left));
 +      __dm_bless_for_disk(&location);
 +      memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)),
 +                  &location, sizeof(__le64));
 +
 +      location = cpu_to_le64(dm_block_location(right));
 +      __dm_bless_for_disk(&location);
 +
 +      r = insert_at(sizeof(__le64), pn, parent_index + 1,
 +                    le64_to_cpu(rn->keys[0]), &location);
 +      if (r)
 +              return r;
 +
 +      if (key < le64_to_cpu(rn->keys[0])) {
 +              unlock_block(s->info, right);
 +              s->nodes[1] = left;
 +      } else {
 +              unlock_block(s->info, left);
 +              s->nodes[1] = right;
 +      }
 +
 +      return 0;
 +}
 +
 +/*
 + * Splits a node by creating two new children beneath the given node.
 + *
 + * Before:
 + *      +----------+
 + *      | A ++++++ |
 + *      +----------+
 + *
 + *
 + * After:
 + *    +------------+
 + *    | A (shadow) |
 + *    +------------+
 + *        |   |
 + *   +------+ +----+
 + *   |                     |
 + *   v                     v
 + * +-------+   +-------+
 + * | B +++ |   | C +++ |
 + * +-------+   +-------+
 + */
 +static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
 +{
 +      int r;
 +      size_t size;
 +      unsigned nr_left, nr_right;
 +      struct dm_block *left, *right, *new_parent;
 +      struct node *pn, *ln, *rn;
 +      __le64 val;
 +
 +      new_parent = shadow_current(s);
 +
 +      r = new_block(s->info, &left);
 +      if (r < 0)
 +              return r;
 +
 +      r = new_block(s->info, &right);
 +      if (r < 0) {
 +              /* FIXME: put left */
 +              return r;
 +      }
 +
 +      pn = dm_block_data(new_parent);
 +      ln = dm_block_data(left);
 +      rn = dm_block_data(right);
 +
 +      nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
 +      nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
 +
 +      ln->header.flags = pn->header.flags;
 +      ln->header.nr_entries = cpu_to_le32(nr_left);
 +      ln->header.max_entries = pn->header.max_entries;
 +      ln->header.value_size = pn->header.value_size;
 +
 +      rn->header.flags = pn->header.flags;
 +      rn->header.nr_entries = cpu_to_le32(nr_right);
 +      rn->header.max_entries = pn->header.max_entries;
 +      rn->header.value_size = pn->header.value_size;
 +
 +      memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
 +      memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
 +
 +      size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
 +              sizeof(__le64) : s->info->value_type.size;
 +      memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size);
 +      memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size),
 +             nr_right * size);
 +
 +      /* new_parent should just point to l and r now */
 +      pn->header.flags = cpu_to_le32(INTERNAL_NODE);
 +      pn->header.nr_entries = cpu_to_le32(2);
 +      pn->header.max_entries = cpu_to_le32(
 +              calc_max_entries(sizeof(__le64),
 +                               dm_bm_block_size(
 +                                       dm_tm_get_bm(s->info->tm))));
 +      pn->header.value_size = cpu_to_le32(sizeof(__le64));
 +
 +      val = cpu_to_le64(dm_block_location(left));
 +      __dm_bless_for_disk(&val);
 +      pn->keys[0] = ln->keys[0];
 +      memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64));
 +
 +      val = cpu_to_le64(dm_block_location(right));
 +      __dm_bless_for_disk(&val);
 +      pn->keys[1] = rn->keys[0];
 +      memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64));
 +
 +      /*
 +       * rejig the spine.  This is ugly, since it knows too
 +       * much about the spine
 +       */
 +      if (s->nodes[0] != new_parent) {
 +              unlock_block(s->info, s->nodes[0]);
 +              s->nodes[0] = new_parent;
 +      }
 +      if (key < le64_to_cpu(rn->keys[0])) {
 +              unlock_block(s->info, right);
 +              s->nodes[1] = left;
 +      } else {
 +              unlock_block(s->info, left);
 +              s->nodes[1] = right;
 +      }
 +      s->count = 2;
 +
 +      return 0;
 +}
 +
 +static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
 +                          struct dm_btree_value_type *vt,
 +                          uint64_t key, unsigned *index)
 +{
 +      int r, i = *index, top = 1;
 +      struct node *node;
 +
 +      for (;;) {
 +              r = shadow_step(s, root, vt);
 +              if (r < 0)
 +                      return r;
 +
 +              node = dm_block_data(shadow_current(s));
 +
 +              /*
 +               * We have to patch up the parent node, ugly, but I don't
 +               * see a way to do this automatically as part of the spine
 +               * op.
 +               */
 +              if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
 +                      __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
 +
 +                      __dm_bless_for_disk(&location);
 +                      memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
 +                                  &location, sizeof(__le64));
 +              }
 +
 +              node = dm_block_data(shadow_current(s));
 +
 +              if (node->header.nr_entries == node->header.max_entries) {
 +                      if (top)
 +                              r = btree_split_beneath(s, key);
 +                      else
 +                              r = btree_split_sibling(s, root, i, key);
 +
 +                      if (r < 0)
 +                              return r;
 +              }
 +
 +              node = dm_block_data(shadow_current(s));
 +
 +              i = lower_bound(node, key);
 +
 +              if (le32_to_cpu(node->header.flags) & LEAF_NODE)
 +                      break;
 +
 +              if (i < 0) {
 +                      /* change the bounds on the lowest key */
 +                      node->keys[0] = cpu_to_le64(key);
 +                      i = 0;
 +              }
 +
 +              root = value64(node, i);
 +              top = 0;
 +      }
 +
 +      if (i < 0 || le64_to_cpu(node->keys[i]) != key)
 +              i++;
 +
 +      *index = i;
 +      return 0;
 +}
 +
 +static int insert(struct dm_btree_info *info, dm_block_t root,
 +                uint64_t *keys, void *value, dm_block_t *new_root,
 +                int *inserted)
 +                __dm_written_to_disk(value)
 +{
 +      int r, need_insert;
 +      unsigned level, index = -1, last_level = info->levels - 1;
 +      dm_block_t block = root;
 +      struct shadow_spine spine;
 +      struct node *n;
 +      struct dm_btree_value_type le64_type;
 +
 +      le64_type.context = NULL;
 +      le64_type.size = sizeof(__le64);
 +      le64_type.inc = NULL;
 +      le64_type.dec = NULL;
 +      le64_type.equal = NULL;
 +
 +      init_shadow_spine(&spine, info);
 +
 +      for (level = 0; level < (info->levels - 1); level++) {
 +              r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
 +              if (r < 0)
 +                      goto bad;
 +
 +              n = dm_block_data(shadow_current(&spine));
 +              need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
 +                             (le64_to_cpu(n->keys[index]) != keys[level]));
 +
 +              if (need_insert) {
 +                      dm_block_t new_tree;
 +                      __le64 new_le;
 +
 +                      r = dm_btree_empty(info, &new_tree);
 +                      if (r < 0)
 +                              goto bad;
 +
 +                      new_le = cpu_to_le64(new_tree);
 +                      __dm_bless_for_disk(&new_le);
 +
 +                      r = insert_at(sizeof(uint64_t), n, index,
 +                                    keys[level], &new_le);
 +                      if (r)
 +                              goto bad;
 +              }
 +
 +              if (level < last_level)
 +                      block = value64(n, index);
 +      }
 +
 +      r = btree_insert_raw(&spine, block, &info->value_type,
 +                           keys[level], &index);
 +      if (r < 0)
 +              goto bad;
 +
 +      n = dm_block_data(shadow_current(&spine));
 +      need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
 +                     (le64_to_cpu(n->keys[index]) != keys[level]));
 +
 +      if (need_insert) {
 +              if (inserted)
 +                      *inserted = 1;
 +
 +              r = insert_at(info->value_type.size, n, index,
 +                            keys[level], value);
 +              if (r)
 +                      goto bad_unblessed;
 +      } else {
 +              if (inserted)
 +                      *inserted = 0;
 +
 +              if (info->value_type.dec &&
 +                  (!info->value_type.equal ||
 +                   !info->value_type.equal(
 +                           info->value_type.context,
 +                           value_ptr(n, index, info->value_type.size),
 +                           value))) {
 +                      info->value_type.dec(info->value_type.context,
 +                                           value_ptr(n, index, info->value_type.size));
 +              }
 +              memcpy_disk(value_ptr(n, index, info->value_type.size),
 +                          value, info->value_type.size);
 +      }
 +
 +      *new_root = shadow_root(&spine);
 +      exit_shadow_spine(&spine);
 +
 +      return 0;
 +
 +bad:
 +      __dm_unbless_for_disk(value);
 +bad_unblessed:
 +      exit_shadow_spine(&spine);
 +      return r;
 +}
 +
 +int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
 +                  uint64_t *keys, void *value, dm_block_t *new_root)
 +                  __dm_written_to_disk(value)
 +{
 +      return insert(info, root, keys, value, new_root, NULL);
 +}
 +EXPORT_SYMBOL_GPL(dm_btree_insert);
 +
 +int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
 +                         uint64_t *keys, void *value, dm_block_t *new_root,
 +                         int *inserted)
 +                         __dm_written_to_disk(value)
 +{
 +      return insert(info, root, keys, value, new_root, inserted);
 +}
 +EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
 +
 +/*----------------------------------------------------------------*/
 +
 +static int find_highest_key(struct ro_spine *s, dm_block_t block,
 +                          uint64_t *result_key, dm_block_t *next_block)
 +{
 +      int i, r;
 +      uint32_t flags;
 +
 +      do {
 +              r = ro_step(s, block);
 +              if (r < 0)
 +                      return r;
 +
 +              flags = le32_to_cpu(ro_node(s)->header.flags);
 +              i = le32_to_cpu(ro_node(s)->header.nr_entries);
 +              if (!i)
 +                      return -ENODATA;
 +              else
 +                      i--;
 +
 +              *result_key = le64_to_cpu(ro_node(s)->keys[i]);
 +              if (next_block || flags & INTERNAL_NODE)
 +                      block = value64(ro_node(s), i);
 +
 +      } while (flags & INTERNAL_NODE);
 +
 +      if (next_block)
 +              *next_block = block;
 +      return 0;
 +}
 +
 +int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
 +                            uint64_t *result_keys)
 +{
 +      int r = 0, count = 0, level;
 +      struct ro_spine spine;
 +
 +      init_ro_spine(&spine, info);
 +      for (level = 0; level < info->levels; level++) {
 +              r = find_highest_key(&spine, root, result_keys + level,
 +                                   level == info->levels - 1 ? NULL : &root);
 +              if (r == -ENODATA) {
 +                      r = 0;
 +                      break;
 +
 +              } else if (r)
 +                      break;
 +
 +              count++;
 +      }
 +      exit_ro_spine(&spine);
 +
 +      return r ? r : count;
 +}
 +EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
index aeff7852cf79bf731ab69370fffe9ce0f4c38537,0000000000000000000000000000000000000000..fc469ba9f6277a7c701615a1b3d788a05fa52559
mode 100644,000000..100644
--- /dev/null
@@@ -1,335 -1,0 +1,335 @@@
- #include <linux/module.h>
 +/*
 + * Copyright (C) 2011 Red Hat, Inc.
 + *
 + * This file is released under the GPL.
 + */
 +
 +#include "dm-space-map-checker.h"
 +#include "dm-space-map-common.h"
 +#include "dm-space-map-disk.h"
 +#include "dm-space-map.h"
 +#include "dm-transaction-manager.h"
 +
 +#include <linux/list.h>
 +#include <linux/slab.h>
++#include <linux/export.h>
 +#include <linux/device-mapper.h>
 +
 +#define DM_MSG_PREFIX "space map disk"
 +
 +/*----------------------------------------------------------------*/
 +
 +/*
 + * Space map interface.
 + */
 +struct sm_disk {
 +      struct dm_space_map sm;
 +
 +      struct ll_disk ll;
 +      struct ll_disk old_ll;
 +
 +      dm_block_t begin;
 +      dm_block_t nr_allocated_this_transaction;
 +};
 +
 +static void sm_disk_destroy(struct dm_space_map *sm)
 +{
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +
 +      kfree(smd);
 +}
 +
 +static int sm_disk_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 +{
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +
 +      return sm_ll_extend(&smd->ll, extra_blocks);
 +}
 +
 +static int sm_disk_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
 +{
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +      *count = smd->old_ll.nr_blocks;
 +
 +      return 0;
 +}
 +
 +static int sm_disk_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
 +{
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +      *count = (smd->old_ll.nr_blocks - smd->old_ll.nr_allocated) - smd->nr_allocated_this_transaction;
 +
 +      return 0;
 +}
 +
 +static int sm_disk_get_count(struct dm_space_map *sm, dm_block_t b,
 +                           uint32_t *result)
 +{
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +      return sm_ll_lookup(&smd->ll, b, result);
 +}
 +
 +static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
 +                                        int *result)
 +{
 +      int r;
 +      uint32_t count;
 +
 +      r = sm_disk_get_count(sm, b, &count);
 +      if (r)
 +              return r;
 +
 +      return count > 1;
 +}
 +
 +static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
 +                           uint32_t count)
 +{
 +      int r;
 +      uint32_t old_count;
 +      enum allocation_event ev;
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +
 +      r = sm_ll_insert(&smd->ll, b, count, &ev);
 +      if (!r) {
 +              switch (ev) {
 +              case SM_NONE:
 +                      break;
 +
 +              case SM_ALLOC:
 +                      /*
 +                       * This _must_ be free in the prior transaction
 +                       * otherwise we've lost atomicity.
 +                       */
 +                      smd->nr_allocated_this_transaction++;
 +                      break;
 +
 +              case SM_FREE:
 +                      /*
 +                       * It's only free if it's also free in the last
 +                       * transaction.
 +                       */
 +                      r = sm_ll_lookup(&smd->old_ll, b, &old_count);
 +                      if (r)
 +                              return r;
 +
 +                      if (!old_count)
 +                              smd->nr_allocated_this_transaction--;
 +                      break;
 +              }
 +      }
 +
 +      return r;
 +}
 +
 +static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
 +{
 +      int r;
 +      enum allocation_event ev;
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +
 +      r = sm_ll_inc(&smd->ll, b, &ev);
 +      if (!r && (ev == SM_ALLOC))
 +              /*
 +               * This _must_ be free in the prior transaction
 +               * otherwise we've lost atomicity.
 +               */
 +              smd->nr_allocated_this_transaction++;
 +
 +      return r;
 +}
 +
 +static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
 +{
 +      int r;
 +      uint32_t old_count;
 +      enum allocation_event ev;
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +
 +      r = sm_ll_dec(&smd->ll, b, &ev);
 +      if (!r && (ev == SM_FREE)) {
 +              /*
 +               * It's only free if it's also free in the last
 +               * transaction.
 +               */
 +              r = sm_ll_lookup(&smd->old_ll, b, &old_count);
 +              if (r)
 +                      return r;
 +
 +              if (!old_count)
 +                      smd->nr_allocated_this_transaction--;
 +      }
 +
 +      return r;
 +}
 +
 +static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 +{
 +      int r;
 +      enum allocation_event ev;
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +
 +      /* FIXME: we should loop round a couple of times */
 +      r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b);
 +      if (r)
 +              return r;
 +
 +      smd->begin = *b + 1;
 +      r = sm_ll_inc(&smd->ll, *b, &ev);
 +      if (!r) {
 +              BUG_ON(ev != SM_ALLOC);
 +              smd->nr_allocated_this_transaction++;
 +      }
 +
 +      return r;
 +}
 +
 +static int sm_disk_commit(struct dm_space_map *sm)
 +{
 +      int r;
 +      dm_block_t nr_free;
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +
 +      r = sm_disk_get_nr_free(sm, &nr_free);
 +      if (r)
 +              return r;
 +
 +      r = sm_ll_commit(&smd->ll);
 +      if (r)
 +              return r;
 +
 +      memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll));
 +      smd->begin = 0;
 +      smd->nr_allocated_this_transaction = 0;
 +
 +      r = sm_disk_get_nr_free(sm, &nr_free);
 +      if (r)
 +              return r;
 +
 +      return 0;
 +}
 +
 +static int sm_disk_root_size(struct dm_space_map *sm, size_t *result)
 +{
 +      *result = sizeof(struct disk_sm_root);
 +
 +      return 0;
 +}
 +
 +static int sm_disk_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
 +{
 +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
 +      struct disk_sm_root root_le;
 +
 +      root_le.nr_blocks = cpu_to_le64(smd->ll.nr_blocks);
 +      root_le.nr_allocated = cpu_to_le64(smd->ll.nr_allocated);
 +      root_le.bitmap_root = cpu_to_le64(smd->ll.bitmap_root);
 +      root_le.ref_count_root = cpu_to_le64(smd->ll.ref_count_root);
 +
 +      if (max < sizeof(root_le))
 +              return -ENOSPC;
 +
 +      memcpy(where_le, &root_le, sizeof(root_le));
 +
 +      return 0;
 +}
 +
 +/*----------------------------------------------------------------*/
 +
 +static struct dm_space_map ops = {
 +      .destroy = sm_disk_destroy,
 +      .extend = sm_disk_extend,
 +      .get_nr_blocks = sm_disk_get_nr_blocks,
 +      .get_nr_free = sm_disk_get_nr_free,
 +      .get_count = sm_disk_get_count,
 +      .count_is_more_than_one = sm_disk_count_is_more_than_one,
 +      .set_count = sm_disk_set_count,
 +      .inc_block = sm_disk_inc_block,
 +      .dec_block = sm_disk_dec_block,
 +      .new_block = sm_disk_new_block,
 +      .commit = sm_disk_commit,
 +      .root_size = sm_disk_root_size,
 +      .copy_root = sm_disk_copy_root
 +};
 +
 +static struct dm_space_map *dm_sm_disk_create_real(
 +      struct dm_transaction_manager *tm,
 +      dm_block_t nr_blocks)
 +{
 +      int r;
 +      struct sm_disk *smd;
 +
 +      smd = kmalloc(sizeof(*smd), GFP_KERNEL);
 +      if (!smd)
 +              return ERR_PTR(-ENOMEM);
 +
 +      smd->begin = 0;
 +      smd->nr_allocated_this_transaction = 0;
 +      memcpy(&smd->sm, &ops, sizeof(smd->sm));
 +
 +      r = sm_ll_new_disk(&smd->ll, tm);
 +      if (r)
 +              goto bad;
 +
 +      r = sm_ll_extend(&smd->ll, nr_blocks);
 +      if (r)
 +              goto bad;
 +
 +      r = sm_disk_commit(&smd->sm);
 +      if (r)
 +              goto bad;
 +
 +      return &smd->sm;
 +
 +bad:
 +      kfree(smd);
 +      return ERR_PTR(r);
 +}
 +
 +struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
 +                                     dm_block_t nr_blocks)
 +{
 +      struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
 +      return dm_sm_checker_create_fresh(sm);
 +}
 +EXPORT_SYMBOL_GPL(dm_sm_disk_create);
 +
 +static struct dm_space_map *dm_sm_disk_open_real(
 +      struct dm_transaction_manager *tm,
 +      void *root_le, size_t len)
 +{
 +      int r;
 +      struct sm_disk *smd;
 +
 +      smd = kmalloc(sizeof(*smd), GFP_KERNEL);
 +      if (!smd)
 +              return ERR_PTR(-ENOMEM);
 +
 +      smd->begin = 0;
 +      smd->nr_allocated_this_transaction = 0;
 +      memcpy(&smd->sm, &ops, sizeof(smd->sm));
 +
 +      r = sm_ll_open_disk(&smd->ll, tm, root_le, len);
 +      if (r)
 +              goto bad;
 +
 +      r = sm_disk_commit(&smd->sm);
 +      if (r)
 +              goto bad;
 +
 +      return &smd->sm;
 +
 +bad:
 +      kfree(smd);
 +      return ERR_PTR(r);
 +}
 +
 +struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
 +                                   void *root_le, size_t len)
 +{
 +      return dm_sm_checker_create(
 +              dm_sm_disk_open_real(tm, root_le, len));
 +}
 +EXPORT_SYMBOL_GPL(dm_sm_disk_open);
 +
 +/*----------------------------------------------------------------*/
index 728e89a3f97830c3290e282a5daa7788c4fc309c,0000000000000000000000000000000000000000..6f8d38747d7f438294fca80e24a3e3a61441dda1
mode 100644,000000..100644
--- /dev/null
@@@ -1,400 -1,0 +1,400 @@@
- #include <linux/module.h>
 +/*
 + * Copyright (C) 2011 Red Hat, Inc.
 + *
 + * This file is released under the GPL.
 + */
 +#include "dm-transaction-manager.h"
 +#include "dm-space-map.h"
 +#include "dm-space-map-checker.h"
 +#include "dm-space-map-disk.h"
 +#include "dm-space-map-metadata.h"
 +#include "dm-persistent-data-internal.h"
 +
++#include <linux/export.h>
 +#include <linux/slab.h>
 +#include <linux/device-mapper.h>
 +
 +#define DM_MSG_PREFIX "transaction manager"
 +
 +/*----------------------------------------------------------------*/
 +
 +struct shadow_info {
 +      struct hlist_node hlist;
 +      dm_block_t where;
 +};
 +
 +/*
 + * It would be nice if we scaled with the size of transaction.
 + */
 +#define HASH_SIZE 256
 +#define HASH_MASK (HASH_SIZE - 1)
 +
 +struct dm_transaction_manager {
 +      int is_clone;
 +      struct dm_transaction_manager *real;
 +
 +      struct dm_block_manager *bm;
 +      struct dm_space_map *sm;
 +
 +      spinlock_t lock;
 +      struct hlist_head buckets[HASH_SIZE];
 +};
 +
 +/*----------------------------------------------------------------*/
 +
 +static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 +{
 +      int r = 0;
 +      unsigned bucket = dm_hash_block(b, HASH_MASK);
 +      struct shadow_info *si;
 +      struct hlist_node *n;
 +
 +      spin_lock(&tm->lock);
 +      hlist_for_each_entry(si, n, tm->buckets + bucket, hlist)
 +              if (si->where == b) {
 +                      r = 1;
 +                      break;
 +              }
 +      spin_unlock(&tm->lock);
 +
 +      return r;
 +}
 +
 +/*
 + * This can silently fail if there's no memory.  We're ok with this since
 + * creating redundant shadows causes no harm.
 + */
 +static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
 +{
 +      unsigned bucket;
 +      struct shadow_info *si;
 +
 +      si = kmalloc(sizeof(*si), GFP_NOIO);
 +      if (si) {
 +              si->where = b;
 +              bucket = dm_hash_block(b, HASH_MASK);
 +              spin_lock(&tm->lock);
 +              hlist_add_head(&si->hlist, tm->buckets + bucket);
 +              spin_unlock(&tm->lock);
 +      }
 +}
 +
 +static void wipe_shadow_table(struct dm_transaction_manager *tm)
 +{
 +      struct shadow_info *si;
 +      struct hlist_node *n, *tmp;
 +      struct hlist_head *bucket;
 +      int i;
 +
 +      spin_lock(&tm->lock);
 +      for (i = 0; i < HASH_SIZE; i++) {
 +              bucket = tm->buckets + i;
 +              hlist_for_each_entry_safe(si, n, tmp, bucket, hlist)
 +                      kfree(si);
 +
 +              INIT_HLIST_HEAD(bucket);
 +      }
 +
 +      spin_unlock(&tm->lock);
 +}
 +
 +/*----------------------------------------------------------------*/
 +
 +static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
 +                                                 struct dm_space_map *sm)
 +{
 +      int i;
 +      struct dm_transaction_manager *tm;
 +
 +      tm = kmalloc(sizeof(*tm), GFP_KERNEL);
 +      if (!tm)
 +              return ERR_PTR(-ENOMEM);
 +
 +      tm->is_clone = 0;
 +      tm->real = NULL;
 +      tm->bm = bm;
 +      tm->sm = sm;
 +
 +      spin_lock_init(&tm->lock);
 +      for (i = 0; i < HASH_SIZE; i++)
 +              INIT_HLIST_HEAD(tm->buckets + i);
 +
 +      return tm;
 +}
 +
 +struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transaction_manager *real)
 +{
 +      struct dm_transaction_manager *tm;
 +
 +      tm = kmalloc(sizeof(*tm), GFP_KERNEL);
 +      if (tm) {
 +              tm->is_clone = 1;
 +              tm->real = real;
 +      }
 +
 +      return tm;
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
 +
 +void dm_tm_destroy(struct dm_transaction_manager *tm)
 +{
 +      kfree(tm);
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_destroy);
 +
 +int dm_tm_pre_commit(struct dm_transaction_manager *tm)
 +{
 +      int r;
 +
 +      if (tm->is_clone)
 +              return -EWOULDBLOCK;
 +
 +      r = dm_sm_commit(tm->sm);
 +      if (r < 0)
 +              return r;
 +
 +      return 0;
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
 +
 +int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
 +{
 +      if (tm->is_clone)
 +              return -EWOULDBLOCK;
 +
 +      wipe_shadow_table(tm);
 +
 +      return dm_bm_flush_and_unlock(tm->bm, root);
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_commit);
 +
 +int dm_tm_new_block(struct dm_transaction_manager *tm,
 +                  struct dm_block_validator *v,
 +                  struct dm_block **result)
 +{
 +      int r;
 +      dm_block_t new_block;
 +
 +      if (tm->is_clone)
 +              return -EWOULDBLOCK;
 +
 +      r = dm_sm_new_block(tm->sm, &new_block);
 +      if (r < 0)
 +              return r;
 +
 +      r = dm_bm_write_lock_zero(tm->bm, new_block, v, result);
 +      if (r < 0) {
 +              dm_sm_dec_block(tm->sm, new_block);
 +              return r;
 +      }
 +
 +      /*
 +       * New blocks count as shadows in that they don't need to be
 +       * shadowed again.
 +       */
 +      insert_shadow(tm, new_block);
 +
 +      return 0;
 +}
 +
 +static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
 +                        struct dm_block_validator *v,
 +                        struct dm_block **result)
 +{
 +      int r;
 +      dm_block_t new;
 +      struct dm_block *orig_block;
 +
 +      r = dm_sm_new_block(tm->sm, &new);
 +      if (r < 0)
 +              return r;
 +
 +      r = dm_sm_dec_block(tm->sm, orig);
 +      if (r < 0)
 +              return r;
 +
 +      r = dm_bm_read_lock(tm->bm, orig, v, &orig_block);
 +      if (r < 0)
 +              return r;
 +
 +      r = dm_bm_unlock_move(orig_block, new);
 +      if (r < 0) {
 +              dm_bm_unlock(orig_block);
 +              return r;
 +      }
 +
 +      return dm_bm_write_lock(tm->bm, new, v, result);
 +}
 +
 +int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
 +                     struct dm_block_validator *v, struct dm_block **result,
 +                     int *inc_children)
 +{
 +      int r;
 +
 +      if (tm->is_clone)
 +              return -EWOULDBLOCK;
 +
 +      r = dm_sm_count_is_more_than_one(tm->sm, orig, inc_children);
 +      if (r < 0)
 +              return r;
 +
 +      if (is_shadow(tm, orig) && !*inc_children)
 +              return dm_bm_write_lock(tm->bm, orig, v, result);
 +
 +      r = __shadow_block(tm, orig, v, result);
 +      if (r < 0)
 +              return r;
 +      insert_shadow(tm, dm_block_location(*result));
 +
 +      return r;
 +}
 +
 +int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
 +                  struct dm_block_validator *v,
 +                  struct dm_block **blk)
 +{
 +      if (tm->is_clone)
 +              return dm_bm_read_try_lock(tm->real->bm, b, v, blk);
 +
 +      return dm_bm_read_lock(tm->bm, b, v, blk);
 +}
 +
 +int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
 +{
 +      return dm_bm_unlock(b);
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_unlock);
 +
 +void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
 +{
 +      /*
 +       * The non-blocking clone doesn't support this.
 +       */
 +      BUG_ON(tm->is_clone);
 +
 +      dm_sm_inc_block(tm->sm, b);
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_inc);
 +
 +void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
 +{
 +      /*
 +       * The non-blocking clone doesn't support this.
 +       */
 +      BUG_ON(tm->is_clone);
 +
 +      dm_sm_dec_block(tm->sm, b);
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_dec);
 +
 +int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
 +            uint32_t *result)
 +{
 +      if (tm->is_clone)
 +              return -EWOULDBLOCK;
 +
 +      return dm_sm_get_count(tm->sm, b, result);
 +}
 +
 +struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
 +{
 +      return tm->bm;
 +}
 +
 +/*----------------------------------------------------------------*/
 +
 +static int dm_tm_create_internal(struct dm_block_manager *bm,
 +                               dm_block_t sb_location,
 +                               struct dm_block_validator *sb_validator,
 +                               size_t root_offset, size_t root_max_len,
 +                               struct dm_transaction_manager **tm,
 +                               struct dm_space_map **sm,
 +                               struct dm_block **sblock,
 +                               int create)
 +{
 +      int r;
 +      struct dm_space_map *inner;
 +
 +      inner = dm_sm_metadata_init();
 +      if (IS_ERR(inner))
 +              return PTR_ERR(inner);
 +
 +      *tm = dm_tm_create(bm, inner);
 +      if (IS_ERR(*tm)) {
 +              dm_sm_destroy(inner);
 +              return PTR_ERR(*tm);
 +      }
 +
 +      if (create) {
 +              r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location,
 +                                        sb_validator, sblock);
 +              if (r < 0) {
 +                      DMERR("couldn't lock superblock");
 +                      goto bad1;
 +              }
 +
 +              r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm),
 +                                        sb_location);
 +              if (r) {
 +                      DMERR("couldn't create metadata space map");
 +                      goto bad2;
 +              }
 +
 +              *sm = dm_sm_checker_create(inner);
 +              if (!*sm)
 +                      goto bad2;
 +
 +      } else {
 +              r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
 +                                   sb_validator, sblock);
 +              if (r < 0) {
 +                      DMERR("couldn't lock superblock");
 +                      goto bad1;
 +              }
 +
 +              r = dm_sm_metadata_open(inner, *tm,
 +                                      dm_block_data(*sblock) + root_offset,
 +                                      root_max_len);
 +              if (r) {
 +                      DMERR("couldn't open metadata space map");
 +                      goto bad2;
 +              }
 +
 +              *sm = dm_sm_checker_create(inner);
 +              if (!*sm)
 +                      goto bad2;
 +      }
 +
 +      return 0;
 +
 +bad2:
 +      dm_tm_unlock(*tm, *sblock);
 +bad1:
 +      dm_tm_destroy(*tm);
 +      dm_sm_destroy(inner);
 +      return r;
 +}
 +
 +int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
 +                       struct dm_block_validator *sb_validator,
 +                       struct dm_transaction_manager **tm,
 +                       struct dm_space_map **sm, struct dm_block **sblock)
 +{
 +      return dm_tm_create_internal(bm, sb_location, sb_validator,
 +                                   0, 0, tm, sm, sblock, 1);
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_create_with_sm);
 +
 +int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
 +                     struct dm_block_validator *sb_validator,
 +                     size_t root_offset, size_t root_max_len,
 +                     struct dm_transaction_manager **tm,
 +                     struct dm_space_map **sm, struct dm_block **sblock)
 +{
 +      return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset,
 +                                   root_max_len, tm, sm, sblock, 0);
 +}
 +EXPORT_SYMBOL_GPL(dm_tm_open_with_sm);
 +
 +/*----------------------------------------------------------------*/
Simple merge
Simple merge
Simple merge
Simple merge
index f5d53a2023442fbfe1bb8c9bf77817e0fd051174,31e53b6a881aa2f7894d72f43bb3fe50e84495a6..d6b1cf66042d196b40a7b99c18241fe186840d1d
      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
  
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 +
  #include <media/saa7146.h>
+ #include <linux/module.h>
  
  LIST_HEAD(saa7146_devices);
  DEFINE_MUTEX(saa7146_devices_lock);
index a92546144eaa30dc33695104374841b73b5b2bda,e4547afcfa8809e41c9488b380c58939188ca719..71f8e018e564818225137d437ecbc369418a5519
@@@ -1,6 -1,5 +1,7 @@@
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 +
  #include <media/saa7146_vv.h>
+ #include <linux/module.h>
  
  /****************************************************************************/
  /* resource management functions, shamelessly stolen from saa7134 driver */
index 79ad73accb27426a058c03ef4481be174988eda0,c9c6e9a6c31d30e187ef38b480d0a46a226f1803..bc1f545c95cb2b669cae45a7e59bfe268e4d2844
@@@ -1,6 -1,5 +1,7 @@@
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 +
  #include <linux/kernel.h>
+ #include <linux/export.h>
  #include <media/saa7146_vv.h>
  
  static void calculate_output_format_register(struct saa7146_dev* saa, u32 palette, u32* clip_format)
index 384b358d30379dfcdef7a99428bea28c24f7b77f,3a00253fe1ee8bc6ab5e0e3fb89ef8cd451feef2..ce30533fd9724e1f802ecb6f67a6e6f42f998a65
@@@ -1,7 -1,6 +1,8 @@@
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 +
  #include <media/saa7146_vv.h>
  #include <media/v4l2-chip-ident.h>
+ #include <linux/module.h>
  
  static int max_memory = 32;
  
index 774d507b66cc7e841b412efe4c06ea4a6c8c1be5,977211fec137bdcf668ef7a022f23548e38ae11b..43be7238311ec513726e8ecaa13d0864c2b622dd
@@@ -1,5 -1,5 +1,6 @@@
  #include <linux/i2c.h>
 +#include <linux/mutex.h>
+ #include <linux/module.h>
  
  #include "dibx000_common.h"
  
Simple merge
Simple merge
Simple merge
index 5914390211ff2ac6f087ec5693c08234ec55bd7d,c2594948ca3fad7cbc7c4d52a327b1fa57a94904..12eedf4d515aa86dcf60107de69158895ae04795
@@@ -31,8 -31,8 +31,9 @@@
   */
  
  #include <linux/delay.h>
+ #include <linux/module.h>
  #include <linux/i2c.h>
 +#include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/version.h>
  #include <media/adp1653.h>
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index dc58750bb71bbfc52c45fd8afe93be1d5d288be7,50ad93bb49ddfbf4e4839f81102fd98299958dbf..5be53ae9b61cf2a6d6c318c881e43605b3dee526
@@@ -23,8 -23,8 +23,9 @@@
  
  #include <linux/slab.h>
  #include <linux/i2c.h>
 +#include <linux/interrupt.h>
  #include <linux/pm_runtime.h>
+ #include <linux/module.h>
  #include <linux/mutex.h>
  #include <linux/mfd/core.h>
  #include <linux/mfd/max8997.h>
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index d291a54acfad5f94ec127cfdbff6e37d5b8cf85d,b95cbdccc11a506128c5e8c237f3c7121fbed86e..85f4a9a5d12e5d00a7e445bd1e1e0a784eaf7cd0
  #define KMSG_COMPONENT "vmur"
  #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  
 -#include <linux/kernel_stat.h>
  #include <linux/cdev.h>
  #include <linux/slab.h>
+ #include <linux/module.h>
  
  #include <asm/uaccess.h>
  #include <asm/cio.h>
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 7b38512d6c419cd43690bca290ab4765220c409b,0000000000000000000000000000000000000000..ced26c8ccd573eb8e6757a30681901b7a0ac88eb
mode 100644,000000..100644
--- /dev/null
@@@ -1,424 -1,0 +1,425 @@@
 +/*
 + * opal driver interface to hvc_console.c
 + *
 + * Copyright 2011 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License as published by
 + * the Free Software Foundation; either version 2 of the License, or
 + * (at your option) any later version.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 + * GNU General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public License
 + * along with this program; if not, write to the Free Software
 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 + *
 + */
 +
 +#undef DEBUG
 +
 +#include <linux/types.h>
 +#include <linux/init.h>
 +#include <linux/delay.h>
 +#include <linux/slab.h>
 +#include <linux/console.h>
 +#include <linux/of.h>
 +#include <linux/of_platform.h>
++#include <linux/export.h>
 +
 +#include <asm/hvconsole.h>
 +#include <asm/prom.h>
 +#include <asm/firmware.h>
 +#include <asm/hvsi.h>
 +#include <asm/udbg.h>
 +#include <asm/opal.h>
 +
 +#include "hvc_console.h"
 +
 +static const char hvc_opal_name[] = "hvc_opal";
 +
 +static struct of_device_id hvc_opal_match[] __devinitdata = {
 +      { .name = "serial", .compatible = "ibm,opal-console-raw" },
 +      { .name = "serial", .compatible = "ibm,opal-console-hvsi" },
 +      { },
 +};
 +
 +typedef enum hv_protocol {
 +      HV_PROTOCOL_RAW,
 +      HV_PROTOCOL_HVSI
 +} hv_protocol_t;
 +
 +struct hvc_opal_priv {
 +      hv_protocol_t           proto;  /* Raw data or HVSI packets */
 +      struct hvsi_priv        hvsi;   /* HVSI specific data */
 +};
 +static struct hvc_opal_priv *hvc_opal_privs[MAX_NR_HVC_CONSOLES];
 +
 +/* For early boot console */
 +static struct hvc_opal_priv hvc_opal_boot_priv;
 +static u32 hvc_opal_boot_termno;
 +
 +static const struct hv_ops hvc_opal_raw_ops = {
 +      .get_chars = opal_get_chars,
 +      .put_chars = opal_put_chars,
 +      .notifier_add = notifier_add_irq,
 +      .notifier_del = notifier_del_irq,
 +      .notifier_hangup = notifier_hangup_irq,
 +};
 +
 +static int hvc_opal_hvsi_get_chars(uint32_t vtermno, char *buf, int count)
 +{
 +      struct hvc_opal_priv *pv = hvc_opal_privs[vtermno];
 +
 +      if (WARN_ON(!pv))
 +              return -ENODEV;
 +
 +      return hvsilib_get_chars(&pv->hvsi, buf, count);
 +}
 +
 +static int hvc_opal_hvsi_put_chars(uint32_t vtermno, const char *buf, int count)
 +{
 +      struct hvc_opal_priv *pv = hvc_opal_privs[vtermno];
 +
 +      if (WARN_ON(!pv))
 +              return -ENODEV;
 +
 +      return hvsilib_put_chars(&pv->hvsi, buf, count);
 +}
 +
 +static int hvc_opal_hvsi_open(struct hvc_struct *hp, int data)
 +{
 +      struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
 +      int rc;
 +
 +      pr_devel("HVSI@%x: do open !\n", hp->vtermno);
 +
 +      rc = notifier_add_irq(hp, data);
 +      if (rc)
 +              return rc;
 +
 +      return hvsilib_open(&pv->hvsi, hp);
 +}
 +
 +static void hvc_opal_hvsi_close(struct hvc_struct *hp, int data)
 +{
 +      struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
 +
 +      pr_devel("HVSI@%x: do close !\n", hp->vtermno);
 +
 +      hvsilib_close(&pv->hvsi, hp);
 +
 +      notifier_del_irq(hp, data);
 +}
 +
 +void hvc_opal_hvsi_hangup(struct hvc_struct *hp, int data)
 +{
 +      struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
 +
 +      pr_devel("HVSI@%x: do hangup !\n", hp->vtermno);
 +
 +      hvsilib_close(&pv->hvsi, hp);
 +
 +      notifier_hangup_irq(hp, data);
 +}
 +
 +static int hvc_opal_hvsi_tiocmget(struct hvc_struct *hp)
 +{
 +      struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
 +
 +      if (!pv)
 +              return -EINVAL;
 +      return pv->hvsi.mctrl;
 +}
 +
 +static int hvc_opal_hvsi_tiocmset(struct hvc_struct *hp, unsigned int set,
 +                              unsigned int clear)
 +{
 +      struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
 +
 +      pr_devel("HVSI@%x: Set modem control, set=%x,clr=%x\n",
 +               hp->vtermno, set, clear);
 +
 +      if (set & TIOCM_DTR)
 +              hvsilib_write_mctrl(&pv->hvsi, 1);
 +      else if (clear & TIOCM_DTR)
 +              hvsilib_write_mctrl(&pv->hvsi, 0);
 +
 +      return 0;
 +}
 +
 +static const struct hv_ops hvc_opal_hvsi_ops = {
 +      .get_chars = hvc_opal_hvsi_get_chars,
 +      .put_chars = hvc_opal_hvsi_put_chars,
 +      .notifier_add = hvc_opal_hvsi_open,
 +      .notifier_del = hvc_opal_hvsi_close,
 +      .notifier_hangup = hvc_opal_hvsi_hangup,
 +      .tiocmget = hvc_opal_hvsi_tiocmget,
 +      .tiocmset = hvc_opal_hvsi_tiocmset,
 +};
 +
 +static int __devinit hvc_opal_probe(struct platform_device *dev)
 +{
 +      const struct hv_ops *ops;
 +      struct hvc_struct *hp;
 +      struct hvc_opal_priv *pv;
 +      hv_protocol_t proto;
 +      unsigned int termno, boot = 0;
 +      const __be32 *reg;
 +
 +      if (of_device_is_compatible(dev->dev.of_node, "ibm,opal-console-raw")) {
 +              proto = HV_PROTOCOL_RAW;
 +              ops = &hvc_opal_raw_ops;
 +      } else if (of_device_is_compatible(dev->dev.of_node,
 +                                         "ibm,opal-console-hvsi")) {
 +              proto = HV_PROTOCOL_HVSI;
 +              ops = &hvc_opal_hvsi_ops;
 +      } else {
 +              pr_err("hvc_opal: Unkown protocol for %s\n",
 +                     dev->dev.of_node->full_name);
 +              return -ENXIO;
 +      }
 +
 +      reg = of_get_property(dev->dev.of_node, "reg", NULL);
 +      termno = reg ? be32_to_cpup(reg) : 0;
 +
 +      /* Is it our boot one ? */
 +      if (hvc_opal_privs[termno] == &hvc_opal_boot_priv) {
 +              pv = hvc_opal_privs[termno];
 +              boot = 1;
 +      } else if (hvc_opal_privs[termno] == NULL) {
 +              pv = kzalloc(sizeof(struct hvc_opal_priv), GFP_KERNEL);
 +              if (!pv)
 +                      return -ENOMEM;
 +              pv->proto = proto;
 +              hvc_opal_privs[termno] = pv;
 +              if (proto == HV_PROTOCOL_HVSI)
 +                      hvsilib_init(&pv->hvsi, opal_get_chars, opal_put_chars,
 +                                   termno, 0);
 +
 +              /* Instanciate now to establish a mapping index==vtermno */
 +              hvc_instantiate(termno, termno, ops);
 +      } else {
 +              pr_err("hvc_opal: Device %s has duplicate terminal number #%d\n",
 +                     dev->dev.of_node->full_name, termno);
 +              return -ENXIO;
 +      }
 +
 +      pr_info("hvc%d: %s protocol on %s%s\n", termno,
 +              proto == HV_PROTOCOL_RAW ? "raw" : "hvsi",
 +              dev->dev.of_node->full_name,
 +              boot ? " (boot console)" : "");
 +
 +      /* We don't do IRQ yet */
 +      hp = hvc_alloc(termno, 0, ops, MAX_VIO_PUT_CHARS);
 +      if (IS_ERR(hp))
 +              return PTR_ERR(hp);
 +      dev_set_drvdata(&dev->dev, hp);
 +
 +      return 0;
 +}
 +
 +static int __devexit hvc_opal_remove(struct platform_device *dev)
 +{
 +      struct hvc_struct *hp = dev_get_drvdata(&dev->dev);
 +      int rc, termno;
 +
 +      termno = hp->vtermno;
 +      rc = hvc_remove(hp);
 +      if (rc == 0) {
 +              if (hvc_opal_privs[termno] != &hvc_opal_boot_priv)
 +                      kfree(hvc_opal_privs[termno]);
 +              hvc_opal_privs[termno] = NULL;
 +      }
 +      return rc;
 +}
 +
 +static struct platform_driver hvc_opal_driver = {
 +      .probe          = hvc_opal_probe,
 +      .remove         = __devexit_p(hvc_opal_remove),
 +      .driver         = {
 +              .name   = hvc_opal_name,
 +              .owner  = THIS_MODULE,
 +              .of_match_table = hvc_opal_match,
 +      }
 +};
 +
 +static int __init hvc_opal_init(void)
 +{
 +      if (!firmware_has_feature(FW_FEATURE_OPAL))
 +              return -ENODEV;
 +
 +      /* Register as a vio device to receive callbacks */
 +      return platform_driver_register(&hvc_opal_driver);
 +}
 +module_init(hvc_opal_init);
 +
 +static void __exit hvc_opal_exit(void)
 +{
 +      platform_driver_unregister(&hvc_opal_driver);
 +}
 +module_exit(hvc_opal_exit);
 +
 +static void udbg_opal_putc(char c)
 +{
 +      unsigned int termno = hvc_opal_boot_termno;
 +      int count = -1;
 +
 +      if (c == '\n')
 +              udbg_opal_putc('\r');
 +
 +      do {
 +              switch(hvc_opal_boot_priv.proto) {
 +              case HV_PROTOCOL_RAW:
 +                      count = opal_put_chars(termno, &c, 1);
 +                      break;
 +              case HV_PROTOCOL_HVSI:
 +                      count = hvc_opal_hvsi_put_chars(termno, &c, 1);
 +                      break;
 +              }
 +      } while(count == 0 || count == -EAGAIN);
 +}
 +
 +static int udbg_opal_getc_poll(void)
 +{
 +      unsigned int termno = hvc_opal_boot_termno;
 +      int rc = 0;
 +      char c;
 +
 +      switch(hvc_opal_boot_priv.proto) {
 +      case HV_PROTOCOL_RAW:
 +              rc = opal_get_chars(termno, &c, 1);
 +              break;
 +      case HV_PROTOCOL_HVSI:
 +              rc = hvc_opal_hvsi_get_chars(termno, &c, 1);
 +              break;
 +      }
 +      if (!rc)
 +              return -1;
 +      return c;
 +}
 +
 +static int udbg_opal_getc(void)
 +{
 +      int ch;
 +      for (;;) {
 +              ch = udbg_opal_getc_poll();
 +              if (ch == -1) {
 +                      /* This shouldn't be needed...but... */
 +                      volatile unsigned long delay;
 +                      for (delay=0; delay < 2000000; delay++)
 +                              ;
 +              } else {
 +                      return ch;
 +              }
 +      }
 +}
 +
 +static void udbg_init_opal_common(void)
 +{
 +      udbg_putc = udbg_opal_putc;
 +      udbg_getc = udbg_opal_getc;
 +      udbg_getc_poll = udbg_opal_getc_poll;
 +      tb_ticks_per_usec = 0x200; /* Make udelay not suck */
 +}
 +
 +void __init hvc_opal_init_early(void)
 +{
 +      struct device_node *stdout_node = NULL;
 +      const u32 *termno;
 +      const char *name = NULL;
 +      const struct hv_ops *ops;
 +      u32 index;
 +
 +      /* find the boot console from /chosen/stdout */
 +      if (of_chosen)
 +              name = of_get_property(of_chosen, "linux,stdout-path", NULL);
 +      if (name) {
 +              stdout_node = of_find_node_by_path(name);
 +              if (!stdout_node) {
 +                      pr_err("hvc_opal: Failed to locate default console!\n");
 +                      return;
 +              }
 +      } else {
 +              struct device_node *opal, *np;
 +
 +              /* Current OPAL takeover doesn't provide the stdout
 +               * path, so we hard wire it
 +               */
 +              opal = of_find_node_by_path("/ibm,opal/consoles");
 +              if (opal)
 +                      pr_devel("hvc_opal: Found consoles in new location\n");
 +              if (!opal) {
 +                      opal = of_find_node_by_path("/ibm,opal");
 +                      if (opal)
 +                              pr_devel("hvc_opal: "
 +                                       "Found consoles in old location\n");
 +              }
 +              if (!opal)
 +                      return;
 +              for_each_child_of_node(opal, np) {
 +                      if (!strcmp(np->name, "serial")) {
 +                              stdout_node = np;
 +                              break;
 +                      }
 +              }
 +              of_node_put(opal);
 +      }
 +      if (!stdout_node)
 +              return;
 +      termno = of_get_property(stdout_node, "reg", NULL);
 +      index = termno ? *termno : 0;
 +      if (index >= MAX_NR_HVC_CONSOLES)
 +              return;
 +      hvc_opal_privs[index] = &hvc_opal_boot_priv;
 +
 +      /* Check the protocol */
 +      if (of_device_is_compatible(stdout_node, "ibm,opal-console-raw")) {
 +              hvc_opal_boot_priv.proto = HV_PROTOCOL_RAW;
 +              ops = &hvc_opal_raw_ops;
 +              pr_devel("hvc_opal: Found RAW console\n");
 +      }
 +      else if (of_device_is_compatible(stdout_node,"ibm,opal-console-hvsi")) {
 +              hvc_opal_boot_priv.proto = HV_PROTOCOL_HVSI;
 +              ops = &hvc_opal_hvsi_ops;
 +              hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars,
 +                           opal_put_chars, index, 1);
 +              /* HVSI, perform the handshake now */
 +              hvsilib_establish(&hvc_opal_boot_priv.hvsi);
 +              pr_devel("hvc_opal: Found HVSI console\n");
 +      } else
 +              goto out;
 +      hvc_opal_boot_termno = index;
 +      udbg_init_opal_common();
 +      add_preferred_console("hvc", index, NULL);
 +      hvc_instantiate(index, index, ops);
 +out:
 +      of_node_put(stdout_node);
 +}
 +
 +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL_RAW
 +void __init udbg_init_debug_opal(void)
 +{
 +      u32 index = CONFIG_PPC_EARLY_DEBUG_OPAL_VTERMNO;
 +      hvc_opal_privs[index] = &hvc_opal_boot_priv;
 +      hvc_opal_boot_priv.proto = HV_PROTOCOL_RAW;
 +      hvc_opal_boot_termno = index;
 +      udbg_init_opal_common();
 +}
 +#endif /* CONFIG_PPC_EARLY_DEBUG_OPAL_RAW */
 +
 +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI
 +void __init udbg_init_debug_opal_hvsi(void)
 +{
 +      u32 index = CONFIG_PPC_EARLY_DEBUG_OPAL_VTERMNO;
 +      hvc_opal_privs[index] = &hvc_opal_boot_priv;
 +      hvc_opal_boot_termno = index;
 +      udbg_init_opal_common();
 +      hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, opal_put_chars,
 +                   index, 1);
 +      hvsilib_establish(&hvc_opal_boot_priv.hvsi);
 +}
 +#endif /* CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI */
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc fs/nfs/pnfs.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index ace51af4369f668ea647f47c2d87f76d4478b121,1ceff5ae9d31c2df27b1e6bcc46df342a5b43125..75f53f874b24a0c0abb790f501f2f60ace48e17e
  #include <linux/device.h>
  #include <linux/uio.h>
  #include <linux/dma-direction.h>
 +#include <linux/scatterlist.h>
+ #include <linux/bitmap.h>
+ #include <asm/page.h>
  
 -struct scatterlist;
 -
  /**
   * typedef dma_cookie_t - an opaque DMA cookie
   *
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc kernel/cpu.c
Simple merge
Simple merge
diff --cc kernel/module.c
Simple merge
diff --cc kernel/signal.c
Simple merge
diff --cc kernel/sys.c
Simple merge
diff --cc mm/bounce.c
Simple merge
diff --cc mm/filemap.c
Simple merge
diff --cc mm/kmemleak.c
Simple merge
Simple merge
diff --cc mm/swapfile.c
Simple merge
diff --cc mm/truncate.c
Simple merge
index 163397f1fd5adefd655efcfb681544b1a56655bf,1f64cc9da1b09b75b40ccf532580ef20cdbe7c29..f5ffc02729d60396aac6abbdcbfc49bf1ebf20e1
@@@ -2,9 -2,10 +2,10 @@@
  #include <linux/netdevice.h>
  #include <linux/if_vlan.h>
  #include <linux/netpoll.h>
+ #include <linux/export.h>
  #include "vlan.h"
  
 -bool vlan_do_receive(struct sk_buff **skbp)
 +bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
  {
        struct sk_buff *skb = *skbp;
        u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge