Merge remote-tracking branch 'moduleh/module.h-split'

author Stephen Rothwell <sfr@canb.auug.org.au>

Mon, 1 Aug 2011 03:08:55 +0000 (13:08 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Mon, 1 Aug 2011 03:22:34 +0000 (13:22 +1000)
author Stephen Rothwell <sfr@canb.auug.org.au>
Mon, 1 Aug 2011 03:08:55 +0000 (13:08 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Mon, 1 Aug 2011 03:22:34 +0000 (13:22 +1000)
diff --cc arch/arm/kernel/setup.c
Simple merge
diff --cc arch/arm/mach-davinci/board-dm644x-evm.c
Simple merge
diff --cc arch/arm/mach-msm/io.c
Simple merge
diff --cc arch/mips/kernel/traps.c
Simple merge
diff --cc arch/mips/pmc-sierra/msp71xx/msp_setup.c

index 655308a4e1cda2c02c6d63f9202531cc7ffaabd8,594a9dba9bceb2369d3dd2090d327e3138c08c5a..4ae4353de38f1b222c8c2a17008b8e913c4b9516
--- 1/arch/mips/pmc-sierra/msp71xx/msp_setup.c
--- 2/arch/mips/pmc-sierra/msp71xx/msp_setup.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_setup.c
@@@ -13,8 -13,8 +13,9 @@@
   #include <asm/bootinfo.h>
   #include <asm/cacheflush.h>
   #include <asm/r4kcache.h>
+ #include <asm/smp-ops.h>
   #include <asm/reboot.h>
+ +#include <asm/smp-ops.h>
   #include <asm/time.h>
   
   #include <msp_prom.h>
diff --cc arch/sparc/kernel/cpu.c
Simple merge
diff --cc arch/sparc/kernel/setup_64.c
Simple merge
diff --cc block/bsg-lib.c

index f8c0a61a529cdb2d3c7cd61091dfe434495299b3,0000000000000000000000000000000000000000..b82c11b2930f3cbaff60ff7e3149091033903608

mode 100644,000000..100644
--- 1/block/bsg-lib.c
--- /dev/null
+++ b/block/bsg-lib.c
@@@ -1,297 -1,0 +1,298 @@@
+ +/*
+ + *  BSG helper library
+ + *
+ + *  Copyright (C) 2008   James Smart, Emulex Corporation
+ + *  Copyright (C) 2011   Red Hat, Inc.  All rights reserved.
+ + *  Copyright (C) 2011   Mike Christie
+ + *
+ + *  This program is free software; you can redistribute it and/or modify
+ + *  it under the terms of the GNU General Public License as published by
+ + *  the Free Software Foundation; either version 2 of the License, or
+ + *  (at your option) any later version.
+ + *
+ + *  This program is distributed in the hope that it will be useful,
+ + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + *  GNU General Public License for more details.
+ + *
+ + *  You should have received a copy of the GNU General Public License
+ + *  along with this program; if not, write to the Free Software
+ + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ + *
+ + */
+ +#include <linux/slab.h>
+ +#include <linux/blkdev.h>
+ +#include <linux/delay.h>
++#include <linux/export.h>
+ +#include <linux/scatterlist.h>
+ +#include <linux/bsg-lib.h>
+ +#include <scsi/scsi_cmnd.h>
+ +
+ +/**
+ + * bsg_destroy_job - routine to teardown/delete a bsg job
+ + * @job: bsg_job that is to be torn down
+ + */
+ +static void bsg_destroy_job(struct bsg_job *job)
+ +{
+ +      put_device(job->dev);   /* release reference for the request */
+ +
+ +      kfree(job->request_payload.sg_list);
+ +      kfree(job->reply_payload.sg_list);
+ +      kfree(job);
+ +}
+ +
+ +/**
+ + * bsg_job_done - completion routine for bsg requests
+ + * @job: bsg_job that is complete
+ + * @result: job reply result
+ + * @reply_payload_rcv_len: length of payload recvd
+ + *
+ + * The LLD should call this when the bsg job has completed.
+ + */
+ +void bsg_job_done(struct bsg_job *job, int result,
+ +                unsigned int reply_payload_rcv_len)
+ +{
+ +      struct request *req = job->req;
+ +      struct request *rsp = req->next_rq;
+ +      int err;
+ +
+ +      err = job->req->errors = result;
+ +      if (err < 0)
+ +              /* we're only returning the result field in the reply */
+ +              job->req->sense_len = sizeof(u32);
+ +      else
+ +              job->req->sense_len = job->reply_len;
+ +      /* we assume all request payload was transferred, residual == 0 */
+ +      req->resid_len = 0;
+ +
+ +      if (rsp) {
+ +              WARN_ON(reply_payload_rcv_len > rsp->resid_len);
+ +
+ +              /* set reply (bidi) residual */
+ +              rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
+ +      }
+ +      blk_complete_request(req);
+ +}
+ +EXPORT_SYMBOL_GPL(bsg_job_done);
+ +
+ +/**
+ + * bsg_softirq_done - softirq done routine for destroying the bsg requests
+ + * @rq: BSG request that holds the job to be destroyed
+ + */
+ +static void bsg_softirq_done(struct request *rq)
+ +{
+ +      struct bsg_job *job = rq->special;
+ +
+ +      blk_end_request_all(rq, rq->errors);
+ +      bsg_destroy_job(job);
+ +}
+ +
+ +static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
+ +{
+ +      size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
+ +
+ +      BUG_ON(!req->nr_phys_segments);
+ +
+ +      buf->sg_list = kzalloc(sz, GFP_KERNEL);
+ +      if (!buf->sg_list)
+ +              return -ENOMEM;
+ +      sg_init_table(buf->sg_list, req->nr_phys_segments);
+ +      buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
+ +      buf->payload_len = blk_rq_bytes(req);
+ +      return 0;
+ +}
+ +
+ +/**
+ + * bsg_create_job - create the bsg_job structure for the bsg request
+ + * @dev: device that is being sent the bsg request
+ + * @req: BSG request that needs a job structure
+ + */
+ +static int bsg_create_job(struct device *dev, struct request *req)
+ +{
+ +      struct request *rsp = req->next_rq;
+ +      struct request_queue *q = req->q;
+ +      struct bsg_job *job;
+ +      int ret;
+ +
+ +      BUG_ON(req->special);
+ +
+ +      job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
+ +      if (!job)
+ +              return -ENOMEM;
+ +
+ +      req->special = job;
+ +      job->req = req;
+ +      if (q->bsg_job_size)
+ +              job->dd_data = (void *)&job[1];
+ +      job->request = req->cmd;
+ +      job->request_len = req->cmd_len;
+ +      job->reply = req->sense;
+ +      job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
+ +                                               * allocated */
+ +      if (req->bio) {
+ +              ret = bsg_map_buffer(&job->request_payload, req);
+ +              if (ret)
+ +                      goto failjob_rls_job;
+ +      }
+ +      if (rsp && rsp->bio) {
+ +              ret = bsg_map_buffer(&job->reply_payload, rsp);
+ +              if (ret)
+ +                      goto failjob_rls_rqst_payload;
+ +      }
+ +      job->dev = dev;
+ +      /* take a reference for the request */
+ +      get_device(job->dev);
+ +      return 0;
+ +
+ +failjob_rls_rqst_payload:
+ +      kfree(job->request_payload.sg_list);
+ +failjob_rls_job:
+ +      kfree(job);
+ +      return -ENOMEM;
+ +}
+ +
+ +/*
+ + * bsg_goose_queue - restart queue in case it was stopped
+ + * @q: request q to be restarted
+ + */
+ +void bsg_goose_queue(struct request_queue *q)
+ +{
+ +      if (!q)
+ +              return;
+ +
+ +      blk_run_queue_async(q);
+ +}
+ +EXPORT_SYMBOL_GPL(bsg_goose_queue);
+ +
+ +/**
+ + * bsg_request_fn - generic handler for bsg requests
+ + * @q: request queue to manage
+ + *
+ + * On error the create_bsg_job function should return a -Exyz error value
+ + * that will be set to the req->errors.
+ + *
+ + * Drivers/subsys should pass this to the queue init function.
+ + */
+ +void bsg_request_fn(struct request_queue *q)
+ +{
+ +      struct device *dev = q->queuedata;
+ +      struct request *req;
+ +      struct bsg_job *job;
+ +      int ret;
+ +
+ +      if (!get_device(dev))
+ +              return;
+ +
+ +      while (1) {
+ +              req = blk_fetch_request(q);
+ +              if (!req)
+ +                      break;
+ +              spin_unlock_irq(q->queue_lock);
+ +
+ +              ret = bsg_create_job(dev, req);
+ +              if (ret) {
+ +                      req->errors = ret;
+ +                      blk_end_request_all(req, ret);
+ +                      spin_lock_irq(q->queue_lock);
+ +                      continue;
+ +              }
+ +
+ +              job = req->special;
+ +              ret = q->bsg_job_fn(job);
+ +              spin_lock_irq(q->queue_lock);
+ +              if (ret)
+ +                      break;
+ +      }
+ +
+ +      spin_unlock_irq(q->queue_lock);
+ +      put_device(dev);
+ +      spin_lock_irq(q->queue_lock);
+ +}
+ +EXPORT_SYMBOL_GPL(bsg_request_fn);
+ +
+ +/**
+ + * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ + * @dev: device to attach bsg device to
+ + * @q: request queue setup by caller
+ + * @name: device to give bsg device
+ + * @job_fn: bsg job handler
+ + * @dd_job_size: size of LLD data needed for each job
+ + *
+ + * The caller should have setup the reuqest queue with bsg_request_fn
+ + * as the request_fn.
+ + */
+ +int bsg_setup_queue(struct device *dev, struct request_queue *q,
+ +                  char *name, bsg_job_fn *job_fn, int dd_job_size)
+ +{
+ +      int ret;
+ +
+ +      q->queuedata = dev;
+ +      q->bsg_job_size = dd_job_size;
+ +      q->bsg_job_fn = job_fn;
+ +      queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ +      blk_queue_softirq_done(q, bsg_softirq_done);
+ +      blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
+ +
+ +      ret = bsg_register_queue(q, dev, name, NULL);
+ +      if (ret) {
+ +              printk(KERN_ERR "%s: bsg interface failed to "
+ +                     "initialize - register queue\n", dev->kobj.name);
+ +              return ret;
+ +      }
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(bsg_setup_queue);
+ +
+ +/**
+ + * bsg_remove_queue - Deletes the bsg dev from the q
+ + * @q:        the request_queue that is to be torn down.
+ + *
+ + * Notes:
+ + *   Before unregistering the queue empty any requests that are blocked
+ + */
+ +void bsg_remove_queue(struct request_queue *q)
+ +{
+ +      struct request *req; /* block request */
+ +      int counts; /* totals for request_list count and starved */
+ +
+ +      if (!q)
+ +              return;
+ +
+ +      /* Stop taking in new requests */
+ +      spin_lock_irq(q->queue_lock);
+ +      blk_stop_queue(q);
+ +
+ +      /* drain all requests in the queue */
+ +      while (1) {
+ +              /* need the lock to fetch a request
+ +               * this may fetch the same reqeust as the previous pass
+ +               */
+ +              req = blk_fetch_request(q);
+ +              /* save requests in use and starved */
+ +              counts = q->rq.count[0] + q->rq.count[1] +
+ +                       q->rq.starved[0] + q->rq.starved[1];
+ +              spin_unlock_irq(q->queue_lock);
+ +              /* any requests still outstanding? */
+ +              if (counts == 0)
+ +                      break;
+ +
+ +              /* This may be the same req as the previous iteration,
+ +               * always send the blk_end_request_all after a prefetch.
+ +               * It is not okay to not end the request because the
+ +               * prefetch started the request.
+ +               */
+ +              if (req) {
+ +                      /* return -ENXIO to indicate that this queue is
+ +                       * going away
+ +                       */
+ +                      req->errors = -ENXIO;
+ +                      blk_end_request_all(req, -ENXIO);
+ +              }
+ +
+ +              msleep(200); /* allow bsg to possibly finish */
+ +              spin_lock_irq(q->queue_lock);
+ +      }
+ +      bsg_unregister_queue(q);
+ +}
+ +EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --cc drivers/acpi/ec_sys.c
Simple merge
diff --cc drivers/acpi/sysfs.c
Simple merge
diff --cc drivers/dma/intel_mid_dma.c
Simple merge
diff --cc drivers/dma/ipu/ipu_idmac.c
Simple merge
diff --cc drivers/dma/ste_dma40.c

index cd3a7c726bf87bef330f882981afc2fc42be964e,eb22aaf780f8657ee88538fdad5e43c57b6acc76..f6da8911adb6e25cb87418d850058566d4169e0f
--- 1/drivers/dma/ste_dma40.c
--- 2/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@@ -14,7 -14,7 +14,8 @@@
   #include <linux/clk.h>
   #include <linux/delay.h>
   #include <linux/err.h>
+ #include <linux/export.h>
+ +#include <linux/amba/bus.h>
   
   #include <plat/ste_dma40.h>
   
diff --cc drivers/md/dm-log-userspace-base.c
Simple merge
diff --cc drivers/md/dm-raid.c

index a002dd85db1e674e2efbc188a531f001c2d0716b,a07560ec4313c16de5ca76c10ea1a271cf70eb7d..8f6a8f832e8f744b7ba279bf1aa2c073f22caff1
--- 1/drivers/md/dm-raid.c
--- 2/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@@ -6,14 -6,13 +6,15 @@@
    */
   
   #include <linux/slab.h>
+ #include <linux/module.h>
   
   #include "md.h"
+ +#include "raid1.h"
   #include "raid5.h"
- -#include "dm.h"
   #include "bitmap.h"
   
+ +#include <linux/device-mapper.h>
+ +
   #define DM_MSG_PREFIX "raid"
   
   /*
diff --cc drivers/md/dm-snap-persistent.c
Simple merge
diff --cc drivers/md/persistent-data/dm-block-manager.c

index 4ec480c5f6e7c48604904969776d50d20bf05a7b,0000000000000000000000000000000000000000..7b8e84c484bb186791eef4604c7e38803b88bc70

mode 100644,000000..100644
--- 1/drivers/md/persistent-data/dm-block-manager.c
--- /dev/null
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@@ -1,946 -1,0 +1,947 @@@
+ +/*
+ + * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ + *
+ + * This file is released under the GPL.
+ + */
+ +#include "dm-block-manager.h"
+ +#include "dm-persistent-data-internal.h"
+ +
+ +#include <linux/dm-io.h>
+ +#include <linux/slab.h>
+ +#include <linux/device-mapper.h>
++#include <linux/export.h>
+ +
+ +#define DM_MSG_PREFIX "block manager"
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+ +#define MAX_CACHE_SIZE 16U
+ +
+ +enum dm_block_state {
+ +      BS_EMPTY,
+ +      BS_CLEAN,
+ +      BS_READING,
+ +      BS_WRITING,
+ +      BS_READ_LOCKED,
+ +      BS_READ_LOCKED_DIRTY,   /* Block was dirty before it was read locked. */
+ +      BS_WRITE_LOCKED,
+ +      BS_DIRTY,
+ +      BS_ERROR
+ +};
+ +
+ +struct dm_block {
+ +      struct list_head list;
+ +      struct hlist_node hlist;
+ +
+ +      dm_block_t where;
+ +      struct dm_block_validator *validator;
+ +      void *data;
+ +      wait_queue_head_t io_q;
+ +      unsigned read_lock_count;
+ +      unsigned write_lock_pending;
+ +      enum dm_block_state state;
+ +
+ +      /*
+ +       * Extra flags like REQ_FLUSH and REQ_FUA can be set here.  This is
+ +       * mainly as to avoid a race condition in flush_and_unlock() where
+ +       * the newly-unlocked superblock may have been submitted for a
+ +       * write before the write_all_dirty() call is made.
+ +       */
+ +      int io_flags;
+ +
+ +      /*
+ +       * Sadly we need an up pointer so we can get to the bm on io
+ +       * completion.
+ +       */
+ +      struct dm_block_manager *bm;
+ +};
+ +
+ +struct dm_block_manager {
+ +      struct block_device *bdev;
+ +      unsigned cache_size;    /* In bytes */
+ +      unsigned block_size;    /* In bytes */
+ +      dm_block_t nr_blocks;
+ +
+ +      /*
+ +       * This will trigger every time an io completes.
+ +       */
+ +      wait_queue_head_t io_q;
+ +
+ +      struct dm_io_client *io;
+ +
+ +      /*
+ +       * Protects all the lists and the hash table.
+ +       */
+ +      spinlock_t lock;
+ +
+ +      struct list_head empty_list;    /* No block assigned */
+ +      struct list_head clean_list;    /* Unlocked and clean */
+ +      struct list_head dirty_list;    /* Unlocked and dirty */
+ +      struct list_head error_list;
+ +
+ +      unsigned available_count;
+ +      unsigned reading_count;
+ +      unsigned writing_count;
+ +
+ +      struct kmem_cache *block_cache;  /* struct dm_block */
+ +      struct kmem_cache *buffer_cache; /* The buffers that store the raw data */
+ +
+ +      /*
+ +       * Hash table of cached blocks, holds everything that isn't in the
+ +       * BS_EMPTY state.
+ +       */
+ +      unsigned hash_size;
+ +      unsigned hash_mask;
+ +
+ +      struct hlist_head buckets[0];   /* Must be last member of struct. */
+ +};
+ +
+ +dm_block_t dm_block_location(struct dm_block *b)
+ +{
+ +      return b->where;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_block_location);
+ +
+ +void *dm_block_data(struct dm_block *b)
+ +{
+ +      return b->data;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_block_data);
+ +
+ +/*----------------------------------------------------------------
+ + * Hash table
+ + *--------------------------------------------------------------*/
+ +static struct dm_block *__find_block(struct dm_block_manager *bm, dm_block_t b)
+ +{
+ +      unsigned bucket = dm_hash_block(b, bm->hash_mask);
+ +      struct dm_block *blk;
+ +      struct hlist_node *n;
+ +
+ +      hlist_for_each_entry(blk, n, bm->buckets + bucket, hlist)
+ +              if (blk->where == b)
+ +                      return blk;
+ +
+ +      return NULL;
+ +}
+ +
+ +static void __insert_block(struct dm_block_manager *bm, struct dm_block *b)
+ +{
+ +      unsigned bucket = dm_hash_block(b->where, bm->hash_mask);
+ +
+ +      hlist_add_head(&b->hlist, bm->buckets + bucket);
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * Block state:
+ + * __transition() handles transition of a block between different states.
+ + * Study this to understand the state machine.
+ + *
+ + * Alternatively install graphviz and run:
+ + *     grep DOT dm-block-manager.c | grep -v '        ' |
+ + *     sed -e 's/.*DOT: //' -e 's/\*\///' |
+ + *     dot -Tps -o states.ps
+ + *
+ + * Assumes bm->lock is held.
+ + *--------------------------------------------------------------*/
+ +static void __transition(struct dm_block *b, enum dm_block_state new_state)
+ +{
+ +      /* DOT: digraph BlockStates { */
+ +      struct dm_block_manager *bm = b->bm;
+ +
+ +      switch (new_state) {
+ +      case BS_EMPTY:
+ +              /* DOT: error -> empty */
+ +              /* DOT: clean -> empty */
+ +              BUG_ON(!((b->state == BS_ERROR) ||
+ +                       (b->state == BS_CLEAN)));
+ +              hlist_del(&b->hlist);
+ +              list_move(&b->list, &bm->empty_list);
+ +              b->write_lock_pending = 0;
+ +              b->read_lock_count = 0;
+ +              b->io_flags = 0;
+ +              b->validator = NULL;
+ +
+ +              if (b->state == BS_ERROR)
+ +                      bm->available_count++;
+ +              break;
+ +
+ +      case BS_CLEAN:
+ +              /* DOT: reading -> clean */
+ +              /* DOT: writing -> clean */
+ +              /* DOT: read_locked -> clean */
+ +              BUG_ON(!((b->state == BS_READING) ||
+ +                       (b->state == BS_WRITING) ||
+ +                       (b->state == BS_READ_LOCKED)));
+ +              switch (b->state) {
+ +              case BS_READING:
+ +                      BUG_ON(!bm->reading_count);
+ +                      bm->reading_count--;
+ +                      break;
+ +
+ +              case BS_WRITING:
+ +                      BUG_ON(!bm->writing_count);
+ +                      bm->writing_count--;
+ +                      b->io_flags = 0;
+ +                      break;
+ +
+ +              default:
+ +                      break;
+ +              }
+ +              list_add_tail(&b->list, &bm->clean_list);
+ +              bm->available_count++;
+ +              break;
+ +
+ +      case BS_READING:
+ +              /* DOT: empty -> reading */
+ +              BUG_ON(!(b->state == BS_EMPTY));
+ +              __insert_block(bm, b);
+ +              list_del(&b->list);
+ +              bm->available_count--;
+ +              bm->reading_count++;
+ +              break;
+ +
+ +      case BS_WRITING:
+ +              /* DOT: dirty -> writing */
+ +              BUG_ON(!(b->state == BS_DIRTY));
+ +              list_del(&b->list);
+ +              bm->writing_count++;
+ +              break;
+ +
+ +      case BS_READ_LOCKED:
+ +              /* DOT: clean -> read_locked */
+ +              BUG_ON(!(b->state == BS_CLEAN));
+ +              list_del(&b->list);
+ +              bm->available_count--;
+ +              break;
+ +
+ +      case BS_READ_LOCKED_DIRTY:
+ +              /* DOT: dirty -> read_locked_dirty */
+ +              BUG_ON(!((b->state == BS_DIRTY)));
+ +              list_del(&b->list);
+ +              break;
+ +
+ +      case BS_WRITE_LOCKED:
+ +              /* DOT: dirty -> write_locked */
+ +              /* DOT: clean -> write_locked */
+ +              BUG_ON(!((b->state == BS_DIRTY) ||
+ +                       (b->state == BS_CLEAN)));
+ +              list_del(&b->list);
+ +
+ +              if (b->state == BS_CLEAN)
+ +                      bm->available_count--;
+ +              break;
+ +
+ +      case BS_DIRTY:
+ +              /* DOT: write_locked -> dirty */
+ +              /* DOT: read_locked_dirty -> dirty */
+ +              BUG_ON(!((b->state == BS_WRITE_LOCKED) ||
+ +                       (b->state == BS_READ_LOCKED_DIRTY)));
+ +              list_add_tail(&b->list, &bm->dirty_list);
+ +              break;
+ +
+ +      case BS_ERROR:
+ +              /* DOT: writing -> error */
+ +              /* DOT: reading -> error */
+ +              BUG_ON(!((b->state == BS_WRITING) ||
+ +                       (b->state == BS_READING)));
+ +              list_add_tail(&b->list, &bm->error_list);
+ +              break;
+ +      }
+ +
+ +      b->state = new_state;
+ +      /* DOT: } */
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * Low-level io.
+ + *--------------------------------------------------------------*/
+ +typedef void (completion_fn)(unsigned long error, struct dm_block *b);
+ +
+ +static void submit_io(struct dm_block *b, int rw,
+ +                    completion_fn fn)
+ +{
+ +      struct dm_block_manager *bm = b->bm;
+ +      struct dm_io_request req;
+ +      struct dm_io_region region;
+ +      unsigned sectors_per_block = bm->block_size >> SECTOR_SHIFT;
+ +
+ +      region.bdev = bm->bdev;
+ +      region.sector = b->where * sectors_per_block;
+ +      region.count = sectors_per_block;
+ +
+ +      req.bi_rw = rw;
+ +      req.mem.type = DM_IO_KMEM;
+ +      req.mem.offset = 0;
+ +      req.mem.ptr.addr = b->data;
+ +      req.notify.fn = (void (*)(unsigned long, void *)) fn;
+ +      req.notify.context = b;
+ +      req.client = bm->io;
+ +
+ +      if (dm_io(&req, 1, &region, NULL) < 0)
+ +              fn(1, b);
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * High-level io.
+ + *--------------------------------------------------------------*/
+ +static void __complete_io(unsigned long error, struct dm_block *b)
+ +{
+ +      struct dm_block_manager *bm = b->bm;
+ +
+ +      if (error) {
+ +              DMERR("io error = %lu, block = %llu",
+ +                    error , (unsigned long long)b->where);
+ +              __transition(b, BS_ERROR);
+ +      } else
+ +              __transition(b, BS_CLEAN);
+ +
+ +      wake_up(&b->io_q);
+ +      wake_up(&bm->io_q);
+ +}
+ +
+ +static void complete_io(unsigned long error, struct dm_block *b)
+ +{
+ +      struct dm_block_manager *bm = b->bm;
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(&bm->lock, flags);
+ +      __complete_io(error, b);
+ +      spin_unlock_irqrestore(&bm->lock, flags);
+ +}
+ +
+ +static void read_block(struct dm_block *b)
+ +{
+ +      submit_io(b, READ, complete_io);
+ +}
+ +
+ +static void write_block(struct dm_block *b)
+ +{
+ +      if (b->validator)
+ +              b->validator->prepare_for_write(b->validator, b,
+ +                                              b->bm->block_size);
+ +
+ +      submit_io(b, WRITE | b->io_flags, complete_io);
+ +}
+ +
+ +static void write_dirty(struct dm_block_manager *bm, unsigned count)
+ +{
+ +      struct dm_block *b, *tmp;
+ +      struct list_head dirty;
+ +      unsigned long flags;
+ +
+ +      /*
+ +       * Grab the first @count entries from the dirty list
+ +       */
+ +      INIT_LIST_HEAD(&dirty);
+ +      spin_lock_irqsave(&bm->lock, flags);
+ +      list_for_each_entry_safe(b, tmp, &bm->dirty_list, list) {
+ +              if (!count--)
+ +                      break;
+ +              __transition(b, BS_WRITING);
+ +              list_add_tail(&b->list, &dirty);
+ +      }
+ +      spin_unlock_irqrestore(&bm->lock, flags);
+ +
+ +      list_for_each_entry_safe(b, tmp, &dirty, list) {
+ +              list_del(&b->list);
+ +              write_block(b);
+ +      }
+ +}
+ +
+ +static void write_all_dirty(struct dm_block_manager *bm)
+ +{
+ +      write_dirty(bm, bm->cache_size);
+ +}
+ +
+ +static void __clear_errors(struct dm_block_manager *bm)
+ +{
+ +      struct dm_block *b, *tmp;
+ +      list_for_each_entry_safe(b, tmp, &bm->error_list, list)
+ +              __transition(b, BS_EMPTY);
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * Waiting
+ + *--------------------------------------------------------------*/
+ +#ifdef __CHECKER__
+ +#  define __retains(x)        __attribute__((context(x, 1, 1)))
+ +#else
+ +#  define __retains(x)
+ +#endif
+ +
+ +#define __wait_block(wq, lock, flags, sched_fn, condition)    \
+ +do {                                                          \
+ +      int r = 0;                                              \
+ +                                                              \
+ +      DEFINE_WAIT(wait);                                      \
+ +      add_wait_queue(wq, &wait);                              \
+ +                                                              \
+ +      for (;;) {                                              \
+ +              prepare_to_wait(wq, &wait, TASK_INTERRUPTIBLE); \
+ +              if (condition)                                  \
+ +                      break;                                  \
+ +                                                              \
+ +              spin_unlock_irqrestore(lock, flags);            \
+ +              if (signal_pending(current)) {                  \
+ +                      r = -ERESTARTSYS;                       \
+ +                      spin_lock_irqsave(lock, flags);         \
+ +                      break;                                  \
+ +              }                                               \
+ +                                                              \
+ +              sched_fn();                                     \
+ +              spin_lock_irqsave(lock, flags);                 \
+ +      }                                                       \
+ +                                                              \
+ +      finish_wait(wq, &wait);                                 \
+ +      return r;                                               \
+ +} while (0)
+ +
+ +static int __wait_io(struct dm_block *b, unsigned long *flags)
+ +      __retains(&b->bm->lock)
+ +{
+ +      __wait_block(&b->io_q, &b->bm->lock, *flags, io_schedule,
+ +                   ((b->state != BS_READING) && (b->state != BS_WRITING)));
+ +}
+ +
+ +static int __wait_unlocked(struct dm_block *b, unsigned long *flags)
+ +      __retains(&b->bm->lock)
+ +{
+ +      __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
+ +                   ((b->state == BS_CLEAN) || (b->state == BS_DIRTY)));
+ +}
+ +
+ +static int __wait_read_lockable(struct dm_block *b, unsigned long *flags)
+ +      __retains(&b->bm->lock)
+ +{
+ +      __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
+ +                   (!b->write_lock_pending && (b->state == BS_CLEAN ||
+ +                                               b->state == BS_DIRTY ||
+ +                                               b->state == BS_READ_LOCKED)));
+ +}
+ +
+ +static int __wait_all_writes(struct dm_block_manager *bm, unsigned long *flags)
+ +      __retains(&bm->lock)
+ +{
+ +      __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
+ +                   !bm->writing_count);
+ +}
+ +
+ +static int __wait_all_io(struct dm_block_manager *bm, unsigned long *flags)
+ +      __retains(&bm->lock)
+ +{
+ +      __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
+ +                   !bm->writing_count && !bm->reading_count);
+ +}
+ +
+ +static int __wait_clean(struct dm_block_manager *bm, unsigned long *flags)
+ +      __retains(&bm->lock)
+ +{
+ +      __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
+ +                   (!list_empty(&bm->clean_list) ||
+ +                    (!bm->writing_count)));
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * Finding a free block to recycle
+ + *--------------------------------------------------------------*/
+ +static int recycle_block(struct dm_block_manager *bm, dm_block_t where,
+ +                       int need_read, struct dm_block_validator *v,
+ +                       struct dm_block **result)
+ +{
+ +      int r = 0;
+ +      struct dm_block *b;
+ +      unsigned long flags, available;
+ +
+ +      /*
+ +       * Wait for a block to appear on the empty or clean lists.
+ +       */
+ +      spin_lock_irqsave(&bm->lock, flags);
+ +      while (1) {
+ +              /*
+ +               * Once we can lock and do io concurrently then we should
+ +               * probably flush at bm->cache_size / 2 and write _all_
+ +               * dirty blocks.
+ +               */
+ +              available = bm->available_count + bm->writing_count;
+ +              if (available < bm->cache_size / 4) {
+ +                      spin_unlock_irqrestore(&bm->lock, flags);
+ +                      write_dirty(bm, bm->cache_size / 4);
+ +                      spin_lock_irqsave(&bm->lock, flags);
+ +              }
+ +
+ +              if (!list_empty(&bm->empty_list)) {
+ +                      b = list_first_entry(&bm->empty_list, struct dm_block, list);
+ +                      break;
+ +
+ +              } else if (!list_empty(&bm->clean_list)) {
+ +                      b = list_first_entry(&bm->clean_list, struct dm_block, list);
+ +                      __transition(b, BS_EMPTY);
+ +                      break;
+ +              }
+ +
+ +              __wait_clean(bm, &flags);
+ +      }
+ +
+ +      b->where = where;
+ +      b->validator = v;
+ +      __transition(b, BS_READING);
+ +
+ +      if (!need_read) {
+ +              memset(b->data, 0, bm->block_size);
+ +              __transition(b, BS_CLEAN);
+ +      } else {
+ +              spin_unlock_irqrestore(&bm->lock, flags);
+ +              read_block(b);
+ +              spin_lock_irqsave(&bm->lock, flags);
+ +              __wait_io(b, &flags);
+ +
+ +              /* FIXME: Can b have been recycled between io completion and here? */
+ +
+ +              /*
+ +               * Did the io succeed?
+ +               */
+ +              if (b->state == BS_ERROR) {
+ +                      /*
+ +                       * Since this is a read that has failed we can clear the error
+ +                       * immediately.  Failed writes are revealed during a commit.
+ +                       */
+ +                      __transition(b, BS_EMPTY);
+ +                      r = -EIO;
+ +              }
+ +
+ +              if (b->validator) {
+ +                      r = b->validator->check(b->validator, b, bm->block_size);
+ +                      if (r) {
+ +                              DMERR("%s validator check failed for block %llu",
+ +                                    b->validator->name, (unsigned long long)b->where);
+ +                              __transition(b, BS_EMPTY);
+ +                      }
+ +              }
+ +      }
+ +      spin_unlock_irqrestore(&bm->lock, flags);
+ +
+ +      if (!r)
+ +              *result = b;
+ +
+ +      return r;
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * Low level block management
+ + *--------------------------------------------------------------*/
+ +
+ +static struct dm_block *alloc_block(struct dm_block_manager *bm)
+ +{
+ +      struct dm_block *b = kmem_cache_alloc(bm->block_cache, GFP_KERNEL);
+ +
+ +      if (!b)
+ +              return NULL;
+ +
+ +      INIT_LIST_HEAD(&b->list);
+ +      INIT_HLIST_NODE(&b->hlist);
+ +
+ +      b->data = kmem_cache_alloc(bm->buffer_cache, GFP_KERNEL);
+ +      if (!b->data) {
+ +              kmem_cache_free(bm->block_cache, b);
+ +              return NULL;
+ +      }
+ +
+ +      b->validator = NULL;
+ +      b->state = BS_EMPTY;
+ +      init_waitqueue_head(&b->io_q);
+ +      b->read_lock_count = 0;
+ +      b->write_lock_pending = 0;
+ +      b->io_flags = 0;
+ +      b->bm = bm;
+ +
+ +      return b;
+ +}
+ +
+ +static void free_block(struct dm_block *b)
+ +{
+ +      kmem_cache_free(b->bm->buffer_cache, b->data);
+ +      kmem_cache_free(b->bm->block_cache, b);
+ +}
+ +
+ +static int populate_bm(struct dm_block_manager *bm, unsigned count)
+ +{
+ +      int i;
+ +      LIST_HEAD(bs);
+ +
+ +      for (i = 0; i < count; i++) {
+ +              struct dm_block *b = alloc_block(bm);
+ +              if (!b) {
+ +                      struct dm_block *tmp;
+ +                      list_for_each_entry_safe(b, tmp, &bs, list)
+ +                              free_block(b);
+ +                      return -ENOMEM;
+ +              }
+ +
+ +              list_add(&b->list, &bs);
+ +      }
+ +
+ +      list_replace(&bs, &bm->empty_list);
+ +      bm->available_count = count;
+ +
+ +      return 0;
+ +}
+ +
+ +/*----------------------------------------------------------------
+ + * Public interface
+ + *--------------------------------------------------------------*/
+ +static unsigned calc_hash_size(unsigned cache_size)
+ +{
+ +      unsigned r = 32;        /* Minimum size is 16 */
+ +
+ +      while (r < cache_size)
+ +              r <<= 1;
+ +
+ +      return r >> 1;
+ +}
+ +
+ +struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
+ +                                               unsigned block_size,
+ +                                               unsigned cache_size)
+ +{
+ +      unsigned i;
+ +      unsigned hash_size = calc_hash_size(cache_size);
+ +      size_t len = sizeof(struct dm_block_manager) +
+ +                   sizeof(struct hlist_head) * hash_size;
+ +      struct dm_block_manager *bm;
+ +
+ +      bm = kmalloc(len, GFP_KERNEL);
+ +      if (!bm)
+ +              return NULL;
+ +
+ +      bm->bdev = bdev;
+ +      bm->cache_size = max(MAX_CACHE_SIZE, cache_size);
+ +      bm->block_size = block_size;
+ +      bm->nr_blocks = i_size_read(bdev->bd_inode);
+ +      do_div(bm->nr_blocks, block_size);
+ +      init_waitqueue_head(&bm->io_q);
+ +      spin_lock_init(&bm->lock);
+ +
+ +      INIT_LIST_HEAD(&bm->empty_list);
+ +      INIT_LIST_HEAD(&bm->clean_list);
+ +      INIT_LIST_HEAD(&bm->dirty_list);
+ +      INIT_LIST_HEAD(&bm->error_list);
+ +      bm->available_count = 0;
+ +      bm->reading_count = 0;
+ +      bm->writing_count = 0;
+ +
+ +      bm->block_cache = kmem_cache_create("dm-block-manager-blocks",
+ +                                          sizeof(struct dm_block),
+ +                                          __alignof__(struct dm_block),
+ +                                          SLAB_HWCACHE_ALIGN, NULL);
+ +      if (!bm->block_cache)
+ +              goto bad_bm;
+ +
+ +      bm->buffer_cache = kmem_cache_create("dm-block-manager-buffers",
+ +                                           block_size, SECTOR_SIZE,
+ +                                           0, NULL);
+ +      if (!bm->buffer_cache)
+ +              goto bad_block_cache;
+ +
+ +      bm->hash_size = hash_size;
+ +      bm->hash_mask = hash_size - 1;
+ +      for (i = 0; i < hash_size; i++)
+ +              INIT_HLIST_HEAD(bm->buckets + i);
+ +
+ +      bm->io = dm_io_client_create();
+ +      if (!bm->io)
+ +              goto bad_buffer_cache;
+ +
+ +      if (populate_bm(bm, cache_size) < 0)
+ +              goto bad_io_client;
+ +
+ +      return bm;
+ +
+ +bad_io_client:
+ +      dm_io_client_destroy(bm->io);
+ +bad_buffer_cache:
+ +      kmem_cache_destroy(bm->buffer_cache);
+ +bad_block_cache:
+ +      kmem_cache_destroy(bm->block_cache);
+ +bad_bm:
+ +      kfree(bm);
+ +
+ +      return NULL;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_block_manager_create);
+ +
+ +void dm_block_manager_destroy(struct dm_block_manager *bm)
+ +{
+ +      int i;
+ +      struct dm_block *b, *btmp;
+ +      struct hlist_node *n, *tmp;
+ +
+ +      dm_io_client_destroy(bm->io);
+ +
+ +      for (i = 0; i < bm->hash_size; i++)
+ +              hlist_for_each_entry_safe(b, n, tmp, bm->buckets + i, hlist)
+ +                      free_block(b);
+ +
+ +      list_for_each_entry_safe(b, btmp, &bm->empty_list, list)
+ +              free_block(b);
+ +
+ +      kmem_cache_destroy(bm->buffer_cache);
+ +      kmem_cache_destroy(bm->block_cache);
+ +
+ +      kfree(bm);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
+ +
+ +unsigned dm_bm_block_size(struct dm_block_manager *bm)
+ +{
+ +      return bm->block_size;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_bm_block_size);
+ +
+ +dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
+ +{
+ +      return bm->nr_blocks;
+ +}
+ +
+ +static int lock_internal(struct dm_block_manager *bm, dm_block_t block,
+ +                       int how, int need_read, int can_block,
+ +                       struct dm_block_validator *v,
+ +                       struct dm_block **result)
+ +{
+ +      int r = 0;
+ +      struct dm_block *b;
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(&bm->lock, flags);
+ +retry:
+ +      b = __find_block(bm, block);
+ +      if (b) {
+ +              if (!need_read)
+ +                      b->validator = v;
+ +              else {
+ +                      if (b->validator && (v != b->validator)) {
+ +                              DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
+ +                                    b->validator->name, v ? v->name : "NULL",
+ +                                    (unsigned long long)b->where);
+ +                              spin_unlock_irqrestore(&bm->lock, flags);
+ +                              return -EINVAL;
+ +
+ +                      }
+ +                      if (!b->validator && v) {
+ +                              b->validator = v;
+ +                              r = b->validator->check(b->validator, b, bm->block_size);
+ +                              if (r) {
+ +                                      DMERR("%s validator check failed for block %llu",
+ +                                            b->validator->name,
+ +                                            (unsigned long long)b->where);
+ +                                      spin_unlock_irqrestore(&bm->lock, flags);
+ +                                      return r;
+ +                              }
+ +                      }
+ +              }
+ +
+ +              switch (how) {
+ +              case READ:
+ +                      if (b->write_lock_pending || (b->state != BS_CLEAN &&
+ +                                                    b->state != BS_DIRTY &&
+ +                                                    b->state != BS_READ_LOCKED)) {
+ +                              if (!can_block) {
+ +                                      spin_unlock_irqrestore(&bm->lock, flags);
+ +                                      return -EWOULDBLOCK;
+ +                              }
+ +
+ +                              __wait_read_lockable(b, &flags);
+ +
+ +                              if (b->where != block)
+ +                                      goto retry;
+ +                      }
+ +                      break;
+ +
+ +              case WRITE:
+ +                      while (b->state != BS_CLEAN && b->state != BS_DIRTY) {
+ +                              if (!can_block) {
+ +                                      spin_unlock_irqrestore(&bm->lock, flags);
+ +                                      return -EWOULDBLOCK;
+ +                              }
+ +
+ +                              b->write_lock_pending++;
+ +                              __wait_unlocked(b, &flags);
+ +                              b->write_lock_pending--;
+ +                              if (b->where != block)
+ +                                      goto retry;
+ +                      }
+ +                      break;
+ +              }
+ +
+ +      } else if (!can_block) {
+ +              r = -EWOULDBLOCK;
+ +              goto out;
+ +
+ +      } else {
+ +              spin_unlock_irqrestore(&bm->lock, flags);
+ +              r = recycle_block(bm, block, need_read, v, &b);
+ +              spin_lock_irqsave(&bm->lock, flags);
+ +      }
+ +
+ +      if (!r) {
+ +              switch (how) {
+ +              case READ:
+ +                      b->read_lock_count++;
+ +
+ +                      if (b->state == BS_DIRTY)
+ +                              __transition(b, BS_READ_LOCKED_DIRTY);
+ +                      else if (b->state == BS_CLEAN)
+ +                              __transition(b, BS_READ_LOCKED);
+ +                      break;
+ +
+ +              case WRITE:
+ +                      __transition(b, BS_WRITE_LOCKED);
+ +                      break;
+ +              }
+ +
+ +              *result = b;
+ +      }
+ +
+ +out:
+ +      spin_unlock_irqrestore(&bm->lock, flags);
+ +
+ +      return r;
+ +}
+ +
+ +int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
+ +                  struct dm_block_validator *v,
+ +                  struct dm_block **result)
+ +{
+ +      return lock_internal(bm, b, READ, 1, 1, v, result);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_bm_read_lock);
+ +
+ +int dm_bm_write_lock(struct dm_block_manager *bm,
+ +                   dm_block_t b, struct dm_block_validator *v,
+ +                   struct dm_block **result)
+ +{
+ +      return lock_internal(bm, b, WRITE, 1, 1, v, result);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_bm_write_lock);
+ +
+ +int dm_bm_read_try_lock(struct dm_block_manager *bm,
+ +                      dm_block_t b, struct dm_block_validator *v,
+ +                      struct dm_block **result)
+ +{
+ +      return lock_internal(bm, b, READ, 1, 0, v, result);
+ +}
+ +
+ +int dm_bm_write_lock_zero(struct dm_block_manager *bm,
+ +                        dm_block_t b, struct dm_block_validator *v,
+ +                        struct dm_block **result)
+ +{
+ +      int r = lock_internal(bm, b, WRITE, 0, 1, v, result);
+ +
+ +      if (!r)
+ +              memset((*result)->data, 0, bm->block_size);
+ +
+ +      return r;
+ +}
+ +
+ +int dm_bm_unlock(struct dm_block *b)
+ +{
+ +      int r = 0;
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(&b->bm->lock, flags);
+ +      switch (b->state) {
+ +      case BS_WRITE_LOCKED:
+ +              __transition(b, BS_DIRTY);
+ +              wake_up(&b->io_q);
+ +              break;
+ +
+ +      case BS_READ_LOCKED:
+ +              if (!--b->read_lock_count) {
+ +                      __transition(b, BS_CLEAN);
+ +                      wake_up(&b->io_q);
+ +              }
+ +              break;
+ +
+ +      case BS_READ_LOCKED_DIRTY:
+ +              if (!--b->read_lock_count) {
+ +                      __transition(b, BS_DIRTY);
+ +                      wake_up(&b->io_q);
+ +              }
+ +              break;
+ +
+ +      default:
+ +              DMERR("block = %llu not locked",
+ +                    (unsigned long long)b->where);
+ +              r = -EINVAL;
+ +              break;
+ +      }
+ +      spin_unlock_irqrestore(&b->bm->lock, flags);
+ +
+ +      return r;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_bm_unlock);
+ +
+ +static int __wait_flush(struct dm_block_manager *bm)
+ +{
+ +      int r = 0;
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(&bm->lock, flags);
+ +      __wait_all_writes(bm, &flags);
+ +
+ +      if (!list_empty(&bm->error_list)) {
+ +              r = -EIO;
+ +              __clear_errors(bm);
+ +      }
+ +      spin_unlock_irqrestore(&bm->lock, flags);
+ +
+ +      return r;
+ +}
+ +
+ +int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
+ +                         struct dm_block *superblock)
+ +{
+ +      int r;
+ +      unsigned long flags;
+ +
+ +      write_all_dirty(bm);
+ +      r = __wait_flush(bm);
+ +      if (r)
+ +              return r;
+ +
+ +      spin_lock_irqsave(&bm->lock, flags);
+ +      superblock->io_flags = REQ_FUA | REQ_FLUSH;
+ +      spin_unlock_irqrestore(&bm->lock, flags);
+ +
+ +      dm_bm_unlock(superblock);
+ +      write_all_dirty(bm);
+ +
+ +      return __wait_flush(bm);
+ +}
+ +
+ +int dm_bm_rebind_block_device(struct dm_block_manager *bm,
+ +                            struct block_device *bdev)
+ +{
+ +      unsigned long flags;
+ +      dm_block_t nr_blocks = i_size_read(bdev->bd_inode);
+ +
+ +      do_div(nr_blocks, bm->block_size);
+ +
+ +      spin_lock_irqsave(&bm->lock, flags);
+ +      if (nr_blocks < bm->nr_blocks) {
+ +              spin_unlock_irqrestore(&bm->lock, flags);
+ +              return -EINVAL;
+ +      }
+ +
+ +      bm->bdev = bdev;
+ +      bm->nr_blocks = nr_blocks;
+ +
+ +      /*
+ +       * Wait for any in-flight io that may be using the old bdev
+ +       */
+ +      __wait_all_io(bm, &flags);
+ +      spin_unlock_irqrestore(&bm->lock, flags);
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_bm_rebind_block_device);
diff --cc drivers/md/persistent-data/dm-btree-remove.c

index 72c97054b1e8e4dab7f6e0b811a62a781726d9d4,0000000000000000000000000000000000000000..e7071f66dc39d18b0b241a7a5ee44b244991d6b7

mode 100644,000000..100644
--- 1/drivers/md/persistent-data/dm-btree-remove.c
--- /dev/null
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@@ -1,569 -1,0 +1,571 @@@
+ +/*
+ + * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ + *
+ + * This file is released under the GPL.
+ + */
+ +
+ +#include "dm-btree.h"
+ +#include "dm-btree-internal.h"
+ +#include "dm-transaction-manager.h"
+ +
++#include <linux/export.h>
++
+ +/*
+ + * Removing an entry from a btree
+ + * ==============================
+ + *
+ + * A very important constraint for our btree is that no node, except the
+ + * root, may have fewer than a certain number of entries.
+ + * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
+ + *
+ + * Ensuring this is complicated by the way we want to only ever hold the
+ + * locks on 2 nodes concurrently, and only change nodes in a top to bottom
+ + * fashion.
+ + *
+ + * Each node may have a left or right sibling.  When decending the spine,
+ + * if a node contains only MIN_ENTRIES then we try and increase this to at
+ + * least MIN_ENTRIES + 1.  We do this in the following ways:
+ + *
+ + * [A] No siblings => this can only happen if the node is the root, in which
+ + *     case we copy the childs contents over the root.
+ + *
+ + * [B] No left sibling
+ + *     ==> rebalance(node, right sibling)
+ + *
+ + * [C] No right sibling
+ + *     ==> rebalance(left sibling, node)
+ + *
+ + * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
+ + *     ==> delete node adding it's contents to left and right
+ + *
+ + * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
+ + *     ==> rebalance(left, node, right)
+ + *
+ + * After these operations it's possible that the our original node no
+ + * longer contains the desired sub tree.  For this reason this rebalancing
+ + * is performed on the children of the current node.  This also avoids
+ + * having a special case for the root.
+ + *
+ + * Once this rebalancing has occurred we can then step into the child node
+ + * for internal nodes.  Or delete the entry for leaf nodes.
+ + */
+ +
+ +/*
+ + * Some little utilities for moving node data around.
+ + */
+ +static void node_shift(struct node *n, int shift)
+ +{
+ +      uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+ +
+ +      if (shift < 0) {
+ +              shift = -shift;
+ +              memmove(key_ptr(n, 0),
+ +                      key_ptr(n, shift),
+ +                      (nr_entries - shift) * sizeof(__le64));
+ +              memmove(value_ptr(n, 0, sizeof(__le64)),
+ +                      value_ptr(n, shift, sizeof(__le64)),
+ +                      (nr_entries - shift) * sizeof(__le64));
+ +      } else {
+ +              memmove(key_ptr(n, shift),
+ +                      key_ptr(n, 0),
+ +                      nr_entries * sizeof(__le64));
+ +              memmove(value_ptr(n, shift, sizeof(__le64)),
+ +                      value_ptr(n, 0, sizeof(__le64)),
+ +                      nr_entries * sizeof(__le64));
+ +      }
+ +}
+ +
+ +static void node_copy(struct node *left, struct node *right, int shift)
+ +{
+ +      uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ +
+ +      if (shift < 0) {
+ +              shift = -shift;
+ +              memcpy(key_ptr(left, nr_left),
+ +                     key_ptr(right, 0),
+ +                     shift * sizeof(__le64));
+ +              memcpy(value_ptr(left, nr_left, sizeof(__le64)),
+ +                     value_ptr(right, 0, sizeof(__le64)),
+ +                     shift * sizeof(__le64));
+ +      } else {
+ +              memcpy(key_ptr(right, 0),
+ +                     key_ptr(left, nr_left - shift),
+ +                     shift * sizeof(__le64));
+ +              memcpy(value_ptr(right, 0, sizeof(__le64)),
+ +                     value_ptr(left, nr_left - shift, sizeof(__le64)),
+ +                     shift * sizeof(__le64));
+ +      }
+ +}
+ +
+ +/*
+ + * Delete a specific entry from a leaf node.
+ + */
+ +static void delete_at(struct node *n, unsigned index, size_t value_size)
+ +{
+ +      unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
+ +      unsigned nr_to_copy = nr_entries - (index + 1);
+ +
+ +      if (nr_to_copy) {
+ +              memmove(key_ptr(n, index),
+ +                      key_ptr(n, index + 1),
+ +                      nr_to_copy * sizeof(__le64));
+ +
+ +              memmove(value_ptr(n, index, value_size),
+ +                      value_ptr(n, index + 1, value_size),
+ +                      nr_to_copy * value_size);
+ +      }
+ +
+ +      n->header.nr_entries = cpu_to_le32(nr_entries - 1);
+ +}
+ +
+ +static unsigned del_threshold(struct node *n)
+ +{
+ +      return le32_to_cpu(n->header.max_entries) / 3;
+ +}
+ +
+ +static unsigned merge_threshold(struct node *n)
+ +{
+ +      /*
+ +       * The extra one is because we know we're potentially going to
+ +       * delete an entry.
+ +       */
+ +      return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1;
+ +}
+ +
+ +struct child {
+ +      unsigned index;
+ +      struct dm_block *block;
+ +      struct node *n;
+ +};
+ +
+ +static struct dm_btree_value_type le64_type = {
+ +      .context = NULL,
+ +      .size = sizeof(__le64),
+ +      .inc = NULL,
+ +      .dec = NULL,
+ +      .equal = NULL
+ +};
+ +
+ +static int init_child(struct dm_btree_info *info, struct node *parent,
+ +                    unsigned index, struct child *result)
+ +{
+ +      int r, inc;
+ +      dm_block_t root;
+ +
+ +      result->index = index;
+ +      root = value64(parent, index);
+ +
+ +      r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
+ +                             &result->block, &inc);
+ +      if (r)
+ +              return r;
+ +
+ +      result->n = dm_block_data(result->block);
+ +
+ +      if (inc)
+ +              inc_children(info->tm, result->n, &le64_type);
+ +
+ +      return 0;
+ +}
+ +
+ +static int exit_child(struct dm_btree_info *info, struct child *c)
+ +{
+ +      return dm_tm_unlock(info->tm, c->block);
+ +}
+ +
+ +static void shift(struct node *left, struct node *right, int count)
+ +{
+ +      if (!count)
+ +              return;
+ +
+ +      if (count > 0) {
+ +              node_shift(right, count);
+ +              node_copy(left, right, count);
+ +      } else {
+ +              node_copy(left, right, count);
+ +              node_shift(right, count);
+ +      }
+ +
+ +      left->header.nr_entries =
+ +              cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count);
+ +
+ +      right->header.nr_entries =
+ +              cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count);
+ +}
+ +
+ +static void __rebalance2(struct dm_btree_info *info, struct node *parent,
+ +                       struct child *l, struct child *r)
+ +{
+ +      struct node *left = l->n;
+ +      struct node *right = r->n;
+ +      uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ +      uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+ +
+ +      if (nr_left + nr_right <= merge_threshold(left)) {
+ +              /*
+ +               * Merge
+ +               */
+ +              node_copy(left, right, -nr_right);
+ +              left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
+ +
+ +              *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ +                      cpu_to_le64(dm_block_location(l->block));
+ +              delete_at(parent, r->index, sizeof(__le64));
+ +
+ +              /*
+ +               * We need to decrement the right block, but not it's
+ +               * children, since they're still referenced by left.
+ +               */
+ +              dm_tm_dec(info->tm, dm_block_location(r->block));
+ +      } else {
+ +              /*
+ +               * Rebalance.
+ +               */
+ +              unsigned target_left = (nr_left + nr_right) / 2;
+ +
+ +              shift(left, right, nr_left - target_left);
+ +              *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ +                      cpu_to_le64(dm_block_location(l->block));
+ +              *((__le64 *) value_ptr(parent, r->index, sizeof(__le64))) =
+ +                      cpu_to_le64(dm_block_location(r->block));
+ +              *key_ptr(parent, r->index) = right->keys[0];
+ +      }
+ +}
+ +
+ +static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
+ +                    unsigned left_index)
+ +{
+ +      int r;
+ +      struct node *parent;
+ +      struct child left, right;
+ +
+ +      parent = dm_block_data(shadow_current(s));
+ +
+ +      r = init_child(info, parent, left_index, &left);
+ +      if (r)
+ +              return r;
+ +
+ +      r = init_child(info, parent, left_index + 1, &right);
+ +      if (r) {
+ +              exit_child(info, &left);
+ +              return r;
+ +      }
+ +
+ +      __rebalance2(info, parent, &left, &right);
+ +
+ +      r = exit_child(info, &left);
+ +      if (r) {
+ +              exit_child(info, &right);
+ +              return r;
+ +      }
+ +
+ +      r = exit_child(info, &right);
+ +      if (r)
+ +              return r;
+ +
+ +      return 0;
+ +}
+ +
+ +static void __rebalance3(struct dm_btree_info *info, struct node *parent,
+ +                       struct child *l, struct child *c, struct child *r)
+ +{
+ +      struct node *left = l->n;
+ +      struct node *center = c->n;
+ +      struct node *right = r->n;
+ +
+ +      uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ +      uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
+ +      uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+ +      uint32_t max_entries = le32_to_cpu(left->header.max_entries);
+ +
+ +      unsigned target;
+ +
+ +      if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) {
+ +              /*
+ +               * Delete center node:
+ +               *
+ +               * We dump as many entries from center as possible into
+ +               * left, then the rest in right, then rebalance2.  This
+ +               * wastes some cpu, but I want something simple atm.
+ +               */
+ +              unsigned shift = min(max_entries - nr_left, nr_center);
+ +
+ +              node_copy(left, center, -shift);
+ +              left->header.nr_entries = cpu_to_le32(nr_left + shift);
+ +
+ +              if (shift != nr_center) {
+ +                      shift = nr_center - shift;
+ +                      node_shift(right, shift);
+ +                      node_copy(center, right, shift);
+ +                      right->header.nr_entries = cpu_to_le32(nr_right + shift);
+ +              }
+ +
+ +              *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ +                      cpu_to_le64(dm_block_location(l->block));
+ +              *((__le64 *) value_ptr(parent, r->index, sizeof(__le64))) =
+ +                      cpu_to_le64(dm_block_location(r->block));
+ +              *key_ptr(parent, r->index) = right->keys[0];
+ +
+ +              delete_at(parent, c->index, sizeof(__le64));
+ +              r->index--;
+ +
+ +              dm_tm_dec(info->tm, dm_block_location(c->block));
+ +              __rebalance2(info, parent, l, r);
+ +
+ +              return;
+ +      }
+ +
+ +      /*
+ +       * Rebalance
+ +       */
+ +      target = (nr_left + nr_center + nr_right) / 3;
+ +      BUG_ON(target == nr_center);
+ +
+ +      /*
+ +       * Adjust the left node
+ +       */
+ +      shift(left, center, nr_left - target);
+ +
+ +      /*
+ +       * Adjust the right node
+ +       */
+ +      shift(center, right, target - nr_right);
+ +
+ +      *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ +              cpu_to_le64(dm_block_location(l->block));
+ +      *((__le64 *) value_ptr(parent, c->index, sizeof(__le64))) =
+ +              cpu_to_le64(dm_block_location(c->block));
+ +      *((__le64 *) value_ptr(parent, r->index, sizeof(__le64))) =
+ +              cpu_to_le64(dm_block_location(r->block));
+ +
+ +      *key_ptr(parent, c->index) = center->keys[0];
+ +      *key_ptr(parent, r->index) = right->keys[0];
+ +}
+ +
+ +static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
+ +                    unsigned left_index)
+ +{
+ +      int r;
+ +      struct node *parent = dm_block_data(shadow_current(s));
+ +      struct child left, center, right;
+ +
+ +      /*
+ +       * FIXME: fill out an array?
+ +       */
+ +      r = init_child(info, parent, left_index, &left);
+ +      if (r)
+ +              return r;
+ +
+ +      r = init_child(info, parent, left_index + 1, &center);
+ +      if (r) {
+ +              exit_child(info, &left);
+ +              return r;
+ +      }
+ +
+ +      r = init_child(info, parent, left_index + 2, &right);
+ +      if (r) {
+ +              exit_child(info, &left);
+ +              exit_child(info, &center);
+ +              return r;
+ +      }
+ +
+ +      __rebalance3(info, parent, &left, &center, &right);
+ +
+ +      r = exit_child(info, &left);
+ +      if (r) {
+ +              exit_child(info, &center);
+ +              exit_child(info, &right);
+ +              return r;
+ +      }
+ +
+ +      r = exit_child(info, &center);
+ +      if (r) {
+ +              exit_child(info, &right);
+ +              return r;
+ +      }
+ +
+ +      r = exit_child(info, &right);
+ +      if (r)
+ +              return r;
+ +
+ +      return 0;
+ +}
+ +
+ +static int get_nr_entries(struct dm_transaction_manager *tm,
+ +                        dm_block_t b, uint32_t *result)
+ +{
+ +      int r;
+ +      struct dm_block *block;
+ +      struct node *n;
+ +
+ +      r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
+ +      if (r)
+ +              return r;
+ +
+ +      n = dm_block_data(block);
+ +      *result = le32_to_cpu(n->header.nr_entries);
+ +
+ +      return dm_tm_unlock(tm, block);
+ +}
+ +
+ +static int rebalance_children(struct shadow_spine *s,
+ +                            struct dm_btree_info *info, uint64_t key)
+ +{
+ +      int i, r, has_left_sibling, has_right_sibling;
+ +      uint32_t child_entries;
+ +      struct node *n;
+ +
+ +      n = dm_block_data(shadow_current(s));
+ +
+ +      if (le32_to_cpu(n->header.nr_entries) == 1) {
+ +              struct dm_block *child;
+ +              dm_block_t b = value64(n, 0);
+ +
+ +              r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
+ +              if (r)
+ +                      return r;
+ +
+ +              memcpy(n, dm_block_data(child),
+ +                     dm_bm_block_size(dm_tm_get_bm(info->tm)));
+ +              r = dm_tm_unlock(info->tm, child);
+ +              dm_tm_dec(info->tm, dm_block_location(child));
+ +
+ +              return r;
+ +      }
+ +
+ +      i = lower_bound(n, key);
+ +      if (i < 0)
+ +              return -ENODATA;
+ +
+ +      r = get_nr_entries(info->tm, value64(n, i), &child_entries);
+ +      if (r)
+ +              return r;
+ +
+ +      if (child_entries > del_threshold(n))
+ +              return 0;
+ +
+ +      has_left_sibling = i > 0 ? 1 : 0;
+ +      has_right_sibling =
+ +              (i >= (le32_to_cpu(n->header.nr_entries) - 1)) ? 0 : 1;
+ +
+ +      if (!has_left_sibling)
+ +              r = rebalance2(s, info, i);
+ +
+ +      else if (!has_right_sibling)
+ +              r = rebalance2(s, info, i - 1);
+ +
+ +      else
+ +              r = rebalance3(s, info, i - 1);
+ +
+ +      return r;
+ +}
+ +
+ +static int do_leaf(struct node *n, uint64_t key, unsigned *index)
+ +{
+ +      int i = lower_bound(n, key);
+ +
+ +      if ((i < 0) ||
+ +          (i >= le32_to_cpu(n->header.nr_entries)) ||
+ +          (le64_to_cpu(n->keys[i]) != key))
+ +              return -ENODATA;
+ +
+ +      *index = i;
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Prepares for removal from one level of the hierarchy.  The caller must
+ + * actually call delete_at() to remove the entry at index.
+ + */
+ +static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
+ +                    struct dm_btree_value_type *vt, dm_block_t root,
+ +                    uint64_t key, unsigned *index)
+ +{
+ +      int i = *index, inc, r;
+ +      struct node *n;
+ +
+ +      for (;;) {
+ +              r = shadow_step(s, root, vt, &inc);
+ +              if (r < 0)
+ +                      break;
+ +
+ +              /*
+ +               * We have to patch up the parent node, ugly, but I don't
+ +               * see a way to do this automatically as part of the spine
+ +               * op.
+ +               */
+ +              if (shadow_has_parent(s)) {
+ +                      __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+ +                      memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
+ +                             &location, sizeof(__le64));
+ +              }
+ +
+ +              n = dm_block_data(shadow_current(s));
+ +              if (inc)
+ +                      inc_children(info->tm, n, vt);
+ +
+ +              if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+ +                      return do_leaf(n, key, index);
+ +
+ +              r = rebalance_children(s, info, key);
+ +              if (r)
+ +                      break;
+ +
+ +              n = dm_block_data(shadow_current(s));
+ +              if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+ +                      return do_leaf(n, key, index);
+ +
+ +              i = lower_bound(n, key);
+ +
+ +              /*
+ +               * We know the key is present, or else
+ +               * rebalance_children would have returned
+ +               * -ENODATA
+ +               */
+ +              root = value64(n, i);
+ +      }
+ +
+ +      return r;
+ +}
+ +
+ +int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
+ +                  uint64_t *keys, dm_block_t *new_root)
+ +{
+ +      unsigned level, last_level = info->levels - 1;
+ +      int index = 0, r = 0;
+ +      struct shadow_spine spine;
+ +      struct node *n;
+ +
+ +      init_shadow_spine(&spine, info);
+ +      for (level = 0; level < info->levels; level++) {
+ +              r = remove_raw(&spine, info,
+ +                             (level == last_level ?
+ +                              &info->value_type : &le64_type),
+ +                             root, keys[level], (unsigned *)&index);
+ +              if (r < 0)
+ +                      break;
+ +
+ +              n = dm_block_data(shadow_current(&spine));
+ +              if (level != last_level) {
+ +                      root = value64(n, index);
+ +                      continue;
+ +              }
+ +
+ +              BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
+ +
+ +              if (info->value_type.dec)
+ +                      info->value_type.dec(info->value_type.context,
+ +                                           value_ptr(n, index, info->value_type.size));
+ +
+ +              delete_at(n, index, info->value_type.size);
+ +
+ +              r = 0;
+ +              *new_root = shadow_root(&spine);
+ +      }
+ +
+ +      exit_shadow_spine(&spine);
+ +
+ +      return r;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_remove);
diff --cc drivers/md/persistent-data/dm-btree.c

index 55b8bf33cfce73772201cdc0f34e333f274384bb,0000000000000000000000000000000000000000..7cad5e8da27a71fedd6773e7e82ab17a0808ae75

mode 100644,000000..100644
--- 1/drivers/md/persistent-data/dm-btree.c
--- /dev/null
+++ b/drivers/md/persistent-data/dm-btree.c
@@@ -1,857 -1,0 +1,858 @@@
+ +/*
+ + * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ + *
+ + * This file is released under the GPL.
+ + */
+ +
+ +#include "dm-btree-internal.h"
+ +#include "dm-space-map.h"
+ +#include "dm-transaction-manager.h"
+ +
+ +#include <linux/device-mapper.h>
++#include <linux/export.h>
+ +
+ +#define DM_MSG_PREFIX "btree"
+ +
+ +/*----------------------------------------------------------------
+ + * Array manipulation
+ + *--------------------------------------------------------------*/
+ +static void memcpy_disk(void *dest, const void *src, size_t len)
+ +      __dm_written_to_disk(src)
+ +{
+ +      memcpy(dest, src, len);
+ +      __dm_unbless_for_disk(src);
+ +}
+ +
+ +static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
+ +                       unsigned index, void *elt)
+ +      __dm_written_to_disk(elt)
+ +{
+ +      if (index < nr_elts)
+ +              memmove(base + (elt_size * (index + 1)),
+ +                      base + (elt_size * index),
+ +                      (nr_elts - index) * elt_size);
+ +
+ +      memcpy_disk(base + (elt_size * index), elt, elt_size);
+ +}
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +/* makes the assumption that no two keys are the same. */
+ +static int bsearch(struct node *n, uint64_t key, int want_hi)
+ +{
+ +      int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
+ +
+ +      while (hi - lo > 1) {
+ +              int mid = lo + ((hi - lo) / 2);
+ +              uint64_t mid_key = le64_to_cpu(n->keys[mid]);
+ +
+ +              if (mid_key == key)
+ +                      return mid;
+ +
+ +              if (mid_key < key)
+ +                      lo = mid;
+ +              else
+ +                      hi = mid;
+ +      }
+ +
+ +      return want_hi ? hi : lo;
+ +}
+ +
+ +int lower_bound(struct node *n, uint64_t key)
+ +{
+ +      return bsearch(n, key, 0);
+ +}
+ +
+ +void inc_children(struct dm_transaction_manager *tm, struct node *n,
+ +                struct dm_btree_value_type *vt)
+ +{
+ +      unsigned i;
+ +      uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+ +
+ +      if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
+ +              for (i = 0; i < nr_entries; i++)
+ +                      dm_tm_inc(tm, value64(n, i));
+ +      else if (vt->inc)
+ +              for (i = 0; i < nr_entries; i++)
+ +                      vt->inc(vt->context,
+ +                              value_ptr(n, i, vt->size));
+ +}
+ +
+ +static int insert_at(size_t value_size, struct node *node, unsigned index,
+ +                    uint64_t key, void *value)
+ +                    __dm_written_to_disk(value)
+ +{
+ +      uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
+ +      __le64 key_le = cpu_to_le64(key);
+ +
+ +      if (index > nr_entries ||
+ +          index >= le32_to_cpu(node->header.max_entries)) {
+ +              DMERR("too many entries in btree node for insert");
+ +              __dm_unbless_for_disk(value);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      __dm_bless_for_disk(&key_le);
+ +
+ +      array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
+ +      array_insert(value_base(node), value_size, nr_entries, index, value);
+ +      node->header.nr_entries = cpu_to_le32(nr_entries + 1);
+ +
+ +      return 0;
+ +}
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +/*
+ + * We want 3n entries (for some n).  This works more nicely for repeated
+ + * insert remove loops than (2n + 1).
+ + */
+ +static uint32_t calc_max_entries(size_t value_size, size_t block_size)
+ +{
+ +      uint32_t total, n;
+ +      size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */
+ +
+ +      block_size -= sizeof(struct node_header);
+ +      total = block_size / elt_size;
+ +      n = total / 3;          /* rounds down */
+ +
+ +      return 3 * n;
+ +}
+ +
+ +int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
+ +{
+ +      int r;
+ +      struct dm_block *b;
+ +      struct node *n;
+ +      size_t block_size;
+ +      uint32_t max_entries;
+ +
+ +      r = new_block(info, &b);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
+ +      max_entries = calc_max_entries(info->value_type.size, block_size);
+ +
+ +      n = dm_block_data(b);
+ +      memset(n, 0, block_size);
+ +      n->header.flags = cpu_to_le32(LEAF_NODE);
+ +      n->header.nr_entries = cpu_to_le32(0);
+ +      n->header.max_entries = cpu_to_le32(max_entries);
+ +      n->header.value_size = cpu_to_le32(info->value_type.size);
+ +
+ +      *root = dm_block_location(b);
+ +      return unlock_block(info, b);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_empty);
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +/*
+ + * Deletion uses a recursive algorithm, since we have limited stack space
+ + * we explicitly manage our own stack on the heap.
+ + */
+ +#define MAX_SPINE_DEPTH 64
+ +struct frame {
+ +      struct dm_block *b;
+ +      struct node *n;
+ +      unsigned level;
+ +      unsigned nr_children;
+ +      unsigned current_child;
+ +};
+ +
+ +struct del_stack {
+ +      struct dm_transaction_manager *tm;
+ +      int top;
+ +      struct frame spine[MAX_SPINE_DEPTH];
+ +};
+ +
+ +static int top_frame(struct del_stack *s, struct frame **f)
+ +{
+ +      if (s->top < 0) {
+ +              DMERR("btree deletion stack empty");
+ +              return -EINVAL;
+ +      }
+ +
+ +      *f = s->spine + s->top;
+ +
+ +      return 0;
+ +}
+ +
+ +static int unprocessed_frames(struct del_stack *s)
+ +{
+ +      return s->top >= 0;
+ +}
+ +
+ +static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
+ +{
+ +      int r;
+ +      uint32_t ref_count;
+ +
+ +      if (s->top >= MAX_SPINE_DEPTH - 1) {
+ +              DMERR("btree deletion stack out of memory");
+ +              return -ENOMEM;
+ +      }
+ +
+ +      r = dm_tm_ref(s->tm, b, &ref_count);
+ +      if (r)
+ +              return r;
+ +
+ +      if (ref_count > 1)
+ +              /*
+ +               * This is a shared node, so we can just decrement it's
+ +               * reference counter and leave the children.
+ +               */
+ +              dm_tm_dec(s->tm, b);
+ +
+ +      else {
+ +              struct frame *f = s->spine + ++s->top;
+ +
+ +              r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
+ +              if (r) {
+ +                      s->top--;
+ +                      return r;
+ +              }
+ +
+ +              f->n = dm_block_data(f->b);
+ +              f->level = level;
+ +              f->nr_children = le32_to_cpu(f->n->header.nr_entries);
+ +              f->current_child = 0;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static void pop_frame(struct del_stack *s)
+ +{
+ +      struct frame *f = s->spine + s->top--;
+ +
+ +      dm_tm_dec(s->tm, dm_block_location(f->b));
+ +      dm_tm_unlock(s->tm, f->b);
+ +}
+ +
+ +int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
+ +{
+ +      int r;
+ +      struct del_stack *s;
+ +
+ +      s = kmalloc(sizeof(*s), GFP_KERNEL);
+ +      if (!s)
+ +              return -ENOMEM;
+ +      s->tm = info->tm;
+ +      s->top = -1;
+ +
+ +      r = push_frame(s, root, 1);
+ +      if (r)
+ +              goto out;
+ +
+ +      while (unprocessed_frames(s)) {
+ +              uint32_t flags;
+ +              struct frame *f;
+ +              dm_block_t b;
+ +
+ +              r = top_frame(s, &f);
+ +              if (r)
+ +                      goto out;
+ +
+ +              if (f->current_child >= f->nr_children) {
+ +                      pop_frame(s);
+ +                      continue;
+ +              }
+ +
+ +              flags = le32_to_cpu(f->n->header.flags);
+ +              if (flags & INTERNAL_NODE) {
+ +                      b = value64(f->n, f->current_child);
+ +                      f->current_child++;
+ +                      r = push_frame(s, b, f->level);
+ +                      if (r)
+ +                              goto out;
+ +
+ +              } else if (f->level != (info->levels - 1)) {
+ +                      b = value64(f->n, f->current_child);
+ +                      f->current_child++;
+ +                      r = push_frame(s, b, f->level + 1);
+ +                      if (r)
+ +                              goto out;
+ +
+ +              } else {
+ +                      if (info->value_type.dec) {
+ +                              unsigned i;
+ +
+ +                              for (i = 0; i < f->nr_children; i++)
+ +                                      info->value_type.dec(info->value_type.context,
+ +                                                           value_ptr(f->n, i, info->value_type.size));
+ +                      }
+ +                      f->current_child = f->nr_children;
+ +              }
+ +      }
+ +
+ +out:
+ +      kfree(s);
+ +      return r;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_del);
+ +
+ +int dm_btree_del_gt(struct dm_btree_info *info, dm_block_t root, uint64_t *key,
+ +                  dm_block_t *new_root)
+ +{
+ +      /* FIXME: implement */
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_del_gt);
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
+ +                          int (*search_fn)(struct node *, uint64_t),
+ +                          uint64_t *result_key, void *v, size_t value_size)
+ +{
+ +      int i, r;
+ +      uint32_t flags, nr_entries;
+ +
+ +      do {
+ +              r = ro_step(s, block);
+ +              if (r < 0)
+ +                      return r;
+ +
+ +              i = search_fn(ro_node(s), key);
+ +
+ +              flags = le32_to_cpu(ro_node(s)->header.flags);
+ +              nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
+ +              if (i < 0 || i >= nr_entries)
+ +                      return -ENODATA;
+ +
+ +              if (flags & INTERNAL_NODE)
+ +                      block = value64(ro_node(s), i);
+ +
+ +      } while (!(flags & LEAF_NODE));
+ +
+ +      *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ +      memcpy(v, value_ptr(ro_node(s), i, value_size), value_size);
+ +
+ +      return 0;
+ +}
+ +
+ +int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
+ +                  uint64_t *keys, void *value_le)
+ +{
+ +      unsigned level, last_level = info->levels - 1;
+ +      int r = -ENODATA;
+ +      uint64_t rkey;
+ +      __le64 internal_value_le;
+ +      struct ro_spine spine;
+ +
+ +      init_ro_spine(&spine, info);
+ +      for (level = 0; level < info->levels; level++) {
+ +              size_t size;
+ +              void *value_p;
+ +
+ +              if (level == last_level) {
+ +                      value_p = value_le;
+ +                      size = info->value_type.size;
+ +
+ +              } else {
+ +                      value_p = &internal_value_le;
+ +                      size = sizeof(uint64_t);
+ +              }
+ +
+ +              r = btree_lookup_raw(&spine, root, keys[level],
+ +                                   lower_bound, &rkey,
+ +                                   value_p, size);
+ +
+ +              if (!r) {
+ +                      if (rkey != keys[level]) {
+ +                              exit_ro_spine(&spine);
+ +                              return -ENODATA;
+ +                      }
+ +              } else {
+ +                      exit_ro_spine(&spine);
+ +                      return r;
+ +              }
+ +
+ +              root = le64_to_cpu(internal_value_le);
+ +      }
+ +      exit_ro_spine(&spine);
+ +
+ +      return r;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_lookup);
+ +
+ +/*
+ + * Splits a node by creating a sibling node and shifting half the nodes
+ + * contents across.  Assumes there is a parent node, and it has room for
+ + * another child.
+ + *
+ + * Before:
+ + *      +--------+
+ + *      | Parent |
+ + *      +--------+
+ + *         |
+ + *         v
+ + *    +----------+
+ + *    | A ++++++ |
+ + *    +----------+
+ + *
+ + *
+ + * After:
+ + *            +--------+
+ + *            | Parent |
+ + *            +--------+
+ + *              |     |
+ + *              v     +------+
+ + *        +---------+        |
+ + *        | A* +++  |        v
+ + *        +---------+   +-------+
+ + *                      | B +++ |
+ + *                      +-------+
+ + *
+ + * Where A* is a shadow of A.
+ + */
+ +static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
+ +                             unsigned parent_index, uint64_t key)
+ +{
+ +      int r;
+ +      size_t size;
+ +      unsigned nr_left, nr_right;
+ +      struct dm_block *left, *right, *parent;
+ +      struct node *ln, *rn, *pn;
+ +      __le64 location;
+ +
+ +      left = shadow_current(s);
+ +
+ +      r = new_block(s->info, &right);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      ln = dm_block_data(left);
+ +      rn = dm_block_data(right);
+ +
+ +      nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
+ +      nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
+ +
+ +      ln->header.nr_entries = cpu_to_le32(nr_left);
+ +
+ +      rn->header.flags = ln->header.flags;
+ +      rn->header.nr_entries = cpu_to_le32(nr_right);
+ +      rn->header.max_entries = ln->header.max_entries;
+ +      rn->header.value_size = ln->header.value_size;
+ +      memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
+ +
+ +      size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
+ +              sizeof(uint64_t) : s->info->value_type.size;
+ +      memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size),
+ +             size * nr_right);
+ +
+ +      /*
+ +       * Patch up the parent
+ +       */
+ +      parent = shadow_parent(s);
+ +
+ +      pn = dm_block_data(parent);
+ +      location = cpu_to_le64(dm_block_location(left));
+ +      __dm_bless_for_disk(&location);
+ +      memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)),
+ +                  &location, sizeof(__le64));
+ +
+ +      location = cpu_to_le64(dm_block_location(right));
+ +      __dm_bless_for_disk(&location);
+ +
+ +      r = insert_at(sizeof(__le64), pn, parent_index + 1,
+ +                    le64_to_cpu(rn->keys[0]), &location);
+ +      if (r)
+ +              return r;
+ +
+ +      if (key < le64_to_cpu(rn->keys[0])) {
+ +              unlock_block(s->info, right);
+ +              s->nodes[1] = left;
+ +      } else {
+ +              unlock_block(s->info, left);
+ +              s->nodes[1] = right;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Splits a node by creating two new children beneath the given node.
+ + *
+ + * Before:
+ + *      +----------+
+ + *      | A ++++++ |
+ + *      +----------+
+ + *
+ + *
+ + * After:
+ + *    +------------+
+ + *    | A (shadow) |
+ + *    +------------+
+ + *        |   |
+ + *   +------+ +----+
+ + *   |                     |
+ + *   v                     v
+ + * +-------+   +-------+
+ + * | B +++ |   | C +++ |
+ + * +-------+   +-------+
+ + */
+ +static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
+ +{
+ +      int r;
+ +      size_t size;
+ +      unsigned nr_left, nr_right;
+ +      struct dm_block *left, *right, *new_parent;
+ +      struct node *pn, *ln, *rn;
+ +      __le64 val;
+ +
+ +      new_parent = shadow_current(s);
+ +
+ +      r = new_block(s->info, &left);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      r = new_block(s->info, &right);
+ +      if (r < 0) {
+ +              /* FIXME: put left */
+ +              return r;
+ +      }
+ +
+ +      pn = dm_block_data(new_parent);
+ +      ln = dm_block_data(left);
+ +      rn = dm_block_data(right);
+ +
+ +      nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
+ +      nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
+ +
+ +      ln->header.flags = pn->header.flags;
+ +      ln->header.nr_entries = cpu_to_le32(nr_left);
+ +      ln->header.max_entries = pn->header.max_entries;
+ +      ln->header.value_size = pn->header.value_size;
+ +
+ +      rn->header.flags = pn->header.flags;
+ +      rn->header.nr_entries = cpu_to_le32(nr_right);
+ +      rn->header.max_entries = pn->header.max_entries;
+ +      rn->header.value_size = pn->header.value_size;
+ +
+ +      memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
+ +      memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
+ +
+ +      size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
+ +              sizeof(__le64) : s->info->value_type.size;
+ +      memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size);
+ +      memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size),
+ +             nr_right * size);
+ +
+ +      /* new_parent should just point to l and r now */
+ +      pn->header.flags = cpu_to_le32(INTERNAL_NODE);
+ +      pn->header.nr_entries = cpu_to_le32(2);
+ +      pn->header.max_entries = cpu_to_le32(
+ +              calc_max_entries(sizeof(__le64),
+ +                               dm_bm_block_size(
+ +                                       dm_tm_get_bm(s->info->tm))));
+ +      pn->header.value_size = cpu_to_le32(sizeof(__le64));
+ +
+ +      val = cpu_to_le64(dm_block_location(left));
+ +      __dm_bless_for_disk(&val);
+ +      pn->keys[0] = ln->keys[0];
+ +      memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64));
+ +
+ +      val = cpu_to_le64(dm_block_location(right));
+ +      __dm_bless_for_disk(&val);
+ +      pn->keys[1] = rn->keys[0];
+ +      memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64));
+ +
+ +      /*
+ +       * rejig the spine.  This is ugly, since it knows too
+ +       * much about the spine
+ +       */
+ +      if (s->nodes[0] != new_parent) {
+ +              unlock_block(s->info, s->nodes[0]);
+ +              s->nodes[0] = new_parent;
+ +      }
+ +      if (key < le64_to_cpu(rn->keys[0])) {
+ +              unlock_block(s->info, right);
+ +              s->nodes[1] = left;
+ +      } else {
+ +              unlock_block(s->info, left);
+ +              s->nodes[1] = right;
+ +      }
+ +      s->count = 2;
+ +
+ +      return 0;
+ +}
+ +
+ +static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
+ +                          struct dm_btree_value_type *vt,
+ +                          uint64_t key, unsigned *index)
+ +{
+ +      int r, i = *index, inc, top = 1;
+ +      struct node *node;
+ +
+ +      for (;;) {
+ +              r = shadow_step(s, root, vt, &inc);
+ +              if (r < 0)
+ +                      return r;
+ +
+ +              node = dm_block_data(shadow_current(s));
+ +              if (inc)
+ +                      inc_children(s->info->tm, node, vt);
+ +
+ +              /*
+ +               * We have to patch up the parent node, ugly, but I don't
+ +               * see a way to do this automatically as part of the spine
+ +               * op.
+ +               */
+ +              if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
+ +                      __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+ +
+ +                      __dm_bless_for_disk(&location);
+ +                      memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
+ +                                  &location, sizeof(__le64));
+ +              }
+ +
+ +              node = dm_block_data(shadow_current(s));
+ +
+ +              if (node->header.nr_entries == node->header.max_entries) {
+ +                      if (top)
+ +                              r = btree_split_beneath(s, key);
+ +                      else
+ +                              r = btree_split_sibling(s, root, i, key);
+ +
+ +                      if (r < 0)
+ +                              return r;
+ +              }
+ +
+ +              node = dm_block_data(shadow_current(s));
+ +
+ +              i = lower_bound(node, key);
+ +
+ +              if (le32_to_cpu(node->header.flags) & LEAF_NODE)
+ +                      break;
+ +
+ +              if (i < 0) {
+ +                      /* change the bounds on the lowest key */
+ +                      node->keys[0] = cpu_to_le64(key);
+ +                      i = 0;
+ +              }
+ +
+ +              root = value64(node, i);
+ +              top = 0;
+ +      }
+ +
+ +      if (i < 0 || le64_to_cpu(node->keys[i]) != key)
+ +              i++;
+ +
+ +      /* we're about to overwrite this value, so undo the increment for it */
+ +      /* FIXME: shame that inc information is leaking outside the spine.
+ +       * Plus inc is just plain wrong in the event of a split */
+ +      if (le64_to_cpu(node->keys[i]) == key && inc)
+ +              if (vt->dec)
+ +                      vt->dec(vt->context, value_ptr(node, i, vt->size));
+ +
+ +      *index = i;
+ +      return 0;
+ +}
+ +
+ +static int insert(struct dm_btree_info *info, dm_block_t root,
+ +                uint64_t *keys, void *value, dm_block_t *new_root,
+ +                int *inserted)
+ +                __dm_written_to_disk(value)
+ +{
+ +      int r, need_insert;
+ +      unsigned level, index = -1, last_level = info->levels - 1;
+ +      dm_block_t block = root;
+ +      struct shadow_spine spine;
+ +      struct node *n;
+ +      struct dm_btree_value_type le64_type;
+ +
+ +      le64_type.context = NULL;
+ +      le64_type.size = sizeof(__le64);
+ +      le64_type.inc = NULL;
+ +      le64_type.dec = NULL;
+ +      le64_type.equal = NULL;
+ +
+ +      init_shadow_spine(&spine, info);
+ +
+ +      for (level = 0; level < (info->levels - 1); level++) {
+ +              r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
+ +              if (r < 0)
+ +                      goto bad;
+ +
+ +              n = dm_block_data(shadow_current(&spine));
+ +              need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+ +                             (le64_to_cpu(n->keys[index]) != keys[level]));
+ +
+ +              if (need_insert) {
+ +                      dm_block_t new_tree;
+ +                      __le64 new_le;
+ +
+ +                      r = dm_btree_empty(info, &new_tree);
+ +                      if (r < 0)
+ +                              goto bad;
+ +
+ +                      new_le = cpu_to_le64(new_tree);
+ +                      __dm_bless_for_disk(&new_le);
+ +
+ +                      r = insert_at(sizeof(uint64_t), n, index,
+ +                                    keys[level], &new_le);
+ +                      if (r)
+ +                              goto bad;
+ +              }
+ +
+ +              if (level < last_level)
+ +                      block = value64(n, index);
+ +      }
+ +
+ +      r = btree_insert_raw(&spine, block, &info->value_type,
+ +                           keys[level], &index);
+ +      if (r < 0)
+ +              goto bad;
+ +
+ +      n = dm_block_data(shadow_current(&spine));
+ +      need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+ +                     (le64_to_cpu(n->keys[index]) != keys[level]));
+ +
+ +      if (need_insert) {
+ +              if (inserted)
+ +                      *inserted = 1;
+ +
+ +              r = insert_at(info->value_type.size, n, index,
+ +                            keys[level], value);
+ +              if (r)
+ +                      goto bad_unblessed;
+ +      } else {
+ +              if (inserted)
+ +                      *inserted = 0;
+ +
+ +              if (info->value_type.dec &&
+ +                  (!info->value_type.equal ||
+ +                   !info->value_type.equal(
+ +                           info->value_type.context,
+ +                           value_ptr(n, index, info->value_type.size),
+ +                           value))) {
+ +                      info->value_type.dec(info->value_type.context,
+ +                                           value_ptr(n, index, info->value_type.size));
+ +              }
+ +              memcpy_disk(value_ptr(n, index, info->value_type.size),
+ +                          value, info->value_type.size);
+ +      }
+ +
+ +      *new_root = shadow_root(&spine);
+ +      exit_shadow_spine(&spine);
+ +
+ +      return 0;
+ +
+ +bad:
+ +      __dm_unbless_for_disk(value);
+ +bad_unblessed:
+ +      exit_shadow_spine(&spine);
+ +      return r;
+ +}
+ +
+ +int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
+ +                  uint64_t *keys, void *value, dm_block_t *new_root)
+ +                  __dm_written_to_disk(value)
+ +{
+ +      return insert(info, root, keys, value, new_root, NULL);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_insert);
+ +
+ +int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
+ +                         uint64_t *keys, void *value, dm_block_t *new_root,
+ +                         int *inserted)
+ +                         __dm_written_to_disk(value)
+ +{
+ +      return insert(info, root, keys, value, new_root, inserted);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +int dm_btree_clone(struct dm_btree_info *info, dm_block_t root,
+ +                 dm_block_t *clone)
+ +{
+ +      int r;
+ +      struct dm_block *b, *orig_b;
+ +      struct node *b_node, *orig_node;
+ +
+ +      /* Copy the root node */
+ +      r = new_block(info, &b);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      r = dm_tm_read_lock(info->tm, root, &btree_node_validator, &orig_b);
+ +      if (r < 0) {
+ +              dm_block_t location = dm_block_location(b);
+ +
+ +              unlock_block(info, b);
+ +              dm_tm_dec(info->tm, location);
+ +      }
+ +
+ +      *clone = dm_block_location(b);
+ +      b_node = dm_block_data(b);
+ +      orig_node = dm_block_data(orig_b);
+ +
+ +      memcpy(b_node, orig_node,
+ +             dm_bm_block_size(dm_tm_get_bm(info->tm)));
+ +      dm_tm_unlock(info->tm, orig_b);
+ +      inc_children(info->tm, b_node, &info->value_type);
+ +      dm_tm_unlock(info->tm, b);
+ +
+ +      return 0;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_clone);
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +static int find_highest_key(struct ro_spine *s, dm_block_t block,
+ +                          uint64_t *result_key, dm_block_t *next_block)
+ +{
+ +      int i, r;
+ +      uint32_t flags;
+ +
+ +      do {
+ +              r = ro_step(s, block);
+ +              if (r < 0)
+ +                      return r;
+ +
+ +              flags = le32_to_cpu(ro_node(s)->header.flags);
+ +              i = le32_to_cpu(ro_node(s)->header.nr_entries);
+ +              if (!i)
+ +                      return -ENODATA;
+ +              else
+ +                      i--;
+ +
+ +              *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ +              if (next_block || flags & INTERNAL_NODE)
+ +                      block = value64(ro_node(s), i);
+ +
+ +      } while (flags & INTERNAL_NODE);
+ +
+ +      if (next_block)
+ +              *next_block = block;
+ +      return 0;
+ +}
+ +
+ +int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+ +                            uint64_t *result_keys)
+ +{
+ +      int r = 0, count = 0, level;
+ +      struct ro_spine spine;
+ +
+ +      init_ro_spine(&spine, info);
+ +      for (level = 0; level < info->levels; level++) {
+ +              r = find_highest_key(&spine, root, result_keys + level,
+ +                                   level == info->levels - 1 ? NULL : &root);
+ +              if (r == -ENODATA) {
+ +                      r = 0;
+ +                      break;
+ +
+ +              } else if (r)
+ +                      break;
+ +
+ +              count++;
+ +      }
+ +      exit_ro_spine(&spine);
+ +
+ +      return r ? r : count;
+ +}
+ +EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
diff --cc drivers/md/persistent-data/dm-space-map-disk.c

index de31096903e1000fcab60a689a85523796036f2c,0000000000000000000000000000000000000000..26e6e199f66f0d7f8b31b63e1fcc9ed08763fc82

mode 100644,000000..100644
--- 1/drivers/md/persistent-data/dm-space-map-disk.c
--- /dev/null
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@@ -1,662 -1,0 +1,663 @@@
+ +/*
+ + * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ + *
+ + * This file is released under the GPL.
+ + */
+ +
+ +#include "dm-space-map-common.h"
+ +#include "dm-space-map-disk.h"
+ +#include "dm-space-map.h"
+ +#include "dm-transaction-manager.h"
+ +
+ +#include <linux/list.h>
+ +#include <linux/slab.h>
+ +#include <linux/bitops.h>
+ +#include <linux/device-mapper.h>
++#include <linux/export.h>
+ +
+ +#define DM_MSG_PREFIX "space map disk"
+ +
+ +/*
+ + * Bitmap validator
+ + */
+ +static void bitmap_prepare_for_write(struct dm_block_validator *v,
+ +                                   struct dm_block *b,
+ +                                   size_t block_size)
+ +{
+ +      struct disk_bitmap_header *disk_header = dm_block_data(b);
+ +
+ +      disk_header->blocknr = cpu_to_le64(dm_block_location(b));
+ +      disk_header->csum = cpu_to_le32(dm_block_csum_data(&disk_header->not_used, block_size - sizeof(__le32)));
+ +}
+ +
+ +static int bitmap_check(struct dm_block_validator *v,
+ +                      struct dm_block *b,
+ +                      size_t block_size)
+ +{
+ +      struct disk_bitmap_header *disk_header = dm_block_data(b);
+ +      __le32 csum_disk;
+ +
+ +      if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
+ +              DMERR("bitmap check failed blocknr %llu wanted %llu",
+ +                    le64_to_cpu(disk_header->blocknr), dm_block_location(b));
+ +              return -ENOTBLK;
+ +      }
+ +
+ +      csum_disk = cpu_to_le32(dm_block_csum_data(&disk_header->not_used, block_size - sizeof(__le32)));
+ +      if (csum_disk != disk_header->csum) {
+ +              DMERR("bitmap check failed csum %u wanted %u",
+ +                    le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
+ +              return -EILSEQ;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +struct dm_block_validator dm_sm_bitmap_validator = {
+ +      .name = "sm_bitmap",
+ +      .prepare_for_write = bitmap_prepare_for_write,
+ +      .check = bitmap_check
+ +};
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +#define ENTRIES_PER_WORD 32
+ +#define ENTRIES_SHIFT 5
+ +
+ +void *dm_bitmap_data(struct dm_block *b)
+ +{
+ +      return dm_block_data(b) + sizeof(struct disk_bitmap_header);
+ +}
+ +
+ +#define WORD_MASK_LOW 0x5555555555555555ULL
+ +#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
+ +#define WORD_MASK_ALL 0xFFFFFFFFFFFFFFFFULL
+ +
+ +static unsigned bitmap_word_used(void *addr, unsigned b)
+ +{
+ +      __le64 *words_le = addr;
+ +      __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+ +
+ +      uint64_t bits = le64_to_cpu(*w_le);
+ +
+ +      return ((bits & WORD_MASK_LOW) == WORD_MASK_LOW ||
+ +              (bits & WORD_MASK_HIGH) == WORD_MASK_HIGH ||
+ +              (bits & WORD_MASK_ALL) == WORD_MASK_ALL);
+ +}
+ +
+ +unsigned sm_lookup_bitmap(void *addr, unsigned b)
+ +{
+ +      __le64 *words_le = addr;
+ +      __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+ +
+ +      b = (b & (ENTRIES_PER_WORD - 1)) << 1;
+ +
+ +      return (!!test_bit_le(b, (void *) w_le) << 1) |
+ +              (!!test_bit_le(b + 1, (void *) w_le));
+ +}
+ +
+ +void sm_set_bitmap(void *addr, unsigned b, unsigned val)
+ +{
+ +      __le64 *words_le = addr;
+ +      __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+ +
+ +      b = (b & (ENTRIES_PER_WORD - 1)) << 1;
+ +
+ +      if (val & 2)
+ +              __set_bit_le(b, (void *) w_le);
+ +      else
+ +              __clear_bit_le(b, (void *) w_le);
+ +
+ +      if (val & 1)
+ +              __set_bit_le(b + 1, (void *) w_le);
+ +      else
+ +              __clear_bit_le(b + 1, (void *) w_le);
+ +}
+ +
+ +int sm_find_free(void *addr, unsigned begin, unsigned end,
+ +               unsigned *result)
+ +{
+ +      while (begin < end) {
+ +              if (!(begin & (ENTRIES_PER_WORD - 1)) &&
+ +                  bitmap_word_used(addr, begin)) {
+ +                      begin += ENTRIES_PER_WORD;
+ +                      continue;
+ +              }
+ +
+ +              if (!sm_lookup_bitmap(addr, begin)) {
+ +                      *result = begin;
+ +                      return 0;
+ +              }
+ +
+ +              begin++;
+ +      }
+ +
+ +      return -ENOSPC;
+ +}
+ +
+ +static int disk_ll_init(struct ll_disk *io, struct dm_transaction_manager *tm)
+ +{
+ +      io->tm = tm;
+ +      io->bitmap_info.tm = tm;
+ +      io->bitmap_info.levels = 1;
+ +
+ +      /*
+ +       * Because the new bitmap blocks are created via a shadow
+ +       * operation, the old entry has already had its reference count
+ +       * decremented and we don't need the btree to do any bookkeeping.
+ +       */
+ +      io->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
+ +      io->bitmap_info.value_type.inc = NULL;
+ +      io->bitmap_info.value_type.dec = NULL;
+ +      io->bitmap_info.value_type.equal = NULL;
+ +
+ +      io->ref_count_info.tm = tm;
+ +      io->ref_count_info.levels = 1;
+ +      io->ref_count_info.value_type.size = sizeof(uint32_t);
+ +      io->ref_count_info.value_type.inc = NULL;
+ +      io->ref_count_info.value_type.dec = NULL;
+ +      io->ref_count_info.value_type.equal = NULL;
+ +
+ +      io->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
+ +
+ +      if (io->block_size > (1 << 30)) {
+ +              DMERR("block size too big to hold bitmaps");
+ +              return -EINVAL;
+ +      }
+ +
+ +      io->entries_per_block = (io->block_size - sizeof(struct disk_bitmap_header)) *
+ +                              ENTRIES_PER_BYTE;
+ +      io->nr_blocks = 0;
+ +      io->bitmap_root = 0;
+ +      io->ref_count_root = 0;
+ +
+ +      return 0;
+ +}
+ +
+ +static int disk_ll_new(struct ll_disk *io, struct dm_transaction_manager *tm)
+ +{
+ +      int r;
+ +
+ +      r = disk_ll_init(io, tm);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      io->nr_blocks = 0;
+ +      io->nr_allocated = 0;
+ +      r = dm_btree_empty(&io->bitmap_info, &io->bitmap_root);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      r = dm_btree_empty(&io->ref_count_info, &io->ref_count_root);
+ +      if (r < 0) {
+ +              dm_btree_del(&io->bitmap_info, io->bitmap_root);
+ +              return r;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +static int disk_ll_extend(struct ll_disk *io, dm_block_t extra_blocks)
+ +{
+ +      int r;
+ +      dm_block_t i, nr_blocks;
+ +      unsigned old_blocks, blocks;
+ +
+ +      nr_blocks = io->nr_blocks + extra_blocks;
+ +      old_blocks = dm_sector_div_up(io->nr_blocks, io->entries_per_block);
+ +      blocks = dm_sector_div_up(nr_blocks, io->entries_per_block);
+ +
+ +      for (i = old_blocks; i < blocks; i++) {
+ +              struct dm_block *b;
+ +              struct disk_index_entry idx;
+ +
+ +              r = dm_tm_new_block(io->tm, &dm_sm_bitmap_validator, &b);
+ +              if (r < 0)
+ +                      return r;
+ +              idx.blocknr = cpu_to_le64(dm_block_location(b));
+ +
+ +              r = dm_tm_unlock(io->tm, b);
+ +              if (r < 0)
+ +                      return r;
+ +
+ +              idx.nr_free = cpu_to_le32(io->entries_per_block);
+ +              idx.none_free_before = 0;
+ +              __dm_bless_for_disk(&idx);
+ +
+ +              r = dm_btree_insert(&io->bitmap_info, io->bitmap_root,
+ +                                  &i, &idx, &io->bitmap_root);
+ +              if (r < 0)
+ +                      return r;
+ +      }
+ +
+ +      io->nr_blocks = nr_blocks;
+ +      return 0;
+ +}
+ +
+ +static int disk_ll_open(struct ll_disk *ll, struct dm_transaction_manager *tm,
+ +                      void *root_le, size_t len)
+ +{
+ +      int r;
+ +      struct disk_sm_root *smr = root_le;
+ +
+ +      if (len < sizeof(struct disk_sm_root)) {
+ +              DMERR("sm_disk root too small");
+ +              return -ENOMEM;
+ +      }
+ +
+ +      r = disk_ll_init(ll, tm);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
+ +      ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
+ +      ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
+ +      ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
+ +
+ +      return 0;
+ +}
+ +
+ +static int disk_ll_lookup_bitmap(struct ll_disk *io, dm_block_t b, uint32_t *result)
+ +{
+ +      int r;
+ +      dm_block_t index = b;
+ +      struct disk_index_entry ie_disk;
+ +      struct dm_block *blk;
+ +
+ +      do_div(index, io->entries_per_block);
+ +      r = dm_btree_lookup(&io->bitmap_info, io->bitmap_root, &index, &ie_disk);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      r = dm_tm_read_lock(io->tm, le64_to_cpu(ie_disk.blocknr), &dm_sm_bitmap_validator, &blk);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      *result = sm_lookup_bitmap(dm_bitmap_data(blk), do_div(b, io->entries_per_block));
+ +
+ +      return dm_tm_unlock(io->tm, blk);
+ +}
+ +
+ +static int disk_ll_lookup(struct ll_disk *io, dm_block_t b, uint32_t *result)
+ +{
+ +      __le32 rc_le;
+ +      int r = disk_ll_lookup_bitmap(io, b, result);
+ +
+ +      if (r)
+ +              return r;
+ +
+ +      if (*result != 3)
+ +              return r;
+ +
+ +      r = dm_btree_lookup(&io->ref_count_info, io->ref_count_root, &b, &rc_le);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      *result = le32_to_cpu(rc_le);
+ +
+ +      return r;
+ +}
+ +
+ +static int disk_ll_find_free_block(struct ll_disk *io, dm_block_t begin,
+ +                                 dm_block_t end, dm_block_t *result)
+ +{
+ +      int r;
+ +      struct disk_index_entry ie_disk;
+ +      dm_block_t i, index_begin = begin;
+ +      dm_block_t index_end = dm_sector_div_up(end, io->entries_per_block);
+ +
+ +      begin = do_div(index_begin, io->entries_per_block);
+ +
+ +      for (i = index_begin; i < index_end; i++, begin = 0) {
+ +              struct dm_block *blk;
+ +              unsigned position;
+ +              uint32_t bit_end;
+ +
+ +              r = dm_btree_lookup(&io->bitmap_info, io->bitmap_root, &i, &ie_disk);
+ +              if (r < 0)
+ +                      return r;
+ +
+ +              if (le32_to_cpu(ie_disk.nr_free) <= 0)
+ +                      continue;
+ +
+ +              r = dm_tm_read_lock(io->tm, le64_to_cpu(ie_disk.blocknr),
+ +                                  &dm_sm_bitmap_validator, &blk);
+ +              if (r < 0)
+ +                      return r;
+ +
+ +              bit_end = (i == index_end - 1) ?
+ +                      do_div(end, io->entries_per_block) : io->entries_per_block;
+ +
+ +              r = sm_find_free(dm_bitmap_data(blk),
+ +                               max((unsigned)begin, (unsigned)le32_to_cpu(ie_disk.none_free_before)),
+ +                               bit_end, &position);
+ +              if (r < 0) {
+ +                      dm_tm_unlock(io->tm, blk);
+ +                      continue;
+ +              }
+ +
+ +              r = dm_tm_unlock(io->tm, blk);
+ +              if (r < 0)
+ +                      return r;
+ +
+ +              *result = i * io->entries_per_block + (dm_block_t) position;
+ +
+ +              return 0;
+ +      }
+ +
+ +      return -ENOSPC;
+ +}
+ +
+ +static int disk_ll_insert(struct ll_disk *io, dm_block_t b, uint32_t ref_count)
+ +{
+ +      int r;
+ +      uint32_t bit, old;
+ +      struct dm_block *nb;
+ +      dm_block_t index = b;
+ +      struct disk_index_entry ie_disk;
+ +      void *bm_le;
+ +      int inc;
+ +
+ +      do_div(index, io->entries_per_block);
+ +      r = dm_btree_lookup(&io->bitmap_info, io->bitmap_root, &index, &ie_disk);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      r = dm_tm_shadow_block(io->tm, le64_to_cpu(ie_disk.blocknr),
+ +                             &dm_sm_bitmap_validator, &nb, &inc);
+ +      if (r < 0) {
+ +              DMERR("dm_tm_shadow_block() failed");
+ +              return r;
+ +      }
+ +      ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
+ +
+ +      bm_le = dm_bitmap_data(nb);
+ +      bit = do_div(b, io->entries_per_block);
+ +      old = sm_lookup_bitmap(bm_le, bit);
+ +
+ +      if (ref_count <= 2) {
+ +              sm_set_bitmap(bm_le, bit, ref_count);
+ +
+ +              if (old > 2) {
+ +                      r = dm_btree_remove(&io->ref_count_info, io->ref_count_root,
+ +                                          &b, &io->ref_count_root);
+ +                      if (r) {
+ +                              dm_tm_unlock(io->tm, nb);
+ +                              return r;
+ +                      }
+ +              }
+ +      } else {
+ +              __le32 rc_le = cpu_to_le32(ref_count);
+ +
+ +              __dm_bless_for_disk(&rc_le);
+ +
+ +              sm_set_bitmap(bm_le, bit, 3);
+ +              r = dm_btree_insert(&io->ref_count_info, io->ref_count_root,
+ +                                  &b, &rc_le, &io->ref_count_root);
+ +              if (r < 0) {
+ +                      dm_tm_unlock(io->tm, nb);
+ +                      DMERR("ref count insert failed");
+ +                      return r;
+ +              }
+ +      }
+ +
+ +      r = dm_tm_unlock(io->tm, nb);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      if (ref_count && !old) {
+ +              io->nr_allocated++;
+ +              ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) - 1);
+ +              if (le32_to_cpu(ie_disk.none_free_before) == b)
+ +                      ie_disk.none_free_before = cpu_to_le32(b + 1);
+ +
+ +      } else if (old && !ref_count) {
+ +              io->nr_allocated--;
+ +              ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) + 1);
+ +              ie_disk.none_free_before = cpu_to_le32(min((dm_block_t) le32_to_cpu(ie_disk.none_free_before), b));
+ +      }
+ +
+ +      __dm_bless_for_disk(&ie_disk);
+ +
+ +      r = dm_btree_insert(&io->bitmap_info, io->bitmap_root, &index, &ie_disk, &io->bitmap_root);
+ +      if (r < 0)
+ +              return r;
+ +
+ +      return 0;
+ +}
+ +
+ +static int disk_ll_inc(struct ll_disk *ll, dm_block_t b)
+ +{
+ +      int r;
+ +      uint32_t rc;
+ +
+ +      r = disk_ll_lookup(ll, b, &rc);
+ +      if (r)
+ +              return r;
+ +
+ +      return disk_ll_insert(ll, b, rc + 1);
+ +}
+ +
+ +static int disk_ll_dec(struct ll_disk *ll, dm_block_t b)
+ +{
+ +      int r;
+ +      uint32_t rc;
+ +
+ +      r = disk_ll_lookup(ll, b, &rc);
+ +      if (r)
+ +              return r;
+ +
+ +      if (!rc)
+ +              return -EINVAL;
+ +
+ +      return disk_ll_insert(ll, b, rc - 1);
+ +}
+ +
+ +/*--------------------------------------------------------------*/
+ +
+ +/*
+ + * Space map interface.
+ + */
+ +struct sm_disk {
+ +      struct dm_space_map sm;
+ +
+ +      struct ll_disk ll;
+ +};
+ +
+ +static void sm_disk_destroy(struct dm_space_map *sm)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      kfree(smd);
+ +}
+ +
+ +static int sm_disk_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      return disk_ll_extend(&smd->ll, extra_blocks);
+ +}
+ +
+ +static int sm_disk_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      *count = smd->ll.nr_blocks;
+ +
+ +      return 0;
+ +}
+ +
+ +static int sm_disk_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      *count = smd->ll.nr_blocks - smd->ll.nr_allocated;
+ +
+ +      return 0;
+ +}
+ +
+ +static int sm_disk_get_count(struct dm_space_map *sm, dm_block_t b,
+ +                           uint32_t *result)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      return disk_ll_lookup(&smd->ll, b, result);
+ +}
+ +
+ +static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
+ +                                        int *result)
+ +{
+ +      int r;
+ +      uint32_t count;
+ +
+ +      r = sm_disk_get_count(sm, b, &count);
+ +      if (r)
+ +              return r;
+ +
+ +      return count > 1;
+ +}
+ +
+ +static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
+ +                           uint32_t count)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      return disk_ll_insert(&smd->ll, b, count);
+ +}
+ +
+ +static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      return disk_ll_inc(&smd->ll, b);
+ +}
+ +
+ +static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      return disk_ll_dec(&smd->ll, b);
+ +}
+ +
+ +static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
+ +{
+ +      int r;
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +
+ +      /*
+ +       * FIXME: We should start the search where we left off.
+ +       */
+ +      r = disk_ll_find_free_block(&smd->ll, 0, smd->ll.nr_blocks, b);
+ +      if (r)
+ +              return r;
+ +
+ +      return disk_ll_inc(&smd->ll, *b);
+ +}
+ +
+ +static int sm_disk_commit(struct dm_space_map *sm)
+ +{
+ +      return 0;
+ +}
+ +
+ +static int sm_disk_root_size(struct dm_space_map *sm, size_t *result)
+ +{
+ +      *result = sizeof(struct disk_sm_root);
+ +
+ +      return 0;
+ +}
+ +
+ +static int sm_disk_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
+ +{
+ +      struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ +      struct disk_sm_root root_le;
+ +
+ +      root_le.nr_blocks = cpu_to_le64(smd->ll.nr_blocks);
+ +      root_le.nr_allocated = cpu_to_le64(smd->ll.nr_allocated);
+ +      root_le.bitmap_root = cpu_to_le64(smd->ll.bitmap_root);
+ +      root_le.ref_count_root = cpu_to_le64(smd->ll.ref_count_root);
+ +
+ +      if (max < sizeof(root_le))
+ +              return -ENOSPC;
+ +
+ +      memcpy(where_le, &root_le, sizeof(root_le));
+ +
+ +      return 0;
+ +}
+ +
+ +/*----------------------------------------------------------------*/
+ +
+ +static struct dm_space_map ops = {
+ +      .destroy = sm_disk_destroy,
+ +      .extend = sm_disk_extend,
+ +      .get_nr_blocks = sm_disk_get_nr_blocks,
+ +      .get_nr_free = sm_disk_get_nr_free,
+ +      .get_count = sm_disk_get_count,
+ +      .count_is_more_than_one = sm_disk_count_is_more_than_one,
+ +      .set_count = sm_disk_set_count,
+ +      .inc_block = sm_disk_inc_block,
+ +      .dec_block = sm_disk_dec_block,
+ +      .new_block = sm_disk_new_block,
+ +      .commit = sm_disk_commit,
+ +      .root_size = sm_disk_root_size,
+ +      .copy_root = sm_disk_copy_root
+ +};
+ +
+ +struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
+ +                                     dm_block_t nr_blocks)
+ +{
+ +      int r;
+ +      struct sm_disk *smd;
+ +
+ +      smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+ +      if (!smd)
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      memcpy(&smd->sm, &ops, sizeof(smd->sm));
+ +
+ +      r = disk_ll_new(&smd->ll, tm);
+ +      if (r)
+ +              goto bad;
+ +
+ +      r = disk_ll_extend(&smd->ll, nr_blocks);
+ +      if (r)
+ +              goto bad;
+ +
+ +      r = sm_disk_commit(&smd->sm);
+ +      if (r)
+ +              goto bad;
+ +
+ +      return &smd->sm;
+ +
+ +bad:
+ +      kfree(smd);
+ +      return ERR_PTR(r);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_sm_disk_create);
+ +
+ +struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
+ +                                   void *root_le, size_t len)
+ +{
+ +      int r;
+ +      struct sm_disk *smd;
+ +
+ +      smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+ +      if (!smd)
+ +              return ERR_PTR(-ENOMEM);
+ +
+ +      memcpy(&smd->sm, &ops, sizeof(smd->sm));
+ +
+ +      r = disk_ll_open(&smd->ll, tm, root_le, len);
+ +      if (r)
+ +              goto bad;
+ +
+ +      r = sm_disk_commit(&smd->sm);
+ +      if (r)
+ +              goto bad;
+ +
+ +      return &smd->sm;
+ +
+ +bad:
+ +      kfree(smd);
+ +      return ERR_PTR(r);
+ +}
+ +EXPORT_SYMBOL_GPL(dm_sm_disk_open);
diff --cc drivers/mfd/ab3550-core.c
Simple merge
diff --cc drivers/mfd/ab8500-debugfs.c
Simple merge
diff --cc drivers/mfd/twl-core.c
Simple merge
diff --cc drivers/mmc/card/mmc_test.c
Simple merge
diff --cc drivers/mmc/core/host.c
Simple merge
diff --cc drivers/mmc/core/mmc.c
Simple merge
diff --cc drivers/mmc/host/sdhci-pxav3.c
Simple merge
diff --cc drivers/mmc/host/sdhci-s3c.c

index 2bd7bf4fece75715ed275673b1eed6bf888ba862,cf84a7672915f24b76b1760fea4055da67232bdc..00c11f4c399f9b15e057127de305969faca48fbe
--- 1/drivers/mmc/host/sdhci-s3c.c
--- 2/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@@ -19,9 -19,9 +19,10 @@@
   #include <linux/clk.h>
   #include <linux/io.h>
   #include <linux/gpio.h>
+ +#include <linux/module.h>
   
   #include <linux/mmc/host.h>
+ #include <linux/module.h>
   
   #include <plat/sdhci.h>
   #include <plat/regs-sdhci.h>
diff --cc drivers/mmc/host/sdhci.c
Simple merge
diff --cc drivers/mtd/ar7part.c
Simple merge
diff --cc drivers/mtd/cmdlinepart.c
Simple merge
diff --cc drivers/mtd/mtdsuper.c
Simple merge
diff --cc drivers/mtd/nand/cafe_nand.c
Simple merge
diff --cc drivers/mtd/nand/cmx270_nand.c
Simple merge
diff --cc drivers/mtd/nand/diskonchip.c
Simple merge
diff --cc drivers/mtd/nand/nand_bbt.c
Simple merge
diff --cc drivers/mtd/nand/omap2.c
Simple merge
diff --cc drivers/mtd/onenand/onenand_bbt.c
Simple merge
diff --cc drivers/mtd/redboot.c
Simple merge
diff --cc drivers/pcmcia/pxa2xx_cm_x255.c
Simple merge
diff --cc drivers/pcmcia/pxa2xx_cm_x270.c
Simple merge
diff --cc drivers/platform/x86/intel_scu_ipc.c
Simple merge
diff --cc drivers/platform/x86/msi-wmi.c
Simple merge
diff --cc drivers/power/max17042_battery.c

index 98bfab35b8e99ab4c020e9dad91b8f6475b79ac2,0814aff8d26ff6e55c397d62859c8c92baa20a3d..6db971055eb432b05c664aae1e2cca45e80c40aa
--- 1/drivers/power/max17042_battery.c
--- 2/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@@ -28,7 -28,76 +28,8 @@@
   #include <linux/mod_devicetable.h>
   #include <linux/power_supply.h>
   #include <linux/power/max17042_battery.h>
+ #include <linux/module.h>
   
- -enum max17042_register {
- -      MAX17042_STATUS         = 0x00,
- -      MAX17042_VALRT_Th       = 0x01,
- -      MAX17042_TALRT_Th       = 0x02,
- -      MAX17042_SALRT_Th       = 0x03,
- -      MAX17042_AtRate         = 0x04,
- -      MAX17042_RepCap         = 0x05,
- -      MAX17042_RepSOC         = 0x06,
- -      MAX17042_Age            = 0x07,
- -      MAX17042_TEMP           = 0x08,
- -      MAX17042_VCELL          = 0x09,
- -      MAX17042_Current        = 0x0A,
- -      MAX17042_AvgCurrent     = 0x0B,
- -      MAX17042_Qresidual      = 0x0C,
- -      MAX17042_SOC            = 0x0D,
- -      MAX17042_AvSOC          = 0x0E,
- -      MAX17042_RemCap         = 0x0F,
- -      MAX17402_FullCAP        = 0x10,
- -      MAX17042_TTE            = 0x11,
- -      MAX17042_V_empty        = 0x12,
- -
- -      MAX17042_RSLOW          = 0x14,
- -
- -      MAX17042_AvgTA          = 0x16,
- -      MAX17042_Cycles         = 0x17,
- -      MAX17042_DesignCap      = 0x18,
- -      MAX17042_AvgVCELL       = 0x19,
- -      MAX17042_MinMaxTemp     = 0x1A,
- -      MAX17042_MinMaxVolt     = 0x1B,
- -      MAX17042_MinMaxCurr     = 0x1C,
- -      MAX17042_CONFIG         = 0x1D,
- -      MAX17042_ICHGTerm       = 0x1E,
- -      MAX17042_AvCap          = 0x1F,
- -      MAX17042_ManName        = 0x20,
- -      MAX17042_DevName        = 0x21,
- -      MAX17042_DevChem        = 0x22,
- -
- -      MAX17042_TempNom        = 0x24,
- -      MAX17042_TempCold       = 0x25,
- -      MAX17042_TempHot        = 0x26,
- -      MAX17042_AIN            = 0x27,
- -      MAX17042_LearnCFG       = 0x28,
- -      MAX17042_SHFTCFG        = 0x29,
- -      MAX17042_RelaxCFG       = 0x2A,
- -      MAX17042_MiscCFG        = 0x2B,
- -      MAX17042_TGAIN          = 0x2C,
- -      MAx17042_TOFF           = 0x2D,
- -      MAX17042_CGAIN          = 0x2E,
- -      MAX17042_COFF           = 0x2F,
- -
- -      MAX17042_Q_empty        = 0x33,
- -      MAX17042_T_empty        = 0x34,
- -
- -      MAX17042_RCOMP0         = 0x38,
- -      MAX17042_TempCo         = 0x39,
- -      MAX17042_Rx             = 0x3A,
- -      MAX17042_T_empty0       = 0x3B,
- -      MAX17042_TaskPeriod     = 0x3C,
- -      MAX17042_FSTAT          = 0x3D,
- -
- -      MAX17042_SHDNTIMER      = 0x3F,
- -
- -      MAX17042_VFRemCap       = 0x4A,
- -
- -      MAX17042_QH             = 0x4D,
- -      MAX17042_QL             = 0x4E,
- -};
- -
   struct max17042_chip {
         struct i2c_client *client;
         struct power_supply battery;
diff --cc drivers/power/max8903_charger.c

index a9b0209a2f55042a572fc18b2d34b2cee979ad78,d26e8281217b25dba2d9fa27cc787ea7e817c6f4..e7fa391c18e3d2e888ffa4f73db15e8f94428b6b
--- 1/drivers/power/max8903_charger.c
--- 2/drivers/power/max8903_charger.c
+++ b/drivers/power/max8903_charger.c
@@@ -26,9 -26,10 +26,10 @@@
   #include <linux/power_supply.h>
   #include <linux/platform_device.h>
   #include <linux/power/max8903_charger.h>
+ #include <linux/module.h>
   
   struct max8903_data {
- -      struct max8903_pdata *pdata;
+ +      struct max8903_pdata pdata;
         struct device *dev;
         struct power_supply psy;
         bool fault;
diff --cc drivers/regulator/dummy.c
Simple merge
diff --cc drivers/video/atmel_lcdfb.c
Simple merge
diff --cc drivers/video/carminefb.c
Simple merge
diff --cc drivers/video/mb862xx/mb862xxfbdrv.c
Simple merge
diff --cc drivers/video/msm/mdp.c
Simple merge
diff --cc include/linux/blkdev.h
Simple merge
diff --cc include/linux/mtd/mtd.h
Simple merge
diff --cc include/linux/regulator/driver.h
Simple merge
diff --cc kernel/debug/kdb/kdb_debugger.c
Simple merge
diff --cc kernel/resource.c
Simple merge
diff --cc mm/memory-failure.c
Simple merge
diff --cc mm/swapfile.c
Simple merge
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Mon, 1 Aug 2011 03:08:55 +0000 (13:08 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Mon, 1 Aug 2011 03:22:34 +0000 (13:22 +1000)
		1	2
arch/arm/kernel/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-davinci/board-dm644x-evm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/mach-msm/io.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/kernel/traps.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/pmc-sierra/msp71xx/msp_setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/cpu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/setup_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/bsg-lib.c	patch \|	diff1 \|	\|	blob \| history
drivers/acpi/ec_sys.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/acpi/sysfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/intel_mid_dma.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/ipu/ipu_idmac.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/dma/ste_dma40.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-log-userspace-base.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-raid.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-snap-persistent.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/persistent-data/dm-block-manager.c	patch \|	diff1 \|	\|	blob \| history
drivers/md/persistent-data/dm-btree-remove.c	patch \|	diff1 \|	\|	blob \| history
drivers/md/persistent-data/dm-btree.c	patch \|	diff1 \|	\|	blob \| history
drivers/md/persistent-data/dm-space-map-disk.c	patch \|	diff1 \|	\|	blob \| history
drivers/mfd/ab3550-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mfd/ab8500-debugfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mfd/twl-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/card/mmc_test.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/core/host.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/core/mmc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/host/sdhci-pxav3.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/host/sdhci-s3c.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/host/sdhci.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/ar7part.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/cmdlinepart.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/mtdsuper.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/nand/cafe_nand.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/nand/cmx270_nand.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/nand/diskonchip.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/nand/nand_bbt.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/nand/omap2.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/onenand/onenand_bbt.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/redboot.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/pcmcia/pxa2xx_cm_x255.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/pcmcia/pxa2xx_cm_x270.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/platform/x86/intel_scu_ipc.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/platform/x86/msi-wmi.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/power/max17042_battery.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/power/max8903_charger.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/regulator/dummy.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/atmel_lcdfb.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/carminefb.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/mb862xx/mb862xxfbdrv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/video/msm/mdp.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mtd/mtd.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/regulator/driver.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/debug/kdb/kdb_debugger.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/resource.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory-failure.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/swapfile.c	patch \|	diff1 \|	diff2 \|	blob \| history