#include <asm/bootinfo.h>
#include <asm/cacheflush.h>
#include <asm/r4kcache.h>
+ #include <asm/smp-ops.h>
#include <asm/reboot.h>
+#include <asm/smp-ops.h>
#include <asm/time.h>
#include <msp_prom.h>
--- /dev/null
+/*
+ * BSG helper library
+ *
+ * Copyright (C) 2008 James Smart, Emulex Corporation
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2011 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
++#include <linux/export.h>
+#include <linux/scatterlist.h>
+#include <linux/bsg-lib.h>
+#include <scsi/scsi_cmnd.h>
+
+/**
+ * bsg_destroy_job - routine to teardown/delete a bsg job
+ * @job: bsg_job that is to be torn down
+ */
+static void bsg_destroy_job(struct bsg_job *job)
+{
+ put_device(job->dev); /* release reference for the request */
+
+ kfree(job->request_payload.sg_list);
+ kfree(job->reply_payload.sg_list);
+ kfree(job);
+}
+
+/**
+ * bsg_job_done - completion routine for bsg requests
+ * @job: bsg_job that is complete
+ * @result: job reply result
+ * @reply_payload_rcv_len: length of payload recvd
+ *
+ * The LLD should call this when the bsg job has completed.
+ */
+void bsg_job_done(struct bsg_job *job, int result,
+ unsigned int reply_payload_rcv_len)
+{
+ struct request *req = job->req;
+ struct request *rsp = req->next_rq;
+ int err;
+
+ err = job->req->errors = result;
+ if (err < 0)
+ /* we're only returning the result field in the reply */
+ job->req->sense_len = sizeof(u32);
+ else
+ job->req->sense_len = job->reply_len;
+ /* we assume all request payload was transferred, residual == 0 */
+ req->resid_len = 0;
+
+ if (rsp) {
+ WARN_ON(reply_payload_rcv_len > rsp->resid_len);
+
+ /* set reply (bidi) residual */
+ rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
+ }
+ blk_complete_request(req);
+}
+EXPORT_SYMBOL_GPL(bsg_job_done);
+
+/**
+ * bsg_softirq_done - softirq done routine for destroying the bsg requests
+ * @rq: BSG request that holds the job to be destroyed
+ */
+static void bsg_softirq_done(struct request *rq)
+{
+ struct bsg_job *job = rq->special;
+
+ blk_end_request_all(rq, rq->errors);
+ bsg_destroy_job(job);
+}
+
+static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
+{
+ size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
+
+ BUG_ON(!req->nr_phys_segments);
+
+ buf->sg_list = kzalloc(sz, GFP_KERNEL);
+ if (!buf->sg_list)
+ return -ENOMEM;
+ sg_init_table(buf->sg_list, req->nr_phys_segments);
+ buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
+ buf->payload_len = blk_rq_bytes(req);
+ return 0;
+}
+
+/**
+ * bsg_create_job - create the bsg_job structure for the bsg request
+ * @dev: device that is being sent the bsg request
+ * @req: BSG request that needs a job structure
+ */
+static int bsg_create_job(struct device *dev, struct request *req)
+{
+ struct request *rsp = req->next_rq;
+ struct request_queue *q = req->q;
+ struct bsg_job *job;
+ int ret;
+
+ BUG_ON(req->special);
+
+ job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ req->special = job;
+ job->req = req;
+ if (q->bsg_job_size)
+ job->dd_data = (void *)&job[1];
+ job->request = req->cmd;
+ job->request_len = req->cmd_len;
+ job->reply = req->sense;
+ job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
+ * allocated */
+ if (req->bio) {
+ ret = bsg_map_buffer(&job->request_payload, req);
+ if (ret)
+ goto failjob_rls_job;
+ }
+ if (rsp && rsp->bio) {
+ ret = bsg_map_buffer(&job->reply_payload, rsp);
+ if (ret)
+ goto failjob_rls_rqst_payload;
+ }
+ job->dev = dev;
+ /* take a reference for the request */
+ get_device(job->dev);
+ return 0;
+
+failjob_rls_rqst_payload:
+ kfree(job->request_payload.sg_list);
+failjob_rls_job:
+ kfree(job);
+ return -ENOMEM;
+}
+
+/*
+ * bsg_goose_queue - restart queue in case it was stopped
+ * @q: request q to be restarted
+ */
+void bsg_goose_queue(struct request_queue *q)
+{
+ if (!q)
+ return;
+
+ blk_run_queue_async(q);
+}
+EXPORT_SYMBOL_GPL(bsg_goose_queue);
+
+/**
+ * bsg_request_fn - generic handler for bsg requests
+ * @q: request queue to manage
+ *
+ * On error the create_bsg_job function should return a -Exyz error value
+ * that will be set to the req->errors.
+ *
+ * Drivers/subsys should pass this to the queue init function.
+ */
+void bsg_request_fn(struct request_queue *q)
+{
+ struct device *dev = q->queuedata;
+ struct request *req;
+ struct bsg_job *job;
+ int ret;
+
+ if (!get_device(dev))
+ return;
+
+ while (1) {
+ req = blk_fetch_request(q);
+ if (!req)
+ break;
+ spin_unlock_irq(q->queue_lock);
+
+ ret = bsg_create_job(dev, req);
+ if (ret) {
+ req->errors = ret;
+ blk_end_request_all(req, ret);
+ spin_lock_irq(q->queue_lock);
+ continue;
+ }
+
+ job = req->special;
+ ret = q->bsg_job_fn(job);
+ spin_lock_irq(q->queue_lock);
+ if (ret)
+ break;
+ }
+
+ spin_unlock_irq(q->queue_lock);
+ put_device(dev);
+ spin_lock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(bsg_request_fn);
+
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @q: request queue setup by caller
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ *
+ * The caller should have setup the reuqest queue with bsg_request_fn
+ * as the request_fn.
+ */
+int bsg_setup_queue(struct device *dev, struct request_queue *q,
+ char *name, bsg_job_fn *job_fn, int dd_job_size)
+{
+ int ret;
+
+ q->queuedata = dev;
+ q->bsg_job_size = dd_job_size;
+ q->bsg_job_fn = job_fn;
+ queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ blk_queue_softirq_done(q, bsg_softirq_done);
+ blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
+
+ ret = bsg_register_queue(q, dev, name, NULL);
+ if (ret) {
+ printk(KERN_ERR "%s: bsg interface failed to "
+ "initialize - register queue\n", dev->kobj.name);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_remove_queue - Deletes the bsg dev from the q
+ * @q: the request_queue that is to be torn down.
+ *
+ * Notes:
+ * Before unregistering the queue empty any requests that are blocked
+ */
+void bsg_remove_queue(struct request_queue *q)
+{
+ struct request *req; /* block request */
+ int counts; /* totals for request_list count and starved */
+
+ if (!q)
+ return;
+
+ /* Stop taking in new requests */
+ spin_lock_irq(q->queue_lock);
+ blk_stop_queue(q);
+
+ /* drain all requests in the queue */
+ while (1) {
+ /* need the lock to fetch a request
+ * this may fetch the same reqeust as the previous pass
+ */
+ req = blk_fetch_request(q);
+ /* save requests in use and starved */
+ counts = q->rq.count[0] + q->rq.count[1] +
+ q->rq.starved[0] + q->rq.starved[1];
+ spin_unlock_irq(q->queue_lock);
+ /* any requests still outstanding? */
+ if (counts == 0)
+ break;
+
+ /* This may be the same req as the previous iteration,
+ * always send the blk_end_request_all after a prefetch.
+ * It is not okay to not end the request because the
+ * prefetch started the request.
+ */
+ if (req) {
+ /* return -ENXIO to indicate that this queue is
+ * going away
+ */
+ req->errors = -ENXIO;
+ blk_end_request_all(req, -ENXIO);
+ }
+
+ msleep(200); /* allow bsg to possibly finish */
+ spin_lock_irq(q->queue_lock);
+ }
+ bsg_unregister_queue(q);
+}
+EXPORT_SYMBOL_GPL(bsg_remove_queue);
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/err.h>
+ #include <linux/export.h>
+#include <linux/amba/bus.h>
#include <plat/ste_dma40.h>
*/
#include <linux/slab.h>
+ #include <linux/module.h>
#include "md.h"
+#include "raid1.h"
#include "raid5.h"
-#include "dm.h"
#include "bitmap.h"
+#include <linux/device-mapper.h>
+
#define DM_MSG_PREFIX "raid"
/*
--- /dev/null
+/*
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+#include "dm-block-manager.h"
+#include "dm-persistent-data-internal.h"
+
+#include <linux/dm-io.h>
+#include <linux/slab.h>
+#include <linux/device-mapper.h>
++#include <linux/export.h>
+
+#define DM_MSG_PREFIX "block manager"
+
+/*----------------------------------------------------------------*/
+
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#define MAX_CACHE_SIZE 16U
+
+enum dm_block_state {
+ BS_EMPTY,
+ BS_CLEAN,
+ BS_READING,
+ BS_WRITING,
+ BS_READ_LOCKED,
+ BS_READ_LOCKED_DIRTY, /* Block was dirty before it was read locked. */
+ BS_WRITE_LOCKED,
+ BS_DIRTY,
+ BS_ERROR
+};
+
+struct dm_block {
+ struct list_head list;
+ struct hlist_node hlist;
+
+ dm_block_t where;
+ struct dm_block_validator *validator;
+ void *data;
+ wait_queue_head_t io_q;
+ unsigned read_lock_count;
+ unsigned write_lock_pending;
+ enum dm_block_state state;
+
+ /*
+ * Extra flags like REQ_FLUSH and REQ_FUA can be set here. This is
+ * mainly as to avoid a race condition in flush_and_unlock() where
+ * the newly-unlocked superblock may have been submitted for a
+ * write before the write_all_dirty() call is made.
+ */
+ int io_flags;
+
+ /*
+ * Sadly we need an up pointer so we can get to the bm on io
+ * completion.
+ */
+ struct dm_block_manager *bm;
+};
+
+struct dm_block_manager {
+ struct block_device *bdev;
+ unsigned cache_size; /* In bytes */
+ unsigned block_size; /* In bytes */
+ dm_block_t nr_blocks;
+
+ /*
+ * This will trigger every time an io completes.
+ */
+ wait_queue_head_t io_q;
+
+ struct dm_io_client *io;
+
+ /*
+ * Protects all the lists and the hash table.
+ */
+ spinlock_t lock;
+
+ struct list_head empty_list; /* No block assigned */
+ struct list_head clean_list; /* Unlocked and clean */
+ struct list_head dirty_list; /* Unlocked and dirty */
+ struct list_head error_list;
+
+ unsigned available_count;
+ unsigned reading_count;
+ unsigned writing_count;
+
+ struct kmem_cache *block_cache; /* struct dm_block */
+ struct kmem_cache *buffer_cache; /* The buffers that store the raw data */
+
+ /*
+ * Hash table of cached blocks, holds everything that isn't in the
+ * BS_EMPTY state.
+ */
+ unsigned hash_size;
+ unsigned hash_mask;
+
+ struct hlist_head buckets[0]; /* Must be last member of struct. */
+};
+
+dm_block_t dm_block_location(struct dm_block *b)
+{
+ return b->where;
+}
+EXPORT_SYMBOL_GPL(dm_block_location);
+
+void *dm_block_data(struct dm_block *b)
+{
+ return b->data;
+}
+EXPORT_SYMBOL_GPL(dm_block_data);
+
+/*----------------------------------------------------------------
+ * Hash table
+ *--------------------------------------------------------------*/
+static struct dm_block *__find_block(struct dm_block_manager *bm, dm_block_t b)
+{
+ unsigned bucket = dm_hash_block(b, bm->hash_mask);
+ struct dm_block *blk;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(blk, n, bm->buckets + bucket, hlist)
+ if (blk->where == b)
+ return blk;
+
+ return NULL;
+}
+
+static void __insert_block(struct dm_block_manager *bm, struct dm_block *b)
+{
+ unsigned bucket = dm_hash_block(b->where, bm->hash_mask);
+
+ hlist_add_head(&b->hlist, bm->buckets + bucket);
+}
+
+/*----------------------------------------------------------------
+ * Block state:
+ * __transition() handles transition of a block between different states.
+ * Study this to understand the state machine.
+ *
+ * Alternatively install graphviz and run:
+ * grep DOT dm-block-manager.c | grep -v ' ' |
+ * sed -e 's/.*DOT: //' -e 's/\*\///' |
+ * dot -Tps -o states.ps
+ *
+ * Assumes bm->lock is held.
+ *--------------------------------------------------------------*/
+static void __transition(struct dm_block *b, enum dm_block_state new_state)
+{
+ /* DOT: digraph BlockStates { */
+ struct dm_block_manager *bm = b->bm;
+
+ switch (new_state) {
+ case BS_EMPTY:
+ /* DOT: error -> empty */
+ /* DOT: clean -> empty */
+ BUG_ON(!((b->state == BS_ERROR) ||
+ (b->state == BS_CLEAN)));
+ hlist_del(&b->hlist);
+ list_move(&b->list, &bm->empty_list);
+ b->write_lock_pending = 0;
+ b->read_lock_count = 0;
+ b->io_flags = 0;
+ b->validator = NULL;
+
+ if (b->state == BS_ERROR)
+ bm->available_count++;
+ break;
+
+ case BS_CLEAN:
+ /* DOT: reading -> clean */
+ /* DOT: writing -> clean */
+ /* DOT: read_locked -> clean */
+ BUG_ON(!((b->state == BS_READING) ||
+ (b->state == BS_WRITING) ||
+ (b->state == BS_READ_LOCKED)));
+ switch (b->state) {
+ case BS_READING:
+ BUG_ON(!bm->reading_count);
+ bm->reading_count--;
+ break;
+
+ case BS_WRITING:
+ BUG_ON(!bm->writing_count);
+ bm->writing_count--;
+ b->io_flags = 0;
+ break;
+
+ default:
+ break;
+ }
+ list_add_tail(&b->list, &bm->clean_list);
+ bm->available_count++;
+ break;
+
+ case BS_READING:
+ /* DOT: empty -> reading */
+ BUG_ON(!(b->state == BS_EMPTY));
+ __insert_block(bm, b);
+ list_del(&b->list);
+ bm->available_count--;
+ bm->reading_count++;
+ break;
+
+ case BS_WRITING:
+ /* DOT: dirty -> writing */
+ BUG_ON(!(b->state == BS_DIRTY));
+ list_del(&b->list);
+ bm->writing_count++;
+ break;
+
+ case BS_READ_LOCKED:
+ /* DOT: clean -> read_locked */
+ BUG_ON(!(b->state == BS_CLEAN));
+ list_del(&b->list);
+ bm->available_count--;
+ break;
+
+ case BS_READ_LOCKED_DIRTY:
+ /* DOT: dirty -> read_locked_dirty */
+ BUG_ON(!((b->state == BS_DIRTY)));
+ list_del(&b->list);
+ break;
+
+ case BS_WRITE_LOCKED:
+ /* DOT: dirty -> write_locked */
+ /* DOT: clean -> write_locked */
+ BUG_ON(!((b->state == BS_DIRTY) ||
+ (b->state == BS_CLEAN)));
+ list_del(&b->list);
+
+ if (b->state == BS_CLEAN)
+ bm->available_count--;
+ break;
+
+ case BS_DIRTY:
+ /* DOT: write_locked -> dirty */
+ /* DOT: read_locked_dirty -> dirty */
+ BUG_ON(!((b->state == BS_WRITE_LOCKED) ||
+ (b->state == BS_READ_LOCKED_DIRTY)));
+ list_add_tail(&b->list, &bm->dirty_list);
+ break;
+
+ case BS_ERROR:
+ /* DOT: writing -> error */
+ /* DOT: reading -> error */
+ BUG_ON(!((b->state == BS_WRITING) ||
+ (b->state == BS_READING)));
+ list_add_tail(&b->list, &bm->error_list);
+ break;
+ }
+
+ b->state = new_state;
+ /* DOT: } */
+}
+
+/*----------------------------------------------------------------
+ * Low-level io.
+ *--------------------------------------------------------------*/
+typedef void (completion_fn)(unsigned long error, struct dm_block *b);
+
+static void submit_io(struct dm_block *b, int rw,
+ completion_fn fn)
+{
+ struct dm_block_manager *bm = b->bm;
+ struct dm_io_request req;
+ struct dm_io_region region;
+ unsigned sectors_per_block = bm->block_size >> SECTOR_SHIFT;
+
+ region.bdev = bm->bdev;
+ region.sector = b->where * sectors_per_block;
+ region.count = sectors_per_block;
+
+ req.bi_rw = rw;
+ req.mem.type = DM_IO_KMEM;
+ req.mem.offset = 0;
+ req.mem.ptr.addr = b->data;
+ req.notify.fn = (void (*)(unsigned long, void *)) fn;
+ req.notify.context = b;
+ req.client = bm->io;
+
+ if (dm_io(&req, 1, ®ion, NULL) < 0)
+ fn(1, b);
+}
+
+/*----------------------------------------------------------------
+ * High-level io.
+ *--------------------------------------------------------------*/
+static void __complete_io(unsigned long error, struct dm_block *b)
+{
+ struct dm_block_manager *bm = b->bm;
+
+ if (error) {
+ DMERR("io error = %lu, block = %llu",
+ error , (unsigned long long)b->where);
+ __transition(b, BS_ERROR);
+ } else
+ __transition(b, BS_CLEAN);
+
+ wake_up(&b->io_q);
+ wake_up(&bm->io_q);
+}
+
+static void complete_io(unsigned long error, struct dm_block *b)
+{
+ struct dm_block_manager *bm = b->bm;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bm->lock, flags);
+ __complete_io(error, b);
+ spin_unlock_irqrestore(&bm->lock, flags);
+}
+
+static void read_block(struct dm_block *b)
+{
+ submit_io(b, READ, complete_io);
+}
+
+static void write_block(struct dm_block *b)
+{
+ if (b->validator)
+ b->validator->prepare_for_write(b->validator, b,
+ b->bm->block_size);
+
+ submit_io(b, WRITE | b->io_flags, complete_io);
+}
+
+static void write_dirty(struct dm_block_manager *bm, unsigned count)
+{
+ struct dm_block *b, *tmp;
+ struct list_head dirty;
+ unsigned long flags;
+
+ /*
+ * Grab the first @count entries from the dirty list
+ */
+ INIT_LIST_HEAD(&dirty);
+ spin_lock_irqsave(&bm->lock, flags);
+ list_for_each_entry_safe(b, tmp, &bm->dirty_list, list) {
+ if (!count--)
+ break;
+ __transition(b, BS_WRITING);
+ list_add_tail(&b->list, &dirty);
+ }
+ spin_unlock_irqrestore(&bm->lock, flags);
+
+ list_for_each_entry_safe(b, tmp, &dirty, list) {
+ list_del(&b->list);
+ write_block(b);
+ }
+}
+
+static void write_all_dirty(struct dm_block_manager *bm)
+{
+ write_dirty(bm, bm->cache_size);
+}
+
+static void __clear_errors(struct dm_block_manager *bm)
+{
+ struct dm_block *b, *tmp;
+ list_for_each_entry_safe(b, tmp, &bm->error_list, list)
+ __transition(b, BS_EMPTY);
+}
+
+/*----------------------------------------------------------------
+ * Waiting
+ *--------------------------------------------------------------*/
+#ifdef __CHECKER__
+# define __retains(x) __attribute__((context(x, 1, 1)))
+#else
+# define __retains(x)
+#endif
+
+#define __wait_block(wq, lock, flags, sched_fn, condition) \
+do { \
+ int r = 0; \
+ \
+ DEFINE_WAIT(wait); \
+ add_wait_queue(wq, &wait); \
+ \
+ for (;;) { \
+ prepare_to_wait(wq, &wait, TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ \
+ spin_unlock_irqrestore(lock, flags); \
+ if (signal_pending(current)) { \
+ r = -ERESTARTSYS; \
+ spin_lock_irqsave(lock, flags); \
+ break; \
+ } \
+ \
+ sched_fn(); \
+ spin_lock_irqsave(lock, flags); \
+ } \
+ \
+ finish_wait(wq, &wait); \
+ return r; \
+} while (0)
+
+static int __wait_io(struct dm_block *b, unsigned long *flags)
+ __retains(&b->bm->lock)
+{
+ __wait_block(&b->io_q, &b->bm->lock, *flags, io_schedule,
+ ((b->state != BS_READING) && (b->state != BS_WRITING)));
+}
+
+static int __wait_unlocked(struct dm_block *b, unsigned long *flags)
+ __retains(&b->bm->lock)
+{
+ __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
+ ((b->state == BS_CLEAN) || (b->state == BS_DIRTY)));
+}
+
+static int __wait_read_lockable(struct dm_block *b, unsigned long *flags)
+ __retains(&b->bm->lock)
+{
+ __wait_block(&b->io_q, &b->bm->lock, *flags, schedule,
+ (!b->write_lock_pending && (b->state == BS_CLEAN ||
+ b->state == BS_DIRTY ||
+ b->state == BS_READ_LOCKED)));
+}
+
+static int __wait_all_writes(struct dm_block_manager *bm, unsigned long *flags)
+ __retains(&bm->lock)
+{
+ __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
+ !bm->writing_count);
+}
+
+static int __wait_all_io(struct dm_block_manager *bm, unsigned long *flags)
+ __retains(&bm->lock)
+{
+ __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
+ !bm->writing_count && !bm->reading_count);
+}
+
+static int __wait_clean(struct dm_block_manager *bm, unsigned long *flags)
+ __retains(&bm->lock)
+{
+ __wait_block(&bm->io_q, &bm->lock, *flags, io_schedule,
+ (!list_empty(&bm->clean_list) ||
+ (!bm->writing_count)));
+}
+
+/*----------------------------------------------------------------
+ * Finding a free block to recycle
+ *--------------------------------------------------------------*/
+static int recycle_block(struct dm_block_manager *bm, dm_block_t where,
+ int need_read, struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ int r = 0;
+ struct dm_block *b;
+ unsigned long flags, available;
+
+ /*
+ * Wait for a block to appear on the empty or clean lists.
+ */
+ spin_lock_irqsave(&bm->lock, flags);
+ while (1) {
+ /*
+ * Once we can lock and do io concurrently then we should
+ * probably flush at bm->cache_size / 2 and write _all_
+ * dirty blocks.
+ */
+ available = bm->available_count + bm->writing_count;
+ if (available < bm->cache_size / 4) {
+ spin_unlock_irqrestore(&bm->lock, flags);
+ write_dirty(bm, bm->cache_size / 4);
+ spin_lock_irqsave(&bm->lock, flags);
+ }
+
+ if (!list_empty(&bm->empty_list)) {
+ b = list_first_entry(&bm->empty_list, struct dm_block, list);
+ break;
+
+ } else if (!list_empty(&bm->clean_list)) {
+ b = list_first_entry(&bm->clean_list, struct dm_block, list);
+ __transition(b, BS_EMPTY);
+ break;
+ }
+
+ __wait_clean(bm, &flags);
+ }
+
+ b->where = where;
+ b->validator = v;
+ __transition(b, BS_READING);
+
+ if (!need_read) {
+ memset(b->data, 0, bm->block_size);
+ __transition(b, BS_CLEAN);
+ } else {
+ spin_unlock_irqrestore(&bm->lock, flags);
+ read_block(b);
+ spin_lock_irqsave(&bm->lock, flags);
+ __wait_io(b, &flags);
+
+ /* FIXME: Can b have been recycled between io completion and here? */
+
+ /*
+ * Did the io succeed?
+ */
+ if (b->state == BS_ERROR) {
+ /*
+ * Since this is a read that has failed we can clear the error
+ * immediately. Failed writes are revealed during a commit.
+ */
+ __transition(b, BS_EMPTY);
+ r = -EIO;
+ }
+
+ if (b->validator) {
+ r = b->validator->check(b->validator, b, bm->block_size);
+ if (r) {
+ DMERR("%s validator check failed for block %llu",
+ b->validator->name, (unsigned long long)b->where);
+ __transition(b, BS_EMPTY);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&bm->lock, flags);
+
+ if (!r)
+ *result = b;
+
+ return r;
+}
+
+/*----------------------------------------------------------------
+ * Low level block management
+ *--------------------------------------------------------------*/
+
+static struct dm_block *alloc_block(struct dm_block_manager *bm)
+{
+ struct dm_block *b = kmem_cache_alloc(bm->block_cache, GFP_KERNEL);
+
+ if (!b)
+ return NULL;
+
+ INIT_LIST_HEAD(&b->list);
+ INIT_HLIST_NODE(&b->hlist);
+
+ b->data = kmem_cache_alloc(bm->buffer_cache, GFP_KERNEL);
+ if (!b->data) {
+ kmem_cache_free(bm->block_cache, b);
+ return NULL;
+ }
+
+ b->validator = NULL;
+ b->state = BS_EMPTY;
+ init_waitqueue_head(&b->io_q);
+ b->read_lock_count = 0;
+ b->write_lock_pending = 0;
+ b->io_flags = 0;
+ b->bm = bm;
+
+ return b;
+}
+
+static void free_block(struct dm_block *b)
+{
+ kmem_cache_free(b->bm->buffer_cache, b->data);
+ kmem_cache_free(b->bm->block_cache, b);
+}
+
+static int populate_bm(struct dm_block_manager *bm, unsigned count)
+{
+ int i;
+ LIST_HEAD(bs);
+
+ for (i = 0; i < count; i++) {
+ struct dm_block *b = alloc_block(bm);
+ if (!b) {
+ struct dm_block *tmp;
+ list_for_each_entry_safe(b, tmp, &bs, list)
+ free_block(b);
+ return -ENOMEM;
+ }
+
+ list_add(&b->list, &bs);
+ }
+
+ list_replace(&bs, &bm->empty_list);
+ bm->available_count = count;
+
+ return 0;
+}
+
+/*----------------------------------------------------------------
+ * Public interface
+ *--------------------------------------------------------------*/
+static unsigned calc_hash_size(unsigned cache_size)
+{
+ unsigned r = 32; /* Minimum size is 16 */
+
+ while (r < cache_size)
+ r <<= 1;
+
+ return r >> 1;
+}
+
+struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
+ unsigned block_size,
+ unsigned cache_size)
+{
+ unsigned i;
+ unsigned hash_size = calc_hash_size(cache_size);
+ size_t len = sizeof(struct dm_block_manager) +
+ sizeof(struct hlist_head) * hash_size;
+ struct dm_block_manager *bm;
+
+ bm = kmalloc(len, GFP_KERNEL);
+ if (!bm)
+ return NULL;
+
+ bm->bdev = bdev;
+ bm->cache_size = max(MAX_CACHE_SIZE, cache_size);
+ bm->block_size = block_size;
+ bm->nr_blocks = i_size_read(bdev->bd_inode);
+ do_div(bm->nr_blocks, block_size);
+ init_waitqueue_head(&bm->io_q);
+ spin_lock_init(&bm->lock);
+
+ INIT_LIST_HEAD(&bm->empty_list);
+ INIT_LIST_HEAD(&bm->clean_list);
+ INIT_LIST_HEAD(&bm->dirty_list);
+ INIT_LIST_HEAD(&bm->error_list);
+ bm->available_count = 0;
+ bm->reading_count = 0;
+ bm->writing_count = 0;
+
+ bm->block_cache = kmem_cache_create("dm-block-manager-blocks",
+ sizeof(struct dm_block),
+ __alignof__(struct dm_block),
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!bm->block_cache)
+ goto bad_bm;
+
+ bm->buffer_cache = kmem_cache_create("dm-block-manager-buffers",
+ block_size, SECTOR_SIZE,
+ 0, NULL);
+ if (!bm->buffer_cache)
+ goto bad_block_cache;
+
+ bm->hash_size = hash_size;
+ bm->hash_mask = hash_size - 1;
+ for (i = 0; i < hash_size; i++)
+ INIT_HLIST_HEAD(bm->buckets + i);
+
+ bm->io = dm_io_client_create();
+ if (!bm->io)
+ goto bad_buffer_cache;
+
+ if (populate_bm(bm, cache_size) < 0)
+ goto bad_io_client;
+
+ return bm;
+
+bad_io_client:
+ dm_io_client_destroy(bm->io);
+bad_buffer_cache:
+ kmem_cache_destroy(bm->buffer_cache);
+bad_block_cache:
+ kmem_cache_destroy(bm->block_cache);
+bad_bm:
+ kfree(bm);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(dm_block_manager_create);
+
+void dm_block_manager_destroy(struct dm_block_manager *bm)
+{
+ int i;
+ struct dm_block *b, *btmp;
+ struct hlist_node *n, *tmp;
+
+ dm_io_client_destroy(bm->io);
+
+ for (i = 0; i < bm->hash_size; i++)
+ hlist_for_each_entry_safe(b, n, tmp, bm->buckets + i, hlist)
+ free_block(b);
+
+ list_for_each_entry_safe(b, btmp, &bm->empty_list, list)
+ free_block(b);
+
+ kmem_cache_destroy(bm->buffer_cache);
+ kmem_cache_destroy(bm->block_cache);
+
+ kfree(bm);
+}
+EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
+
+unsigned dm_bm_block_size(struct dm_block_manager *bm)
+{
+ return bm->block_size;
+}
+EXPORT_SYMBOL_GPL(dm_bm_block_size);
+
+dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm)
+{
+ return bm->nr_blocks;
+}
+
+static int lock_internal(struct dm_block_manager *bm, dm_block_t block,
+ int how, int need_read, int can_block,
+ struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ int r = 0;
+ struct dm_block *b;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bm->lock, flags);
+retry:
+ b = __find_block(bm, block);
+ if (b) {
+ if (!need_read)
+ b->validator = v;
+ else {
+ if (b->validator && (v != b->validator)) {
+ DMERR("validator mismatch (old=%s vs new=%s) for block %llu",
+ b->validator->name, v ? v->name : "NULL",
+ (unsigned long long)b->where);
+ spin_unlock_irqrestore(&bm->lock, flags);
+ return -EINVAL;
+
+ }
+ if (!b->validator && v) {
+ b->validator = v;
+ r = b->validator->check(b->validator, b, bm->block_size);
+ if (r) {
+ DMERR("%s validator check failed for block %llu",
+ b->validator->name,
+ (unsigned long long)b->where);
+ spin_unlock_irqrestore(&bm->lock, flags);
+ return r;
+ }
+ }
+ }
+
+ switch (how) {
+ case READ:
+ if (b->write_lock_pending || (b->state != BS_CLEAN &&
+ b->state != BS_DIRTY &&
+ b->state != BS_READ_LOCKED)) {
+ if (!can_block) {
+ spin_unlock_irqrestore(&bm->lock, flags);
+ return -EWOULDBLOCK;
+ }
+
+ __wait_read_lockable(b, &flags);
+
+ if (b->where != block)
+ goto retry;
+ }
+ break;
+
+ case WRITE:
+ while (b->state != BS_CLEAN && b->state != BS_DIRTY) {
+ if (!can_block) {
+ spin_unlock_irqrestore(&bm->lock, flags);
+ return -EWOULDBLOCK;
+ }
+
+ b->write_lock_pending++;
+ __wait_unlocked(b, &flags);
+ b->write_lock_pending--;
+ if (b->where != block)
+ goto retry;
+ }
+ break;
+ }
+
+ } else if (!can_block) {
+ r = -EWOULDBLOCK;
+ goto out;
+
+ } else {
+ spin_unlock_irqrestore(&bm->lock, flags);
+ r = recycle_block(bm, block, need_read, v, &b);
+ spin_lock_irqsave(&bm->lock, flags);
+ }
+
+ if (!r) {
+ switch (how) {
+ case READ:
+ b->read_lock_count++;
+
+ if (b->state == BS_DIRTY)
+ __transition(b, BS_READ_LOCKED_DIRTY);
+ else if (b->state == BS_CLEAN)
+ __transition(b, BS_READ_LOCKED);
+ break;
+
+ case WRITE:
+ __transition(b, BS_WRITE_LOCKED);
+ break;
+ }
+
+ *result = b;
+ }
+
+out:
+ spin_unlock_irqrestore(&bm->lock, flags);
+
+ return r;
+}
+
+int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
+ struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ return lock_internal(bm, b, READ, 1, 1, v, result);
+}
+EXPORT_SYMBOL_GPL(dm_bm_read_lock);
+
+int dm_bm_write_lock(struct dm_block_manager *bm,
+ dm_block_t b, struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ return lock_internal(bm, b, WRITE, 1, 1, v, result);
+}
+EXPORT_SYMBOL_GPL(dm_bm_write_lock);
+
+int dm_bm_read_try_lock(struct dm_block_manager *bm,
+ dm_block_t b, struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ return lock_internal(bm, b, READ, 1, 0, v, result);
+}
+
+int dm_bm_write_lock_zero(struct dm_block_manager *bm,
+ dm_block_t b, struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ int r = lock_internal(bm, b, WRITE, 0, 1, v, result);
+
+ if (!r)
+ memset((*result)->data, 0, bm->block_size);
+
+ return r;
+}
+
+int dm_bm_unlock(struct dm_block *b)
+{
+ int r = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&b->bm->lock, flags);
+ switch (b->state) {
+ case BS_WRITE_LOCKED:
+ __transition(b, BS_DIRTY);
+ wake_up(&b->io_q);
+ break;
+
+ case BS_READ_LOCKED:
+ if (!--b->read_lock_count) {
+ __transition(b, BS_CLEAN);
+ wake_up(&b->io_q);
+ }
+ break;
+
+ case BS_READ_LOCKED_DIRTY:
+ if (!--b->read_lock_count) {
+ __transition(b, BS_DIRTY);
+ wake_up(&b->io_q);
+ }
+ break;
+
+ default:
+ DMERR("block = %llu not locked",
+ (unsigned long long)b->where);
+ r = -EINVAL;
+ break;
+ }
+ spin_unlock_irqrestore(&b->bm->lock, flags);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_bm_unlock);
+
+static int __wait_flush(struct dm_block_manager *bm)
+{
+ int r = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bm->lock, flags);
+ __wait_all_writes(bm, &flags);
+
+ if (!list_empty(&bm->error_list)) {
+ r = -EIO;
+ __clear_errors(bm);
+ }
+ spin_unlock_irqrestore(&bm->lock, flags);
+
+ return r;
+}
+
+int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
+ struct dm_block *superblock)
+{
+ int r;
+ unsigned long flags;
+
+ write_all_dirty(bm);
+ r = __wait_flush(bm);
+ if (r)
+ return r;
+
+ spin_lock_irqsave(&bm->lock, flags);
+ superblock->io_flags = REQ_FUA | REQ_FLUSH;
+ spin_unlock_irqrestore(&bm->lock, flags);
+
+ dm_bm_unlock(superblock);
+ write_all_dirty(bm);
+
+ return __wait_flush(bm);
+}
+
+int dm_bm_rebind_block_device(struct dm_block_manager *bm,
+ struct block_device *bdev)
+{
+ unsigned long flags;
+ dm_block_t nr_blocks = i_size_read(bdev->bd_inode);
+
+ do_div(nr_blocks, bm->block_size);
+
+ spin_lock_irqsave(&bm->lock, flags);
+ if (nr_blocks < bm->nr_blocks) {
+ spin_unlock_irqrestore(&bm->lock, flags);
+ return -EINVAL;
+ }
+
+ bm->bdev = bdev;
+ bm->nr_blocks = nr_blocks;
+
+ /*
+ * Wait for any in-flight io that may be using the old bdev
+ */
+ __wait_all_io(bm, &flags);
+ spin_unlock_irqrestore(&bm->lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_bm_rebind_block_device);
--- /dev/null
+/*
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-btree.h"
+#include "dm-btree-internal.h"
+#include "dm-transaction-manager.h"
+
++#include <linux/export.h>
++
+/*
+ * Removing an entry from a btree
+ * ==============================
+ *
+ * A very important constraint for our btree is that no node, except the
+ * root, may have fewer than a certain number of entries.
+ * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
+ *
+ * Ensuring this is complicated by the way we want to only ever hold the
+ * locks on 2 nodes concurrently, and only change nodes in a top to bottom
+ * fashion.
+ *
+ * Each node may have a left or right sibling. When decending the spine,
+ * if a node contains only MIN_ENTRIES then we try and increase this to at
+ * least MIN_ENTRIES + 1. We do this in the following ways:
+ *
+ * [A] No siblings => this can only happen if the node is the root, in which
+ * case we copy the childs contents over the root.
+ *
+ * [B] No left sibling
+ * ==> rebalance(node, right sibling)
+ *
+ * [C] No right sibling
+ * ==> rebalance(left sibling, node)
+ *
+ * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
+ * ==> delete node adding it's contents to left and right
+ *
+ * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
+ * ==> rebalance(left, node, right)
+ *
+ * After these operations it's possible that the our original node no
+ * longer contains the desired sub tree. For this reason this rebalancing
+ * is performed on the children of the current node. This also avoids
+ * having a special case for the root.
+ *
+ * Once this rebalancing has occurred we can then step into the child node
+ * for internal nodes. Or delete the entry for leaf nodes.
+ */
+
+/*
+ * Some little utilities for moving node data around.
+ */
+static void node_shift(struct node *n, int shift)
+{
+ uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+
+ if (shift < 0) {
+ shift = -shift;
+ memmove(key_ptr(n, 0),
+ key_ptr(n, shift),
+ (nr_entries - shift) * sizeof(__le64));
+ memmove(value_ptr(n, 0, sizeof(__le64)),
+ value_ptr(n, shift, sizeof(__le64)),
+ (nr_entries - shift) * sizeof(__le64));
+ } else {
+ memmove(key_ptr(n, shift),
+ key_ptr(n, 0),
+ nr_entries * sizeof(__le64));
+ memmove(value_ptr(n, shift, sizeof(__le64)),
+ value_ptr(n, 0, sizeof(__le64)),
+ nr_entries * sizeof(__le64));
+ }
+}
+
+static void node_copy(struct node *left, struct node *right, int shift)
+{
+ uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+
+ if (shift < 0) {
+ shift = -shift;
+ memcpy(key_ptr(left, nr_left),
+ key_ptr(right, 0),
+ shift * sizeof(__le64));
+ memcpy(value_ptr(left, nr_left, sizeof(__le64)),
+ value_ptr(right, 0, sizeof(__le64)),
+ shift * sizeof(__le64));
+ } else {
+ memcpy(key_ptr(right, 0),
+ key_ptr(left, nr_left - shift),
+ shift * sizeof(__le64));
+ memcpy(value_ptr(right, 0, sizeof(__le64)),
+ value_ptr(left, nr_left - shift, sizeof(__le64)),
+ shift * sizeof(__le64));
+ }
+}
+
+/*
+ * Delete a specific entry from a leaf node.
+ */
+static void delete_at(struct node *n, unsigned index, size_t value_size)
+{
+ unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
+ unsigned nr_to_copy = nr_entries - (index + 1);
+
+ if (nr_to_copy) {
+ memmove(key_ptr(n, index),
+ key_ptr(n, index + 1),
+ nr_to_copy * sizeof(__le64));
+
+ memmove(value_ptr(n, index, value_size),
+ value_ptr(n, index + 1, value_size),
+ nr_to_copy * value_size);
+ }
+
+ n->header.nr_entries = cpu_to_le32(nr_entries - 1);
+}
+
+static unsigned del_threshold(struct node *n)
+{
+ return le32_to_cpu(n->header.max_entries) / 3;
+}
+
+static unsigned merge_threshold(struct node *n)
+{
+ /*
+ * The extra one is because we know we're potentially going to
+ * delete an entry.
+ */
+ return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1;
+}
+
+struct child {
+ unsigned index;
+ struct dm_block *block;
+ struct node *n;
+};
+
+static struct dm_btree_value_type le64_type = {
+ .context = NULL,
+ .size = sizeof(__le64),
+ .inc = NULL,
+ .dec = NULL,
+ .equal = NULL
+};
+
+static int init_child(struct dm_btree_info *info, struct node *parent,
+ unsigned index, struct child *result)
+{
+ int r, inc;
+ dm_block_t root;
+
+ result->index = index;
+ root = value64(parent, index);
+
+ r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
+ &result->block, &inc);
+ if (r)
+ return r;
+
+ result->n = dm_block_data(result->block);
+
+ if (inc)
+ inc_children(info->tm, result->n, &le64_type);
+
+ return 0;
+}
+
+static int exit_child(struct dm_btree_info *info, struct child *c)
+{
+ return dm_tm_unlock(info->tm, c->block);
+}
+
+static void shift(struct node *left, struct node *right, int count)
+{
+ if (!count)
+ return;
+
+ if (count > 0) {
+ node_shift(right, count);
+ node_copy(left, right, count);
+ } else {
+ node_copy(left, right, count);
+ node_shift(right, count);
+ }
+
+ left->header.nr_entries =
+ cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count);
+
+ right->header.nr_entries =
+ cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count);
+}
+
+static void __rebalance2(struct dm_btree_info *info, struct node *parent,
+ struct child *l, struct child *r)
+{
+ struct node *left = l->n;
+ struct node *right = r->n;
+ uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+
+ if (nr_left + nr_right <= merge_threshold(left)) {
+ /*
+ * Merge
+ */
+ node_copy(left, right, -nr_right);
+ left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
+
+ *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(l->block));
+ delete_at(parent, r->index, sizeof(__le64));
+
+ /*
+ * We need to decrement the right block, but not it's
+ * children, since they're still referenced by left.
+ */
+ dm_tm_dec(info->tm, dm_block_location(r->block));
+ } else {
+ /*
+ * Rebalance.
+ */
+ unsigned target_left = (nr_left + nr_right) / 2;
+
+ shift(left, right, nr_left - target_left);
+ *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(l->block));
+ *((__le64 *) value_ptr(parent, r->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(r->block));
+ *key_ptr(parent, r->index) = right->keys[0];
+ }
+}
+
+static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
+ unsigned left_index)
+{
+ int r;
+ struct node *parent;
+ struct child left, right;
+
+ parent = dm_block_data(shadow_current(s));
+
+ r = init_child(info, parent, left_index, &left);
+ if (r)
+ return r;
+
+ r = init_child(info, parent, left_index + 1, &right);
+ if (r) {
+ exit_child(info, &left);
+ return r;
+ }
+
+ __rebalance2(info, parent, &left, &right);
+
+ r = exit_child(info, &left);
+ if (r) {
+ exit_child(info, &right);
+ return r;
+ }
+
+ r = exit_child(info, &right);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static void __rebalance3(struct dm_btree_info *info, struct node *parent,
+ struct child *l, struct child *c, struct child *r)
+{
+ struct node *left = l->n;
+ struct node *center = c->n;
+ struct node *right = r->n;
+
+ uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
+ uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+ uint32_t max_entries = le32_to_cpu(left->header.max_entries);
+
+ unsigned target;
+
+ if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) {
+ /*
+ * Delete center node:
+ *
+ * We dump as many entries from center as possible into
+ * left, then the rest in right, then rebalance2. This
+ * wastes some cpu, but I want something simple atm.
+ */
+ unsigned shift = min(max_entries - nr_left, nr_center);
+
+ node_copy(left, center, -shift);
+ left->header.nr_entries = cpu_to_le32(nr_left + shift);
+
+ if (shift != nr_center) {
+ shift = nr_center - shift;
+ node_shift(right, shift);
+ node_copy(center, right, shift);
+ right->header.nr_entries = cpu_to_le32(nr_right + shift);
+ }
+
+ *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(l->block));
+ *((__le64 *) value_ptr(parent, r->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(r->block));
+ *key_ptr(parent, r->index) = right->keys[0];
+
+ delete_at(parent, c->index, sizeof(__le64));
+ r->index--;
+
+ dm_tm_dec(info->tm, dm_block_location(c->block));
+ __rebalance2(info, parent, l, r);
+
+ return;
+ }
+
+ /*
+ * Rebalance
+ */
+ target = (nr_left + nr_center + nr_right) / 3;
+ BUG_ON(target == nr_center);
+
+ /*
+ * Adjust the left node
+ */
+ shift(left, center, nr_left - target);
+
+ /*
+ * Adjust the right node
+ */
+ shift(center, right, target - nr_right);
+
+ *((__le64 *) value_ptr(parent, l->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(l->block));
+ *((__le64 *) value_ptr(parent, c->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(c->block));
+ *((__le64 *) value_ptr(parent, r->index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(r->block));
+
+ *key_ptr(parent, c->index) = center->keys[0];
+ *key_ptr(parent, r->index) = right->keys[0];
+}
+
+static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
+ unsigned left_index)
+{
+ int r;
+ struct node *parent = dm_block_data(shadow_current(s));
+ struct child left, center, right;
+
+ /*
+ * FIXME: fill out an array?
+ */
+ r = init_child(info, parent, left_index, &left);
+ if (r)
+ return r;
+
+ r = init_child(info, parent, left_index + 1, ¢er);
+ if (r) {
+ exit_child(info, &left);
+ return r;
+ }
+
+ r = init_child(info, parent, left_index + 2, &right);
+ if (r) {
+ exit_child(info, &left);
+ exit_child(info, ¢er);
+ return r;
+ }
+
+ __rebalance3(info, parent, &left, ¢er, &right);
+
+ r = exit_child(info, &left);
+ if (r) {
+ exit_child(info, ¢er);
+ exit_child(info, &right);
+ return r;
+ }
+
+ r = exit_child(info, ¢er);
+ if (r) {
+ exit_child(info, &right);
+ return r;
+ }
+
+ r = exit_child(info, &right);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int get_nr_entries(struct dm_transaction_manager *tm,
+ dm_block_t b, uint32_t *result)
+{
+ int r;
+ struct dm_block *block;
+ struct node *n;
+
+ r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
+ if (r)
+ return r;
+
+ n = dm_block_data(block);
+ *result = le32_to_cpu(n->header.nr_entries);
+
+ return dm_tm_unlock(tm, block);
+}
+
+static int rebalance_children(struct shadow_spine *s,
+ struct dm_btree_info *info, uint64_t key)
+{
+ int i, r, has_left_sibling, has_right_sibling;
+ uint32_t child_entries;
+ struct node *n;
+
+ n = dm_block_data(shadow_current(s));
+
+ if (le32_to_cpu(n->header.nr_entries) == 1) {
+ struct dm_block *child;
+ dm_block_t b = value64(n, 0);
+
+ r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
+ if (r)
+ return r;
+
+ memcpy(n, dm_block_data(child),
+ dm_bm_block_size(dm_tm_get_bm(info->tm)));
+ r = dm_tm_unlock(info->tm, child);
+ dm_tm_dec(info->tm, dm_block_location(child));
+
+ return r;
+ }
+
+ i = lower_bound(n, key);
+ if (i < 0)
+ return -ENODATA;
+
+ r = get_nr_entries(info->tm, value64(n, i), &child_entries);
+ if (r)
+ return r;
+
+ if (child_entries > del_threshold(n))
+ return 0;
+
+ has_left_sibling = i > 0 ? 1 : 0;
+ has_right_sibling =
+ (i >= (le32_to_cpu(n->header.nr_entries) - 1)) ? 0 : 1;
+
+ if (!has_left_sibling)
+ r = rebalance2(s, info, i);
+
+ else if (!has_right_sibling)
+ r = rebalance2(s, info, i - 1);
+
+ else
+ r = rebalance3(s, info, i - 1);
+
+ return r;
+}
+
+static int do_leaf(struct node *n, uint64_t key, unsigned *index)
+{
+ int i = lower_bound(n, key);
+
+ if ((i < 0) ||
+ (i >= le32_to_cpu(n->header.nr_entries)) ||
+ (le64_to_cpu(n->keys[i]) != key))
+ return -ENODATA;
+
+ *index = i;
+
+ return 0;
+}
+
+/*
+ * Prepares for removal from one level of the hierarchy. The caller must
+ * actually call delete_at() to remove the entry at index.
+ */
+static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
+ struct dm_btree_value_type *vt, dm_block_t root,
+ uint64_t key, unsigned *index)
+{
+ int i = *index, inc, r;
+ struct node *n;
+
+ for (;;) {
+ r = shadow_step(s, root, vt, &inc);
+ if (r < 0)
+ break;
+
+ /*
+ * We have to patch up the parent node, ugly, but I don't
+ * see a way to do this automatically as part of the spine
+ * op.
+ */
+ if (shadow_has_parent(s)) {
+ __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+ memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
+ &location, sizeof(__le64));
+ }
+
+ n = dm_block_data(shadow_current(s));
+ if (inc)
+ inc_children(info->tm, n, vt);
+
+ if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+ return do_leaf(n, key, index);
+
+ r = rebalance_children(s, info, key);
+ if (r)
+ break;
+
+ n = dm_block_data(shadow_current(s));
+ if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+ return do_leaf(n, key, index);
+
+ i = lower_bound(n, key);
+
+ /*
+ * We know the key is present, or else
+ * rebalance_children would have returned
+ * -ENODATA
+ */
+ root = value64(n, i);
+ }
+
+ return r;
+}
+
+int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, dm_block_t *new_root)
+{
+ unsigned level, last_level = info->levels - 1;
+ int index = 0, r = 0;
+ struct shadow_spine spine;
+ struct node *n;
+
+ init_shadow_spine(&spine, info);
+ for (level = 0; level < info->levels; level++) {
+ r = remove_raw(&spine, info,
+ (level == last_level ?
+ &info->value_type : &le64_type),
+ root, keys[level], (unsigned *)&index);
+ if (r < 0)
+ break;
+
+ n = dm_block_data(shadow_current(&spine));
+ if (level != last_level) {
+ root = value64(n, index);
+ continue;
+ }
+
+ BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
+
+ if (info->value_type.dec)
+ info->value_type.dec(info->value_type.context,
+ value_ptr(n, index, info->value_type.size));
+
+ delete_at(n, index, info->value_type.size);
+
+ r = 0;
+ *new_root = shadow_root(&spine);
+ }
+
+ exit_shadow_spine(&spine);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_remove);
--- /dev/null
+/*
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-btree-internal.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/device-mapper.h>
++#include <linux/export.h>
+
+#define DM_MSG_PREFIX "btree"
+
+/*----------------------------------------------------------------
+ * Array manipulation
+ *--------------------------------------------------------------*/
+static void memcpy_disk(void *dest, const void *src, size_t len)
+ __dm_written_to_disk(src)
+{
+ memcpy(dest, src, len);
+ __dm_unbless_for_disk(src);
+}
+
+static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
+ unsigned index, void *elt)
+ __dm_written_to_disk(elt)
+{
+ if (index < nr_elts)
+ memmove(base + (elt_size * (index + 1)),
+ base + (elt_size * index),
+ (nr_elts - index) * elt_size);
+
+ memcpy_disk(base + (elt_size * index), elt, elt_size);
+}
+
+/*----------------------------------------------------------------*/
+
+/* makes the assumption that no two keys are the same. */
+static int bsearch(struct node *n, uint64_t key, int want_hi)
+{
+ int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
+
+ while (hi - lo > 1) {
+ int mid = lo + ((hi - lo) / 2);
+ uint64_t mid_key = le64_to_cpu(n->keys[mid]);
+
+ if (mid_key == key)
+ return mid;
+
+ if (mid_key < key)
+ lo = mid;
+ else
+ hi = mid;
+ }
+
+ return want_hi ? hi : lo;
+}
+
+int lower_bound(struct node *n, uint64_t key)
+{
+ return bsearch(n, key, 0);
+}
+
+void inc_children(struct dm_transaction_manager *tm, struct node *n,
+ struct dm_btree_value_type *vt)
+{
+ unsigned i;
+ uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+
+ if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
+ for (i = 0; i < nr_entries; i++)
+ dm_tm_inc(tm, value64(n, i));
+ else if (vt->inc)
+ for (i = 0; i < nr_entries; i++)
+ vt->inc(vt->context,
+ value_ptr(n, i, vt->size));
+}
+
+static int insert_at(size_t value_size, struct node *node, unsigned index,
+ uint64_t key, void *value)
+ __dm_written_to_disk(value)
+{
+ uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
+ __le64 key_le = cpu_to_le64(key);
+
+ if (index > nr_entries ||
+ index >= le32_to_cpu(node->header.max_entries)) {
+ DMERR("too many entries in btree node for insert");
+ __dm_unbless_for_disk(value);
+ return -ENOMEM;
+ }
+
+ __dm_bless_for_disk(&key_le);
+
+ array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
+ array_insert(value_base(node), value_size, nr_entries, index, value);
+ node->header.nr_entries = cpu_to_le32(nr_entries + 1);
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * We want 3n entries (for some n). This works more nicely for repeated
+ * insert remove loops than (2n + 1).
+ */
+static uint32_t calc_max_entries(size_t value_size, size_t block_size)
+{
+ uint32_t total, n;
+ size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */
+
+ block_size -= sizeof(struct node_header);
+ total = block_size / elt_size;
+ n = total / 3; /* rounds down */
+
+ return 3 * n;
+}
+
+int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
+{
+ int r;
+ struct dm_block *b;
+ struct node *n;
+ size_t block_size;
+ uint32_t max_entries;
+
+ r = new_block(info, &b);
+ if (r < 0)
+ return r;
+
+ block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
+ max_entries = calc_max_entries(info->value_type.size, block_size);
+
+ n = dm_block_data(b);
+ memset(n, 0, block_size);
+ n->header.flags = cpu_to_le32(LEAF_NODE);
+ n->header.nr_entries = cpu_to_le32(0);
+ n->header.max_entries = cpu_to_le32(max_entries);
+ n->header.value_size = cpu_to_le32(info->value_type.size);
+
+ *root = dm_block_location(b);
+ return unlock_block(info, b);
+}
+EXPORT_SYMBOL_GPL(dm_btree_empty);
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Deletion uses a recursive algorithm, since we have limited stack space
+ * we explicitly manage our own stack on the heap.
+ */
+#define MAX_SPINE_DEPTH 64
+struct frame {
+ struct dm_block *b;
+ struct node *n;
+ unsigned level;
+ unsigned nr_children;
+ unsigned current_child;
+};
+
+struct del_stack {
+ struct dm_transaction_manager *tm;
+ int top;
+ struct frame spine[MAX_SPINE_DEPTH];
+};
+
+static int top_frame(struct del_stack *s, struct frame **f)
+{
+ if (s->top < 0) {
+ DMERR("btree deletion stack empty");
+ return -EINVAL;
+ }
+
+ *f = s->spine + s->top;
+
+ return 0;
+}
+
+static int unprocessed_frames(struct del_stack *s)
+{
+ return s->top >= 0;
+}
+
+static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
+{
+ int r;
+ uint32_t ref_count;
+
+ if (s->top >= MAX_SPINE_DEPTH - 1) {
+ DMERR("btree deletion stack out of memory");
+ return -ENOMEM;
+ }
+
+ r = dm_tm_ref(s->tm, b, &ref_count);
+ if (r)
+ return r;
+
+ if (ref_count > 1)
+ /*
+ * This is a shared node, so we can just decrement it's
+ * reference counter and leave the children.
+ */
+ dm_tm_dec(s->tm, b);
+
+ else {
+ struct frame *f = s->spine + ++s->top;
+
+ r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
+ if (r) {
+ s->top--;
+ return r;
+ }
+
+ f->n = dm_block_data(f->b);
+ f->level = level;
+ f->nr_children = le32_to_cpu(f->n->header.nr_entries);
+ f->current_child = 0;
+ }
+
+ return 0;
+}
+
+static void pop_frame(struct del_stack *s)
+{
+ struct frame *f = s->spine + s->top--;
+
+ dm_tm_dec(s->tm, dm_block_location(f->b));
+ dm_tm_unlock(s->tm, f->b);
+}
+
+int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
+{
+ int r;
+ struct del_stack *s;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+ s->tm = info->tm;
+ s->top = -1;
+
+ r = push_frame(s, root, 1);
+ if (r)
+ goto out;
+
+ while (unprocessed_frames(s)) {
+ uint32_t flags;
+ struct frame *f;
+ dm_block_t b;
+
+ r = top_frame(s, &f);
+ if (r)
+ goto out;
+
+ if (f->current_child >= f->nr_children) {
+ pop_frame(s);
+ continue;
+ }
+
+ flags = le32_to_cpu(f->n->header.flags);
+ if (flags & INTERNAL_NODE) {
+ b = value64(f->n, f->current_child);
+ f->current_child++;
+ r = push_frame(s, b, f->level);
+ if (r)
+ goto out;
+
+ } else if (f->level != (info->levels - 1)) {
+ b = value64(f->n, f->current_child);
+ f->current_child++;
+ r = push_frame(s, b, f->level + 1);
+ if (r)
+ goto out;
+
+ } else {
+ if (info->value_type.dec) {
+ unsigned i;
+
+ for (i = 0; i < f->nr_children; i++)
+ info->value_type.dec(info->value_type.context,
+ value_ptr(f->n, i, info->value_type.size));
+ }
+ f->current_child = f->nr_children;
+ }
+ }
+
+out:
+ kfree(s);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_del);
+
+int dm_btree_del_gt(struct dm_btree_info *info, dm_block_t root, uint64_t *key,
+ dm_block_t *new_root)
+{
+ /* FIXME: implement */
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_btree_del_gt);
+
+/*----------------------------------------------------------------*/
+
+static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
+ int (*search_fn)(struct node *, uint64_t),
+ uint64_t *result_key, void *v, size_t value_size)
+{
+ int i, r;
+ uint32_t flags, nr_entries;
+
+ do {
+ r = ro_step(s, block);
+ if (r < 0)
+ return r;
+
+ i = search_fn(ro_node(s), key);
+
+ flags = le32_to_cpu(ro_node(s)->header.flags);
+ nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
+ if (i < 0 || i >= nr_entries)
+ return -ENODATA;
+
+ if (flags & INTERNAL_NODE)
+ block = value64(ro_node(s), i);
+
+ } while (!(flags & LEAF_NODE));
+
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ memcpy(v, value_ptr(ro_node(s), i, value_size), value_size);
+
+ return 0;
+}
+
+int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value_le)
+{
+ unsigned level, last_level = info->levels - 1;
+ int r = -ENODATA;
+ uint64_t rkey;
+ __le64 internal_value_le;
+ struct ro_spine spine;
+
+ init_ro_spine(&spine, info);
+ for (level = 0; level < info->levels; level++) {
+ size_t size;
+ void *value_p;
+
+ if (level == last_level) {
+ value_p = value_le;
+ size = info->value_type.size;
+
+ } else {
+ value_p = &internal_value_le;
+ size = sizeof(uint64_t);
+ }
+
+ r = btree_lookup_raw(&spine, root, keys[level],
+ lower_bound, &rkey,
+ value_p, size);
+
+ if (!r) {
+ if (rkey != keys[level]) {
+ exit_ro_spine(&spine);
+ return -ENODATA;
+ }
+ } else {
+ exit_ro_spine(&spine);
+ return r;
+ }
+
+ root = le64_to_cpu(internal_value_le);
+ }
+ exit_ro_spine(&spine);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_lookup);
+
+/*
+ * Splits a node by creating a sibling node and shifting half the nodes
+ * contents across. Assumes there is a parent node, and it has room for
+ * another child.
+ *
+ * Before:
+ * +--------+
+ * | Parent |
+ * +--------+
+ * |
+ * v
+ * +----------+
+ * | A ++++++ |
+ * +----------+
+ *
+ *
+ * After:
+ * +--------+
+ * | Parent |
+ * +--------+
+ * | |
+ * v +------+
+ * +---------+ |
+ * | A* +++ | v
+ * +---------+ +-------+
+ * | B +++ |
+ * +-------+
+ *
+ * Where A* is a shadow of A.
+ */
+static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
+ unsigned parent_index, uint64_t key)
+{
+ int r;
+ size_t size;
+ unsigned nr_left, nr_right;
+ struct dm_block *left, *right, *parent;
+ struct node *ln, *rn, *pn;
+ __le64 location;
+
+ left = shadow_current(s);
+
+ r = new_block(s->info, &right);
+ if (r < 0)
+ return r;
+
+ ln = dm_block_data(left);
+ rn = dm_block_data(right);
+
+ nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
+ nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
+
+ ln->header.nr_entries = cpu_to_le32(nr_left);
+
+ rn->header.flags = ln->header.flags;
+ rn->header.nr_entries = cpu_to_le32(nr_right);
+ rn->header.max_entries = ln->header.max_entries;
+ rn->header.value_size = ln->header.value_size;
+ memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
+
+ size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
+ sizeof(uint64_t) : s->info->value_type.size;
+ memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size),
+ size * nr_right);
+
+ /*
+ * Patch up the parent
+ */
+ parent = shadow_parent(s);
+
+ pn = dm_block_data(parent);
+ location = cpu_to_le64(dm_block_location(left));
+ __dm_bless_for_disk(&location);
+ memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)),
+ &location, sizeof(__le64));
+
+ location = cpu_to_le64(dm_block_location(right));
+ __dm_bless_for_disk(&location);
+
+ r = insert_at(sizeof(__le64), pn, parent_index + 1,
+ le64_to_cpu(rn->keys[0]), &location);
+ if (r)
+ return r;
+
+ if (key < le64_to_cpu(rn->keys[0])) {
+ unlock_block(s->info, right);
+ s->nodes[1] = left;
+ } else {
+ unlock_block(s->info, left);
+ s->nodes[1] = right;
+ }
+
+ return 0;
+}
+
+/*
+ * Splits a node by creating two new children beneath the given node.
+ *
+ * Before:
+ * +----------+
+ * | A ++++++ |
+ * +----------+
+ *
+ *
+ * After:
+ * +------------+
+ * | A (shadow) |
+ * +------------+
+ * | |
+ * +------+ +----+
+ * | |
+ * v v
+ * +-------+ +-------+
+ * | B +++ | | C +++ |
+ * +-------+ +-------+
+ */
+static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
+{
+ int r;
+ size_t size;
+ unsigned nr_left, nr_right;
+ struct dm_block *left, *right, *new_parent;
+ struct node *pn, *ln, *rn;
+ __le64 val;
+
+ new_parent = shadow_current(s);
+
+ r = new_block(s->info, &left);
+ if (r < 0)
+ return r;
+
+ r = new_block(s->info, &right);
+ if (r < 0) {
+ /* FIXME: put left */
+ return r;
+ }
+
+ pn = dm_block_data(new_parent);
+ ln = dm_block_data(left);
+ rn = dm_block_data(right);
+
+ nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
+ nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
+
+ ln->header.flags = pn->header.flags;
+ ln->header.nr_entries = cpu_to_le32(nr_left);
+ ln->header.max_entries = pn->header.max_entries;
+ ln->header.value_size = pn->header.value_size;
+
+ rn->header.flags = pn->header.flags;
+ rn->header.nr_entries = cpu_to_le32(nr_right);
+ rn->header.max_entries = pn->header.max_entries;
+ rn->header.value_size = pn->header.value_size;
+
+ memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
+ memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
+
+ size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
+ sizeof(__le64) : s->info->value_type.size;
+ memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size);
+ memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size),
+ nr_right * size);
+
+ /* new_parent should just point to l and r now */
+ pn->header.flags = cpu_to_le32(INTERNAL_NODE);
+ pn->header.nr_entries = cpu_to_le32(2);
+ pn->header.max_entries = cpu_to_le32(
+ calc_max_entries(sizeof(__le64),
+ dm_bm_block_size(
+ dm_tm_get_bm(s->info->tm))));
+ pn->header.value_size = cpu_to_le32(sizeof(__le64));
+
+ val = cpu_to_le64(dm_block_location(left));
+ __dm_bless_for_disk(&val);
+ pn->keys[0] = ln->keys[0];
+ memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64));
+
+ val = cpu_to_le64(dm_block_location(right));
+ __dm_bless_for_disk(&val);
+ pn->keys[1] = rn->keys[0];
+ memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64));
+
+ /*
+ * rejig the spine. This is ugly, since it knows too
+ * much about the spine
+ */
+ if (s->nodes[0] != new_parent) {
+ unlock_block(s->info, s->nodes[0]);
+ s->nodes[0] = new_parent;
+ }
+ if (key < le64_to_cpu(rn->keys[0])) {
+ unlock_block(s->info, right);
+ s->nodes[1] = left;
+ } else {
+ unlock_block(s->info, left);
+ s->nodes[1] = right;
+ }
+ s->count = 2;
+
+ return 0;
+}
+
+static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
+ struct dm_btree_value_type *vt,
+ uint64_t key, unsigned *index)
+{
+ int r, i = *index, inc, top = 1;
+ struct node *node;
+
+ for (;;) {
+ r = shadow_step(s, root, vt, &inc);
+ if (r < 0)
+ return r;
+
+ node = dm_block_data(shadow_current(s));
+ if (inc)
+ inc_children(s->info->tm, node, vt);
+
+ /*
+ * We have to patch up the parent node, ugly, but I don't
+ * see a way to do this automatically as part of the spine
+ * op.
+ */
+ if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
+ __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+
+ __dm_bless_for_disk(&location);
+ memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
+ &location, sizeof(__le64));
+ }
+
+ node = dm_block_data(shadow_current(s));
+
+ if (node->header.nr_entries == node->header.max_entries) {
+ if (top)
+ r = btree_split_beneath(s, key);
+ else
+ r = btree_split_sibling(s, root, i, key);
+
+ if (r < 0)
+ return r;
+ }
+
+ node = dm_block_data(shadow_current(s));
+
+ i = lower_bound(node, key);
+
+ if (le32_to_cpu(node->header.flags) & LEAF_NODE)
+ break;
+
+ if (i < 0) {
+ /* change the bounds on the lowest key */
+ node->keys[0] = cpu_to_le64(key);
+ i = 0;
+ }
+
+ root = value64(node, i);
+ top = 0;
+ }
+
+ if (i < 0 || le64_to_cpu(node->keys[i]) != key)
+ i++;
+
+ /* we're about to overwrite this value, so undo the increment for it */
+ /* FIXME: shame that inc information is leaking outside the spine.
+ * Plus inc is just plain wrong in the event of a split */
+ if (le64_to_cpu(node->keys[i]) == key && inc)
+ if (vt->dec)
+ vt->dec(vt->context, value_ptr(node, i, vt->size));
+
+ *index = i;
+ return 0;
+}
+
+static int insert(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value, dm_block_t *new_root,
+ int *inserted)
+ __dm_written_to_disk(value)
+{
+ int r, need_insert;
+ unsigned level, index = -1, last_level = info->levels - 1;
+ dm_block_t block = root;
+ struct shadow_spine spine;
+ struct node *n;
+ struct dm_btree_value_type le64_type;
+
+ le64_type.context = NULL;
+ le64_type.size = sizeof(__le64);
+ le64_type.inc = NULL;
+ le64_type.dec = NULL;
+ le64_type.equal = NULL;
+
+ init_shadow_spine(&spine, info);
+
+ for (level = 0; level < (info->levels - 1); level++) {
+ r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
+ if (r < 0)
+ goto bad;
+
+ n = dm_block_data(shadow_current(&spine));
+ need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+ (le64_to_cpu(n->keys[index]) != keys[level]));
+
+ if (need_insert) {
+ dm_block_t new_tree;
+ __le64 new_le;
+
+ r = dm_btree_empty(info, &new_tree);
+ if (r < 0)
+ goto bad;
+
+ new_le = cpu_to_le64(new_tree);
+ __dm_bless_for_disk(&new_le);
+
+ r = insert_at(sizeof(uint64_t), n, index,
+ keys[level], &new_le);
+ if (r)
+ goto bad;
+ }
+
+ if (level < last_level)
+ block = value64(n, index);
+ }
+
+ r = btree_insert_raw(&spine, block, &info->value_type,
+ keys[level], &index);
+ if (r < 0)
+ goto bad;
+
+ n = dm_block_data(shadow_current(&spine));
+ need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+ (le64_to_cpu(n->keys[index]) != keys[level]));
+
+ if (need_insert) {
+ if (inserted)
+ *inserted = 1;
+
+ r = insert_at(info->value_type.size, n, index,
+ keys[level], value);
+ if (r)
+ goto bad_unblessed;
+ } else {
+ if (inserted)
+ *inserted = 0;
+
+ if (info->value_type.dec &&
+ (!info->value_type.equal ||
+ !info->value_type.equal(
+ info->value_type.context,
+ value_ptr(n, index, info->value_type.size),
+ value))) {
+ info->value_type.dec(info->value_type.context,
+ value_ptr(n, index, info->value_type.size));
+ }
+ memcpy_disk(value_ptr(n, index, info->value_type.size),
+ value, info->value_type.size);
+ }
+
+ *new_root = shadow_root(&spine);
+ exit_shadow_spine(&spine);
+
+ return 0;
+
+bad:
+ __dm_unbless_for_disk(value);
+bad_unblessed:
+ exit_shadow_spine(&spine);
+ return r;
+}
+
+int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value)
+{
+ return insert(info, root, keys, value, new_root, NULL);
+}
+EXPORT_SYMBOL_GPL(dm_btree_insert);
+
+int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value, dm_block_t *new_root,
+ int *inserted)
+ __dm_written_to_disk(value)
+{
+ return insert(info, root, keys, value, new_root, inserted);
+}
+EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
+
+/*----------------------------------------------------------------*/
+
+int dm_btree_clone(struct dm_btree_info *info, dm_block_t root,
+ dm_block_t *clone)
+{
+ int r;
+ struct dm_block *b, *orig_b;
+ struct node *b_node, *orig_node;
+
+ /* Copy the root node */
+ r = new_block(info, &b);
+ if (r < 0)
+ return r;
+
+ r = dm_tm_read_lock(info->tm, root, &btree_node_validator, &orig_b);
+ if (r < 0) {
+ dm_block_t location = dm_block_location(b);
+
+ unlock_block(info, b);
+ dm_tm_dec(info->tm, location);
+ }
+
+ *clone = dm_block_location(b);
+ b_node = dm_block_data(b);
+ orig_node = dm_block_data(orig_b);
+
+ memcpy(b_node, orig_node,
+ dm_bm_block_size(dm_tm_get_bm(info->tm)));
+ dm_tm_unlock(info->tm, orig_b);
+ inc_children(info->tm, b_node, &info->value_type);
+ dm_tm_unlock(info->tm, b);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_btree_clone);
+
+/*----------------------------------------------------------------*/
+
+static int find_highest_key(struct ro_spine *s, dm_block_t block,
+ uint64_t *result_key, dm_block_t *next_block)
+{
+ int i, r;
+ uint32_t flags;
+
+ do {
+ r = ro_step(s, block);
+ if (r < 0)
+ return r;
+
+ flags = le32_to_cpu(ro_node(s)->header.flags);
+ i = le32_to_cpu(ro_node(s)->header.nr_entries);
+ if (!i)
+ return -ENODATA;
+ else
+ i--;
+
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ if (next_block || flags & INTERNAL_NODE)
+ block = value64(ro_node(s), i);
+
+ } while (flags & INTERNAL_NODE);
+
+ if (next_block)
+ *next_block = block;
+ return 0;
+}
+
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ int r = 0, count = 0, level;
+ struct ro_spine spine;
+
+ init_ro_spine(&spine, info);
+ for (level = 0; level < info->levels; level++) {
+ r = find_highest_key(&spine, root, result_keys + level,
+ level == info->levels - 1 ? NULL : &root);
+ if (r == -ENODATA) {
+ r = 0;
+ break;
+
+ } else if (r)
+ break;
+
+ count++;
+ }
+ exit_ro_spine(&spine);
+
+ return r ? r : count;
+}
+EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
--- /dev/null
+/*
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-space-map-common.h"
+#include "dm-space-map-disk.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/device-mapper.h>
++#include <linux/export.h>
+
+#define DM_MSG_PREFIX "space map disk"
+
+/*
+ * Bitmap validator
+ */
+static void bitmap_prepare_for_write(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t block_size)
+{
+ struct disk_bitmap_header *disk_header = dm_block_data(b);
+
+ disk_header->blocknr = cpu_to_le64(dm_block_location(b));
+ disk_header->csum = cpu_to_le32(dm_block_csum_data(&disk_header->not_used, block_size - sizeof(__le32)));
+}
+
+static int bitmap_check(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t block_size)
+{
+ struct disk_bitmap_header *disk_header = dm_block_data(b);
+ __le32 csum_disk;
+
+ if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
+ DMERR("bitmap check failed blocknr %llu wanted %llu",
+ le64_to_cpu(disk_header->blocknr), dm_block_location(b));
+ return -ENOTBLK;
+ }
+
+ csum_disk = cpu_to_le32(dm_block_csum_data(&disk_header->not_used, block_size - sizeof(__le32)));
+ if (csum_disk != disk_header->csum) {
+ DMERR("bitmap check failed csum %u wanted %u",
+ le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
+ return -EILSEQ;
+ }
+
+ return 0;
+}
+
+struct dm_block_validator dm_sm_bitmap_validator = {
+ .name = "sm_bitmap",
+ .prepare_for_write = bitmap_prepare_for_write,
+ .check = bitmap_check
+};
+
+/*----------------------------------------------------------------*/
+
+#define ENTRIES_PER_WORD 32
+#define ENTRIES_SHIFT 5
+
+void *dm_bitmap_data(struct dm_block *b)
+{
+ return dm_block_data(b) + sizeof(struct disk_bitmap_header);
+}
+
+#define WORD_MASK_LOW 0x5555555555555555ULL
+#define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
+#define WORD_MASK_ALL 0xFFFFFFFFFFFFFFFFULL
+
+static unsigned bitmap_word_used(void *addr, unsigned b)
+{
+ __le64 *words_le = addr;
+ __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+
+ uint64_t bits = le64_to_cpu(*w_le);
+
+ return ((bits & WORD_MASK_LOW) == WORD_MASK_LOW ||
+ (bits & WORD_MASK_HIGH) == WORD_MASK_HIGH ||
+ (bits & WORD_MASK_ALL) == WORD_MASK_ALL);
+}
+
+unsigned sm_lookup_bitmap(void *addr, unsigned b)
+{
+ __le64 *words_le = addr;
+ __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+
+ b = (b & (ENTRIES_PER_WORD - 1)) << 1;
+
+ return (!!test_bit_le(b, (void *) w_le) << 1) |
+ (!!test_bit_le(b + 1, (void *) w_le));
+}
+
+void sm_set_bitmap(void *addr, unsigned b, unsigned val)
+{
+ __le64 *words_le = addr;
+ __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
+
+ b = (b & (ENTRIES_PER_WORD - 1)) << 1;
+
+ if (val & 2)
+ __set_bit_le(b, (void *) w_le);
+ else
+ __clear_bit_le(b, (void *) w_le);
+
+ if (val & 1)
+ __set_bit_le(b + 1, (void *) w_le);
+ else
+ __clear_bit_le(b + 1, (void *) w_le);
+}
+
+int sm_find_free(void *addr, unsigned begin, unsigned end,
+ unsigned *result)
+{
+ while (begin < end) {
+ if (!(begin & (ENTRIES_PER_WORD - 1)) &&
+ bitmap_word_used(addr, begin)) {
+ begin += ENTRIES_PER_WORD;
+ continue;
+ }
+
+ if (!sm_lookup_bitmap(addr, begin)) {
+ *result = begin;
+ return 0;
+ }
+
+ begin++;
+ }
+
+ return -ENOSPC;
+}
+
+static int disk_ll_init(struct ll_disk *io, struct dm_transaction_manager *tm)
+{
+ io->tm = tm;
+ io->bitmap_info.tm = tm;
+ io->bitmap_info.levels = 1;
+
+ /*
+ * Because the new bitmap blocks are created via a shadow
+ * operation, the old entry has already had its reference count
+ * decremented and we don't need the btree to do any bookkeeping.
+ */
+ io->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
+ io->bitmap_info.value_type.inc = NULL;
+ io->bitmap_info.value_type.dec = NULL;
+ io->bitmap_info.value_type.equal = NULL;
+
+ io->ref_count_info.tm = tm;
+ io->ref_count_info.levels = 1;
+ io->ref_count_info.value_type.size = sizeof(uint32_t);
+ io->ref_count_info.value_type.inc = NULL;
+ io->ref_count_info.value_type.dec = NULL;
+ io->ref_count_info.value_type.equal = NULL;
+
+ io->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
+
+ if (io->block_size > (1 << 30)) {
+ DMERR("block size too big to hold bitmaps");
+ return -EINVAL;
+ }
+
+ io->entries_per_block = (io->block_size - sizeof(struct disk_bitmap_header)) *
+ ENTRIES_PER_BYTE;
+ io->nr_blocks = 0;
+ io->bitmap_root = 0;
+ io->ref_count_root = 0;
+
+ return 0;
+}
+
+static int disk_ll_new(struct ll_disk *io, struct dm_transaction_manager *tm)
+{
+ int r;
+
+ r = disk_ll_init(io, tm);
+ if (r < 0)
+ return r;
+
+ io->nr_blocks = 0;
+ io->nr_allocated = 0;
+ r = dm_btree_empty(&io->bitmap_info, &io->bitmap_root);
+ if (r < 0)
+ return r;
+
+ r = dm_btree_empty(&io->ref_count_info, &io->ref_count_root);
+ if (r < 0) {
+ dm_btree_del(&io->bitmap_info, io->bitmap_root);
+ return r;
+ }
+
+ return 0;
+}
+
+static int disk_ll_extend(struct ll_disk *io, dm_block_t extra_blocks)
+{
+ int r;
+ dm_block_t i, nr_blocks;
+ unsigned old_blocks, blocks;
+
+ nr_blocks = io->nr_blocks + extra_blocks;
+ old_blocks = dm_sector_div_up(io->nr_blocks, io->entries_per_block);
+ blocks = dm_sector_div_up(nr_blocks, io->entries_per_block);
+
+ for (i = old_blocks; i < blocks; i++) {
+ struct dm_block *b;
+ struct disk_index_entry idx;
+
+ r = dm_tm_new_block(io->tm, &dm_sm_bitmap_validator, &b);
+ if (r < 0)
+ return r;
+ idx.blocknr = cpu_to_le64(dm_block_location(b));
+
+ r = dm_tm_unlock(io->tm, b);
+ if (r < 0)
+ return r;
+
+ idx.nr_free = cpu_to_le32(io->entries_per_block);
+ idx.none_free_before = 0;
+ __dm_bless_for_disk(&idx);
+
+ r = dm_btree_insert(&io->bitmap_info, io->bitmap_root,
+ &i, &idx, &io->bitmap_root);
+ if (r < 0)
+ return r;
+ }
+
+ io->nr_blocks = nr_blocks;
+ return 0;
+}
+
+static int disk_ll_open(struct ll_disk *ll, struct dm_transaction_manager *tm,
+ void *root_le, size_t len)
+{
+ int r;
+ struct disk_sm_root *smr = root_le;
+
+ if (len < sizeof(struct disk_sm_root)) {
+ DMERR("sm_disk root too small");
+ return -ENOMEM;
+ }
+
+ r = disk_ll_init(ll, tm);
+ if (r < 0)
+ return r;
+
+ ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
+ ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
+ ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
+ ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
+
+ return 0;
+}
+
+static int disk_ll_lookup_bitmap(struct ll_disk *io, dm_block_t b, uint32_t *result)
+{
+ int r;
+ dm_block_t index = b;
+ struct disk_index_entry ie_disk;
+ struct dm_block *blk;
+
+ do_div(index, io->entries_per_block);
+ r = dm_btree_lookup(&io->bitmap_info, io->bitmap_root, &index, &ie_disk);
+ if (r < 0)
+ return r;
+
+ r = dm_tm_read_lock(io->tm, le64_to_cpu(ie_disk.blocknr), &dm_sm_bitmap_validator, &blk);
+ if (r < 0)
+ return r;
+
+ *result = sm_lookup_bitmap(dm_bitmap_data(blk), do_div(b, io->entries_per_block));
+
+ return dm_tm_unlock(io->tm, blk);
+}
+
+static int disk_ll_lookup(struct ll_disk *io, dm_block_t b, uint32_t *result)
+{
+ __le32 rc_le;
+ int r = disk_ll_lookup_bitmap(io, b, result);
+
+ if (r)
+ return r;
+
+ if (*result != 3)
+ return r;
+
+ r = dm_btree_lookup(&io->ref_count_info, io->ref_count_root, &b, &rc_le);
+ if (r < 0)
+ return r;
+
+ *result = le32_to_cpu(rc_le);
+
+ return r;
+}
+
+static int disk_ll_find_free_block(struct ll_disk *io, dm_block_t begin,
+ dm_block_t end, dm_block_t *result)
+{
+ int r;
+ struct disk_index_entry ie_disk;
+ dm_block_t i, index_begin = begin;
+ dm_block_t index_end = dm_sector_div_up(end, io->entries_per_block);
+
+ begin = do_div(index_begin, io->entries_per_block);
+
+ for (i = index_begin; i < index_end; i++, begin = 0) {
+ struct dm_block *blk;
+ unsigned position;
+ uint32_t bit_end;
+
+ r = dm_btree_lookup(&io->bitmap_info, io->bitmap_root, &i, &ie_disk);
+ if (r < 0)
+ return r;
+
+ if (le32_to_cpu(ie_disk.nr_free) <= 0)
+ continue;
+
+ r = dm_tm_read_lock(io->tm, le64_to_cpu(ie_disk.blocknr),
+ &dm_sm_bitmap_validator, &blk);
+ if (r < 0)
+ return r;
+
+ bit_end = (i == index_end - 1) ?
+ do_div(end, io->entries_per_block) : io->entries_per_block;
+
+ r = sm_find_free(dm_bitmap_data(blk),
+ max((unsigned)begin, (unsigned)le32_to_cpu(ie_disk.none_free_before)),
+ bit_end, &position);
+ if (r < 0) {
+ dm_tm_unlock(io->tm, blk);
+ continue;
+ }
+
+ r = dm_tm_unlock(io->tm, blk);
+ if (r < 0)
+ return r;
+
+ *result = i * io->entries_per_block + (dm_block_t) position;
+
+ return 0;
+ }
+
+ return -ENOSPC;
+}
+
+static int disk_ll_insert(struct ll_disk *io, dm_block_t b, uint32_t ref_count)
+{
+ int r;
+ uint32_t bit, old;
+ struct dm_block *nb;
+ dm_block_t index = b;
+ struct disk_index_entry ie_disk;
+ void *bm_le;
+ int inc;
+
+ do_div(index, io->entries_per_block);
+ r = dm_btree_lookup(&io->bitmap_info, io->bitmap_root, &index, &ie_disk);
+ if (r < 0)
+ return r;
+
+ r = dm_tm_shadow_block(io->tm, le64_to_cpu(ie_disk.blocknr),
+ &dm_sm_bitmap_validator, &nb, &inc);
+ if (r < 0) {
+ DMERR("dm_tm_shadow_block() failed");
+ return r;
+ }
+ ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
+
+ bm_le = dm_bitmap_data(nb);
+ bit = do_div(b, io->entries_per_block);
+ old = sm_lookup_bitmap(bm_le, bit);
+
+ if (ref_count <= 2) {
+ sm_set_bitmap(bm_le, bit, ref_count);
+
+ if (old > 2) {
+ r = dm_btree_remove(&io->ref_count_info, io->ref_count_root,
+ &b, &io->ref_count_root);
+ if (r) {
+ dm_tm_unlock(io->tm, nb);
+ return r;
+ }
+ }
+ } else {
+ __le32 rc_le = cpu_to_le32(ref_count);
+
+ __dm_bless_for_disk(&rc_le);
+
+ sm_set_bitmap(bm_le, bit, 3);
+ r = dm_btree_insert(&io->ref_count_info, io->ref_count_root,
+ &b, &rc_le, &io->ref_count_root);
+ if (r < 0) {
+ dm_tm_unlock(io->tm, nb);
+ DMERR("ref count insert failed");
+ return r;
+ }
+ }
+
+ r = dm_tm_unlock(io->tm, nb);
+ if (r < 0)
+ return r;
+
+ if (ref_count && !old) {
+ io->nr_allocated++;
+ ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) - 1);
+ if (le32_to_cpu(ie_disk.none_free_before) == b)
+ ie_disk.none_free_before = cpu_to_le32(b + 1);
+
+ } else if (old && !ref_count) {
+ io->nr_allocated--;
+ ie_disk.nr_free = cpu_to_le32(le32_to_cpu(ie_disk.nr_free) + 1);
+ ie_disk.none_free_before = cpu_to_le32(min((dm_block_t) le32_to_cpu(ie_disk.none_free_before), b));
+ }
+
+ __dm_bless_for_disk(&ie_disk);
+
+ r = dm_btree_insert(&io->bitmap_info, io->bitmap_root, &index, &ie_disk, &io->bitmap_root);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+
+static int disk_ll_inc(struct ll_disk *ll, dm_block_t b)
+{
+ int r;
+ uint32_t rc;
+
+ r = disk_ll_lookup(ll, b, &rc);
+ if (r)
+ return r;
+
+ return disk_ll_insert(ll, b, rc + 1);
+}
+
+static int disk_ll_dec(struct ll_disk *ll, dm_block_t b)
+{
+ int r;
+ uint32_t rc;
+
+ r = disk_ll_lookup(ll, b, &rc);
+ if (r)
+ return r;
+
+ if (!rc)
+ return -EINVAL;
+
+ return disk_ll_insert(ll, b, rc - 1);
+}
+
+/*--------------------------------------------------------------*/
+
+/*
+ * Space map interface.
+ */
+struct sm_disk {
+ struct dm_space_map sm;
+
+ struct ll_disk ll;
+};
+
+static void sm_disk_destroy(struct dm_space_map *sm)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ kfree(smd);
+}
+
+static int sm_disk_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ return disk_ll_extend(&smd->ll, extra_blocks);
+}
+
+static int sm_disk_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ *count = smd->ll.nr_blocks;
+
+ return 0;
+}
+
+static int sm_disk_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ *count = smd->ll.nr_blocks - smd->ll.nr_allocated;
+
+ return 0;
+}
+
+static int sm_disk_get_count(struct dm_space_map *sm, dm_block_t b,
+ uint32_t *result)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ return disk_ll_lookup(&smd->ll, b, result);
+}
+
+static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
+ int *result)
+{
+ int r;
+ uint32_t count;
+
+ r = sm_disk_get_count(sm, b, &count);
+ if (r)
+ return r;
+
+ return count > 1;
+}
+
+static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
+ uint32_t count)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ return disk_ll_insert(&smd->ll, b, count);
+}
+
+static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ return disk_ll_inc(&smd->ll, b);
+}
+
+static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ return disk_ll_dec(&smd->ll, b);
+}
+
+static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+ int r;
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ /*
+ * FIXME: We should start the search where we left off.
+ */
+ r = disk_ll_find_free_block(&smd->ll, 0, smd->ll.nr_blocks, b);
+ if (r)
+ return r;
+
+ return disk_ll_inc(&smd->ll, *b);
+}
+
+static int sm_disk_commit(struct dm_space_map *sm)
+{
+ return 0;
+}
+
+static int sm_disk_root_size(struct dm_space_map *sm, size_t *result)
+{
+ *result = sizeof(struct disk_sm_root);
+
+ return 0;
+}
+
+static int sm_disk_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ struct disk_sm_root root_le;
+
+ root_le.nr_blocks = cpu_to_le64(smd->ll.nr_blocks);
+ root_le.nr_allocated = cpu_to_le64(smd->ll.nr_allocated);
+ root_le.bitmap_root = cpu_to_le64(smd->ll.bitmap_root);
+ root_le.ref_count_root = cpu_to_le64(smd->ll.ref_count_root);
+
+ if (max < sizeof(root_le))
+ return -ENOSPC;
+
+ memcpy(where_le, &root_le, sizeof(root_le));
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_space_map ops = {
+ .destroy = sm_disk_destroy,
+ .extend = sm_disk_extend,
+ .get_nr_blocks = sm_disk_get_nr_blocks,
+ .get_nr_free = sm_disk_get_nr_free,
+ .get_count = sm_disk_get_count,
+ .count_is_more_than_one = sm_disk_count_is_more_than_one,
+ .set_count = sm_disk_set_count,
+ .inc_block = sm_disk_inc_block,
+ .dec_block = sm_disk_dec_block,
+ .new_block = sm_disk_new_block,
+ .commit = sm_disk_commit,
+ .root_size = sm_disk_root_size,
+ .copy_root = sm_disk_copy_root
+};
+
+struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
+ dm_block_t nr_blocks)
+{
+ int r;
+ struct sm_disk *smd;
+
+ smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+ if (!smd)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&smd->sm, &ops, sizeof(smd->sm));
+
+ r = disk_ll_new(&smd->ll, tm);
+ if (r)
+ goto bad;
+
+ r = disk_ll_extend(&smd->ll, nr_blocks);
+ if (r)
+ goto bad;
+
+ r = sm_disk_commit(&smd->sm);
+ if (r)
+ goto bad;
+
+ return &smd->sm;
+
+bad:
+ kfree(smd);
+ return ERR_PTR(r);
+}
+EXPORT_SYMBOL_GPL(dm_sm_disk_create);
+
+struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
+ void *root_le, size_t len)
+{
+ int r;
+ struct sm_disk *smd;
+
+ smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+ if (!smd)
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&smd->sm, &ops, sizeof(smd->sm));
+
+ r = disk_ll_open(&smd->ll, tm, root_le, len);
+ if (r)
+ goto bad;
+
+ r = sm_disk_commit(&smd->sm);
+ if (r)
+ goto bad;
+
+ return &smd->sm;
+
+bad:
+ kfree(smd);
+ return ERR_PTR(r);
+}
+EXPORT_SYMBOL_GPL(dm_sm_disk_open);
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/gpio.h>
+#include <linux/module.h>
#include <linux/mmc/host.h>
+ #include <linux/module.h>
#include <plat/sdhci.h>
#include <plat/regs-sdhci.h>
#include <linux/mod_devicetable.h>
#include <linux/power_supply.h>
#include <linux/power/max17042_battery.h>
+ #include <linux/module.h>
-enum max17042_register {
- MAX17042_STATUS = 0x00,
- MAX17042_VALRT_Th = 0x01,
- MAX17042_TALRT_Th = 0x02,
- MAX17042_SALRT_Th = 0x03,
- MAX17042_AtRate = 0x04,
- MAX17042_RepCap = 0x05,
- MAX17042_RepSOC = 0x06,
- MAX17042_Age = 0x07,
- MAX17042_TEMP = 0x08,
- MAX17042_VCELL = 0x09,
- MAX17042_Current = 0x0A,
- MAX17042_AvgCurrent = 0x0B,
- MAX17042_Qresidual = 0x0C,
- MAX17042_SOC = 0x0D,
- MAX17042_AvSOC = 0x0E,
- MAX17042_RemCap = 0x0F,
- MAX17402_FullCAP = 0x10,
- MAX17042_TTE = 0x11,
- MAX17042_V_empty = 0x12,
-
- MAX17042_RSLOW = 0x14,
-
- MAX17042_AvgTA = 0x16,
- MAX17042_Cycles = 0x17,
- MAX17042_DesignCap = 0x18,
- MAX17042_AvgVCELL = 0x19,
- MAX17042_MinMaxTemp = 0x1A,
- MAX17042_MinMaxVolt = 0x1B,
- MAX17042_MinMaxCurr = 0x1C,
- MAX17042_CONFIG = 0x1D,
- MAX17042_ICHGTerm = 0x1E,
- MAX17042_AvCap = 0x1F,
- MAX17042_ManName = 0x20,
- MAX17042_DevName = 0x21,
- MAX17042_DevChem = 0x22,
-
- MAX17042_TempNom = 0x24,
- MAX17042_TempCold = 0x25,
- MAX17042_TempHot = 0x26,
- MAX17042_AIN = 0x27,
- MAX17042_LearnCFG = 0x28,
- MAX17042_SHFTCFG = 0x29,
- MAX17042_RelaxCFG = 0x2A,
- MAX17042_MiscCFG = 0x2B,
- MAX17042_TGAIN = 0x2C,
- MAx17042_TOFF = 0x2D,
- MAX17042_CGAIN = 0x2E,
- MAX17042_COFF = 0x2F,
-
- MAX17042_Q_empty = 0x33,
- MAX17042_T_empty = 0x34,
-
- MAX17042_RCOMP0 = 0x38,
- MAX17042_TempCo = 0x39,
- MAX17042_Rx = 0x3A,
- MAX17042_T_empty0 = 0x3B,
- MAX17042_TaskPeriod = 0x3C,
- MAX17042_FSTAT = 0x3D,
-
- MAX17042_SHDNTIMER = 0x3F,
-
- MAX17042_VFRemCap = 0x4A,
-
- MAX17042_QH = 0x4D,
- MAX17042_QL = 0x4E,
-};
-
struct max17042_chip {
struct i2c_client *client;
struct power_supply battery;
#include <linux/power_supply.h>
#include <linux/platform_device.h>
#include <linux/power/max8903_charger.h>
+ #include <linux/module.h>
struct max8903_data {
- struct max8903_pdata *pdata;
+ struct max8903_pdata pdata;
struct device *dev;
struct power_supply psy;
bool fault;