#include <linux/delay.h>
#include <linux/io.h>
-#include <linux/clk.h>
#include <linux/err.h>
+ #include <linux/export.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
+#include <linux/clk.h>
#include <plat/common.h>
#include <linux/gpio.h>
#include <linux/platform_device.h>
+ #include <linux/slab.h>
#include <linux/io.h>
#include <linux/pwm_backlight.h>
+#include <linux/slab.h>
#include <plat/devs.h>
#include <plat/gpio-cfg.h>
--- /dev/null
+/* linux/arch/arm/plat-samsung/dma-ops.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ *
+ * Samsung DMA Operations
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/amba/pl330.h>
+#include <linux/scatterlist.h>
++#include <linux/export.h>
+
+#include <mach/dma.h>
+
+static inline bool pl330_filter(struct dma_chan *chan, void *param)
+{
+ struct dma_pl330_peri *peri = chan->private;
+ return peri->peri_id == (unsigned)param;
+}
+
+static unsigned samsung_dmadev_request(enum dma_ch dma_ch,
+ struct samsung_dma_info *info)
+{
+ struct dma_chan *chan;
+ dma_cap_mask_t mask;
+ struct dma_slave_config slave_config;
+
+ dma_cap_zero(mask);
+ dma_cap_set(info->cap, mask);
+
+ chan = dma_request_channel(mask, pl330_filter, (void *)dma_ch);
+
+ if (info->direction == DMA_FROM_DEVICE) {
+ memset(&slave_config, 0, sizeof(struct dma_slave_config));
+ slave_config.direction = info->direction;
+ slave_config.src_addr = info->fifo;
+ slave_config.src_addr_width = info->width;
+ slave_config.src_maxburst = 1;
+ dmaengine_slave_config(chan, &slave_config);
+ } else if (info->direction == DMA_TO_DEVICE) {
+ memset(&slave_config, 0, sizeof(struct dma_slave_config));
+ slave_config.direction = info->direction;
+ slave_config.dst_addr = info->fifo;
+ slave_config.dst_addr_width = info->width;
+ slave_config.dst_maxburst = 1;
+ dmaengine_slave_config(chan, &slave_config);
+ }
+
+ return (unsigned)chan;
+}
+
+static int samsung_dmadev_release(unsigned ch,
+ struct s3c2410_dma_client *client)
+{
+ dma_release_channel((struct dma_chan *)ch);
+
+ return 0;
+}
+
+static int samsung_dmadev_prepare(unsigned ch,
+ struct samsung_dma_prep_info *info)
+{
+ struct scatterlist sg;
+ struct dma_chan *chan = (struct dma_chan *)ch;
+ struct dma_async_tx_descriptor *desc;
+
+ switch (info->cap) {
+ case DMA_SLAVE:
+ sg_init_table(&sg, 1);
+ sg_dma_len(&sg) = info->len;
+ sg_set_page(&sg, pfn_to_page(PFN_DOWN(info->buf)),
+ info->len, offset_in_page(info->buf));
+ sg_dma_address(&sg) = info->buf;
+
+ desc = chan->device->device_prep_slave_sg(chan,
+ &sg, 1, info->direction, DMA_PREP_INTERRUPT);
+ break;
+ case DMA_CYCLIC:
+ desc = chan->device->device_prep_dma_cyclic(chan,
+ info->buf, info->len, info->period, info->direction);
+ break;
+ default:
+ dev_err(&chan->dev->device, "unsupported format\n");
+ return -EFAULT;
+ }
+
+ if (!desc) {
+ dev_err(&chan->dev->device, "cannot prepare cyclic dma\n");
+ return -EFAULT;
+ }
+
+ desc->callback = info->fp;
+ desc->callback_param = info->fp_param;
+
+ dmaengine_submit((struct dma_async_tx_descriptor *)desc);
+
+ return 0;
+}
+
+static inline int samsung_dmadev_trigger(unsigned ch)
+{
+ dma_async_issue_pending((struct dma_chan *)ch);
+
+ return 0;
+}
+
+static inline int samsung_dmadev_flush(unsigned ch)
+{
+ return dmaengine_terminate_all((struct dma_chan *)ch);
+}
+
+struct samsung_dma_ops dmadev_ops = {
+ .request = samsung_dmadev_request,
+ .release = samsung_dmadev_release,
+ .prepare = samsung_dmadev_prepare,
+ .trigger = samsung_dmadev_trigger,
+ .started = NULL,
+ .flush = samsung_dmadev_flush,
+ .stop = samsung_dmadev_flush,
+};
+
+void *samsung_dmadev_get_ops(void)
+{
+ return &dmadev_ops;
+}
+EXPORT_SYMBOL(samsung_dmadev_get_ops);
--- /dev/null
+/* linux/arch/arm/plat-samsung/s3c-dma-ops.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ *
+ * Samsung S3C-DMA Operations
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/types.h>
++#include <linux/export.h>
+
+#include <mach/dma.h>
+
+struct cb_data {
+ void (*fp) (void *);
+ void *fp_param;
+ unsigned ch;
+ struct list_head node;
+};
+
+static LIST_HEAD(dma_list);
+
+static void s3c_dma_cb(struct s3c2410_dma_chan *channel, void *param,
+ int size, enum s3c2410_dma_buffresult res)
+{
+ struct cb_data *data = param;
+
+ data->fp(data->fp_param);
+}
+
+static unsigned s3c_dma_request(enum dma_ch dma_ch,
+ struct samsung_dma_info *info)
+{
+ struct cb_data *data;
+
+ if (s3c2410_dma_request(dma_ch, info->client, NULL) < 0) {
+ s3c2410_dma_free(dma_ch, info->client);
+ return 0;
+ }
+
+ data = kzalloc(sizeof(struct cb_data), GFP_KERNEL);
+ data->ch = dma_ch;
+ list_add_tail(&data->node, &dma_list);
+
+ s3c2410_dma_devconfig(dma_ch, info->direction, info->fifo);
+
+ if (info->cap == DMA_CYCLIC)
+ s3c2410_dma_setflags(dma_ch, S3C2410_DMAF_CIRCULAR);
+
+ s3c2410_dma_config(dma_ch, info->width);
+
+ return (unsigned)dma_ch;
+}
+
+static int s3c_dma_release(unsigned ch, struct s3c2410_dma_client *client)
+{
+ struct cb_data *data;
+
+ list_for_each_entry(data, &dma_list, node)
+ if (data->ch == ch)
+ break;
+ list_del(&data->node);
+
+ s3c2410_dma_free(ch, client);
+ kfree(data);
+
+ return 0;
+}
+
+static int s3c_dma_prepare(unsigned ch, struct samsung_dma_prep_info *info)
+{
+ struct cb_data *data;
+ int len = (info->cap == DMA_CYCLIC) ? info->period : info->len;
+
+ list_for_each_entry(data, &dma_list, node)
+ if (data->ch == ch)
+ break;
+
+ if (!data->fp) {
+ s3c2410_dma_set_buffdone_fn(ch, s3c_dma_cb);
+ data->fp = info->fp;
+ data->fp_param = info->fp_param;
+ }
+
+ s3c2410_dma_enqueue(ch, (void *)data, info->buf, len);
+
+ return 0;
+}
+
+static inline int s3c_dma_trigger(unsigned ch)
+{
+ return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_START);
+}
+
+static inline int s3c_dma_started(unsigned ch)
+{
+ return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_STARTED);
+}
+
+static inline int s3c_dma_flush(unsigned ch)
+{
+ return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_FLUSH);
+}
+
+static inline int s3c_dma_stop(unsigned ch)
+{
+ return s3c2410_dma_ctrl(ch, S3C2410_DMAOP_STOP);
+}
+
+static struct samsung_dma_ops s3c_dma_ops = {
+ .request = s3c_dma_request,
+ .release = s3c_dma_release,
+ .prepare = s3c_dma_prepare,
+ .trigger = s3c_dma_trigger,
+ .started = s3c_dma_started,
+ .flush = s3c_dma_flush,
+ .stop = s3c_dma_stop,
+};
+
+void *s3c_dma_get_ops(void)
+{
+ return &s3c_dma_ops;
+}
+EXPORT_SYMBOL(s3c_dma_get_ops);
#include <linux/dma-mapping.h>
#include <linux/gfp.h>
#include <linux/dma-debug.h>
+ #include <linux/export.h>
#include <asm/bug.h>
-#include <asm/cacheflush.h>
/*
* Generic direct DMA implementation
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/percpu.h>
+#include <linux/topology.h>
#include <linux/node.h>
#include <linux/nodemask.h>
+ #include <linux/export.h>
static DEFINE_PER_CPU(struct cpu, cpu_devices);
*
*/
+ #include <linux/module.h>
#include <crypto/aes.h>
+#include <asm/aes.h>
asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/debugfs.h>
-#include <linux/edac_mce.h>
#include <linux/irq_work.h>
+ #include <linux/export.h>
#include <asm/processor.h>
#include <asm/mce.h>
#include <linux/errno.h>
#include <linux/err.h>
+ #include <linux/export.h>
#include <linux/string.h>
+#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
--- /dev/null
+/*
+ * Copyright (C) 2009-2011 Red Hat, Inc.
+ *
+ * Author: Mikulas Patocka <mpatocka@redhat.com>
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-bufio.h"
+
+#include <linux/device-mapper.h>
+#include <linux/dm-io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <linux/shrinker.h>
++#include <linux/module.h>
+
+#define DM_MSG_PREFIX "bufio"
+
+/*
+ * Memory management policy:
+ * Limit the number of buffers to DM_BUFIO_MEMORY_PERCENT of main memory
+ * or DM_BUFIO_VMALLOC_PERCENT of vmalloc memory (whichever is lower).
+ * Always allocate at least DM_BUFIO_MIN_BUFFERS buffers.
+ * Start background writeback when there are DM_BUFIO_WRITEBACK_PERCENT
+ * dirty buffers.
+ */
+#define DM_BUFIO_MIN_BUFFERS 8
+
+#define DM_BUFIO_MEMORY_PERCENT 2
+#define DM_BUFIO_VMALLOC_PERCENT 25
+#define DM_BUFIO_WRITEBACK_PERCENT 75
+
+/*
+ * Check buffer ages in this interval (seconds)
+ */
+#define DM_BUFIO_WORK_TIMER_SECS 10
+
+/*
+ * Free buffers when they are older than this (seconds)
+ */
+#define DM_BUFIO_DEFAULT_AGE_SECS 60
+
+/*
+ * The number of bvec entries that are embedded directly in the buffer.
+ * If the chunk size is larger, dm-io is used to do the io.
+ */
+#define DM_BUFIO_INLINE_VECS 16
+
+/*
+ * Buffer hash
+ */
+#define DM_BUFIO_HASH_BITS 20
+#define DM_BUFIO_HASH(block) \
+ ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \
+ ((1 << DM_BUFIO_HASH_BITS) - 1))
+
+/*
+ * Don't try to use kmem_cache_alloc for blocks larger than this.
+ * For explanation, see alloc_buffer_data below.
+ */
+#define DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT (PAGE_SIZE >> 1)
+#define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1))
+
+/*
+ * dm_buffer->list_mode
+ */
+#define LIST_CLEAN 0
+#define LIST_DIRTY 1
+#define LIST_SIZE 2
+
+/*
+ * Linking of buffers:
+ * All buffers are linked to cache_hash with their hash_list field.
+ *
+ * Clean buffers that are not being written (B_WRITING not set)
+ * are linked to lru[LIST_CLEAN] with their lru_list field.
+ *
+ * Dirty and clean buffers that are being written are linked to
+ * lru[LIST_DIRTY] with their lru_list field. When the write
+ * finishes, the buffer cannot be relinked immediately (because we
+ * are in an interrupt context and relinking requires process
+ * context), so some clean-not-writing buffers can be held on
+ * dirty_lru too. They are later added to lru in the process
+ * context.
+ */
+struct dm_bufio_client {
+ struct mutex lock;
+
+ struct list_head lru[LIST_SIZE];
+ unsigned long n_buffers[LIST_SIZE];
+
+ struct block_device *bdev;
+ unsigned block_size;
+ unsigned char sectors_per_block_bits;
+ unsigned char pages_per_block_bits;
+ unsigned char blocks_per_page_bits;
+ unsigned aux_size;
+ void (*alloc_callback)(struct dm_buffer *);
+ void (*write_callback)(struct dm_buffer *);
+
+ struct dm_io_client *dm_io;
+
+ struct list_head reserved_buffers;
+ unsigned need_reserved_buffers;
+
+ struct hlist_head *cache_hash;
+ wait_queue_head_t free_buffer_wait;
+
+ int async_write_error;
+
+ struct list_head client_list;
+ struct shrinker shrinker;
+};
+
+/*
+ * Buffer state bits.
+ */
+#define B_READING 0
+#define B_WRITING 1
+#define B_DIRTY 2
+
+/*
+ * Describes how the block was allocated:
+ * kmem_cache_alloc(), __get_free_pages() or vmalloc().
+ * See the comment at alloc_buffer_data.
+ */
+enum data_mode {
+ DATA_MODE_SLAB = 0,
+ DATA_MODE_GET_FREE_PAGES = 1,
+ DATA_MODE_VMALLOC = 2,
+ DATA_MODE_LIMIT = 3
+};
+
+struct dm_buffer {
+ struct hlist_node hash_list;
+ struct list_head lru_list;
+ sector_t block;
+ void *data;
+ enum data_mode data_mode;
+ unsigned char list_mode; /* LIST_* */
+ unsigned hold_count;
+ int read_error;
+ int write_error;
+ unsigned long state;
+ unsigned long last_accessed;
+ struct dm_bufio_client *c;
+ struct bio bio;
+ struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
+};
+
+/*----------------------------------------------------------------*/
+
+static struct kmem_cache *dm_bufio_caches[PAGE_SHIFT - SECTOR_SHIFT];
+static char *dm_bufio_cache_names[PAGE_SHIFT - SECTOR_SHIFT];
+
+static inline int dm_bufio_cache_index(struct dm_bufio_client *c)
+{
+ unsigned ret = c->blocks_per_page_bits - 1;
+
+ BUG_ON(ret >= ARRAY_SIZE(dm_bufio_caches));
+
+ return ret;
+}
+
+#define DM_BUFIO_CACHE(c) (dm_bufio_caches[dm_bufio_cache_index(c)])
+#define DM_BUFIO_CACHE_NAME(c) (dm_bufio_cache_names[dm_bufio_cache_index(c)])
+
+#define dm_bufio_in_request() (!!current->bio_list)
+
+static void dm_bufio_lock(struct dm_bufio_client *c)
+{
+ mutex_lock_nested(&c->lock, dm_bufio_in_request());
+}
+
+static int dm_bufio_trylock(struct dm_bufio_client *c)
+{
+ return mutex_trylock(&c->lock);
+}
+
+static void dm_bufio_unlock(struct dm_bufio_client *c)
+{
+ mutex_unlock(&c->lock);
+}
+
+/*
+ * FIXME Move to sched.h?
+ */
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+# define dm_bufio_cond_resched() \
+do { \
+ if (unlikely(need_resched())) \
+ _cond_resched(); \
+} while (0)
+#else
+# define dm_bufio_cond_resched() do { } while (0)
+#endif
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Default cache size: available memory divided by the ratio.
+ */
+static unsigned long dm_bufio_default_cache_size;
+
+/*
+ * Total cache size set by the user.
+ */
+static unsigned long dm_bufio_cache_size;
+
+/*
+ * A copy of dm_bufio_cache_size because dm_bufio_cache_size can change
+ * at any time. If it disagrees, the user has changed cache size.
+ */
+static unsigned long dm_bufio_cache_size_latch;
+
+static DEFINE_SPINLOCK(param_spinlock);
+
+/*
+ * Buffers are freed after this timeout
+ */
+static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
+
+static unsigned long dm_bufio_peak_allocated;
+static unsigned long dm_bufio_allocated_kmem_cache;
+static unsigned long dm_bufio_allocated_get_free_pages;
+static unsigned long dm_bufio_allocated_vmalloc;
+static unsigned long dm_bufio_current_allocated;
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count
+ */
+static unsigned long dm_bufio_cache_size_per_client;
+
+/*
+ * The current number of clients.
+ */
+static int dm_bufio_client_count;
+
+/*
+ * The list of all clients.
+ */
+static LIST_HEAD(dm_bufio_all_clients);
+
+/*
+ * This mutex protects dm_bufio_cache_size_latch,
+ * dm_bufio_cache_size_per_client and dm_bufio_client_count
+ */
+static DEFINE_MUTEX(dm_bufio_clients_lock);
+
+/*----------------------------------------------------------------*/
+
+static void adjust_total_allocated(enum data_mode data_mode, long diff)
+{
+ static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
+ &dm_bufio_allocated_kmem_cache,
+ &dm_bufio_allocated_get_free_pages,
+ &dm_bufio_allocated_vmalloc,
+ };
+
+ spin_lock(¶m_spinlock);
+
+ *class_ptr[data_mode] += diff;
+
+ dm_bufio_current_allocated += diff;
+
+ if (dm_bufio_current_allocated > dm_bufio_peak_allocated)
+ dm_bufio_peak_allocated = dm_bufio_current_allocated;
+
+ spin_unlock(¶m_spinlock);
+}
+
+/*
+ * Change the number of clients and recalculate per-client limit.
+ */
+static void __cache_size_refresh(void)
+{
+ BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock));
+ BUG_ON(dm_bufio_client_count < 0);
+
+ dm_bufio_cache_size_latch = dm_bufio_cache_size;
+
+ barrier();
+
+ /*
+ * Use default if set to 0 and report the actual cache size used.
+ */
+ if (!dm_bufio_cache_size_latch) {
+ (void)cmpxchg(&dm_bufio_cache_size, 0,
+ dm_bufio_default_cache_size);
+ dm_bufio_cache_size_latch = dm_bufio_default_cache_size;
+ }
+
+ dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch /
+ (dm_bufio_client_count ? : 1);
+}
+
+/*
+ * Allocating buffer data.
+ *
+ * Small buffers are allocated with kmem_cache, to use space optimally.
+ *
+ * For large buffers, we choose between get_free_pages and vmalloc.
+ * Each has advantages and disadvantages.
+ *
+ * __get_free_pages can randomly fail if the memory is fragmented.
+ * __vmalloc won't randomly fail, but vmalloc space is limited (it may be
+ * as low as 128M) so using it for caching is not appropriate.
+ *
+ * If the allocation may fail we use __get_free_pages. Memory fragmentation
+ * won't have a fatal effect here, but it just causes flushes of some other
+ * buffers and more I/O will be performed. Don't use __get_free_pages if it
+ * always fails (i.e. order >= MAX_ORDER).
+ *
+ * If the allocation shouldn't fail we use __vmalloc. This is only for the
+ * initial reserve allocation, so there's no risk of wasting all vmalloc
+ * space.
+ */
+static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
+ enum data_mode *data_mode)
+{
+ if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
+ *data_mode = DATA_MODE_SLAB;
+ return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
+ }
+
+ if (c->block_size <= DM_BUFIO_BLOCK_SIZE_GFP_LIMIT &&
+ gfp_mask & __GFP_NORETRY) {
+ *data_mode = DATA_MODE_GET_FREE_PAGES;
+ return (void *)__get_free_pages(gfp_mask,
+ c->pages_per_block_bits);
+ }
+
+ *data_mode = DATA_MODE_VMALLOC;
+ return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+}
+
+/*
+ * Free buffer's data.
+ */
+static void free_buffer_data(struct dm_bufio_client *c,
+ void *data, enum data_mode data_mode)
+{
+ switch (data_mode) {
+ case DATA_MODE_SLAB:
+ kmem_cache_free(DM_BUFIO_CACHE(c), data);
+ break;
+
+ case DATA_MODE_GET_FREE_PAGES:
+ free_pages((unsigned long)data, c->pages_per_block_bits);
+ break;
+
+ case DATA_MODE_VMALLOC:
+ vfree(data);
+ break;
+
+ default:
+ DMCRIT("dm_bufio_free_buffer_data: bad data mode: %d",
+ data_mode);
+ BUG();
+ }
+}
+
+/*
+ * Allocate buffer and its data.
+ */
+static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
+{
+ struct dm_buffer *b = kmalloc(sizeof(struct dm_buffer) + c->aux_size,
+ gfp_mask);
+
+ if (!b)
+ return NULL;
+
+ b->c = c;
+
+ b->data = alloc_buffer_data(c, gfp_mask, &b->data_mode);
+ if (!b->data) {
+ kfree(b);
+ return NULL;
+ }
+
+ adjust_total_allocated(b->data_mode, (long)c->block_size);
+
+ return b;
+}
+
+/*
+ * Free buffer and its data.
+ */
+static void free_buffer(struct dm_buffer *b)
+{
+ struct dm_bufio_client *c = b->c;
+
+ adjust_total_allocated(b->data_mode, -(long)c->block_size);
+
+ free_buffer_data(c, b->data, b->data_mode);
+ kfree(b);
+}
+
+/*
+ * Link buffer to the hash list and clean or dirty queue.
+ */
+static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
+{
+ struct dm_bufio_client *c = b->c;
+
+ c->n_buffers[dirty]++;
+ b->block = block;
+ b->list_mode = dirty;
+ list_add(&b->lru_list, &c->lru[dirty]);
+ hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]);
+ b->last_accessed = jiffies;
+}
+
+/*
+ * Unlink buffer from the hash list and dirty or clean queue.
+ */
+static void __unlink_buffer(struct dm_buffer *b)
+{
+ struct dm_bufio_client *c = b->c;
+
+ BUG_ON(!c->n_buffers[b->list_mode]);
+
+ c->n_buffers[b->list_mode]--;
+ hlist_del(&b->hash_list);
+ list_del(&b->lru_list);
+}
+
+/*
+ * Place the buffer to the head of dirty or clean LRU queue.
+ */
+static void __relink_lru(struct dm_buffer *b, int dirty)
+{
+ struct dm_bufio_client *c = b->c;
+
+ BUG_ON(!c->n_buffers[b->list_mode]);
+
+ c->n_buffers[b->list_mode]--;
+ c->n_buffers[dirty]++;
+ b->list_mode = dirty;
+ list_del(&b->lru_list);
+ list_add(&b->lru_list, &c->lru[dirty]);
+}
+
+/*----------------------------------------------------------------
+ * Submit I/O on the buffer.
+ *
+ * Bio interface is faster but it has some problems:
+ * the vector list is limited (increasing this limit increases
+ * memory-consumption per buffer, so it is not viable);
+ *
+ * the memory must be direct-mapped, not vmalloced;
+ *
+ * the I/O driver can reject requests spuriously if it thinks that
+ * the requests are too big for the device or if they cross a
+ * controller-defined memory boundary.
+ *
+ * If the buffer is small enough (up to DM_BUFIO_INLINE_VECS pages) and
+ * it is not vmalloced, try using the bio interface.
+ *
+ * If the buffer is big, if it is vmalloced or if the underlying device
+ * rejects the bio because it is too large, use dm-io layer to do the I/O.
+ * The dm-io layer splits the I/O into multiple requests, avoiding the above
+ * shortcomings.
+ *--------------------------------------------------------------*/
+
+/*
+ * dm-io completion routine. It just calls b->bio.bi_end_io, pretending
+ * that the request was handled directly with bio interface.
+ */
+static void dmio_complete(unsigned long error, void *context)
+{
+ struct dm_buffer *b = context;
+
+ b->bio.bi_end_io(&b->bio, error ? -EIO : 0);
+}
+
+static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
+ bio_end_io_t *end_io)
+{
+ int r;
+ struct dm_io_request io_req = {
+ .bi_rw = rw,
+ .notify.fn = dmio_complete,
+ .notify.context = b,
+ .client = b->c->dm_io,
+ };
+ struct dm_io_region region = {
+ .bdev = b->c->bdev,
+ .sector = block << b->c->sectors_per_block_bits,
+ .count = b->c->block_size >> SECTOR_SHIFT,
+ };
+
+ if (b->data_mode != DATA_MODE_VMALLOC) {
+ io_req.mem.type = DM_IO_KMEM;
+ io_req.mem.ptr.addr = b->data;
+ } else {
+ io_req.mem.type = DM_IO_VMA;
+ io_req.mem.ptr.vma = b->data;
+ }
+
+ b->bio.bi_end_io = end_io;
+
+ r = dm_io(&io_req, 1, ®ion, NULL);
+ if (r)
+ end_io(&b->bio, r);
+}
+
+static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
+ bio_end_io_t *end_io)
+{
+ char *ptr;
+ int len;
+
+ bio_init(&b->bio);
+ b->bio.bi_io_vec = b->bio_vec;
+ b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
+ b->bio.bi_sector = block << b->c->sectors_per_block_bits;
+ b->bio.bi_bdev = b->c->bdev;
+ b->bio.bi_end_io = end_io;
+
+ /*
+ * We assume that if len >= PAGE_SIZE ptr is page-aligned.
+ * If len < PAGE_SIZE the buffer doesn't cross page boundary.
+ */
+ ptr = b->data;
+ len = b->c->block_size;
+
+ if (len >= PAGE_SIZE)
+ BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
+ else
+ BUG_ON((unsigned long)ptr & (len - 1));
+
+ do {
+ if (!bio_add_page(&b->bio, virt_to_page(ptr),
+ len < PAGE_SIZE ? len : PAGE_SIZE,
+ virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
+ BUG_ON(b->c->block_size <= PAGE_SIZE);
+ use_dmio(b, rw, block, end_io);
+ return;
+ }
+
+ len -= PAGE_SIZE;
+ ptr += PAGE_SIZE;
+ } while (len > 0);
+
+ submit_bio(rw, &b->bio);
+}
+
+static void submit_io(struct dm_buffer *b, int rw, sector_t block,
+ bio_end_io_t *end_io)
+{
+ if (rw == WRITE && b->c->write_callback)
+ b->c->write_callback(b);
+
+ if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
+ b->data_mode != DATA_MODE_VMALLOC)
+ use_inline_bio(b, rw, block, end_io);
+ else
+ use_dmio(b, rw, block, end_io);
+}
+
+/*----------------------------------------------------------------
+ * Writing dirty buffers
+ *--------------------------------------------------------------*/
+
+/*
+ * The endio routine for write.
+ *
+ * Set the error, clear B_WRITING bit and wake anyone who was waiting on
+ * it.
+ */
+static void write_endio(struct bio *bio, int error)
+{
+ struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
+
+ b->write_error = error;
+ if (error) {
+ struct dm_bufio_client *c = b->c;
+ (void)cmpxchg(&c->async_write_error, 0, error);
+ }
+
+ BUG_ON(!test_bit(B_WRITING, &b->state));
+
+ smp_mb__before_clear_bit();
+ clear_bit(B_WRITING, &b->state);
+ smp_mb__after_clear_bit();
+
+ wake_up_bit(&b->state, B_WRITING);
+}
+
+/*
+ * This function is called when wait_on_bit is actually waiting.
+ */
+static int do_io_schedule(void *word)
+{
+ io_schedule();
+
+ return 0;
+}
+
+/*
+ * Initiate a write on a dirty buffer, but don't wait for it.
+ *
+ * - If the buffer is not dirty, exit.
+ * - If there some previous write going on, wait for it to finish (we can't
+ * have two writes on the same buffer simultaneously).
+ * - Submit our write and don't wait on it. We set B_WRITING indicating
+ * that there is a write in progress.
+ */
+static void __write_dirty_buffer(struct dm_buffer *b)
+{
+ if (!test_bit(B_DIRTY, &b->state))
+ return;
+
+ clear_bit(B_DIRTY, &b->state);
+ wait_on_bit_lock(&b->state, B_WRITING,
+ do_io_schedule, TASK_UNINTERRUPTIBLE);
+
+ submit_io(b, WRITE, b->block, write_endio);
+}
+
+/*
+ * Wait until any activity on the buffer finishes. Possibly write the
+ * buffer if it is dirty. When this function finishes, there is no I/O
+ * running on the buffer and the buffer is not dirty.
+ */
+static void __make_buffer_clean(struct dm_buffer *b)
+{
+ BUG_ON(b->hold_count);
+
+ if (!b->state) /* fast case */
+ return;
+
+ wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+ __write_dirty_buffer(b);
+ wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * Find some buffer that is not held by anybody, clean it, unlink it and
+ * return it.
+ */
+static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
+{
+ struct dm_buffer *b;
+
+ list_for_each_entry_reverse(b, &c->lru[LIST_CLEAN], lru_list) {
+ BUG_ON(test_bit(B_WRITING, &b->state));
+ BUG_ON(test_bit(B_DIRTY, &b->state));
+
+ if (!b->hold_count) {
+ __make_buffer_clean(b);
+ __unlink_buffer(b);
+ return b;
+ }
+ dm_bufio_cond_resched();
+ }
+
+ list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
+ BUG_ON(test_bit(B_READING, &b->state));
+
+ if (!b->hold_count) {
+ __make_buffer_clean(b);
+ __unlink_buffer(b);
+ return b;
+ }
+ dm_bufio_cond_resched();
+ }
+
+ return NULL;
+}
+
+/*
+ * Wait until some other threads free some buffer or release hold count on
+ * some buffer.
+ *
+ * This function is entered with c->lock held, drops it and regains it
+ * before exiting.
+ */
+static void __wait_for_free_buffer(struct dm_bufio_client *c)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue(&c->free_buffer_wait, &wait);
+ set_task_state(current, TASK_UNINTERRUPTIBLE);
+ dm_bufio_unlock(c);
+
+ io_schedule();
+
+ set_task_state(current, TASK_RUNNING);
+ remove_wait_queue(&c->free_buffer_wait, &wait);
+
+ dm_bufio_lock(c);
+}
+
+/*
+ * Allocate a new buffer. If the allocation is not possible, wait until
+ * some other thread frees a buffer.
+ *
+ * May drop the lock and regain it.
+ */
+static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client *c)
+{
+ struct dm_buffer *b;
+
+ /*
+ * dm-bufio is resistant to allocation failures (it just keeps
+ * one buffer reserved in cases all the allocations fail).
+ * So set flags to not try too hard:
+ * GFP_NOIO: don't recurse into the I/O layer
+ * __GFP_NORETRY: don't retry and rather return failure
+ * __GFP_NOMEMALLOC: don't use emergency reserves
+ * __GFP_NOWARN: don't print a warning in case of failure
+ *
+ * For debugging, if we set the cache size to 1, no new buffers will
+ * be allocated.
+ */
+ while (1) {
+ if (dm_bufio_cache_size_latch != 1) {
+ b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ if (b)
+ return b;
+ }
+
+ if (!list_empty(&c->reserved_buffers)) {
+ b = list_entry(c->reserved_buffers.next,
+ struct dm_buffer, lru_list);
+ list_del(&b->lru_list);
+ c->need_reserved_buffers++;
+
+ return b;
+ }
+
+ b = __get_unclaimed_buffer(c);
+ if (b)
+ return b;
+
+ __wait_for_free_buffer(c);
+ }
+}
+
+static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
+{
+ struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
+
+ if (c->alloc_callback)
+ c->alloc_callback(b);
+
+ return b;
+}
+
+/*
+ * Free a buffer and wake other threads waiting for free buffers.
+ */
+static void __free_buffer_wake(struct dm_buffer *b)
+{
+ struct dm_bufio_client *c = b->c;
+
+ if (!c->need_reserved_buffers)
+ free_buffer(b);
+ else {
+ list_add(&b->lru_list, &c->reserved_buffers);
+ c->need_reserved_buffers--;
+ }
+
+ wake_up(&c->free_buffer_wait);
+}
+
+static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait)
+{
+ struct dm_buffer *b, *tmp;
+
+ list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
+ BUG_ON(test_bit(B_READING, &b->state));
+
+ if (!test_bit(B_DIRTY, &b->state) &&
+ !test_bit(B_WRITING, &b->state)) {
+ __relink_lru(b, LIST_CLEAN);
+ continue;
+ }
+
+ if (no_wait && test_bit(B_WRITING, &b->state))
+ return;
+
+ __write_dirty_buffer(b);
+ dm_bufio_cond_resched();
+ }
+}
+
+/*
+ * Get writeback threshold and buffer limit for a given client.
+ */
+static void __get_memory_limit(struct dm_bufio_client *c,
+ unsigned long *threshold_buffers,
+ unsigned long *limit_buffers)
+{
+ unsigned long buffers;
+
+ if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
+ mutex_lock(&dm_bufio_clients_lock);
+ __cache_size_refresh();
+ mutex_unlock(&dm_bufio_clients_lock);
+ }
+
+ buffers = dm_bufio_cache_size_per_client >>
+ (c->sectors_per_block_bits + SECTOR_SHIFT);
+
+ if (buffers < DM_BUFIO_MIN_BUFFERS)
+ buffers = DM_BUFIO_MIN_BUFFERS;
+
+ *limit_buffers = buffers;
+ *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
+}
+
+/*
+ * Check if we're over watermark.
+ * If we are over threshold_buffers, start freeing buffers.
+ * If we're over "limit_buffers", block until we get under the limit.
+ */
+static void __check_watermark(struct dm_bufio_client *c)
+{
+ unsigned long threshold_buffers, limit_buffers;
+
+ __get_memory_limit(c, &threshold_buffers, &limit_buffers);
+
+ while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] >
+ limit_buffers) {
+
+ struct dm_buffer *b = __get_unclaimed_buffer(c);
+
+ if (!b)
+ return;
+
+ __free_buffer_wake(b);
+ dm_bufio_cond_resched();
+ }
+
+ if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
+ __write_dirty_buffers_async(c, 1);
+}
+
+/*
+ * Find a buffer in the hash.
+ */
+static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
+{
+ struct dm_buffer *b;
+ struct hlist_node *hn;
+
+ hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)],
+ hash_list) {
+ dm_bufio_cond_resched();
+ if (b->block == block)
+ return b;
+ }
+
+ return NULL;
+}
+
+/*----------------------------------------------------------------
+ * Getting a buffer
+ *--------------------------------------------------------------*/
+
+enum new_flag {
+ NF_FRESH = 0,
+ NF_READ = 1,
+ NF_GET = 2
+};
+
+static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
+ enum new_flag nf, struct dm_buffer **bp,
+ int *need_submit)
+{
+ struct dm_buffer *b, *new_b = NULL;
+
+ *need_submit = 0;
+
+ b = __find(c, block);
+ if (b) {
+ b->hold_count++;
+ __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
+ test_bit(B_WRITING, &b->state));
+ return b;
+ }
+
+ if (nf == NF_GET)
+ return NULL;
+
+ new_b = __alloc_buffer_wait(c);
+
+ /*
+ * We've had a period where the mutex was unlocked, so need to
+ * recheck the hash table.
+ */
+ b = __find(c, block);
+ if (b) {
+ __free_buffer_wake(new_b);
+ b->hold_count++;
+ __relink_lru(b, test_bit(B_DIRTY, &b->state) ||
+ test_bit(B_WRITING, &b->state));
+ return b;
+ }
+
+ __check_watermark(c);
+
+ b = new_b;
+ b->hold_count = 1;
+ b->read_error = 0;
+ b->write_error = 0;
+ __link_buffer(b, block, LIST_CLEAN);
+
+ if (nf == NF_FRESH) {
+ b->state = 0;
+ return b;
+ }
+
+ b->state = 1 << B_READING;
+ *need_submit = 1;
+
+ return b;
+}
+
+/*
+ * The endio routine for reading: set the error, clear the bit and wake up
+ * anyone waiting on the buffer.
+ */
+static void read_endio(struct bio *bio, int error)
+{
+ struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
+
+ b->read_error = error;
+
+ BUG_ON(!test_bit(B_READING, &b->state));
+
+ smp_mb__before_clear_bit();
+ clear_bit(B_READING, &b->state);
+ smp_mb__after_clear_bit();
+
+ wake_up_bit(&b->state, B_READING);
+}
+
+/*
+ * A common routine for dm_bufio_new and dm_bufio_read. Operation of these
+ * functions is similar except that dm_bufio_new doesn't read the
+ * buffer from the disk (assuming that the caller overwrites all the data
+ * and uses dm_bufio_mark_buffer_dirty to write new data back).
+ */
+static void *new_read(struct dm_bufio_client *c, sector_t block,
+ enum new_flag nf, struct dm_buffer **bp)
+{
+ int need_submit;
+ struct dm_buffer *b;
+
+ dm_bufio_lock(c);
+ b = __bufio_new(c, block, nf, bp, &need_submit);
+ dm_bufio_unlock(c);
+
+ if (!b || IS_ERR(b))
+ return b;
+
+ if (need_submit)
+ submit_io(b, READ, b->block, read_endio);
+
+ wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE);
+
+ if (b->read_error) {
+ int error = b->read_error;
+
+ dm_bufio_release(b);
+
+ return ERR_PTR(error);
+ }
+
+ *bp = b;
+
+ return b->data;
+}
+
+void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp)
+{
+ return new_read(c, block, NF_GET, bp);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get);
+
+void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp)
+{
+ BUG_ON(dm_bufio_in_request());
+
+ return new_read(c, block, NF_READ, bp);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_read);
+
+void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp)
+{
+ BUG_ON(dm_bufio_in_request());
+
+ return new_read(c, block, NF_FRESH, bp);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_new);
+
+void dm_bufio_release(struct dm_buffer *b)
+{
+ struct dm_bufio_client *c = b->c;
+
+ dm_bufio_lock(c);
+
+ BUG_ON(test_bit(B_READING, &b->state));
+ BUG_ON(!b->hold_count);
+
+ b->hold_count--;
+ if (!b->hold_count) {
+ wake_up(&c->free_buffer_wait);
+
+ /*
+ * If there were errors on the buffer, and the buffer is not
+ * to be written, free the buffer. There is no point in caching
+ * invalid buffer.
+ */
+ if ((b->read_error || b->write_error) &&
+ !test_bit(B_WRITING, &b->state) &&
+ !test_bit(B_DIRTY, &b->state)) {
+ __unlink_buffer(b);
+ __free_buffer_wake(b);
+ }
+ }
+
+ dm_bufio_unlock(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_release);
+
+void dm_bufio_mark_buffer_dirty(struct dm_buffer *b)
+{
+ struct dm_bufio_client *c = b->c;
+
+ dm_bufio_lock(c);
+
+ if (!test_and_set_bit(B_DIRTY, &b->state))
+ __relink_lru(b, LIST_DIRTY);
+
+ dm_bufio_unlock(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty);
+
+void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c)
+{
+ BUG_ON(dm_bufio_in_request());
+
+ dm_bufio_lock(c);
+ __write_dirty_buffers_async(c, 0);
+ dm_bufio_unlock(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
+
+/*
+ * For performance, it is essential that the buffers are written asynchronously
+ * and simultaneously (so that the block layer can merge the writes) and then
+ * waited upon.
+ *
+ * Finally, we flush hardware disk cache.
+ */
+int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
+{
+ int a, f;
+ unsigned long buffers_processed = 0;
+ struct dm_buffer *b, *tmp;
+
+ dm_bufio_lock(c);
+ __write_dirty_buffers_async(c, 0);
+
+again:
+ list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_DIRTY], lru_list) {
+ int dropped_lock = 0;
+
+ if (buffers_processed < c->n_buffers[LIST_DIRTY])
+ buffers_processed++;
+
+ BUG_ON(test_bit(B_READING, &b->state));
+
+ if (test_bit(B_WRITING, &b->state)) {
+ if (buffers_processed < c->n_buffers[LIST_DIRTY]) {
+ dropped_lock = 1;
+ b->hold_count++;
+ dm_bufio_unlock(c);
+ wait_on_bit(&b->state, B_WRITING,
+ do_io_schedule,
+ TASK_UNINTERRUPTIBLE);
+ dm_bufio_lock(c);
+ b->hold_count--;
+ } else
+ wait_on_bit(&b->state, B_WRITING,
+ do_io_schedule,
+ TASK_UNINTERRUPTIBLE);
+ }
+
+ if (!test_bit(B_DIRTY, &b->state) &&
+ !test_bit(B_WRITING, &b->state))
+ __relink_lru(b, LIST_CLEAN);
+
+ dm_bufio_cond_resched();
+
+ /*
+ * If we dropped the lock, the list is no longer consistent,
+ * so we must restart the search.
+ *
+ * In the most common case, the buffer just processed is
+ * relinked to the clean list, so we won't loop scanning the
+ * same buffer again and again.
+ *
+ * This may livelock if there is another thread simultaneously
+ * dirtying buffers, so we count the number of buffers walked
+ * and if it exceeds the total number of buffers, it means that
+ * someone is doing some writes simultaneously with us. In
+ * this case, stop, dropping the lock.
+ */
+ if (dropped_lock)
+ goto again;
+ }
+ wake_up(&c->free_buffer_wait);
+ dm_bufio_unlock(c);
+
+ a = xchg(&c->async_write_error, 0);
+ f = dm_bufio_issue_flush(c);
+ if (a)
+ return a;
+
+ return f;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
+
+/*
+ * Use dm-io to send and empty barrier flush the device.
+ */
+int dm_bufio_issue_flush(struct dm_bufio_client *c)
+{
+ struct dm_io_request io_req = {
+ .bi_rw = REQ_FLUSH,
+ .mem.type = DM_IO_KMEM,
+ .mem.ptr.addr = NULL,
+ .client = c->dm_io,
+ };
+ struct dm_io_region io_reg = {
+ .bdev = c->bdev,
+ .sector = 0,
+ .count = 0,
+ };
+
+ BUG_ON(dm_bufio_in_request());
+
+ return dm_io(&io_req, 1, &io_reg, NULL);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
+
+/*
+ * We first delete any other buffer that may be at that new location.
+ *
+ * Then, we write the buffer to the original location if it was dirty.
+ *
+ * Then, if we are the only one who is holding the buffer, relink the buffer
+ * in the hash queue for the new location.
+ *
+ * If there was someone else holding the buffer, we write it to the new
+ * location but not relink it, because that other user needs to have the buffer
+ * at the same place.
+ */
+void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block)
+{
+ struct dm_bufio_client *c = b->c;
+ struct dm_buffer *new;
+
+ BUG_ON(dm_bufio_in_request());
+
+ dm_bufio_lock(c);
+
+retry:
+ new = __find(c, new_block);
+ if (new) {
+ if (new->hold_count) {
+ __wait_for_free_buffer(c);
+ goto retry;
+ }
+
+ /*
+ * FIXME: Is there any point waiting for a write that's going
+ * to be overwritten in a bit?
+ */
+ __make_buffer_clean(new);
+ __unlink_buffer(new);
+ __free_buffer_wake(new);
+ }
+
+ BUG_ON(!b->hold_count);
+ BUG_ON(test_bit(B_READING, &b->state));
+
+ __write_dirty_buffer(b);
+ if (b->hold_count == 1) {
+ wait_on_bit(&b->state, B_WRITING,
+ do_io_schedule, TASK_UNINTERRUPTIBLE);
+ set_bit(B_DIRTY, &b->state);
+ __unlink_buffer(b);
+ __link_buffer(b, new_block, LIST_DIRTY);
+ } else {
+ sector_t old_block;
+ wait_on_bit_lock(&b->state, B_WRITING,
+ do_io_schedule, TASK_UNINTERRUPTIBLE);
+ /*
+ * Relink buffer to "new_block" so that write_callback
+ * sees "new_block" as a block number.
+ * After the write, link the buffer back to old_block.
+ * All this must be done in bufio lock, so that block number
+ * change isn't visible to other threads.
+ */
+ old_block = b->block;
+ __unlink_buffer(b);
+ __link_buffer(b, new_block, b->list_mode);
+ submit_io(b, WRITE, new_block, write_endio);
+ wait_on_bit(&b->state, B_WRITING,
+ do_io_schedule, TASK_UNINTERRUPTIBLE);
+ __unlink_buffer(b);
+ __link_buffer(b, old_block, b->list_mode);
+ }
+
+ dm_bufio_unlock(c);
+ dm_bufio_release(b);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_release_move);
+
+unsigned dm_bufio_get_block_size(struct dm_bufio_client *c)
+{
+ return c->block_size;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
+
+sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
+{
+ return i_size_read(c->bdev->bd_inode) >>
+ (SECTOR_SHIFT + c->sectors_per_block_bits);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
+
+sector_t dm_bufio_get_block_number(struct dm_buffer *b)
+{
+ return b->block;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_block_number);
+
+void *dm_bufio_get_block_data(struct dm_buffer *b)
+{
+ return b->data;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_block_data);
+
+void *dm_bufio_get_aux_data(struct dm_buffer *b)
+{
+ return b + 1;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_aux_data);
+
+struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b)
+{
+ return b->c;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_client);
+
+static void drop_buffers(struct dm_bufio_client *c)
+{
+ struct dm_buffer *b;
+ int i;
+
+ BUG_ON(dm_bufio_in_request());
+
+ /*
+ * An optimization so that the buffers are not written one-by-one.
+ */
+ dm_bufio_write_dirty_buffers_async(c);
+
+ dm_bufio_lock(c);
+
+ while ((b = __get_unclaimed_buffer(c)))
+ __free_buffer_wake(b);
+
+ for (i = 0; i < LIST_SIZE; i++)
+ list_for_each_entry(b, &c->lru[i], lru_list)
+ DMERR("leaked buffer %llx, hold count %u, list %d",
+ (unsigned long long)b->block, b->hold_count, i);
+
+ for (i = 0; i < LIST_SIZE; i++)
+ BUG_ON(!list_empty(&c->lru[i]));
+
+ dm_bufio_unlock(c);
+}
+
+/*
+ * Test if the buffer is unused and too old, and commit it.
+ * At if noio is set, we must not do any I/O because we hold
+ * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to
+ * different bufio client.
+ */
+static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
+ unsigned long max_jiffies)
+{
+ if (jiffies - b->last_accessed < max_jiffies)
+ return 1;
+
+ if (!(gfp & __GFP_IO)) {
+ if (test_bit(B_READING, &b->state) ||
+ test_bit(B_WRITING, &b->state) ||
+ test_bit(B_DIRTY, &b->state))
+ return 1;
+ }
+
+ if (b->hold_count)
+ return 1;
+
+ __make_buffer_clean(b);
+ __unlink_buffer(b);
+ __free_buffer_wake(b);
+
+ return 0;
+}
+
+static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
+ struct shrink_control *sc)
+{
+ int l;
+ struct dm_buffer *b, *tmp;
+
+ for (l = 0; l < LIST_SIZE; l++) {
+ list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list)
+ if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) &&
+ !--nr_to_scan)
+ return;
+ dm_bufio_cond_resched();
+ }
+}
+
+static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
+{
+ struct dm_bufio_client *c =
+ container_of(shrinker, struct dm_bufio_client, shrinker);
+ unsigned long r;
+ unsigned long nr_to_scan = sc->nr_to_scan;
+
+ if (sc->gfp_mask & __GFP_IO)
+ dm_bufio_lock(c);
+ else if (!dm_bufio_trylock(c))
+ return !nr_to_scan ? 0 : -1;
+
+ if (nr_to_scan)
+ __scan(c, nr_to_scan, sc);
+
+ r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+ if (r > INT_MAX)
+ r = INT_MAX;
+
+ dm_bufio_unlock(c);
+
+ return r;
+}
+
+/*
+ * Create the buffering interface
+ */
+struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
+ unsigned reserved_buffers, unsigned aux_size,
+ void (*alloc_callback)(struct dm_buffer *),
+ void (*write_callback)(struct dm_buffer *))
+{
+ int r;
+ struct dm_bufio_client *c;
+ unsigned i;
+
+ BUG_ON(block_size < 1 << SECTOR_SHIFT ||
+ (block_size & (block_size - 1)));
+
+ c = kmalloc(sizeof(*c), GFP_KERNEL);
+ if (!c) {
+ r = -ENOMEM;
+ goto bad_client;
+ }
+ c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS);
+ if (!c->cache_hash) {
+ r = -ENOMEM;
+ goto bad_hash;
+ }
+
+ c->bdev = bdev;
+ c->block_size = block_size;
+ c->sectors_per_block_bits = ffs(block_size) - 1 - SECTOR_SHIFT;
+ c->pages_per_block_bits = (ffs(block_size) - 1 >= PAGE_SHIFT) ?
+ ffs(block_size) - 1 - PAGE_SHIFT : 0;
+ c->blocks_per_page_bits = (ffs(block_size) - 1 < PAGE_SHIFT ?
+ PAGE_SHIFT - (ffs(block_size) - 1) : 0);
+
+ c->aux_size = aux_size;
+ c->alloc_callback = alloc_callback;
+ c->write_callback = write_callback;
+
+ for (i = 0; i < LIST_SIZE; i++) {
+ INIT_LIST_HEAD(&c->lru[i]);
+ c->n_buffers[i] = 0;
+ }
+
+ for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
+ INIT_HLIST_HEAD(&c->cache_hash[i]);
+
+ mutex_init(&c->lock);
+ INIT_LIST_HEAD(&c->reserved_buffers);
+ c->need_reserved_buffers = reserved_buffers;
+
+ init_waitqueue_head(&c->free_buffer_wait);
+ c->async_write_error = 0;
+
+ c->dm_io = dm_io_client_create();
+ if (IS_ERR(c->dm_io)) {
+ r = PTR_ERR(c->dm_io);
+ goto bad_dm_io;
+ }
+
+ mutex_lock(&dm_bufio_clients_lock);
+ if (c->blocks_per_page_bits) {
+ if (!DM_BUFIO_CACHE_NAME(c)) {
+ DM_BUFIO_CACHE_NAME(c) = kasprintf(GFP_KERNEL, "dm_bufio_cache-%u", c->block_size);
+ if (!DM_BUFIO_CACHE_NAME(c)) {
+ r = -ENOMEM;
+ mutex_unlock(&dm_bufio_clients_lock);
+ goto bad_cache;
+ }
+ }
+
+ if (!DM_BUFIO_CACHE(c)) {
+ DM_BUFIO_CACHE(c) = kmem_cache_create(DM_BUFIO_CACHE_NAME(c),
+ c->block_size,
+ c->block_size, 0, NULL);
+ if (!DM_BUFIO_CACHE(c)) {
+ r = -ENOMEM;
+ mutex_unlock(&dm_bufio_clients_lock);
+ goto bad_cache;
+ }
+ }
+ }
+ mutex_unlock(&dm_bufio_clients_lock);
+
+ while (c->need_reserved_buffers) {
+ struct dm_buffer *b = alloc_buffer(c, GFP_KERNEL);
+
+ if (!b) {
+ r = -ENOMEM;
+ goto bad_buffer;
+ }
+ __free_buffer_wake(b);
+ }
+
+ mutex_lock(&dm_bufio_clients_lock);
+ dm_bufio_client_count++;
+ list_add(&c->client_list, &dm_bufio_all_clients);
+ __cache_size_refresh();
+ mutex_unlock(&dm_bufio_clients_lock);
+
+ c->shrinker.shrink = shrink;
+ c->shrinker.seeks = 1;
+ c->shrinker.batch = 0;
+ register_shrinker(&c->shrinker);
+
+ return c;
+
+bad_buffer:
+bad_cache:
+ while (!list_empty(&c->reserved_buffers)) {
+ struct dm_buffer *b = list_entry(c->reserved_buffers.next,
+ struct dm_buffer, lru_list);
+ list_del(&b->lru_list);
+ free_buffer(b);
+ }
+ dm_io_client_destroy(c->dm_io);
+bad_dm_io:
+ vfree(c->cache_hash);
+bad_hash:
+ kfree(c);
+bad_client:
+ return ERR_PTR(r);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_client_create);
+
+/*
+ * Free the buffering interface.
+ * It is required that there are no references on any buffers.
+ */
+void dm_bufio_client_destroy(struct dm_bufio_client *c)
+{
+ unsigned i;
+
+ drop_buffers(c);
+
+ unregister_shrinker(&c->shrinker);
+
+ mutex_lock(&dm_bufio_clients_lock);
+
+ list_del(&c->client_list);
+ dm_bufio_client_count--;
+ __cache_size_refresh();
+
+ mutex_unlock(&dm_bufio_clients_lock);
+
+ for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
+ BUG_ON(!hlist_empty(&c->cache_hash[i]));
+
+ BUG_ON(c->need_reserved_buffers);
+
+ while (!list_empty(&c->reserved_buffers)) {
+ struct dm_buffer *b = list_entry(c->reserved_buffers.next,
+ struct dm_buffer, lru_list);
+ list_del(&b->lru_list);
+ free_buffer(b);
+ }
+
+ for (i = 0; i < LIST_SIZE; i++)
+ if (c->n_buffers[i])
+ DMERR("leaked buffer count %d: %ld", i, c->n_buffers[i]);
+
+ for (i = 0; i < LIST_SIZE; i++)
+ BUG_ON(c->n_buffers[i]);
+
+ dm_io_client_destroy(c->dm_io);
+ vfree(c->cache_hash);
+ kfree(c);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
+
+static void cleanup_old_buffers(void)
+{
+ unsigned long max_age = dm_bufio_max_age;
+ struct dm_bufio_client *c;
+
+ barrier();
+
+ if (max_age > ULONG_MAX / HZ)
+ max_age = ULONG_MAX / HZ;
+
+ mutex_lock(&dm_bufio_clients_lock);
+ list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
+ if (!dm_bufio_trylock(c))
+ continue;
+
+ while (!list_empty(&c->lru[LIST_CLEAN])) {
+ struct dm_buffer *b;
+ b = list_entry(c->lru[LIST_CLEAN].prev,
+ struct dm_buffer, lru_list);
+ if (__cleanup_old_buffer(b, 0, max_age * HZ))
+ break;
+ dm_bufio_cond_resched();
+ }
+
+ dm_bufio_unlock(c);
+ dm_bufio_cond_resched();
+ }
+ mutex_unlock(&dm_bufio_clients_lock);
+}
+
+static struct workqueue_struct *dm_bufio_wq;
+static struct delayed_work dm_bufio_work;
+
+static void work_fn(struct work_struct *w)
+{
+ cleanup_old_buffers();
+
+ queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
+ DM_BUFIO_WORK_TIMER_SECS * HZ);
+}
+
+/*----------------------------------------------------------------
+ * Module setup
+ *--------------------------------------------------------------*/
+
+/*
+ * This is called only once for the whole dm_bufio module.
+ * It initializes memory limit.
+ */
+static int __init dm_bufio_init(void)
+{
+ __u64 mem;
+
+ memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
+ memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
+
+ mem = (__u64)((totalram_pages - totalhigh_pages) *
+ DM_BUFIO_MEMORY_PERCENT / 100) << PAGE_SHIFT;
+
+ if (mem > ULONG_MAX)
+ mem = ULONG_MAX;
+
+#ifdef CONFIG_MMU
+ /*
+ * Get the size of vmalloc space the same way as VMALLOC_TOTAL
+ * in fs/proc/internal.h
+ */
+ if (mem > (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100)
+ mem = (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100;
+#endif
+
+ dm_bufio_default_cache_size = mem;
+
+ mutex_lock(&dm_bufio_clients_lock);
+ __cache_size_refresh();
+ mutex_unlock(&dm_bufio_clients_lock);
+
+ dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache");
+ if (!dm_bufio_wq)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&dm_bufio_work, work_fn);
+ queue_delayed_work(dm_bufio_wq, &dm_bufio_work,
+ DM_BUFIO_WORK_TIMER_SECS * HZ);
+
+ return 0;
+}
+
+/*
+ * This is called once when unloading the dm_bufio module.
+ */
+static void __exit dm_bufio_exit(void)
+{
+ int bug = 0;
+ int i;
+
+ cancel_delayed_work_sync(&dm_bufio_work);
+ destroy_workqueue(dm_bufio_wq);
+
+ for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) {
+ struct kmem_cache *kc = dm_bufio_caches[i];
+
+ if (kc)
+ kmem_cache_destroy(kc);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dm_bufio_cache_names); i++)
+ kfree(dm_bufio_cache_names[i]);
+
+ if (dm_bufio_client_count) {
+ DMCRIT("%s: dm_bufio_client_count leaked: %d",
+ __func__, dm_bufio_client_count);
+ bug = 1;
+ }
+
+ if (dm_bufio_current_allocated) {
+ DMCRIT("%s: dm_bufio_current_allocated leaked: %lu",
+ __func__, dm_bufio_current_allocated);
+ bug = 1;
+ }
+
+ if (dm_bufio_allocated_get_free_pages) {
+ DMCRIT("%s: dm_bufio_allocated_get_free_pages leaked: %lu",
+ __func__, dm_bufio_allocated_get_free_pages);
+ bug = 1;
+ }
+
+ if (dm_bufio_allocated_vmalloc) {
+ DMCRIT("%s: dm_bufio_vmalloc leaked: %lu",
+ __func__, dm_bufio_allocated_vmalloc);
+ bug = 1;
+ }
+
+ if (bug)
+ BUG();
+}
+
+module_init(dm_bufio_init)
+module_exit(dm_bufio_exit)
+
+module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
+
+module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
+
+module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
+
+module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, S_IRUGO);
+MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
+
+module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, S_IRUGO);
+MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
+
+module_param_named(allocated_vmalloc_bytes, dm_bufio_allocated_vmalloc, ulong, S_IRUGO);
+MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
+
+module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, S_IRUGO);
+MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
+
+MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
+MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
+MODULE_LICENSE("GPL");
--- /dev/null
- #include <linux/module.h>
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-btree.h"
+#include "dm-btree-internal.h"
+#include "dm-transaction-manager.h"
+
++#include <linux/export.h>
+
+/*
+ * Removing an entry from a btree
+ * ==============================
+ *
+ * A very important constraint for our btree is that no node, except the
+ * root, may have fewer than a certain number of entries.
+ * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES).
+ *
+ * Ensuring this is complicated by the way we want to only ever hold the
+ * locks on 2 nodes concurrently, and only change nodes in a top to bottom
+ * fashion.
+ *
+ * Each node may have a left or right sibling. When decending the spine,
+ * if a node contains only MIN_ENTRIES then we try and increase this to at
+ * least MIN_ENTRIES + 1. We do this in the following ways:
+ *
+ * [A] No siblings => this can only happen if the node is the root, in which
+ * case we copy the childs contents over the root.
+ *
+ * [B] No left sibling
+ * ==> rebalance(node, right sibling)
+ *
+ * [C] No right sibling
+ * ==> rebalance(left sibling, node)
+ *
+ * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD
+ * ==> delete node adding it's contents to left and right
+ *
+ * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD
+ * ==> rebalance(left, node, right)
+ *
+ * After these operations it's possible that the our original node no
+ * longer contains the desired sub tree. For this reason this rebalancing
+ * is performed on the children of the current node. This also avoids
+ * having a special case for the root.
+ *
+ * Once this rebalancing has occurred we can then step into the child node
+ * for internal nodes. Or delete the entry for leaf nodes.
+ */
+
+/*
+ * Some little utilities for moving node data around.
+ */
+static void node_shift(struct node *n, int shift)
+{
+ uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+ uint32_t value_size = le32_to_cpu(n->header.value_size);
+
+ if (shift < 0) {
+ shift = -shift;
+ BUG_ON(shift > nr_entries);
+ BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift, value_size));
+ memmove(key_ptr(n, 0),
+ key_ptr(n, shift),
+ (nr_entries - shift) * sizeof(__le64));
+ memmove(value_ptr(n, 0, value_size),
+ value_ptr(n, shift, value_size),
+ (nr_entries - shift) * value_size);
+ } else {
+ BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries));
+ memmove(key_ptr(n, shift),
+ key_ptr(n, 0),
+ nr_entries * sizeof(__le64));
+ memmove(value_ptr(n, shift, value_size),
+ value_ptr(n, 0, value_size),
+ nr_entries * value_size);
+ }
+}
+
+static void node_copy(struct node *left, struct node *right, int shift)
+{
+ uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ uint32_t value_size = le32_to_cpu(left->header.value_size);
+ BUG_ON(value_size != le32_to_cpu(right->header.value_size));
+
+ if (shift < 0) {
+ shift = -shift;
+ BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
+ memcpy(key_ptr(left, nr_left),
+ key_ptr(right, 0),
+ shift * sizeof(__le64));
+ memcpy(value_ptr(left, nr_left, value_size),
+ value_ptr(right, 0, value_size),
+ shift * value_size);
+ } else {
+ BUG_ON(shift > le32_to_cpu(right->header.max_entries));
+ memcpy(key_ptr(right, 0),
+ key_ptr(left, nr_left - shift),
+ shift * sizeof(__le64));
+ memcpy(value_ptr(right, 0, value_size),
+ value_ptr(left, nr_left - shift, value_size),
+ shift * value_size);
+ }
+}
+
+/*
+ * Delete a specific entry from a leaf node.
+ */
+static void delete_at(struct node *n, unsigned index)
+{
+ unsigned nr_entries = le32_to_cpu(n->header.nr_entries);
+ unsigned nr_to_copy = nr_entries - (index + 1);
+ uint32_t value_size = le32_to_cpu(n->header.value_size);
+ BUG_ON(index >= nr_entries);
+
+ if (nr_to_copy) {
+ memmove(key_ptr(n, index),
+ key_ptr(n, index + 1),
+ nr_to_copy * sizeof(__le64));
+
+ memmove(value_ptr(n, index, value_size),
+ value_ptr(n, index + 1, value_size),
+ nr_to_copy * value_size);
+ }
+
+ n->header.nr_entries = cpu_to_le32(nr_entries - 1);
+}
+
+static unsigned del_threshold(struct node *n)
+{
+ return le32_to_cpu(n->header.max_entries) / 3;
+}
+
+static unsigned merge_threshold(struct node *n)
+{
+ /*
+ * The extra one is because we know we're potentially going to
+ * delete an entry.
+ */
+ return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1;
+}
+
+struct child {
+ unsigned index;
+ struct dm_block *block;
+ struct node *n;
+};
+
+static struct dm_btree_value_type le64_type = {
+ .context = NULL,
+ .size = sizeof(__le64),
+ .inc = NULL,
+ .dec = NULL,
+ .equal = NULL
+};
+
+static int init_child(struct dm_btree_info *info, struct node *parent,
+ unsigned index, struct child *result)
+{
+ int r, inc;
+ dm_block_t root;
+
+ result->index = index;
+ root = value64(parent, index);
+
+ r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
+ &result->block, &inc);
+ if (r)
+ return r;
+
+ result->n = dm_block_data(result->block);
+
+ if (inc)
+ inc_children(info->tm, result->n, &le64_type);
+
+ *((__le64 *) value_ptr(parent, index, sizeof(__le64))) =
+ cpu_to_le64(dm_block_location(result->block));
+
+ return 0;
+}
+
+static int exit_child(struct dm_btree_info *info, struct child *c)
+{
+ return dm_tm_unlock(info->tm, c->block);
+}
+
+static void shift(struct node *left, struct node *right, int count)
+{
+ if (!count)
+ return;
+
+ if (count > 0) {
+ node_shift(right, count);
+ node_copy(left, right, count);
+ } else {
+ node_copy(left, right, count);
+ node_shift(right, count);
+ }
+
+ left->header.nr_entries =
+ cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count);
+ BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries));
+
+ right->header.nr_entries =
+ cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count);
+ BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries));
+}
+
+static void __rebalance2(struct dm_btree_info *info, struct node *parent,
+ struct child *l, struct child *r)
+{
+ struct node *left = l->n;
+ struct node *right = r->n;
+ uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+
+ if (nr_left + nr_right <= merge_threshold(left)) {
+ /*
+ * Merge
+ */
+ node_copy(left, right, -nr_right);
+ left->header.nr_entries = cpu_to_le32(nr_left + nr_right);
+ delete_at(parent, r->index);
+
+ /*
+ * We need to decrement the right block, but not it's
+ * children, since they're still referenced by left.
+ */
+ dm_tm_dec(info->tm, dm_block_location(r->block));
+ } else {
+ /*
+ * Rebalance.
+ */
+ unsigned target_left = (nr_left + nr_right) / 2;
+ unsigned shift_ = nr_left - target_left;
+ BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_);
+ BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_);
+ shift(left, right, nr_left - target_left);
+ *key_ptr(parent, r->index) = right->keys[0];
+ }
+}
+
+static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
+ unsigned left_index)
+{
+ int r;
+ struct node *parent;
+ struct child left, right;
+
+ parent = dm_block_data(shadow_current(s));
+
+ r = init_child(info, parent, left_index, &left);
+ if (r)
+ return r;
+
+ r = init_child(info, parent, left_index + 1, &right);
+ if (r) {
+ exit_child(info, &left);
+ return r;
+ }
+
+ __rebalance2(info, parent, &left, &right);
+
+ r = exit_child(info, &left);
+ if (r) {
+ exit_child(info, &right);
+ return r;
+ }
+
+ return exit_child(info, &right);
+}
+
+static void __rebalance3(struct dm_btree_info *info, struct node *parent,
+ struct child *l, struct child *c, struct child *r)
+{
+ struct node *left = l->n;
+ struct node *center = c->n;
+ struct node *right = r->n;
+
+ uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
+ uint32_t nr_center = le32_to_cpu(center->header.nr_entries);
+ uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
+ uint32_t max_entries = le32_to_cpu(left->header.max_entries);
+
+ unsigned target;
+
+ BUG_ON(left->header.max_entries != center->header.max_entries);
+ BUG_ON(center->header.max_entries != right->header.max_entries);
+
+ if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) {
+ /*
+ * Delete center node:
+ *
+ * We dump as many entries from center as possible into
+ * left, then the rest in right, then rebalance2. This
+ * wastes some cpu, but I want something simple atm.
+ */
+ unsigned shift = min(max_entries - nr_left, nr_center);
+
+ BUG_ON(nr_left + shift > max_entries);
+ node_copy(left, center, -shift);
+ left->header.nr_entries = cpu_to_le32(nr_left + shift);
+
+ if (shift != nr_center) {
+ shift = nr_center - shift;
+ BUG_ON((nr_right + shift) >= max_entries);
+ node_shift(right, shift);
+ node_copy(center, right, shift);
+ right->header.nr_entries = cpu_to_le32(nr_right + shift);
+ }
+ *key_ptr(parent, r->index) = right->keys[0];
+
+ delete_at(parent, c->index);
+ r->index--;
+
+ dm_tm_dec(info->tm, dm_block_location(c->block));
+ __rebalance2(info, parent, l, r);
+
+ return;
+ }
+
+ /*
+ * Rebalance
+ */
+ target = (nr_left + nr_center + nr_right) / 3;
+ BUG_ON(target > max_entries);
+
+ /*
+ * Adjust the left node
+ */
+ shift(left, center, nr_left - target);
+
+ /*
+ * Adjust the right node
+ */
+ shift(center, right, target - nr_right);
+ *key_ptr(parent, c->index) = center->keys[0];
+ *key_ptr(parent, r->index) = right->keys[0];
+}
+
+static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
+ unsigned left_index)
+{
+ int r;
+ struct node *parent = dm_block_data(shadow_current(s));
+ struct child left, center, right;
+
+ /*
+ * FIXME: fill out an array?
+ */
+ r = init_child(info, parent, left_index, &left);
+ if (r)
+ return r;
+
+ r = init_child(info, parent, left_index + 1, ¢er);
+ if (r) {
+ exit_child(info, &left);
+ return r;
+ }
+
+ r = init_child(info, parent, left_index + 2, &right);
+ if (r) {
+ exit_child(info, &left);
+ exit_child(info, ¢er);
+ return r;
+ }
+
+ __rebalance3(info, parent, &left, ¢er, &right);
+
+ r = exit_child(info, &left);
+ if (r) {
+ exit_child(info, ¢er);
+ exit_child(info, &right);
+ return r;
+ }
+
+ r = exit_child(info, ¢er);
+ if (r) {
+ exit_child(info, &right);
+ return r;
+ }
+
+ r = exit_child(info, &right);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int get_nr_entries(struct dm_transaction_manager *tm,
+ dm_block_t b, uint32_t *result)
+{
+ int r;
+ struct dm_block *block;
+ struct node *n;
+
+ r = dm_tm_read_lock(tm, b, &btree_node_validator, &block);
+ if (r)
+ return r;
+
+ n = dm_block_data(block);
+ *result = le32_to_cpu(n->header.nr_entries);
+
+ return dm_tm_unlock(tm, block);
+}
+
+static int rebalance_children(struct shadow_spine *s,
+ struct dm_btree_info *info, uint64_t key)
+{
+ int i, r, has_left_sibling, has_right_sibling;
+ uint32_t child_entries;
+ struct node *n;
+
+ n = dm_block_data(shadow_current(s));
+
+ if (le32_to_cpu(n->header.nr_entries) == 1) {
+ struct dm_block *child;
+ dm_block_t b = value64(n, 0);
+
+ r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child);
+ if (r)
+ return r;
+
+ memcpy(n, dm_block_data(child),
+ dm_bm_block_size(dm_tm_get_bm(info->tm)));
+ r = dm_tm_unlock(info->tm, child);
+ if (r)
+ return r;
+
+ dm_tm_dec(info->tm, dm_block_location(child));
+ return 0;
+ }
+
+ i = lower_bound(n, key);
+ if (i < 0)
+ return -ENODATA;
+
+ r = get_nr_entries(info->tm, value64(n, i), &child_entries);
+ if (r)
+ return r;
+
+ if (child_entries > del_threshold(n))
+ return 0;
+
+ has_left_sibling = i > 0;
+ has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
+
+ if (!has_left_sibling)
+ r = rebalance2(s, info, i);
+
+ else if (!has_right_sibling)
+ r = rebalance2(s, info, i - 1);
+
+ else
+ r = rebalance3(s, info, i - 1);
+
+ return r;
+}
+
+static int do_leaf(struct node *n, uint64_t key, unsigned *index)
+{
+ int i = lower_bound(n, key);
+
+ if ((i < 0) ||
+ (i >= le32_to_cpu(n->header.nr_entries)) ||
+ (le64_to_cpu(n->keys[i]) != key))
+ return -ENODATA;
+
+ *index = i;
+
+ return 0;
+}
+
+/*
+ * Prepares for removal from one level of the hierarchy. The caller must
+ * call delete_at() to remove the entry at index.
+ */
+static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
+ struct dm_btree_value_type *vt, dm_block_t root,
+ uint64_t key, unsigned *index)
+{
+ int i = *index, r;
+ struct node *n;
+
+ for (;;) {
+ r = shadow_step(s, root, vt);
+ if (r < 0)
+ break;
+
+ /*
+ * We have to patch up the parent node, ugly, but I don't
+ * see a way to do this automatically as part of the spine
+ * op.
+ */
+ if (shadow_has_parent(s)) {
+ __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+ memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(__le64)),
+ &location, sizeof(__le64));
+ }
+
+ n = dm_block_data(shadow_current(s));
+
+ if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+ return do_leaf(n, key, index);
+
+ r = rebalance_children(s, info, key);
+ if (r)
+ break;
+
+ n = dm_block_data(shadow_current(s));
+ if (le32_to_cpu(n->header.flags) & LEAF_NODE)
+ return do_leaf(n, key, index);
+
+ i = lower_bound(n, key);
+
+ /*
+ * We know the key is present, or else
+ * rebalance_children would have returned
+ * -ENODATA
+ */
+ root = value64(n, i);
+ }
+
+ return r;
+}
+
+int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, dm_block_t *new_root)
+{
+ unsigned level, last_level = info->levels - 1;
+ int index = 0, r = 0;
+ struct shadow_spine spine;
+ struct node *n;
+
+ init_shadow_spine(&spine, info);
+ for (level = 0; level < info->levels; level++) {
+ r = remove_raw(&spine, info,
+ (level == last_level ?
+ &info->value_type : &le64_type),
+ root, keys[level], (unsigned *)&index);
+ if (r < 0)
+ break;
+
+ n = dm_block_data(shadow_current(&spine));
+ if (level != last_level) {
+ root = value64(n, index);
+ continue;
+ }
+
+ BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries));
+
+ if (info->value_type.dec)
+ info->value_type.dec(info->value_type.context,
+ value_ptr(n, index, info->value_type.size));
+
+ delete_at(n, index);
+ }
+
+ *new_root = shadow_root(&spine);
+ exit_shadow_spine(&spine);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_remove);
--- /dev/null
- #include <linux/module.h>
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-btree-internal.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
++#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "btree"
+
+/*----------------------------------------------------------------
+ * Array manipulation
+ *--------------------------------------------------------------*/
+static void memcpy_disk(void *dest, const void *src, size_t len)
+ __dm_written_to_disk(src)
+{
+ memcpy(dest, src, len);
+ __dm_unbless_for_disk(src);
+}
+
+static void array_insert(void *base, size_t elt_size, unsigned nr_elts,
+ unsigned index, void *elt)
+ __dm_written_to_disk(elt)
+{
+ if (index < nr_elts)
+ memmove(base + (elt_size * (index + 1)),
+ base + (elt_size * index),
+ (nr_elts - index) * elt_size);
+
+ memcpy_disk(base + (elt_size * index), elt, elt_size);
+}
+
+/*----------------------------------------------------------------*/
+
+/* makes the assumption that no two keys are the same. */
+static int bsearch(struct node *n, uint64_t key, int want_hi)
+{
+ int lo = -1, hi = le32_to_cpu(n->header.nr_entries);
+
+ while (hi - lo > 1) {
+ int mid = lo + ((hi - lo) / 2);
+ uint64_t mid_key = le64_to_cpu(n->keys[mid]);
+
+ if (mid_key == key)
+ return mid;
+
+ if (mid_key < key)
+ lo = mid;
+ else
+ hi = mid;
+ }
+
+ return want_hi ? hi : lo;
+}
+
+int lower_bound(struct node *n, uint64_t key)
+{
+ return bsearch(n, key, 0);
+}
+
+void inc_children(struct dm_transaction_manager *tm, struct node *n,
+ struct dm_btree_value_type *vt)
+{
+ unsigned i;
+ uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
+
+ if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
+ for (i = 0; i < nr_entries; i++)
+ dm_tm_inc(tm, value64(n, i));
+ else if (vt->inc)
+ for (i = 0; i < nr_entries; i++)
+ vt->inc(vt->context,
+ value_ptr(n, i, vt->size));
+}
+
+static int insert_at(size_t value_size, struct node *node, unsigned index,
+ uint64_t key, void *value)
+ __dm_written_to_disk(value)
+{
+ uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
+ __le64 key_le = cpu_to_le64(key);
+
+ if (index > nr_entries ||
+ index >= le32_to_cpu(node->header.max_entries)) {
+ DMERR("too many entries in btree node for insert");
+ __dm_unbless_for_disk(value);
+ return -ENOMEM;
+ }
+
+ __dm_bless_for_disk(&key_le);
+
+ array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le);
+ array_insert(value_base(node), value_size, nr_entries, index, value);
+ node->header.nr_entries = cpu_to_le32(nr_entries + 1);
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
+ * We want 3n entries (for some n). This works more nicely for repeated
+ * insert remove loops than (2n + 1).
+ */
+static uint32_t calc_max_entries(size_t value_size, size_t block_size)
+{
+ uint32_t total, n;
+ size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */
+
+ block_size -= sizeof(struct node_header);
+ total = block_size / elt_size;
+ n = total / 3; /* rounds down */
+
+ return 3 * n;
+}
+
+int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root)
+{
+ int r;
+ struct dm_block *b;
+ struct node *n;
+ size_t block_size;
+ uint32_t max_entries;
+
+ r = new_block(info, &b);
+ if (r < 0)
+ return r;
+
+ block_size = dm_bm_block_size(dm_tm_get_bm(info->tm));
+ max_entries = calc_max_entries(info->value_type.size, block_size);
+
+ n = dm_block_data(b);
+ memset(n, 0, block_size);
+ n->header.flags = cpu_to_le32(LEAF_NODE);
+ n->header.nr_entries = cpu_to_le32(0);
+ n->header.max_entries = cpu_to_le32(max_entries);
+ n->header.value_size = cpu_to_le32(info->value_type.size);
+
+ *root = dm_block_location(b);
+ return unlock_block(info, b);
+}
+EXPORT_SYMBOL_GPL(dm_btree_empty);
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Deletion uses a recursive algorithm, since we have limited stack space
+ * we explicitly manage our own stack on the heap.
+ */
+#define MAX_SPINE_DEPTH 64
+struct frame {
+ struct dm_block *b;
+ struct node *n;
+ unsigned level;
+ unsigned nr_children;
+ unsigned current_child;
+};
+
+struct del_stack {
+ struct dm_transaction_manager *tm;
+ int top;
+ struct frame spine[MAX_SPINE_DEPTH];
+};
+
+static int top_frame(struct del_stack *s, struct frame **f)
+{
+ if (s->top < 0) {
+ DMERR("btree deletion stack empty");
+ return -EINVAL;
+ }
+
+ *f = s->spine + s->top;
+
+ return 0;
+}
+
+static int unprocessed_frames(struct del_stack *s)
+{
+ return s->top >= 0;
+}
+
+static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
+{
+ int r;
+ uint32_t ref_count;
+
+ if (s->top >= MAX_SPINE_DEPTH - 1) {
+ DMERR("btree deletion stack out of memory");
+ return -ENOMEM;
+ }
+
+ r = dm_tm_ref(s->tm, b, &ref_count);
+ if (r)
+ return r;
+
+ if (ref_count > 1)
+ /*
+ * This is a shared node, so we can just decrement it's
+ * reference counter and leave the children.
+ */
+ dm_tm_dec(s->tm, b);
+
+ else {
+ struct frame *f = s->spine + ++s->top;
+
+ r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
+ if (r) {
+ s->top--;
+ return r;
+ }
+
+ f->n = dm_block_data(f->b);
+ f->level = level;
+ f->nr_children = le32_to_cpu(f->n->header.nr_entries);
+ f->current_child = 0;
+ }
+
+ return 0;
+}
+
+static void pop_frame(struct del_stack *s)
+{
+ struct frame *f = s->spine + s->top--;
+
+ dm_tm_dec(s->tm, dm_block_location(f->b));
+ dm_tm_unlock(s->tm, f->b);
+}
+
+int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
+{
+ int r;
+ struct del_stack *s;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+ s->tm = info->tm;
+ s->top = -1;
+
+ r = push_frame(s, root, 1);
+ if (r)
+ goto out;
+
+ while (unprocessed_frames(s)) {
+ uint32_t flags;
+ struct frame *f;
+ dm_block_t b;
+
+ r = top_frame(s, &f);
+ if (r)
+ goto out;
+
+ if (f->current_child >= f->nr_children) {
+ pop_frame(s);
+ continue;
+ }
+
+ flags = le32_to_cpu(f->n->header.flags);
+ if (flags & INTERNAL_NODE) {
+ b = value64(f->n, f->current_child);
+ f->current_child++;
+ r = push_frame(s, b, f->level);
+ if (r)
+ goto out;
+
+ } else if (f->level != (info->levels - 1)) {
+ b = value64(f->n, f->current_child);
+ f->current_child++;
+ r = push_frame(s, b, f->level + 1);
+ if (r)
+ goto out;
+
+ } else {
+ if (info->value_type.dec) {
+ unsigned i;
+
+ for (i = 0; i < f->nr_children; i++)
+ info->value_type.dec(info->value_type.context,
+ value_ptr(f->n, i, info->value_type.size));
+ }
+ f->current_child = f->nr_children;
+ }
+ }
+
+out:
+ kfree(s);
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_del);
+
+/*----------------------------------------------------------------*/
+
+static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
+ int (*search_fn)(struct node *, uint64_t),
+ uint64_t *result_key, void *v, size_t value_size)
+{
+ int i, r;
+ uint32_t flags, nr_entries;
+
+ do {
+ r = ro_step(s, block);
+ if (r < 0)
+ return r;
+
+ i = search_fn(ro_node(s), key);
+
+ flags = le32_to_cpu(ro_node(s)->header.flags);
+ nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries);
+ if (i < 0 || i >= nr_entries)
+ return -ENODATA;
+
+ if (flags & INTERNAL_NODE)
+ block = value64(ro_node(s), i);
+
+ } while (!(flags & LEAF_NODE));
+
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ memcpy(v, value_ptr(ro_node(s), i, value_size), value_size);
+
+ return 0;
+}
+
+int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value_le)
+{
+ unsigned level, last_level = info->levels - 1;
+ int r = -ENODATA;
+ uint64_t rkey;
+ __le64 internal_value_le;
+ struct ro_spine spine;
+
+ init_ro_spine(&spine, info);
+ for (level = 0; level < info->levels; level++) {
+ size_t size;
+ void *value_p;
+
+ if (level == last_level) {
+ value_p = value_le;
+ size = info->value_type.size;
+
+ } else {
+ value_p = &internal_value_le;
+ size = sizeof(uint64_t);
+ }
+
+ r = btree_lookup_raw(&spine, root, keys[level],
+ lower_bound, &rkey,
+ value_p, size);
+
+ if (!r) {
+ if (rkey != keys[level]) {
+ exit_ro_spine(&spine);
+ return -ENODATA;
+ }
+ } else {
+ exit_ro_spine(&spine);
+ return r;
+ }
+
+ root = le64_to_cpu(internal_value_le);
+ }
+ exit_ro_spine(&spine);
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(dm_btree_lookup);
+
+/*
+ * Splits a node by creating a sibling node and shifting half the nodes
+ * contents across. Assumes there is a parent node, and it has room for
+ * another child.
+ *
+ * Before:
+ * +--------+
+ * | Parent |
+ * +--------+
+ * |
+ * v
+ * +----------+
+ * | A ++++++ |
+ * +----------+
+ *
+ *
+ * After:
+ * +--------+
+ * | Parent |
+ * +--------+
+ * | |
+ * v +------+
+ * +---------+ |
+ * | A* +++ | v
+ * +---------+ +-------+
+ * | B +++ |
+ * +-------+
+ *
+ * Where A* is a shadow of A.
+ */
+static int btree_split_sibling(struct shadow_spine *s, dm_block_t root,
+ unsigned parent_index, uint64_t key)
+{
+ int r;
+ size_t size;
+ unsigned nr_left, nr_right;
+ struct dm_block *left, *right, *parent;
+ struct node *ln, *rn, *pn;
+ __le64 location;
+
+ left = shadow_current(s);
+
+ r = new_block(s->info, &right);
+ if (r < 0)
+ return r;
+
+ ln = dm_block_data(left);
+ rn = dm_block_data(right);
+
+ nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
+ nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
+
+ ln->header.nr_entries = cpu_to_le32(nr_left);
+
+ rn->header.flags = ln->header.flags;
+ rn->header.nr_entries = cpu_to_le32(nr_right);
+ rn->header.max_entries = ln->header.max_entries;
+ rn->header.value_size = ln->header.value_size;
+ memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
+
+ size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
+ sizeof(uint64_t) : s->info->value_type.size;
+ memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size),
+ size * nr_right);
+
+ /*
+ * Patch up the parent
+ */
+ parent = shadow_parent(s);
+
+ pn = dm_block_data(parent);
+ location = cpu_to_le64(dm_block_location(left));
+ __dm_bless_for_disk(&location);
+ memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)),
+ &location, sizeof(__le64));
+
+ location = cpu_to_le64(dm_block_location(right));
+ __dm_bless_for_disk(&location);
+
+ r = insert_at(sizeof(__le64), pn, parent_index + 1,
+ le64_to_cpu(rn->keys[0]), &location);
+ if (r)
+ return r;
+
+ if (key < le64_to_cpu(rn->keys[0])) {
+ unlock_block(s->info, right);
+ s->nodes[1] = left;
+ } else {
+ unlock_block(s->info, left);
+ s->nodes[1] = right;
+ }
+
+ return 0;
+}
+
+/*
+ * Splits a node by creating two new children beneath the given node.
+ *
+ * Before:
+ * +----------+
+ * | A ++++++ |
+ * +----------+
+ *
+ *
+ * After:
+ * +------------+
+ * | A (shadow) |
+ * +------------+
+ * | |
+ * +------+ +----+
+ * | |
+ * v v
+ * +-------+ +-------+
+ * | B +++ | | C +++ |
+ * +-------+ +-------+
+ */
+static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
+{
+ int r;
+ size_t size;
+ unsigned nr_left, nr_right;
+ struct dm_block *left, *right, *new_parent;
+ struct node *pn, *ln, *rn;
+ __le64 val;
+
+ new_parent = shadow_current(s);
+
+ r = new_block(s->info, &left);
+ if (r < 0)
+ return r;
+
+ r = new_block(s->info, &right);
+ if (r < 0) {
+ /* FIXME: put left */
+ return r;
+ }
+
+ pn = dm_block_data(new_parent);
+ ln = dm_block_data(left);
+ rn = dm_block_data(right);
+
+ nr_left = le32_to_cpu(pn->header.nr_entries) / 2;
+ nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left;
+
+ ln->header.flags = pn->header.flags;
+ ln->header.nr_entries = cpu_to_le32(nr_left);
+ ln->header.max_entries = pn->header.max_entries;
+ ln->header.value_size = pn->header.value_size;
+
+ rn->header.flags = pn->header.flags;
+ rn->header.nr_entries = cpu_to_le32(nr_right);
+ rn->header.max_entries = pn->header.max_entries;
+ rn->header.value_size = pn->header.value_size;
+
+ memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0]));
+ memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0]));
+
+ size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ?
+ sizeof(__le64) : s->info->value_type.size;
+ memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size);
+ memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size),
+ nr_right * size);
+
+ /* new_parent should just point to l and r now */
+ pn->header.flags = cpu_to_le32(INTERNAL_NODE);
+ pn->header.nr_entries = cpu_to_le32(2);
+ pn->header.max_entries = cpu_to_le32(
+ calc_max_entries(sizeof(__le64),
+ dm_bm_block_size(
+ dm_tm_get_bm(s->info->tm))));
+ pn->header.value_size = cpu_to_le32(sizeof(__le64));
+
+ val = cpu_to_le64(dm_block_location(left));
+ __dm_bless_for_disk(&val);
+ pn->keys[0] = ln->keys[0];
+ memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64));
+
+ val = cpu_to_le64(dm_block_location(right));
+ __dm_bless_for_disk(&val);
+ pn->keys[1] = rn->keys[0];
+ memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64));
+
+ /*
+ * rejig the spine. This is ugly, since it knows too
+ * much about the spine
+ */
+ if (s->nodes[0] != new_parent) {
+ unlock_block(s->info, s->nodes[0]);
+ s->nodes[0] = new_parent;
+ }
+ if (key < le64_to_cpu(rn->keys[0])) {
+ unlock_block(s->info, right);
+ s->nodes[1] = left;
+ } else {
+ unlock_block(s->info, left);
+ s->nodes[1] = right;
+ }
+ s->count = 2;
+
+ return 0;
+}
+
+static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
+ struct dm_btree_value_type *vt,
+ uint64_t key, unsigned *index)
+{
+ int r, i = *index, top = 1;
+ struct node *node;
+
+ for (;;) {
+ r = shadow_step(s, root, vt);
+ if (r < 0)
+ return r;
+
+ node = dm_block_data(shadow_current(s));
+
+ /*
+ * We have to patch up the parent node, ugly, but I don't
+ * see a way to do this automatically as part of the spine
+ * op.
+ */
+ if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */
+ __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+
+ __dm_bless_for_disk(&location);
+ memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)),
+ &location, sizeof(__le64));
+ }
+
+ node = dm_block_data(shadow_current(s));
+
+ if (node->header.nr_entries == node->header.max_entries) {
+ if (top)
+ r = btree_split_beneath(s, key);
+ else
+ r = btree_split_sibling(s, root, i, key);
+
+ if (r < 0)
+ return r;
+ }
+
+ node = dm_block_data(shadow_current(s));
+
+ i = lower_bound(node, key);
+
+ if (le32_to_cpu(node->header.flags) & LEAF_NODE)
+ break;
+
+ if (i < 0) {
+ /* change the bounds on the lowest key */
+ node->keys[0] = cpu_to_le64(key);
+ i = 0;
+ }
+
+ root = value64(node, i);
+ top = 0;
+ }
+
+ if (i < 0 || le64_to_cpu(node->keys[i]) != key)
+ i++;
+
+ *index = i;
+ return 0;
+}
+
+static int insert(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value, dm_block_t *new_root,
+ int *inserted)
+ __dm_written_to_disk(value)
+{
+ int r, need_insert;
+ unsigned level, index = -1, last_level = info->levels - 1;
+ dm_block_t block = root;
+ struct shadow_spine spine;
+ struct node *n;
+ struct dm_btree_value_type le64_type;
+
+ le64_type.context = NULL;
+ le64_type.size = sizeof(__le64);
+ le64_type.inc = NULL;
+ le64_type.dec = NULL;
+ le64_type.equal = NULL;
+
+ init_shadow_spine(&spine, info);
+
+ for (level = 0; level < (info->levels - 1); level++) {
+ r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index);
+ if (r < 0)
+ goto bad;
+
+ n = dm_block_data(shadow_current(&spine));
+ need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+ (le64_to_cpu(n->keys[index]) != keys[level]));
+
+ if (need_insert) {
+ dm_block_t new_tree;
+ __le64 new_le;
+
+ r = dm_btree_empty(info, &new_tree);
+ if (r < 0)
+ goto bad;
+
+ new_le = cpu_to_le64(new_tree);
+ __dm_bless_for_disk(&new_le);
+
+ r = insert_at(sizeof(uint64_t), n, index,
+ keys[level], &new_le);
+ if (r)
+ goto bad;
+ }
+
+ if (level < last_level)
+ block = value64(n, index);
+ }
+
+ r = btree_insert_raw(&spine, block, &info->value_type,
+ keys[level], &index);
+ if (r < 0)
+ goto bad;
+
+ n = dm_block_data(shadow_current(&spine));
+ need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
+ (le64_to_cpu(n->keys[index]) != keys[level]));
+
+ if (need_insert) {
+ if (inserted)
+ *inserted = 1;
+
+ r = insert_at(info->value_type.size, n, index,
+ keys[level], value);
+ if (r)
+ goto bad_unblessed;
+ } else {
+ if (inserted)
+ *inserted = 0;
+
+ if (info->value_type.dec &&
+ (!info->value_type.equal ||
+ !info->value_type.equal(
+ info->value_type.context,
+ value_ptr(n, index, info->value_type.size),
+ value))) {
+ info->value_type.dec(info->value_type.context,
+ value_ptr(n, index, info->value_type.size));
+ }
+ memcpy_disk(value_ptr(n, index, info->value_type.size),
+ value, info->value_type.size);
+ }
+
+ *new_root = shadow_root(&spine);
+ exit_shadow_spine(&spine);
+
+ return 0;
+
+bad:
+ __dm_unbless_for_disk(value);
+bad_unblessed:
+ exit_shadow_spine(&spine);
+ return r;
+}
+
+int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value, dm_block_t *new_root)
+ __dm_written_to_disk(value)
+{
+ return insert(info, root, keys, value, new_root, NULL);
+}
+EXPORT_SYMBOL_GPL(dm_btree_insert);
+
+int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *keys, void *value, dm_block_t *new_root,
+ int *inserted)
+ __dm_written_to_disk(value)
+{
+ return insert(info, root, keys, value, new_root, inserted);
+}
+EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
+
+/*----------------------------------------------------------------*/
+
+static int find_highest_key(struct ro_spine *s, dm_block_t block,
+ uint64_t *result_key, dm_block_t *next_block)
+{
+ int i, r;
+ uint32_t flags;
+
+ do {
+ r = ro_step(s, block);
+ if (r < 0)
+ return r;
+
+ flags = le32_to_cpu(ro_node(s)->header.flags);
+ i = le32_to_cpu(ro_node(s)->header.nr_entries);
+ if (!i)
+ return -ENODATA;
+ else
+ i--;
+
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ if (next_block || flags & INTERNAL_NODE)
+ block = value64(ro_node(s), i);
+
+ } while (flags & INTERNAL_NODE);
+
+ if (next_block)
+ *next_block = block;
+ return 0;
+}
+
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ int r = 0, count = 0, level;
+ struct ro_spine spine;
+
+ init_ro_spine(&spine, info);
+ for (level = 0; level < info->levels; level++) {
+ r = find_highest_key(&spine, root, result_keys + level,
+ level == info->levels - 1 ? NULL : &root);
+ if (r == -ENODATA) {
+ r = 0;
+ break;
+
+ } else if (r)
+ break;
+
+ count++;
+ }
+ exit_ro_spine(&spine);
+
+ return r ? r : count;
+}
+EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
--- /dev/null
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-space-map-checker.h"
+
+#include <linux/device-mapper.h>
++#include <linux/export.h>
+
+#ifdef CONFIG_DM_DEBUG_SPACE_MAPS
+
+#define DM_MSG_PREFIX "space map checker"
+
+/*----------------------------------------------------------------*/
+
+struct count_array {
+ dm_block_t nr;
+ dm_block_t nr_free;
+
+ uint32_t *counts;
+};
+
+static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count)
+{
+ if (b >= ca->nr)
+ return -EINVAL;
+
+ *count = ca->counts[b];
+ return 0;
+}
+
+static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r)
+{
+ if (b >= ca->nr)
+ return -EINVAL;
+
+ *r = ca->counts[b] > 1;
+ return 0;
+}
+
+static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count)
+{
+ uint32_t old_count;
+
+ if (b >= ca->nr)
+ return -EINVAL;
+
+ old_count = ca->counts[b];
+
+ if (!count && old_count)
+ ca->nr_free++;
+
+ else if (count && !old_count)
+ ca->nr_free--;
+
+ ca->counts[b] = count;
+ return 0;
+}
+
+static int ca_inc_block(struct count_array *ca, dm_block_t b)
+{
+ if (b >= ca->nr)
+ return -EINVAL;
+
+ ca_set_count(ca, b, ca->counts[b] + 1);
+ return 0;
+}
+
+static int ca_dec_block(struct count_array *ca, dm_block_t b)
+{
+ if (b >= ca->nr)
+ return -EINVAL;
+
+ BUG_ON(ca->counts[b] == 0);
+ ca_set_count(ca, b, ca->counts[b] - 1);
+ return 0;
+}
+
+static int ca_create(struct count_array *ca, struct dm_space_map *sm)
+{
+ int r;
+ dm_block_t nr_blocks;
+
+ r = dm_sm_get_nr_blocks(sm, &nr_blocks);
+ if (r)
+ return r;
+
+ ca->nr = nr_blocks;
+ ca->nr_free = nr_blocks;
+ ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL);
+ if (!ca->counts)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ca_load(struct count_array *ca, struct dm_space_map *sm)
+{
+ int r;
+ uint32_t count;
+ dm_block_t nr_blocks, i;
+
+ r = dm_sm_get_nr_blocks(sm, &nr_blocks);
+ if (r)
+ return r;
+
+ BUG_ON(ca->nr != nr_blocks);
+
+ DMWARN("Loading debug space map from disk. This may take some time");
+ for (i = 0; i < nr_blocks; i++) {
+ r = dm_sm_get_count(sm, i, &count);
+ if (r) {
+ DMERR("load failed");
+ return r;
+ }
+
+ ca_set_count(ca, i, count);
+ }
+ DMWARN("Load complete");
+
+ return 0;
+}
+
+static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
+{
+ dm_block_t nr_blocks = ca->nr + extra_blocks;
+ uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL);
+ if (!counts)
+ return -ENOMEM;
+
+ memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
+ kfree(ca->counts);
+ ca->nr = nr_blocks;
+ ca->nr_free += extra_blocks;
+ ca->counts = counts;
+ return 0;
+}
+
+static int ca_commit(struct count_array *old, struct count_array *new)
+{
+ if (old->nr != new->nr) {
+ BUG_ON(old->nr > new->nr);
+ ca_extend(old, new->nr - old->nr);
+ }
+
+ BUG_ON(old->nr != new->nr);
+ old->nr_free = new->nr_free;
+ memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr);
+ return 0;
+}
+
+static void ca_destroy(struct count_array *ca)
+{
+ kfree(ca->counts);
+}
+
+/*----------------------------------------------------------------*/
+
+struct sm_checker {
+ struct dm_space_map sm;
+
+ struct count_array old_counts;
+ struct count_array counts;
+
+ struct dm_space_map *real_sm;
+};
+
+static void sm_checker_destroy(struct dm_space_map *sm)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+
+ dm_sm_destroy(smc->real_sm);
+ ca_destroy(&smc->old_counts);
+ ca_destroy(&smc->counts);
+ kfree(smc);
+}
+
+static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int r = dm_sm_get_nr_blocks(smc->real_sm, count);
+ if (!r)
+ BUG_ON(smc->old_counts.nr != *count);
+ return r;
+}
+
+static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int r = dm_sm_get_nr_free(smc->real_sm, count);
+ if (!r) {
+ /*
+ * Slow, but we know it's correct.
+ */
+ dm_block_t b, n = 0;
+ for (b = 0; b < smc->old_counts.nr; b++)
+ if (smc->old_counts.counts[b] == 0 &&
+ smc->counts.counts[b] == 0)
+ n++;
+
+ if (n != *count)
+ DMERR("free block counts differ, checker %u, sm-disk:%u",
+ (unsigned) n, (unsigned) *count);
+ }
+ return r;
+}
+
+static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int r = dm_sm_new_block(smc->real_sm, b);
+
+ if (!r) {
+ BUG_ON(*b >= smc->old_counts.nr);
+ BUG_ON(smc->old_counts.counts[*b] != 0);
+ BUG_ON(*b >= smc->counts.nr);
+ BUG_ON(smc->counts.counts[*b] != 0);
+ ca_set_count(&smc->counts, *b, 1);
+ }
+
+ return r;
+}
+
+static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int r = dm_sm_inc_block(smc->real_sm, b);
+ int r2 = ca_inc_block(&smc->counts, b);
+ BUG_ON(r != r2);
+ return r;
+}
+
+static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int r = dm_sm_dec_block(smc->real_sm, b);
+ int r2 = ca_dec_block(&smc->counts, b);
+ BUG_ON(r != r2);
+ return r;
+}
+
+static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ uint32_t result2 = 0;
+ int r = dm_sm_get_count(smc->real_sm, b, result);
+ int r2 = ca_get_count(&smc->counts, b, &result2);
+
+ BUG_ON(r != r2);
+ if (!r)
+ BUG_ON(*result != result2);
+ return r;
+}
+
+static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int result2 = 0;
+ int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result);
+ int r2 = ca_count_more_than_one(&smc->counts, b, &result2);
+
+ BUG_ON(r != r2);
+ if (!r)
+ BUG_ON(!(*result) && result2);
+ return r;
+}
+
+static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ uint32_t old_rc;
+ int r = dm_sm_set_count(smc->real_sm, b, count);
+ int r2;
+
+ BUG_ON(b >= smc->counts.nr);
+ old_rc = smc->counts.counts[b];
+ r2 = ca_set_count(&smc->counts, b, count);
+ BUG_ON(r != r2);
+
+ return r;
+}
+
+static int sm_checker_commit(struct dm_space_map *sm)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int r;
+
+ r = dm_sm_commit(smc->real_sm);
+ if (r)
+ return r;
+
+ r = ca_commit(&smc->old_counts, &smc->counts);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ int r = dm_sm_extend(smc->real_sm, extra_blocks);
+ if (r)
+ return r;
+
+ return ca_extend(&smc->counts, extra_blocks);
+}
+
+static int sm_checker_root_size(struct dm_space_map *sm, size_t *result)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ return dm_sm_root_size(smc->real_sm, result);
+}
+
+static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len)
+{
+ struct sm_checker *smc = container_of(sm, struct sm_checker, sm);
+ return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len);
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_space_map ops_ = {
+ .destroy = sm_checker_destroy,
+ .get_nr_blocks = sm_checker_get_nr_blocks,
+ .get_nr_free = sm_checker_get_nr_free,
+ .inc_block = sm_checker_inc_block,
+ .dec_block = sm_checker_dec_block,
+ .new_block = sm_checker_new_block,
+ .get_count = sm_checker_get_count,
+ .count_is_more_than_one = sm_checker_count_more_than_one,
+ .set_count = sm_checker_set_count,
+ .commit = sm_checker_commit,
+ .extend = sm_checker_extend,
+ .root_size = sm_checker_root_size,
+ .copy_root = sm_checker_copy_root
+};
+
+struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
+{
+ int r;
+ struct sm_checker *smc;
+
+ if (!sm)
+ return NULL;
+
+ smc = kmalloc(sizeof(*smc), GFP_KERNEL);
+ if (!smc)
+ return NULL;
+
+ memcpy(&smc->sm, &ops_, sizeof(smc->sm));
+ r = ca_create(&smc->old_counts, sm);
+ if (r) {
+ kfree(smc);
+ return NULL;
+ }
+
+ r = ca_create(&smc->counts, sm);
+ if (r) {
+ ca_destroy(&smc->old_counts);
+ kfree(smc);
+ return NULL;
+ }
+
+ smc->real_sm = sm;
+
+ r = ca_load(&smc->counts, sm);
+ if (r) {
+ ca_destroy(&smc->counts);
+ ca_destroy(&smc->old_counts);
+ kfree(smc);
+ return NULL;
+ }
+
+ r = ca_commit(&smc->old_counts, &smc->counts);
+ if (r) {
+ ca_destroy(&smc->counts);
+ ca_destroy(&smc->old_counts);
+ kfree(smc);
+ return NULL;
+ }
+
+ return &smc->sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create);
+
+struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
+{
+ int r;
+ struct sm_checker *smc;
+
+ if (!sm)
+ return NULL;
+
+ smc = kmalloc(sizeof(*smc), GFP_KERNEL);
+ if (!smc)
+ return NULL;
+
+ memcpy(&smc->sm, &ops_, sizeof(smc->sm));
+ r = ca_create(&smc->old_counts, sm);
+ if (r) {
+ kfree(smc);
+ return NULL;
+ }
+
+ r = ca_create(&smc->counts, sm);
+ if (r) {
+ ca_destroy(&smc->old_counts);
+ kfree(smc);
+ return NULL;
+ }
+
+ smc->real_sm = sm;
+ return &smc->sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
+
+/*----------------------------------------------------------------*/
+
+#else
+
+struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
+{
+ return sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create);
+
+struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
+{
+ return sm;
+}
+EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh);
+
+/*----------------------------------------------------------------*/
+
+#endif
--- /dev/null
- #include <linux/module.h>
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-space-map-checker.h"
+#include "dm-space-map-common.h"
+#include "dm-space-map-disk.h"
+#include "dm-space-map.h"
+#include "dm-transaction-manager.h"
+
+#include <linux/list.h>
+#include <linux/slab.h>
++#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "space map disk"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * Space map interface.
+ */
+struct sm_disk {
+ struct dm_space_map sm;
+
+ struct ll_disk ll;
+ struct ll_disk old_ll;
+
+ dm_block_t begin;
+ dm_block_t nr_allocated_this_transaction;
+};
+
+static void sm_disk_destroy(struct dm_space_map *sm)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ kfree(smd);
+}
+
+static int sm_disk_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ return sm_ll_extend(&smd->ll, extra_blocks);
+}
+
+static int sm_disk_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ *count = smd->old_ll.nr_blocks;
+
+ return 0;
+}
+
+static int sm_disk_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ *count = (smd->old_ll.nr_blocks - smd->old_ll.nr_allocated) - smd->nr_allocated_this_transaction;
+
+ return 0;
+}
+
+static int sm_disk_get_count(struct dm_space_map *sm, dm_block_t b,
+ uint32_t *result)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ return sm_ll_lookup(&smd->ll, b, result);
+}
+
+static int sm_disk_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b,
+ int *result)
+{
+ int r;
+ uint32_t count;
+
+ r = sm_disk_get_count(sm, b, &count);
+ if (r)
+ return r;
+
+ return count > 1;
+}
+
+static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
+ uint32_t count)
+{
+ int r;
+ uint32_t old_count;
+ enum allocation_event ev;
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ r = sm_ll_insert(&smd->ll, b, count, &ev);
+ if (!r) {
+ switch (ev) {
+ case SM_NONE:
+ break;
+
+ case SM_ALLOC:
+ /*
+ * This _must_ be free in the prior transaction
+ * otherwise we've lost atomicity.
+ */
+ smd->nr_allocated_this_transaction++;
+ break;
+
+ case SM_FREE:
+ /*
+ * It's only free if it's also free in the last
+ * transaction.
+ */
+ r = sm_ll_lookup(&smd->old_ll, b, &old_count);
+ if (r)
+ return r;
+
+ if (!old_count)
+ smd->nr_allocated_this_transaction--;
+ break;
+ }
+ }
+
+ return r;
+}
+
+static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+{
+ int r;
+ enum allocation_event ev;
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ r = sm_ll_inc(&smd->ll, b, &ev);
+ if (!r && (ev == SM_ALLOC))
+ /*
+ * This _must_ be free in the prior transaction
+ * otherwise we've lost atomicity.
+ */
+ smd->nr_allocated_this_transaction++;
+
+ return r;
+}
+
+static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+{
+ int r;
+ uint32_t old_count;
+ enum allocation_event ev;
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ r = sm_ll_dec(&smd->ll, b, &ev);
+ if (!r && (ev == SM_FREE)) {
+ /*
+ * It's only free if it's also free in the last
+ * transaction.
+ */
+ r = sm_ll_lookup(&smd->old_ll, b, &old_count);
+ if (r)
+ return r;
+
+ if (!old_count)
+ smd->nr_allocated_this_transaction--;
+ }
+
+ return r;
+}
+
+static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
+{
+ int r;
+ enum allocation_event ev;
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ /* FIXME: we should loop round a couple of times */
+ r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b);
+ if (r)
+ return r;
+
+ smd->begin = *b + 1;
+ r = sm_ll_inc(&smd->ll, *b, &ev);
+ if (!r) {
+ BUG_ON(ev != SM_ALLOC);
+ smd->nr_allocated_this_transaction++;
+ }
+
+ return r;
+}
+
+static int sm_disk_commit(struct dm_space_map *sm)
+{
+ int r;
+ dm_block_t nr_free;
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+
+ r = sm_disk_get_nr_free(sm, &nr_free);
+ if (r)
+ return r;
+
+ r = sm_ll_commit(&smd->ll);
+ if (r)
+ return r;
+
+ memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll));
+ smd->begin = 0;
+ smd->nr_allocated_this_transaction = 0;
+
+ r = sm_disk_get_nr_free(sm, &nr_free);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+static int sm_disk_root_size(struct dm_space_map *sm, size_t *result)
+{
+ *result = sizeof(struct disk_sm_root);
+
+ return 0;
+}
+
+static int sm_disk_copy_root(struct dm_space_map *sm, void *where_le, size_t max)
+{
+ struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
+ struct disk_sm_root root_le;
+
+ root_le.nr_blocks = cpu_to_le64(smd->ll.nr_blocks);
+ root_le.nr_allocated = cpu_to_le64(smd->ll.nr_allocated);
+ root_le.bitmap_root = cpu_to_le64(smd->ll.bitmap_root);
+ root_le.ref_count_root = cpu_to_le64(smd->ll.ref_count_root);
+
+ if (max < sizeof(root_le))
+ return -ENOSPC;
+
+ memcpy(where_le, &root_le, sizeof(root_le));
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_space_map ops = {
+ .destroy = sm_disk_destroy,
+ .extend = sm_disk_extend,
+ .get_nr_blocks = sm_disk_get_nr_blocks,
+ .get_nr_free = sm_disk_get_nr_free,
+ .get_count = sm_disk_get_count,
+ .count_is_more_than_one = sm_disk_count_is_more_than_one,
+ .set_count = sm_disk_set_count,
+ .inc_block = sm_disk_inc_block,
+ .dec_block = sm_disk_dec_block,
+ .new_block = sm_disk_new_block,
+ .commit = sm_disk_commit,
+ .root_size = sm_disk_root_size,
+ .copy_root = sm_disk_copy_root
+};
+
+static struct dm_space_map *dm_sm_disk_create_real(
+ struct dm_transaction_manager *tm,
+ dm_block_t nr_blocks)
+{
+ int r;
+ struct sm_disk *smd;
+
+ smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+ if (!smd)
+ return ERR_PTR(-ENOMEM);
+
+ smd->begin = 0;
+ smd->nr_allocated_this_transaction = 0;
+ memcpy(&smd->sm, &ops, sizeof(smd->sm));
+
+ r = sm_ll_new_disk(&smd->ll, tm);
+ if (r)
+ goto bad;
+
+ r = sm_ll_extend(&smd->ll, nr_blocks);
+ if (r)
+ goto bad;
+
+ r = sm_disk_commit(&smd->sm);
+ if (r)
+ goto bad;
+
+ return &smd->sm;
+
+bad:
+ kfree(smd);
+ return ERR_PTR(r);
+}
+
+struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
+ dm_block_t nr_blocks)
+{
+ struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
+ return dm_sm_checker_create_fresh(sm);
+}
+EXPORT_SYMBOL_GPL(dm_sm_disk_create);
+
+static struct dm_space_map *dm_sm_disk_open_real(
+ struct dm_transaction_manager *tm,
+ void *root_le, size_t len)
+{
+ int r;
+ struct sm_disk *smd;
+
+ smd = kmalloc(sizeof(*smd), GFP_KERNEL);
+ if (!smd)
+ return ERR_PTR(-ENOMEM);
+
+ smd->begin = 0;
+ smd->nr_allocated_this_transaction = 0;
+ memcpy(&smd->sm, &ops, sizeof(smd->sm));
+
+ r = sm_ll_open_disk(&smd->ll, tm, root_le, len);
+ if (r)
+ goto bad;
+
+ r = sm_disk_commit(&smd->sm);
+ if (r)
+ goto bad;
+
+ return &smd->sm;
+
+bad:
+ kfree(smd);
+ return ERR_PTR(r);
+}
+
+struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm,
+ void *root_le, size_t len)
+{
+ return dm_sm_checker_create(
+ dm_sm_disk_open_real(tm, root_le, len));
+}
+EXPORT_SYMBOL_GPL(dm_sm_disk_open);
+
+/*----------------------------------------------------------------*/
--- /dev/null
- #include <linux/module.h>
+/*
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+#include "dm-transaction-manager.h"
+#include "dm-space-map.h"
+#include "dm-space-map-checker.h"
+#include "dm-space-map-disk.h"
+#include "dm-space-map-metadata.h"
+#include "dm-persistent-data-internal.h"
+
++#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "transaction manager"
+
+/*----------------------------------------------------------------*/
+
+struct shadow_info {
+ struct hlist_node hlist;
+ dm_block_t where;
+};
+
+/*
+ * It would be nice if we scaled with the size of transaction.
+ */
+#define HASH_SIZE 256
+#define HASH_MASK (HASH_SIZE - 1)
+
+struct dm_transaction_manager {
+ int is_clone;
+ struct dm_transaction_manager *real;
+
+ struct dm_block_manager *bm;
+ struct dm_space_map *sm;
+
+ spinlock_t lock;
+ struct hlist_head buckets[HASH_SIZE];
+};
+
+/*----------------------------------------------------------------*/
+
+static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
+{
+ int r = 0;
+ unsigned bucket = dm_hash_block(b, HASH_MASK);
+ struct shadow_info *si;
+ struct hlist_node *n;
+
+ spin_lock(&tm->lock);
+ hlist_for_each_entry(si, n, tm->buckets + bucket, hlist)
+ if (si->where == b) {
+ r = 1;
+ break;
+ }
+ spin_unlock(&tm->lock);
+
+ return r;
+}
+
+/*
+ * This can silently fail if there's no memory. We're ok with this since
+ * creating redundant shadows causes no harm.
+ */
+static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
+{
+ unsigned bucket;
+ struct shadow_info *si;
+
+ si = kmalloc(sizeof(*si), GFP_NOIO);
+ if (si) {
+ si->where = b;
+ bucket = dm_hash_block(b, HASH_MASK);
+ spin_lock(&tm->lock);
+ hlist_add_head(&si->hlist, tm->buckets + bucket);
+ spin_unlock(&tm->lock);
+ }
+}
+
+static void wipe_shadow_table(struct dm_transaction_manager *tm)
+{
+ struct shadow_info *si;
+ struct hlist_node *n, *tmp;
+ struct hlist_head *bucket;
+ int i;
+
+ spin_lock(&tm->lock);
+ for (i = 0; i < HASH_SIZE; i++) {
+ bucket = tm->buckets + i;
+ hlist_for_each_entry_safe(si, n, tmp, bucket, hlist)
+ kfree(si);
+
+ INIT_HLIST_HEAD(bucket);
+ }
+
+ spin_unlock(&tm->lock);
+}
+
+/*----------------------------------------------------------------*/
+
+static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
+ struct dm_space_map *sm)
+{
+ int i;
+ struct dm_transaction_manager *tm;
+
+ tm = kmalloc(sizeof(*tm), GFP_KERNEL);
+ if (!tm)
+ return ERR_PTR(-ENOMEM);
+
+ tm->is_clone = 0;
+ tm->real = NULL;
+ tm->bm = bm;
+ tm->sm = sm;
+
+ spin_lock_init(&tm->lock);
+ for (i = 0; i < HASH_SIZE; i++)
+ INIT_HLIST_HEAD(tm->buckets + i);
+
+ return tm;
+}
+
+struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transaction_manager *real)
+{
+ struct dm_transaction_manager *tm;
+
+ tm = kmalloc(sizeof(*tm), GFP_KERNEL);
+ if (tm) {
+ tm->is_clone = 1;
+ tm->real = real;
+ }
+
+ return tm;
+}
+EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
+
+void dm_tm_destroy(struct dm_transaction_manager *tm)
+{
+ kfree(tm);
+}
+EXPORT_SYMBOL_GPL(dm_tm_destroy);
+
+int dm_tm_pre_commit(struct dm_transaction_manager *tm)
+{
+ int r;
+
+ if (tm->is_clone)
+ return -EWOULDBLOCK;
+
+ r = dm_sm_commit(tm->sm);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
+
+int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
+{
+ if (tm->is_clone)
+ return -EWOULDBLOCK;
+
+ wipe_shadow_table(tm);
+
+ return dm_bm_flush_and_unlock(tm->bm, root);
+}
+EXPORT_SYMBOL_GPL(dm_tm_commit);
+
+int dm_tm_new_block(struct dm_transaction_manager *tm,
+ struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ int r;
+ dm_block_t new_block;
+
+ if (tm->is_clone)
+ return -EWOULDBLOCK;
+
+ r = dm_sm_new_block(tm->sm, &new_block);
+ if (r < 0)
+ return r;
+
+ r = dm_bm_write_lock_zero(tm->bm, new_block, v, result);
+ if (r < 0) {
+ dm_sm_dec_block(tm->sm, new_block);
+ return r;
+ }
+
+ /*
+ * New blocks count as shadows in that they don't need to be
+ * shadowed again.
+ */
+ insert_shadow(tm, new_block);
+
+ return 0;
+}
+
+static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
+ struct dm_block_validator *v,
+ struct dm_block **result)
+{
+ int r;
+ dm_block_t new;
+ struct dm_block *orig_block;
+
+ r = dm_sm_new_block(tm->sm, &new);
+ if (r < 0)
+ return r;
+
+ r = dm_sm_dec_block(tm->sm, orig);
+ if (r < 0)
+ return r;
+
+ r = dm_bm_read_lock(tm->bm, orig, v, &orig_block);
+ if (r < 0)
+ return r;
+
+ r = dm_bm_unlock_move(orig_block, new);
+ if (r < 0) {
+ dm_bm_unlock(orig_block);
+ return r;
+ }
+
+ return dm_bm_write_lock(tm->bm, new, v, result);
+}
+
+int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
+ struct dm_block_validator *v, struct dm_block **result,
+ int *inc_children)
+{
+ int r;
+
+ if (tm->is_clone)
+ return -EWOULDBLOCK;
+
+ r = dm_sm_count_is_more_than_one(tm->sm, orig, inc_children);
+ if (r < 0)
+ return r;
+
+ if (is_shadow(tm, orig) && !*inc_children)
+ return dm_bm_write_lock(tm->bm, orig, v, result);
+
+ r = __shadow_block(tm, orig, v, result);
+ if (r < 0)
+ return r;
+ insert_shadow(tm, dm_block_location(*result));
+
+ return r;
+}
+
+int dm_tm_read_lock(struct dm_transaction_manager *tm, dm_block_t b,
+ struct dm_block_validator *v,
+ struct dm_block **blk)
+{
+ if (tm->is_clone)
+ return dm_bm_read_try_lock(tm->real->bm, b, v, blk);
+
+ return dm_bm_read_lock(tm->bm, b, v, blk);
+}
+
+int dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b)
+{
+ return dm_bm_unlock(b);
+}
+EXPORT_SYMBOL_GPL(dm_tm_unlock);
+
+void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
+{
+ /*
+ * The non-blocking clone doesn't support this.
+ */
+ BUG_ON(tm->is_clone);
+
+ dm_sm_inc_block(tm->sm, b);
+}
+EXPORT_SYMBOL_GPL(dm_tm_inc);
+
+void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
+{
+ /*
+ * The non-blocking clone doesn't support this.
+ */
+ BUG_ON(tm->is_clone);
+
+ dm_sm_dec_block(tm->sm, b);
+}
+EXPORT_SYMBOL_GPL(dm_tm_dec);
+
+int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
+ uint32_t *result)
+{
+ if (tm->is_clone)
+ return -EWOULDBLOCK;
+
+ return dm_sm_get_count(tm->sm, b, result);
+}
+
+struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
+{
+ return tm->bm;
+}
+
+/*----------------------------------------------------------------*/
+
+static int dm_tm_create_internal(struct dm_block_manager *bm,
+ dm_block_t sb_location,
+ struct dm_block_validator *sb_validator,
+ size_t root_offset, size_t root_max_len,
+ struct dm_transaction_manager **tm,
+ struct dm_space_map **sm,
+ struct dm_block **sblock,
+ int create)
+{
+ int r;
+ struct dm_space_map *inner;
+
+ inner = dm_sm_metadata_init();
+ if (IS_ERR(inner))
+ return PTR_ERR(inner);
+
+ *tm = dm_tm_create(bm, inner);
+ if (IS_ERR(*tm)) {
+ dm_sm_destroy(inner);
+ return PTR_ERR(*tm);
+ }
+
+ if (create) {
+ r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location,
+ sb_validator, sblock);
+ if (r < 0) {
+ DMERR("couldn't lock superblock");
+ goto bad1;
+ }
+
+ r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm),
+ sb_location);
+ if (r) {
+ DMERR("couldn't create metadata space map");
+ goto bad2;
+ }
+
+ *sm = dm_sm_checker_create(inner);
+ if (!*sm)
+ goto bad2;
+
+ } else {
+ r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
+ sb_validator, sblock);
+ if (r < 0) {
+ DMERR("couldn't lock superblock");
+ goto bad1;
+ }
+
+ r = dm_sm_metadata_open(inner, *tm,
+ dm_block_data(*sblock) + root_offset,
+ root_max_len);
+ if (r) {
+ DMERR("couldn't open metadata space map");
+ goto bad2;
+ }
+
+ *sm = dm_sm_checker_create(inner);
+ if (!*sm)
+ goto bad2;
+ }
+
+ return 0;
+
+bad2:
+ dm_tm_unlock(*tm, *sblock);
+bad1:
+ dm_tm_destroy(*tm);
+ dm_sm_destroy(inner);
+ return r;
+}
+
+int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
+ struct dm_block_validator *sb_validator,
+ struct dm_transaction_manager **tm,
+ struct dm_space_map **sm, struct dm_block **sblock)
+{
+ return dm_tm_create_internal(bm, sb_location, sb_validator,
+ 0, 0, tm, sm, sblock, 1);
+}
+EXPORT_SYMBOL_GPL(dm_tm_create_with_sm);
+
+int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location,
+ struct dm_block_validator *sb_validator,
+ size_t root_offset, size_t root_max_len,
+ struct dm_transaction_manager **tm,
+ struct dm_space_map **sm, struct dm_block **sblock)
+{
+ return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset,
+ root_max_len, tm, sm, sblock, 0);
+}
+EXPORT_SYMBOL_GPL(dm_tm_open_with_sm);
+
+/*----------------------------------------------------------------*/
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <media/saa7146.h>
+ #include <linux/module.h>
LIST_HEAD(saa7146_devices);
DEFINE_MUTEX(saa7146_devices_lock);
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <media/saa7146_vv.h>
+ #include <linux/module.h>
/****************************************************************************/
/* resource management functions, shamelessly stolen from saa7134 driver */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
+ #include <linux/export.h>
#include <media/saa7146_vv.h>
static void calculate_output_format_register(struct saa7146_dev* saa, u32 palette, u32* clip_format)
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <media/saa7146_vv.h>
#include <media/v4l2-chip-ident.h>
+ #include <linux/module.h>
static int max_memory = 32;
#include <linux/i2c.h>
+#include <linux/mutex.h>
+ #include <linux/module.h>
#include "dibx000_common.h"
*/
#include <linux/delay.h>
+ #include <linux/module.h>
#include <linux/i2c.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/version.h>
#include <media/adp1653.h>
#include <linux/delay.h>
#include <linux/i2c.h>
+#include <linux/v4l2-mediabus.h>
#include <linux/slab.h>
#include <linux/videodev2.h>
+ #include <linux/module.h>
#include <media/soc_camera.h>
-#include <media/soc_mediabus.h>
#include <media/v4l2-subdev.h>
#include <media/v4l2-chip-ident.h>
#include <linux/slab.h>
#include <linux/i2c.h>
#include <linux/log2.h>
+ #include <linux/module.h>
+#include <media/soc_camera.h>
+#include <media/soc_mediabus.h>
#include <media/v4l2-subdev.h>
#include <media/v4l2-chip-ident.h>
-#include <media/soc_camera.h>
+#include <media/v4l2-ctrls.h>
/*
* mt9m001 i2c address 0x5d
#include <linux/log2.h>
#include <linux/gpio.h>
#include <linux/delay.h>
+#include <linux/v4l2-mediabus.h>
+ #include <linux/module.h>
+#include <media/soc_camera.h>
#include <media/v4l2-common.h>
+#include <media/v4l2-ctrls.h>
#include <media/v4l2-chip-ident.h>
-#include <media/soc_camera.h>
/*
* MT9M111, MT9M112 and MT9M131:
#include <linux/log2.h>
#include <linux/pm.h>
#include <linux/slab.h>
+#include <linux/v4l2-mediabus.h>
#include <linux/videodev2.h>
+ #include <linux/module.h>
#include <media/soc_camera.h>
#include <media/v4l2-chip-ident.h>
#include <linux/i2c.h>
#include <linux/delay.h>
#include <linux/log2.h>
+ #include <linux/module.h>
+#include <media/soc_camera.h>
+#include <media/soc_mediabus.h>
#include <media/v4l2-subdev.h>
#include <media/v4l2-chip-ident.h>
-#include <media/soc_camera.h>
+#include <media/v4l2-ctrls.h>
/*
* mt9v022 i2c address 0x48, 0x4c, 0x58, 0x5c
#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/slab.h>
+#include <linux/v4l2-mediabus.h>
+ #include <linux/module.h>
#include <media/soc_camera.h>
#include <media/v4l2-chip-ident.h>
#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/slab.h>
+#include <linux/v4l2-mediabus.h>
#include <linux/videodev2.h>
+ #include <linux/module.h>
#include <media/rj54n1cb0c.h>
#include <media/soc_camera.h>
#include <linux/types.h>
#include <linux/ioctl.h>
+ #include <linux/module.h>
#include <linux/i2c.h>
+#include <linux/slab.h>
#if defined(CONFIG_SPI)
#include <linux/spi/spi.h>
#endif
#include <linux/slab.h>
#include <linux/i2c.h>
+#include <linux/interrupt.h>
#include <linux/pm_runtime.h>
+ #include <linux/module.h>
#include <linux/mutex.h>
#include <linux/mfd/core.h>
#include <linux/mfd/max8997.h>
#define KMSG_COMPONENT "vmur"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#include <linux/kernel_stat.h>
#include <linux/cdev.h>
#include <linux/slab.h>
+ #include <linux/module.h>
#include <asm/uaccess.h>
#include <asm/cio.h>
--- /dev/null
+/*
+ * opal driver interface to hvc_console.c
+ *
+ * Copyright 2011 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/console.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
++#include <linux/export.h>
+
+#include <asm/hvconsole.h>
+#include <asm/prom.h>
+#include <asm/firmware.h>
+#include <asm/hvsi.h>
+#include <asm/udbg.h>
+#include <asm/opal.h>
+
+#include "hvc_console.h"
+
+static const char hvc_opal_name[] = "hvc_opal";
+
+static struct of_device_id hvc_opal_match[] __devinitdata = {
+ { .name = "serial", .compatible = "ibm,opal-console-raw" },
+ { .name = "serial", .compatible = "ibm,opal-console-hvsi" },
+ { },
+};
+
+typedef enum hv_protocol {
+ HV_PROTOCOL_RAW,
+ HV_PROTOCOL_HVSI
+} hv_protocol_t;
+
+struct hvc_opal_priv {
+ hv_protocol_t proto; /* Raw data or HVSI packets */
+ struct hvsi_priv hvsi; /* HVSI specific data */
+};
+static struct hvc_opal_priv *hvc_opal_privs[MAX_NR_HVC_CONSOLES];
+
+/* For early boot console */
+static struct hvc_opal_priv hvc_opal_boot_priv;
+static u32 hvc_opal_boot_termno;
+
+static const struct hv_ops hvc_opal_raw_ops = {
+ .get_chars = opal_get_chars,
+ .put_chars = opal_put_chars,
+ .notifier_add = notifier_add_irq,
+ .notifier_del = notifier_del_irq,
+ .notifier_hangup = notifier_hangup_irq,
+};
+
+static int hvc_opal_hvsi_get_chars(uint32_t vtermno, char *buf, int count)
+{
+ struct hvc_opal_priv *pv = hvc_opal_privs[vtermno];
+
+ if (WARN_ON(!pv))
+ return -ENODEV;
+
+ return hvsilib_get_chars(&pv->hvsi, buf, count);
+}
+
+static int hvc_opal_hvsi_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+ struct hvc_opal_priv *pv = hvc_opal_privs[vtermno];
+
+ if (WARN_ON(!pv))
+ return -ENODEV;
+
+ return hvsilib_put_chars(&pv->hvsi, buf, count);
+}
+
+static int hvc_opal_hvsi_open(struct hvc_struct *hp, int data)
+{
+ struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
+ int rc;
+
+ pr_devel("HVSI@%x: do open !\n", hp->vtermno);
+
+ rc = notifier_add_irq(hp, data);
+ if (rc)
+ return rc;
+
+ return hvsilib_open(&pv->hvsi, hp);
+}
+
+static void hvc_opal_hvsi_close(struct hvc_struct *hp, int data)
+{
+ struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
+
+ pr_devel("HVSI@%x: do close !\n", hp->vtermno);
+
+ hvsilib_close(&pv->hvsi, hp);
+
+ notifier_del_irq(hp, data);
+}
+
+void hvc_opal_hvsi_hangup(struct hvc_struct *hp, int data)
+{
+ struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
+
+ pr_devel("HVSI@%x: do hangup !\n", hp->vtermno);
+
+ hvsilib_close(&pv->hvsi, hp);
+
+ notifier_hangup_irq(hp, data);
+}
+
+static int hvc_opal_hvsi_tiocmget(struct hvc_struct *hp)
+{
+ struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
+
+ if (!pv)
+ return -EINVAL;
+ return pv->hvsi.mctrl;
+}
+
+static int hvc_opal_hvsi_tiocmset(struct hvc_struct *hp, unsigned int set,
+ unsigned int clear)
+{
+ struct hvc_opal_priv *pv = hvc_opal_privs[hp->vtermno];
+
+ pr_devel("HVSI@%x: Set modem control, set=%x,clr=%x\n",
+ hp->vtermno, set, clear);
+
+ if (set & TIOCM_DTR)
+ hvsilib_write_mctrl(&pv->hvsi, 1);
+ else if (clear & TIOCM_DTR)
+ hvsilib_write_mctrl(&pv->hvsi, 0);
+
+ return 0;
+}
+
+static const struct hv_ops hvc_opal_hvsi_ops = {
+ .get_chars = hvc_opal_hvsi_get_chars,
+ .put_chars = hvc_opal_hvsi_put_chars,
+ .notifier_add = hvc_opal_hvsi_open,
+ .notifier_del = hvc_opal_hvsi_close,
+ .notifier_hangup = hvc_opal_hvsi_hangup,
+ .tiocmget = hvc_opal_hvsi_tiocmget,
+ .tiocmset = hvc_opal_hvsi_tiocmset,
+};
+
+static int __devinit hvc_opal_probe(struct platform_device *dev)
+{
+ const struct hv_ops *ops;
+ struct hvc_struct *hp;
+ struct hvc_opal_priv *pv;
+ hv_protocol_t proto;
+ unsigned int termno, boot = 0;
+ const __be32 *reg;
+
+ if (of_device_is_compatible(dev->dev.of_node, "ibm,opal-console-raw")) {
+ proto = HV_PROTOCOL_RAW;
+ ops = &hvc_opal_raw_ops;
+ } else if (of_device_is_compatible(dev->dev.of_node,
+ "ibm,opal-console-hvsi")) {
+ proto = HV_PROTOCOL_HVSI;
+ ops = &hvc_opal_hvsi_ops;
+ } else {
+ pr_err("hvc_opal: Unkown protocol for %s\n",
+ dev->dev.of_node->full_name);
+ return -ENXIO;
+ }
+
+ reg = of_get_property(dev->dev.of_node, "reg", NULL);
+ termno = reg ? be32_to_cpup(reg) : 0;
+
+ /* Is it our boot one ? */
+ if (hvc_opal_privs[termno] == &hvc_opal_boot_priv) {
+ pv = hvc_opal_privs[termno];
+ boot = 1;
+ } else if (hvc_opal_privs[termno] == NULL) {
+ pv = kzalloc(sizeof(struct hvc_opal_priv), GFP_KERNEL);
+ if (!pv)
+ return -ENOMEM;
+ pv->proto = proto;
+ hvc_opal_privs[termno] = pv;
+ if (proto == HV_PROTOCOL_HVSI)
+ hvsilib_init(&pv->hvsi, opal_get_chars, opal_put_chars,
+ termno, 0);
+
+ /* Instanciate now to establish a mapping index==vtermno */
+ hvc_instantiate(termno, termno, ops);
+ } else {
+ pr_err("hvc_opal: Device %s has duplicate terminal number #%d\n",
+ dev->dev.of_node->full_name, termno);
+ return -ENXIO;
+ }
+
+ pr_info("hvc%d: %s protocol on %s%s\n", termno,
+ proto == HV_PROTOCOL_RAW ? "raw" : "hvsi",
+ dev->dev.of_node->full_name,
+ boot ? " (boot console)" : "");
+
+ /* We don't do IRQ yet */
+ hp = hvc_alloc(termno, 0, ops, MAX_VIO_PUT_CHARS);
+ if (IS_ERR(hp))
+ return PTR_ERR(hp);
+ dev_set_drvdata(&dev->dev, hp);
+
+ return 0;
+}
+
+static int __devexit hvc_opal_remove(struct platform_device *dev)
+{
+ struct hvc_struct *hp = dev_get_drvdata(&dev->dev);
+ int rc, termno;
+
+ termno = hp->vtermno;
+ rc = hvc_remove(hp);
+ if (rc == 0) {
+ if (hvc_opal_privs[termno] != &hvc_opal_boot_priv)
+ kfree(hvc_opal_privs[termno]);
+ hvc_opal_privs[termno] = NULL;
+ }
+ return rc;
+}
+
+static struct platform_driver hvc_opal_driver = {
+ .probe = hvc_opal_probe,
+ .remove = __devexit_p(hvc_opal_remove),
+ .driver = {
+ .name = hvc_opal_name,
+ .owner = THIS_MODULE,
+ .of_match_table = hvc_opal_match,
+ }
+};
+
+static int __init hvc_opal_init(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return -ENODEV;
+
+ /* Register as a vio device to receive callbacks */
+ return platform_driver_register(&hvc_opal_driver);
+}
+module_init(hvc_opal_init);
+
+static void __exit hvc_opal_exit(void)
+{
+ platform_driver_unregister(&hvc_opal_driver);
+}
+module_exit(hvc_opal_exit);
+
+static void udbg_opal_putc(char c)
+{
+ unsigned int termno = hvc_opal_boot_termno;
+ int count = -1;
+
+ if (c == '\n')
+ udbg_opal_putc('\r');
+
+ do {
+ switch(hvc_opal_boot_priv.proto) {
+ case HV_PROTOCOL_RAW:
+ count = opal_put_chars(termno, &c, 1);
+ break;
+ case HV_PROTOCOL_HVSI:
+ count = hvc_opal_hvsi_put_chars(termno, &c, 1);
+ break;
+ }
+ } while(count == 0 || count == -EAGAIN);
+}
+
+static int udbg_opal_getc_poll(void)
+{
+ unsigned int termno = hvc_opal_boot_termno;
+ int rc = 0;
+ char c;
+
+ switch(hvc_opal_boot_priv.proto) {
+ case HV_PROTOCOL_RAW:
+ rc = opal_get_chars(termno, &c, 1);
+ break;
+ case HV_PROTOCOL_HVSI:
+ rc = hvc_opal_hvsi_get_chars(termno, &c, 1);
+ break;
+ }
+ if (!rc)
+ return -1;
+ return c;
+}
+
+static int udbg_opal_getc(void)
+{
+ int ch;
+ for (;;) {
+ ch = udbg_opal_getc_poll();
+ if (ch == -1) {
+ /* This shouldn't be needed...but... */
+ volatile unsigned long delay;
+ for (delay=0; delay < 2000000; delay++)
+ ;
+ } else {
+ return ch;
+ }
+ }
+}
+
+static void udbg_init_opal_common(void)
+{
+ udbg_putc = udbg_opal_putc;
+ udbg_getc = udbg_opal_getc;
+ udbg_getc_poll = udbg_opal_getc_poll;
+ tb_ticks_per_usec = 0x200; /* Make udelay not suck */
+}
+
+void __init hvc_opal_init_early(void)
+{
+ struct device_node *stdout_node = NULL;
+ const u32 *termno;
+ const char *name = NULL;
+ const struct hv_ops *ops;
+ u32 index;
+
+ /* find the boot console from /chosen/stdout */
+ if (of_chosen)
+ name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (name) {
+ stdout_node = of_find_node_by_path(name);
+ if (!stdout_node) {
+ pr_err("hvc_opal: Failed to locate default console!\n");
+ return;
+ }
+ } else {
+ struct device_node *opal, *np;
+
+ /* Current OPAL takeover doesn't provide the stdout
+ * path, so we hard wire it
+ */
+ opal = of_find_node_by_path("/ibm,opal/consoles");
+ if (opal)
+ pr_devel("hvc_opal: Found consoles in new location\n");
+ if (!opal) {
+ opal = of_find_node_by_path("/ibm,opal");
+ if (opal)
+ pr_devel("hvc_opal: "
+ "Found consoles in old location\n");
+ }
+ if (!opal)
+ return;
+ for_each_child_of_node(opal, np) {
+ if (!strcmp(np->name, "serial")) {
+ stdout_node = np;
+ break;
+ }
+ }
+ of_node_put(opal);
+ }
+ if (!stdout_node)
+ return;
+ termno = of_get_property(stdout_node, "reg", NULL);
+ index = termno ? *termno : 0;
+ if (index >= MAX_NR_HVC_CONSOLES)
+ return;
+ hvc_opal_privs[index] = &hvc_opal_boot_priv;
+
+ /* Check the protocol */
+ if (of_device_is_compatible(stdout_node, "ibm,opal-console-raw")) {
+ hvc_opal_boot_priv.proto = HV_PROTOCOL_RAW;
+ ops = &hvc_opal_raw_ops;
+ pr_devel("hvc_opal: Found RAW console\n");
+ }
+ else if (of_device_is_compatible(stdout_node,"ibm,opal-console-hvsi")) {
+ hvc_opal_boot_priv.proto = HV_PROTOCOL_HVSI;
+ ops = &hvc_opal_hvsi_ops;
+ hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars,
+ opal_put_chars, index, 1);
+ /* HVSI, perform the handshake now */
+ hvsilib_establish(&hvc_opal_boot_priv.hvsi);
+ pr_devel("hvc_opal: Found HVSI console\n");
+ } else
+ goto out;
+ hvc_opal_boot_termno = index;
+ udbg_init_opal_common();
+ add_preferred_console("hvc", index, NULL);
+ hvc_instantiate(index, index, ops);
+out:
+ of_node_put(stdout_node);
+}
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL_RAW
+void __init udbg_init_debug_opal(void)
+{
+ u32 index = CONFIG_PPC_EARLY_DEBUG_OPAL_VTERMNO;
+ hvc_opal_privs[index] = &hvc_opal_boot_priv;
+ hvc_opal_boot_priv.proto = HV_PROTOCOL_RAW;
+ hvc_opal_boot_termno = index;
+ udbg_init_opal_common();
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_OPAL_RAW */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI
+void __init udbg_init_debug_opal_hvsi(void)
+{
+ u32 index = CONFIG_PPC_EARLY_DEBUG_OPAL_VTERMNO;
+ hvc_opal_privs[index] = &hvc_opal_boot_priv;
+ hvc_opal_boot_termno = index;
+ udbg_init_opal_common();
+ hvsilib_init(&hvc_opal_boot_priv.hvsi, opal_get_chars, opal_put_chars,
+ index, 1);
+ hvsilib_establish(&hvc_opal_boot_priv.hvsi);
+}
+#endif /* CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI */
#include <linux/device.h>
#include <linux/uio.h>
#include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
+ #include <linux/bitmap.h>
+ #include <asm/page.h>
-struct scatterlist;
-
/**
* typedef dma_cookie_t - an opaque DMA cookie
*
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/netpoll.h>
+ #include <linux/export.h>
#include "vlan.h"
-bool vlan_do_receive(struct sk_buff **skbp)
+bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
{
struct sk_buff *skb = *skbp;
u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;